#ifndef INVERSEDINDEX
#define INVERSEDINDEX

/**
 *  InversedIndex.h
 *  Author: Miroslav Pospíšil <xpospi73@stud.fit.vutbr.cz>
 *  Created on 30. 4 2018, 16:42
 *
 *  Copyright (C) 2018  Miroslav Pospíšil
 *
 *  OPEN SOURCE LICENCE VUT V BRNĚ
 *  Verze 1.
 *  Copyright (c) 2010, Vysoké učení technické v Brně, Antonínská 548/1, PSČ 601 90
 *  -------------------------------------------------------------------------------
 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <time.h>
#include <random>
#include <ctime>
#include <climits>
#include <queue>
#include <set>
#include <iterator>

#include <boost/filesystem.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/serialization/set.hpp>
#include <boost/archive/text_oarchive.hpp>
#include <boost/archive/text_iarchive.hpp>

#define DEFAULT_FEATURE_COUNT 16777216
#define DEFAULT_MEMORY_SIZE (DEFAULT_FEATURE_COUNT / 100) * 10
#define THOUSAND_DIMENSION 3
#define EXPERIMENTS_COUNT 20000

#define DEBUG false

#define DEFAULT_CACHE_PATH "cache/"

using namespace std;

class InversedIndex {
public:
    struct featureItem {
        int featureId;
        int lruCounter;
        vector<int> documents;
    };

    enum CachePolitics {
        LRU, RANDOM, FIFO, NONE
    };

    /**
     * Constructor with default parameters from constants
     */
    InversedIndex() {
        printWelcomeDebugMsg();

        politic = FIFO;
        cachePath = DEFAULT_CACHE_PATH;
        featureCount = DEFAULT_FEATURE_COUNT;
        memorySize = DEFAULT_MEMORY_SIZE;

        idfs = vector<float>(DEFAULT_FEATURE_COUNT, 0);
        loadDatabaseInfo();

        initializeIndex();
    }

    /**
     * Constructor with specified parameters
     * @param indexPath Path to index to be stored.
     * @param politics Type of cache politics to be used.
     * @param features Number of features
     * @param memory Percentage of Number of features to be used.
     */
    InversedIndex(string indexPath, CachePolitics politics, unsigned int features, unsigned int memory) {
        printWelcomeDebugMsg();


        if (indexPath.back() != '/')
        {
            indexPath.append("/");
        }
        cachePath = indexPath;
        politic = politics;
        featureCount = features;
        memorySize = (features / 100) * memory;

        idfs = vector<float>(DEFAULT_FEATURE_COUNT, 0);
        loadDatabaseInfo();

        initializeIndex();
    }

    /**
     * Initializes inversed index
     */
    void initializeIndex()
    {
        fid_cnt = vector<int>(featureCount, 0);
        featuresIndexMap = vector<int>(featureCount);
        for(int i = 0; i < featuresIndexMap.size(); i++) {
            featuresIndexMap[i] = -1;
        }

        index = vector<featureItem>(memorySize);
        for(int i = 0; i < index.size(); i++) {
            index[i] = featureItem();
            index[i].featureId = 0;
            index[i].lruCounter = UINT_MAX;
        }

        if(politic == FIFO) {
            for(int i = 0; i < memorySize; i++) {
                fifo.push(i);
            }
        }
    }

    void printWelcomeDebugMsg() {
        printDebugMsg("Inverse Index\nMiroslav Pospisil\n");
        printDebugMsg(  "Warning. You are running this Inverse Index in debug mode.\n"
                        "All debug messages will be displayed!\n");
    }

    void printDebugMsg(string msg) {
        if(DEBUG) {
            cout << msg << endl;
        }
    }

    /**
     * Generating random number from (0,FEATURE_COUNT)
     * @return Random number
     */
    int getRandomNumber() {
        return rand() % (featureCount + 1);
    }

    /**
     * Converts character to integer
     * @param c Character to convert
     * @return Integer value of given character
     */
    int convertCharToInt(char c) {
        return static_cast<int>(c) - 48;
    }

    /**
     * Convets string to integer
     * @param stringInt String which contains integer
     * @return integer value of given string
     */
    int convertStringToInt(string stringInt) {
        int asInt;
        stringstream ss;

        ss << stringInt;
        ss >> asInt;
        return asInt;
    }

    /**
     * Computes folder path from feature ID number
     * @param number Feature ID
     */
    void getFolderPath(int featureId) {
        stringstream ss;
        ss << "Resolving path for feature with id: " << featureId;
        printDebugMsg(ss.str());

        stringstream invertedPath;

        path.str("");
        path << cachePath;
        string numberText = to_string(featureId);

        for(int i = numberText.length() - 1; i >= 0; i--) {
            if(i <= (numberText.length() - 1)) {
                invertedPath << numberText[i];
            }
            if(((numberText.length() - i) % THOUSAND_DIMENSION == 0) && (i > 0)) {
                invertedPath << "/";
            }
        }

        string invertedPathString = invertedPath.str();
        int order = (to_string(featureCount).length() / (THOUSAND_DIMENSION + 1)) - (numberText.length() / (THOUSAND_DIMENSION + 1));
        if((to_string(featureCount).length() - numberText.length()) < 2) {
            order -= 1;
        }
        for(int i = 0; i < order; i++) {
            path << "0/";
        }

        for(int i = invertedPathString.length() - 1; i >= 0; i--) {
            path << invertedPathString[i];
        }
//        cout << featureId << "\t" << path.str() << endl;
    }

    /**
     * Creates all directories in given path
     * @param path String with path of directories to be created
     */
    void createDirectory(string dirPath) {
        boost::filesystem::path dir(dirPath);
        if(!boost::filesystem::is_directory(dir)) {
            boost::filesystem::create_directories(dir);
        }
    }

    /**
     * Reads and parses document from feature into vector
     */
    void readFeatureIdDocuments() {
        printDebugMsg("Reading documents from feature");

        ifstream readFileDescriptor;
        string line;
        int lines = 0;

        readFileDescriptor.open(fullPath.str());
        if(readFileDescriptor.is_open()) {

            while(getline(readFileDescriptor, line)) {
                lines++;
            }

            readFileDescriptor.clear();
            readFileDescriptor.seekg(0, readFileDescriptor.beg);
            feature.clear();

            while(getline(readFileDescriptor, line)) {
                feature.push_back(convertStringToInt(line));
            }
        }
        readFileDescriptor.close();
    }

    /**
     * Creates full path string from given feature ID
     * @param fId Feature ID for path create
     */
    void createFullPath(int fId) {
        printDebugMsg("Creating full path");

        fullPath.str("");
        fullPath << path.str() << "/" << fId;
    }

    void printMemory() {
        if(DEBUG) {
            for(int i = 0; i < index.size(); i++) {
                cout << "[" << i << "] Feature ID: " << index[i].featureId << "\tLRU counter: " << index[i].lruCounter << endl;
                if(!index[i].documents.empty()) {
                    for(int j = 0; j < index[i].documents.size(); j++) {
                        cout << "Doc ID: " << index[i].documents[j] << "\t";
                    }
                    cout << endl;
                }
            }
            cout << endl;
        }
    }

    /**
     * Adding document ID to file by feature ID
     * @param fId Feature ID
     * @param n_docId Document ID
     */
    void pushBack(int fId, int n_docId) {
        incremetFeature(fId);
        getFolderPath(fId);
        createDirectory(path.str());
        createFullPath(fId);

        ofstream writeFileDescriptor;

        stringstream ss;
        ss  << "Zapisuji doc ID: " << n_docId << " do indexu " << fullPath.str();
        printDebugMsg(ss.str());

        writeFileDescriptor.open(fullPath.str(), ios_base::app);
        writeFileDescriptor << n_docId << endl;
        writeFileDescriptor.close();
    }

    /**
     * Updates LRU Counter for each feature in memory
     * @param recentlyUsedFeatureId
     */
    void updateLRUCounter(int recentlyUsedFeatureId) {
        if(politic != LRU) {
            return;
        }
        int maxLRUFeatureValue = 0, maxLRUFeatureIndex = 0;
        for(int i = 0; i < index.size(); i++) {
            if(index[i].lruCounter > maxLRUFeatureValue) {
                maxLRUFeatureValue = index[i].lruCounter;
                maxLRUFeatureIndex = i;
            }

            if(i == recentlyUsedFeatureId) {
                index[i].lruCounter = 0;
            }
            else if(index[i].lruCounter < UINT_MAX) {
                index[i].lruCounter++;
            }
        }

        lruIndex = maxLRUFeatureIndex;
    }

    /**
     * Gets LRU Feature ID
     * @return LRU Feature ID
     */
    int getLRUFeature() {
        return lruIndex;
    }

    /**
     * Loads feature from file by given feature ID to features in memory
     * @param fId Feature ID for load from
     * @return T/F if loading feature was success or not
     */
    bool loadFeatureFromFile(int fId) {
        stringstream ss;
        ss << "Loading feature with id: " << fId << " from file";
        printDebugMsg(ss.str());

        getFolderPath(fId);
        createFullPath(fId);
        readFeatureIdDocuments();

        if(feature.size() > 0) {
            int lruFeatureId;
            if(politic == LRU) {
                lruFeatureId = getLRUFeature();
            }
            else if(politic == RANDOM) {
                lruFeatureId = rand() % memorySize;
            }
            else if(politic == FIFO) {
                lruFeatureId = fifo.front();
                fifo.pop();
            }

            featuresIndexMap[index[lruFeatureId].featureId] = -1;

            index[lruFeatureId] = featureItem();
            index[lruFeatureId].featureId = fId;
            index[lruFeatureId].documents = feature;
            updateLRUCounter(lruFeatureId);

            if(politic == FIFO) {
                fifo.push(lruFeatureId);
            }

            featuresIndexMap[fId] = lruFeatureId;

            return true;
        }
        return false;
    }

    /**
     * Gets documents of feature from the memory by given feature ID
     * Otherwise loads it into the memory
     * @param fId Feature ID
     * @return Vector of feature's documents
     */
    vector<int> &getFeature(int fId) {
        stringstream ss;
        ss << "Getting documents from Feature with ID: " << fId;
        printDebugMsg(ss.str());

        if(featuresIndexMap[fId] >= 0) {
            updateLRUCounter(featuresIndexMap[fId]);
            printMemory();
            return index[featuresIndexMap[fId]].documents;
        }

        loadFeatureFromFile(fId);
        overwriteCounter++;
        printMemory();
        return feature;
    }

    /**
     * Returns number of documents of feature by given feature ID
     * (if feature is not in the memory, method loads feature into the memory)
     * @param fId
     * @return
     */
    int getSizeOfFeature(int fId) {
        return getFeature(fId).size();
    }

    /**
     * Removing feature from file and memory (if memory contains it) by given feature ID
     * @param fId Feature ID of removed feature
     */
    void removeFeature(int fId) {
        stringstream ss;
        ss << "Removing feature with id: " << fId;
        printDebugMsg(ss.str());

        getFolderPath(fId);
        createFullPath(fId);
        boost::filesystem::remove(fullPath.str());

        if(featuresIndexMap[fId] >= 0) {
            index[featuresIndexMap[fId]].featureId = 0;
            index[featuresIndexMap[fId]].lruCounter = UINT_MAX;
            index[featuresIndexMap[fId]].documents.clear();
            printMemory();
        }
    }

    /**
     * Check if database path is not empty
     * @return T/F if database path is not empty / database path contains folders and features
     */
    bool isDatabaseCreated() {
        if(!boost::filesystem::is_directory(cachePath)) {
            return false;
        }

        boost::filesystem::directory_iterator end_it;
        boost::filesystem::directory_iterator it(cachePath);
        if(it == end_it) {
            return false;
        }
        else {
            return true;
        }
    }

    template <class T>
    void saveContainer(string outpath, T &vec)
    {
        std::ofstream ofs(outpath);
        boost::archive::text_oarchive ar(ofs);
        // Save the data
        ar & vec;
    }

    template <class T>
    T loadContainer(string inpath)
    {
        T vec;
        std::ifstream ifs(inpath);
        boost::archive::text_iarchive ar(ifs);
        // Save the data
        ar & vec;
        return vec;
    }

    void saveDatabaseInfo()
    {
        //save doc and idfs to file
        string doc_path = cachePath;
        doc_path.append("/docnames.bin");

        saveContainer< vector<string> >(doc_path, doc);

        string idfs_path = cachePath;
        idfs_path.append("/idfs.bin");
        saveContainer< vector<float> >(idfs_path, idfs);

        string stoplist_path = cachePath;
        stoplist_path.append("/stoplist.bin");
        saveContainer< vector<int> >(stoplist_path, fid_cnt);
    }

    void loadDatabaseInfo()
    {
        string doc_path = cachePath;
        doc_path.append("docnames.bin");
        string idfs_path = cachePath;
        idfs_path.append("/idfs.bin");
        string stoplist_path = cachePath;
        stoplist_path.append("/stoplist.bin");

        if (boost::filesystem::is_regular_file(doc_path))
        {
            doc = loadContainer< vector<string> >(doc_path);
        }
        if (boost::filesystem::is_regular_file(idfs_path))
        {
            idfs = loadContainer< vector<float> >(idfs_path);
        }
        if (boost::filesystem::is_regular_file(stoplist_path))
        {
            fid_cnt = loadContainer< vector<int> >(stoplist_path);
        }
    }

    void updateIDFS()
    {
        float size = doc.size() + 1;
        for (int i = 0; i < DEFAULT_FEATURE_COUNT; ++i)
        {
            idfs[i] = log(size/(idfs[i]+1));
        }
    }

    inline size_t docSize()
    {
        return doc.size();
    }

    inline int addDocument(string docname)
    {
        doc.push_back(docname);
        return doc.size() - 1;
    }

    inline string getDocument(int docid)
    {
        return doc[docid];
    }

    inline void incrementIDFS(int fid)
    {
        ++idfs[fid];
    }

    inline float getIDF(int fid)
    {
        return idfs[fid];
    }

    inline void clearIDF(int fid)
    {
        idfs[fid] = 0;
    }


    inline int incremetFeature(int fId)
    {
        fid_cnt[fId] += 1;
    }

    inline int getFeatureCount(int fId)
    {
        return fid_cnt[fId];
    }

private:
    stringstream path;
    stringstream fullPath;

    vector<int> featuresIndexMap;
    vector<featureItem> index;
    vector<int> feature;

    queue<int> fifo;

    default_random_engine generator;

    CachePolitics politic;
    string cachePath;
    unsigned int featureCount, memorySize;

    int overwriteCounter = 0, lruIndex = 0;

    //JB: additional database info
    //names of documents, doc_id is position in this vector
    vector<string> doc;

    //count of feature ids for stop listing.
    vector<int> fid_cnt;

    //inverse document frequencies indexed by doc_id (index to doc)
    vector<float> idfs;




    // Allow serialization to access non-public data members.
    friend class boost::serialization::access;

};


#endif // INVERSEDINDEX
