/**
 *  BOWDatabase.h
 *
 *  Created on: Nov 14, 2014
 *
 *  Author: Jan Brejcha <ibrejcha@fit.vutbr.cz>, <brejchaja@gmail.com>
 *  Copyright (C) 2014  Jan Brejcha
 *
 *  OPEN SOURCE LICENCE VUT V BRNĚ
 *  Verze 1.
 *  Copyright (c) 2010, Vysoké učení technické v Brně, Antonínská 548/1, PSČ 601 90
 *  -------------------------------------------------------------------------------
 */


#ifndef PROJECTS_RELIEF_PANO_SRC_HORIZON_LOCATE_HLOC_BOWDATABASE_H_
#define PROJECTS_RELIEF_PANO_SRC_HORIZON_LOCATE_HLOC_BOWDATABASE_H_

#include <vector>
#include <cmath>
#include <stdexcept>
#include <algorithm>
#include <set>


#include "Persistable.h"
#include "DatabaseEntry.h"
#include "BOWQueryResult.h"
#include "InversedIndex.h"

#include <QMutex>
#include <QtConcurrentRun>
#include <QList>
#include <QFuture>
#include <QFileInfo>
#include <QTime>

#include <libxml/xmlreader.h>

#define FEATURE_COUNT 16777216

class BOWDatabase : public Persistable {
public:

    /**
     * @brief BOWDatabase
     * Default constructor for BOWDatabase.
     * Default path for the database will be used.
     * Default maximum number of identical features in the database
     * is set to one million (1000 000).
     */
    BOWDatabase()
    {
        max_documents = 1000000;
    }


    /**
     * @brief BOWDatabase
     * Constructs BOWDatabase.
     *
     * @param db_path Path of the database
     * @param maxdocuments Maximum allowable number of identical features in
     * the database. If there are more features than
     * maxdocuments in the whole database, this featureID will be stop listed.
     */
    BOWDatabase(std::string db_path, int maxdocuments)
        :max_documents(maxdocuments)
    {
        index = InversedIndex(db_path, InversedIndex::CachePolitics::FIFO,
                              FEATURE_COUNT, 100);
    }

    /**
     * @brief isDatabaseCreated
     * @return True if the database exists, false otherwise.
     */
    bool isDatabaseCreated()
    {
        return index.isDatabaseCreated();
    }

    /**
     * @brief ~BOWDatabase
     * Default destructor for BOWDatabase.
     */
    virtual ~BOWDatabase()
    {
    }

    /**
     * @deprecated
     * @brief BOWDatabase
     * Used for initialization of the database from XML.
     * As the inverted index is now stored, there is no need to use this method.
     * @param e Element from which to start parsing the database.
     */
    BOWDatabase(QDomElement &e)
    {
        //doc = std::vector<std::string>(15000000);
        this->initFromXMLElement(e);
        //doc = std::vector<std::string>(4294967297);
    }

    /**
     * @deprecated
     * @brief addEntry
     * Adds entry to the database and stores the added DatabaseEntry in memory.
     * Used to be able to save all DatabaseEntries into XML files.
     * To add entry to the database use addSearchEntry instead, which
     * does not store DatabaseEntries in memory.
     * @param entry
     */
    void addEntry(DatabaseEntry entry)
    {
        dat.push_back(entry);
        addSearchEntry(entry);
    }

    /**
     * @brief isStopListed
     * Tests whether the number of identical features is not greater than
     * maxumum allowable number.
     * @param fid
     * @return true if the number of identical features in the database is
     * larger than maximal allowable number; false otherwise.
     */
    inline bool isStopListed(int fid)
    {
        return index.getFeatureCount(fid) > max_documents;
    }

    /**
     * @brief addSearchEntry
     * Adds search entry (which is a set of horizon features) into the database.
     * @param entry DatabaseEntry to be stored in the database.
     */
    void addSearchEntry(DatabaseEntry &entry)
    {
        QMutexLocker locker(&mutex);

        int docId = index.addDocument(entry.key.toStdString());

        std::set<int> added;

        for (std::vector< std::shared_ptr<LocalFeature> >::const_iterator fit = entry.featuresBegin();
                fit != entry.featuresEnd(); ++fit)
        {
            int fId = (*fit)->featureId();

            if (isStopListed(fId))
            {
                continue;
            }

            int pfId =  (*fit)->pFeatureId();
            int alpha_d = Contourlett::unpackDirection(pfId);
            int n_docId = Contourlett::packDirection(docId, alpha_d);
            //printf("fId: %d, pfId: %d, alpha_d: %d, docId: %d, n_docId: %d, aplha_u: %d, u_docId: %d\n", fId, pfId, alpha_d, docId, n_docId, Contourlett::unpackDirection(n_docId), Contourlett::unpackFeatureId(n_docId));
//            inv[fId].push_back(n_docId);
            index.pushBack(fId, n_docId);
            if (added.find(fId) == added.end())
            {
                index.incrementIDFS(fId);
                added.insert(fId);
            }
            if (index.getFeatureCount(fId) > max_documents)
            {
                qDebug() << "Stoplisting fid: " << fId;
                index.clearIDF(fId);
                index.removeFeature(fId);
            }
        }
    }

    /**
     * @brief addingFinised
     * Shall be called once the database has been created to update
     * inverse document frequencies.
     */
    void addingFinised()
    {
        index.updateIDFS();
    }

    /**
     * @deprecated This functionality has been added to addSearchEntry
     * to gradually add features to stop list in order to make the adding
     * new features faster.
     *
     * Removes all features that contain more than one half distinct documents
     * from the database.
     */
    void clearStopListed()
    {
        /*
        std::cerr << "Warning: Deprecated function clearStopListed()"
                  << std::endl;

        index.stopListClear();
        //clear stop-listed features
        int thr = index.docSize() / 2;
        for (int i = 0; i < FEATURE_COUNT; ++i)
        {
            if ((size_t) index.getSizeOfFeature(i) > thr)
            {
                //this feature id is stop-listed
                index.stopListInsert(i);
                index.removeFeature(i);
            }
        }*/
    }

    /**
     * @brief query
     * Queries the database with a query DatabaseEntry.
     * @param q The query DatabaseEntry
     * @param dir The voting array to vote for direction.
     * The array of document count * 240 bins.
     * @return The distance between the query DatabaseEntry and viable
     * DatabaseEntries from the database based on Eq. (6) from the
     * [Baatz et al. 2012] (Horizon Line Localizaton).
     */
    std::vector< std::pair<int, float> > query(DatabaseEntry q, float *dir)
    {
        return cDistanceInv(q, dir);
    }

    /**
     * Calculates the metric according to Equation (6) from the paper between
     * each document in the database and the query.
     *
     * @param q		the query document
     * @param d 	the database document
     * @param dir	the array of document count * 240 bins.
     */
    std::vector< std::pair<int, float> > cDistanceInv(DatabaseEntry q, float *dir)
    {
        std::vector< std::pair<int, float> > result;
        int u_fId, fId, document, pDocument, alpha_d, alpha_q;
        //float * acc = new float[doc.size()];
        //std::fill(acc_d, acc_d + doc.size(), 0);
        //std::fill(acc, acc + doc.size(), 0);
        //# times document visited
        int * doc_v = new int[index.docSize()];

        float q_c, min, idf;

        float sum = 0;
        for (std::vector< std::shared_ptr<LocalFeature> >::const_iterator fit = q.featuresBegin();
                fit != q.featuresEnd(); ++fit)
        {
            fId = (*fit)->featureId();
            if (isStopListed(fId))
            {
                continue;
            }
            idf = index.getIDF(fId);
            q_c = q.getCount(fId);

            //std::vector<int> d = inv[fId];
            /*std::fill(doc_v, doc_v + doc.size(), 0);
            for (std::vector<int>::iterator it = d.begin();
                    it != d.end(); ++it)
            {
                pDocument = *it;
                document = Contourlett::unpackFeatureId(pDocument);
                ++doc_v[document];
            }*/

            sum += idf * q_c;
        }

        std::fill(dir, dir + ((size_t)index.docSize() * (size_t)360), sum);

        int counter = 0;
        for (std::vector< std::shared_ptr<LocalFeature> >::const_iterator fit = q.featuresBegin();
                fit != q.featuresEnd(); ++fit)
        {
            ++counter;
            fId = (*fit)->featureId();
            if (isStopListed(fId))
            {
                continue;
            }
            u_fId = (*fit)->pFeatureId();

            alpha_q = (int)round(Contourlett::unpackDirection(u_fId) - ((q.hFOV - 1) / 2.0));
            alpha_q = (alpha_q / 3) * 3; //quantize into correct bin

            idf = index.getIDF(fId);
            q_c = q.getCount(fId);

//            std::vector<int> d = inv[fId];
            //QTime myTimer;
            //myTimer.start();
            std::vector<int> &d = index.getFeature(fId);
            //qDebug() << "get feature: " << fId << ": " << myTimer.elapsed();
            //myTimer.restart();
            std::set<int> docs;
            std::fill(doc_v, doc_v + index.docSize(), 0);
            for (std::vector<int>::iterator it = d.begin();
                    it != d.end(); ++it)
            {
                pDocument = *it;
                document = Contourlett::unpackFeatureId(pDocument);
                docs.insert(pDocument);
                ++doc_v[document];
            }
            //qDebug() << "counting TF elapsed: " << myTimer.elapsed();
            //myTimer.restart();
            //qDebug() << "Docs size: " << docs.size() << "orig docs size: " << d.size();
            for (std::set<int>::iterator it = docs.begin();
                    it != docs.end(); ++it)
            {
                pDocument = *it;
                document = Contourlett::unpackFeatureId(pDocument);
                int tf = doc_v[document];
                alpha_d = Contourlett::unpackDirection(pDocument);
                int alpha = (alpha_d - alpha_q) % 360;
                alpha = alpha < 0 ? alpha + 360 : alpha;
                //printf("alpha_d: %d, alpha_q: %d, alpha: %d\n", alpha_d, alpha_q, alpha);
                //printf("alpha: %d\n", alpha);

                min = q_c < tf ? q_c : tf;
                float prod = idf * min;
                //acc[document] -= prod;

                //vote for direction - with min or without??
                //looks like it never minds.
                /** soft binning with 4 neighbors
                int alpha_m1 = (alpha - 3) % 360;
                int alpha_m2 = (alpha - 6) % 360;
                alpha_m1 = alpha_m1 < 0 ? alpha_m1 + 360 : alpha_m1;
                alpha_m2 = alpha_m2 < 0 ? alpha_m2 + 360 : alpha_m2;
                int alpha_1 = (alpha + 3) % 360;
                int alpha_2 = (alpha + 6) % 360;

                dir[360 * document + alpha] += prod;
                dir[360 * document + alpha_1] += 0.33333333 * prod;
                dir[360 * document + alpha_2] += 0.16666666 * prod;
                dir[360 * document + alpha_m1] += 0.33333333 * prod;
                dir[360 * document + alpha_m2] += 0.16666666 * prod;
                */

                //qDebug() << "fId: " << fId << ", prod: " << prod;
                //qDebug() << "qi: " << q_c << ", " << "di: " << tf;
                //soft binning with inverse distance as soft weighting function
                /*float sum = 0;
                for (int i = 0; i < 120; ++i)
                {
                    int alpha_1 = i * 3;
                    int dabs = abs(alpha - alpha_1);
                    int dist = (dabs > 180 ? 360 - dabs : dabs) + 1;
                    sum += 1.0 / (float) dist;
                }*/
                for (int i = 0; i < 120; ++i)
                {
                    int alpha_1 = i * 3;
                    int dabs = abs(alpha - alpha_1);
                    int dist = (dabs > 180 ? 360 - dabs : dabs) + 1;
                    dir[(size_t)360 * (size_t)document + (size_t)alpha_1] -= (prod) / (dist);
                }
                /*
                // soft binning with two neighbors
                int alpha_1 = (alpha + 3) % 360;
                int alpha_2 = (alpha - 3) % 360;
                alpha_2 = alpha_2 < 0 ? alpha_2 + 360 : alpha_2;
                dir[360 * document + alpha] += prod;
                dir[360 * document + alpha_1] += 0.5 * (prod);
                dir[360 * document + alpha_2] += 0.5 * (prod);

                QString qBaseName = QFileInfo(q.key).baseName();
                QStringList dparts = QString(doc.at(document).c_str()).split("/");
                if (qBaseName == dparts[dparts.count() - 2])
                {
                    std::cout << qBaseName.toStdString() << ": " << fId << ", " << prod << ", " << alpha << std::endl;
                }*/
            }
            //qDebug() << "Voting elapsed: " << myTimer.elapsed();
        }

        //save the result
        /*for (int i = 0; i < doc.size(); ++i)
        {
            //qDebug() << "acc: " << QString::number(acc[i]);
            std::pair<int, float> p(i, acc[i]);
            result.push_back(p);
        }*/

        delete [] doc_v;
        //delete acc;

        return result;
    }

    /**
     * @brief getDocumentName
     * Retrieves document name for document_id.
     * @param document_id   The ID of the document.
     * @return  The document name.
     */
    inline std::string getDocumentName(int document_id)
    {
        return index.getDocument(document_id);
    }

    /**
     * Converts the object into XMLElement of QtXML library
     */

    virtual QDomElement toXMLElement(QDomDocument &doc)
    {
        QDomElement db = doc.createElement("BOWDatabase");
        for (typename std::vector< DatabaseEntry >::iterator it = dat.begin();
                    it != dat.end(); ++it)
        {
            db.appendChild(it->toXMLElement(doc));
        }
        return db;
    }

    /**
     * Initializes the object from XMLElement of QtXML library
     */
    virtual void initFromXMLElement(QDomElement &e)
    {
        QDomNode n = e.firstChild();
        while (!n.isNull())
        {
            QDomElement de_dom = n.toElement();
            if (!de_dom.isNull())
            {
                //DatabaseEntry obtained, create it
                DatabaseEntry de;
                de.initFromXMLElement(de_dom);
                this->addSearchEntry(de);
            }
            n = n.nextSibling();
        }

        addingFinised();
        clearStopListed();
    }


    virtual void saveToXmlStream(QXmlStreamWriter &xml)
    {
        xml.writeStartElement("BOWDatabase");
        for (typename std::vector< DatabaseEntry >::iterator it = dat.begin();
                    it != dat.end(); ++it)
        {
            it->saveToXmlStream(xml);
        }
        xml.writeEndElement();
    }

    /**
     * @brief addEntriesFromXmlStreamReader
     * In case of one database in multiple xml files this can be used to
     * add the entries from the xml file into current database.
     *
     * After all entries are added, there is need to call finalizeDatabase()!.
     * @param xml
     */
    virtual void addEntriesFromXmlStreamReader(QXmlStreamReader &xml)
    {
        bool wasDbEntry = false;
        while (!xml.atEnd() && !xml.hasError())
        {
            if (xml.name() == "DatabaseEntry")
            {
                wasDbEntry = true;
                DatabaseEntry de;
                de.initWithXmlStreamReader(xml);
                this->addSearchEntry(de);
            }
            else wasDbEntry = false;
            xml.readNext();
        }

        if (xml.hasError())
        {
            //std::cout << "XML error: " << xml.errorString().toStdString() << "errno: " << xml.error() << std::endl;
        }
    }

    /**
     * @brief finalizeDatabase
     * This method has to be called after all elements of the database have
     * been loaded to prepare the database for use.
     */
    virtual void finalizeDatabase()
    {
        addingFinised();
        //clearStopListed(); not needed, we are stop listing during addSearchEntry
        index.saveDatabaseInfo(); //save idfs, stoplist, doc
    }

    virtual void initWithXmlStreamReader(QXmlStreamReader &xml)
    {
        bool wasDbEntry = false;
        while (!xml.atEnd() && !xml.hasError())
        {

            xml.readNext();
            if (xml.name() == "DatabaseEntry")
            {
                wasDbEntry = true;
                DatabaseEntry de;
                de.initWithXmlStreamReader(xml);
                this->addSearchEntry(de);
            }
            else wasDbEntry = false;
        }

        if (xml.hasError())
        {
            std::cout << "XML error: " << xml.errorString().toStdString() << std::endl;
        }
    }

    /**
     * @brief size
     * Returns number of documents in the database.
     * @return
     */
    int size()
    {
        return index.docSize();
    }

    static bool loadDatabasePart(const char *dbname, BOWDatabase *d)
    {
		try
		{
        	d->mutex.lock();
        	std::cout << "Opening database: " << std::string(dbname) << "..." << std::endl;
        	d->mutex.unlock();
        	QFile db_xml(dbname);
        	if (!db_xml.open(QIODevice::ReadOnly))
        	{
            	d->mutex.lock();
            	std::cerr << "Cannot open database: " << std::string(dbname) << std::endl;
           	 	d->mutex.unlock();
            	return false;
        	}

        	QXmlStreamReader xml;
        	xml.setDevice(&db_xml);
        	d->addEntriesFromXmlStreamReader(xml);
        	db_xml.close();
        	return true;
		}
		catch(std::runtime_error &re)
        {
            d->mutex.lock();
            fprintf(stderr, "Error during opening: %s, msg: %s\n", dbname, re.what());
            d->mutex.unlock();
            return false;
        }
        catch(...)
        {
            d->mutex.lock();
            fprintf(stderr, "Unknoen error during opening: %s\n", dbname);
            d->mutex.unlock();
            return false;
        }

    }

    virtual int addEntriesFromXmlStreamReader(xmlTextReaderPtr &reader)
    {
        const xmlChar *name;
        int ret = 1;
        name = xmlTextReaderLocalName(reader);
        if (name != NULL && strcmp((const char *)name, "DatabaseEntry") == 0)
        {
            DatabaseEntry de;
            ret = de.initWithXmlStreamReader(reader);
            if (ret == -1)
            {
                return -1;
            }
            this->addSearchEntry(de);
            xmlFree((void *)name);
            name = NULL;
        }
        return ret;

    }

    static bool loadDatabasePartLXML2(const char *dbname, BOWDatabase *d)
    {
        try
        {
            xmlTextReaderPtr reader;
            int ret;

            d->mutex.lock();
            std::cout << "Opening database with libxml2: " << std::string(dbname) << "..." << std::endl;
            d->mutex.unlock();
            reader = xmlReaderForFile(dbname, NULL, 0);
            if (reader != NULL) {
                ret = xmlTextReaderRead(reader);
                while (ret == 1) {
                    ret = d->addEntriesFromXmlStreamReader(reader);
                    if (ret == 1)
                    {
                        ret = xmlTextReaderRead(reader);
                    }
                }
                xmlFreeTextReader(reader);
                if (ret != 0) {
                    d->mutex.lock();
                    fprintf(stderr, "%s : failed to parse\n", dbname);
                    d->mutex.unlock();
                    return false;
                }
            } else {
                d->mutex.lock();
                fprintf(stderr, "Unable to open %s\n", dbname);
                d->mutex.unlock();
                return false;
            }
            return true;
        }
        catch(std::runtime_error &re)
        {
            d->mutex.lock();
            fprintf(stderr, "Error during opening: %s, msg: %s\n", dbname, re.what());
            d->mutex.unlock();
            return false;
        }
        catch(...)
        {
            d->mutex.lock();
            fprintf(stderr, "Unknown error during opening: %s\n", dbname);
            d->mutex.unlock();
            return false;
        }
    }

    static BOWDatabase* loadWithLXML2(std::vector<std::string> dbnames, int max_feat_count)
    {
        BOWDatabase *d = new BOWDatabase(dbnames[0], max_feat_count); //fixme add real num of doc
        if (d->isDatabaseCreated())
        {
            return d;
        }

        QList< QFuture<void> > resList;
        for (std::string &dbname : dbnames)
        {
            QFuture<void> future = QtConcurrent::run(BOWDatabase::loadDatabasePartLXML2, dbname.c_str(), d);
            resList.push_back(future);
            //loadDatabasePartLXML2(dbname.c_str(), d);
        }
        for(QFuture<void> &f : resList)
        {
            f.waitForFinished();
        }
        std::cout << "Database loaded, finalizing database... " << std::endl;
        d->finalizeDatabase();
        return d;
    }

    /**
     * @deprecated The database is stored in the paged inverted index on HDD
     * instead of XML. Use constructor with specified database path instead.
     * @brief load
     * Loads the database
     * @param dbnames
     * @return
     */
    static BOWDatabase* load(std::vector<std::string> dbnames)
    {
        BOWDatabase *d = new BOWDatabase;
        if (d->isDatabaseCreated())
        {
            return d;
        }
        QList< QFuture<void> > resList;
        for (std::string &dbname : dbnames)
        {
            QFuture<void> future = QtConcurrent::run(BOWDatabase::loadDatabasePart, dbname.c_str(), d);
            resList.push_back(future);
            //loadDatabasePart(dbname.c_str(), d);
        }
        for(QFuture<void> &f : resList)
        {
            f.waitForFinished();
        }
	std::cout << "database xml loaded, now running finalization" << std::endl;
        d->finalizeDatabase();

        return d;
    }

    /**
     * @deprecated The database is stored in the paged inverted index on HDD
     * instead of XML. Use constructor with specified database path instead.
     **/
    static BOWDatabase* load(QXmlStreamReader &xml)
    {
        BOWDatabase *d = new BOWDatabase;
        if (d->isDatabaseCreated())
        {
            return d;
        }
        d->addEntriesFromXmlStreamReader(xml);
        d->finalizeDatabase();
        return d;
    }

private:
    /// The database entries added into the database using searchEntry method.
    typename std::vector< DatabaseEntry > dat;

    /// The paged inverted index to be stored on HDD.
    InversedIndex index = InversedIndex();

    mutable QMutex mutex;

    /// Maximal number of identical features stored in the database.
    int max_documents;

};

#endif /* PROJECTS_RELIEF_PANO_SRC_HORIZON_LOCATE_HLOC_BOWDATABASE_H_ */
