/**
 * @file parser_adapter.h 
 * @brief Header file containing abstract IParserAdapter class and 
 * concrete TSParserAdapter class
 * 
 * @author Vojtěch Dvořák 
 */

#pragma once


#include "types.h"
#include "yaramod_config.h"
#include "json/json.hpp"
#include "tree_sitter/api.h"
#include <iostream>
#include <vector>
#include <optional>
#include <cstring>
#include <charconv>
#include <memory>

#include "forward.h"


/**
 * @brief Initial size of auxiliary stack of nodes 
 * 
 * The capacity of stack is reserved when TSParserAdapter is instantiated, 
 * to save the time 
 */
#define INITIAL_NODE_STACK_SIZE 128


//#define FULL_REPARSE ///< Uncomment for activation of full reparse mode (yara file object is completely rebuild again if it has any changes)


/**
 * @brief Interface to which will be connected internal parser by concrete 
 * ParserAdapter (the "output socket" of the adapter)
 */
class IParserAdapter {
    public:
        IParserAdapter(std::shared_ptr<YaramodConfig> config) : config_(config) {};
        virtual ~IParserAdapter() {};

        /**
         * Creates YaraSource object from the given string
         * @param string String in YARA language that should be parsed
         * @return Unique ptr to YaraSource
         */
        virtual YaraSourcePtr createYaraSource(const std::string &string) = 0;

        /**
         * 
         */
        virtual YaraSourcePtr createYaraSource(const std::string &string, const std::string &entry_file_path) = 0;
        
        /**
         * Updates YaraSource object, that were created from the string
         * @param string String in YARA language that should be reparsed
         * @param old_src Old YaraSource object, that should be updated
         */
        virtual void updateYaraSource(const std::string &string, YaraSource *old_src) = 0; 
        virtual void updateYaraSource(const std::string &string, const std::string &entry_file_path, YaraSource *old_src) = 0;

        /**
         * Creates YaraSource object from the file 
         * @param path Path leading to file in YARA language that should be parsed
         * @return Unique ptr to YaraSource
         */
        virtual YaraSourcePtr createYaraSourceFromFile(std::string_view path) = 0;
        
        /**
         * Updates YaraSource object, that were created from the file
         * @param path Path leading to file in YARA language that should be 
         * reparsed
         * @param old_src Old YaraSource object, that should be updated
         * @note It is not necessary to pass the same path as was passed to
         * IParserAdapter::createYaraSourceFromFile, if it is allowed to change
         * source file, depends on concrete ParserAdapter 
         */
        virtual void updateYaraSourceFromFile(std::string_view path, YaraSource *old_src) = 0; 

    protected:
        std::shared_ptr<YaramodConfig> config_;
};


/**
 * @brief Class responsible for parsing of input string and converting TS 
 * structure to high level representation 
 */
class TSParserAdapter : public IParserAdapter {
    public:
        TSParserAdapter(std::shared_ptr<YaramodConfig> config);
        ~TSParserAdapter();

        YaraSourcePtr createYaraSource(const std::string &string) override;
        YaraSourcePtr createYaraSource(const std::string &string, const std::string &entry_file_path) override;

        void updateYaraSource(const std::string &string, YaraSource *old_src) override; 
        void updateYaraSource(const std::string &string, const std::string &entry_file_path, YaraSource *old_src) override;

        YaraSourcePtr createYaraSourceFromFile(std::string_view path) override;
        
        /**
         * Updates YaraSource object, that were created from the file
         * @note Path can differ from the original path 
         */
        void updateYaraSourceFromFile(std::string_view path, YaraSource *old_src) override; 


        /**
         * Performs parsing of string and creates its high level representation
         */
        YaraFilePtr parse(YaraSource *yara_src, const std::string &string, const std::string &name = {});
        
        /**
         * Performs reparsing of string and and updates its high level representation 
         */
        void reparse(YaraSource *parent_src, const std::string &string, const YaraFilePtr &old_yara_file);

            
        YaraFilePtr parseFile(YaraSource *parent_src, std::string_view path);


        void reparseFile(YaraSource *parent_src, std::string_view path, const YaraFilePtr &old_yara_file);
        

    private:
        TSParser *ts_parser_; ///< Adaptee

        std::vector<TSNode> node_stack_; ///< Stack for preserving nodes (especially used in case of recursive structures such as expressions)

        /**
         * Performs BFS above the subtree represented by given node, all nodes that has 
         * error type are appended to given vector 
         */
        static void findErrorNodes_(
            TSNode root, 
            std::vector<TSNode> &err_nodes, 
            size_t max_depth = (unsigned)-1
        );

        /**
         * Finds all nodes, that satisfy given condition  
         */
        template <typename Fn>
        static void findNodes_(
            TSNode start, 
            std::vector<TSNode> &out_vec, 
            const Fn &f, 
            size_t max_depth = (unsigned)-1
        );

        /**
         * Traverse subtree specified by given node until given depth or leaf
         * nodes are reached
         */
        template <typename Fn>
        static void traverseSubtree_(
            TSNode start, 
            const Fn &f, 
            size_t max_depth = (unsigned)-1
        );

        /**
         * Creates string view due to TSNode structure 
         */
        static std::string_view getNodeStringV_(const TSNode &node, std::string_view file_sv);


        /**
         * Creates (allocates) string due to information stored in provided
         * node structure and given string_view of whole source file 
         */
        static std::string getNodeString_(const TSNode &node, std::string_view file_sv);


        static std::string getStringId_(const TSNode &string_id_node, std::string_view file_sv);


        /**
         * Auxiliary function for finding specific child node
         * @return TSNode structure, if child does not exists null node is 
         * returned 
         */
        inline TSNode getNodeChildByFieldName_(const TSNode &parent, std::string_view field_name);


        /**
         * Calculates node length in bytes and returns it as a result 
         */
        inline size_t getNodeLen_(const TSNode &node);
        
        
        /**
         * Returns byte offset where the string represented by given node starts 
         */
        inline offset_t getNodeOffset_(const TSNode &node);

        /**
         * Converts TSPoint -like range to range_t structure 
         */
        inline range_t getNodeRange_(const TSNode &node);

        /**
         * Converts TSPoint of start to yaramodv4 position (point_t structure)
         */
        inline point_t getNodePos_(const TSNode &node);

        /**
         * Checks if node is valid - it must not be null, missing or ERROR 
         */
        inline bool isValidNode_(const TSNode &node);

        /**
         * Check if node can appear everywhere in the code (for now it is 
         * only comment)
         * @note Presumes, that given node is valid (it is not error, null 
         * node or missing node)
         */
        inline bool isGlobalNode_(const TSNode &node);

        /**
         * Checks if node has specific type
         * @param node Node to be checked
         * @param type Type, that is expected
         * @return True if node has given type, otherwise false 
         */
        inline bool isType_(const TSNode &node, const char* type);

        /**
         * Check whether node represents valid ASCII character or not 
         */
        inline bool isAsciiChar_(const TSNode &node, std::string_view sv);

        /**
         * Rebuilds completely YaraFile without taking edits into consideration
         */
        void forceUpdateYaraFile_(YaraSource *parent_src, const YaraFilePtr &yara_file);

        /**
         * Builds the YaraFile object - object, that represents the file (or 
         * string) with yara rules  
         */
        void buildYaraFile_(YaraSource *parent_src, const YaraFilePtr &yara_file);

        /**
         * Builds elements of yara file, that can appear everywhere (errors, 
         * missing tokens - recognized by the TS, comments...) 
         */
        void buildSyntaxErrors_(const YaraFilePtr &yara_file, const TSNode &node, std::string_view sv);

        /**
         * Build global elements of YaraFile elements (missing nodes and 
         * comments)
         */
        void buildGlobals_(
            const YaraFilePtr &yara_file,
            const TSNode &node, 
            std::string_view sv, 
            size_t max_depth = unsigned(-1), 
            YaraFileElementBindable *parent = nullptr
        );

        /**
         * Checks if node is valid
         * @return true if node is valid, otherwise false and syntax error is 
         * added 
         */
        bool isValidId_(const YaraFilePtr &yara_file, const TSNode &id_node, std::string_view sv);

        /**
         * Builds Rule object from given TSNode
         * @return Shared pointer to newly created rule object
         */
        RulePtr buildRule_(const YaraFilePtr &yara_file, const TSNode &node, std::string_view sv);
        void buildRuleHead_(const YaraFilePtr &yara_file, const RulePtr &rule, const TSNode &head_node, std::string_view sv);
        void buildRuleModifierList_(const YaraFilePtr &yara_file, const RulePtr &rule, const TSNode &list_node, std::string_view sv);
        void buildRuleTagList_(const YaraFilePtr &yara_file, const RulePtr &rule, const TSNode &list_node, std::string_view sv);

        void buildRuleBody_(const YaraFilePtr &yara_file, const RulePtr &rule, const TSNode &body_node, std::string_view sv);
        
        void buildMetaList_(const YaraFilePtr &yara_file, const RulePtr &rule, const TSNode &list_node, std::string_view sv);
        void buildStringList_(const YaraFilePtr &yara_file, const RulePtr &rule, const TSNode &list_node, std::string_view sv);
        void buildVarList_(const YaraFilePtr &yara_file, const RulePtr &rule, const TSNode &list_node, std::string_view sv);


        /**
         * Builds Regular expression string
         * @note Performs semantic checks without instantiation any special 
         * object, if any error is find, semantic error is added to yara_file
         * @return std::string, that contains the whole regular expression
         */
        std::string buildRegexp_(const YaraFilePtr &yara_file, const TSNode &regexp_node, std::string_view sv);

        // Methods for building elements of regular expressions (for now they are not actually building, but they provide semantics and postprocessing checks)
        void buildRegexpNode_(const YaraFilePtr &yara_file, const TSNode &node, std::string_view sv);
        void buildRegexpCat_(const YaraFilePtr &yara_file, const TSNode &cat_node, std::string_view sv);
        void buildRegexpAlt_(const YaraFilePtr &yara_file, const TSNode &alt_node, std::string_view sv);
        void buildRegexpClassRange_(const YaraFilePtr &yara_file, const TSNode &range_node, std::string_view sv);
        void buildRegexpRep_(const YaraFilePtr &yara_file, const TSNode &range_node, std::string_view sv);

        /**
         * Builds HexString from given TSNode
         * @note Performs semantic checks without instantiation any special 
         * object, if any error is find, semantic error is added to yara_file
         * @return std::string with content of hex string
         */
        std::string buildHexStr_(const YaraFilePtr &yara_file, const TSNode &hex_str_node, std::string_view sv);

        void buildHexStrNode_(const YaraFilePtr &yara_file, const TSNode &node, std::string_view sv);
        void buildHexStrAlt_(const YaraFilePtr &yara_file, const TSNode &hex_str_alt, std::string_view sv);
        void buildHexJump_(const YaraFilePtr &yara_file, const TSNode &jump_node, std::string_view sv);

        /**
         * Build string modifier list from given TSNode structure 
         */
        void buildStringModifierList_(const YaraFilePtr &yara_file, const std::unique_ptr<String> &string, const TSNode &list_node, std::string_view sv);
        void buildModAlphaArg_(const YaraFilePtr &yara_file, const std::unique_ptr<StringModifier> &mod, const TSNode &alphabet_node, std::string_view sv);
        void buildModRangeArg_(const YaraFilePtr &yara_file, const std::unique_ptr<StringModifier> &mod, const TSNode &range_node, std::string_view sv);


        /**
         * Builds literal object
         */
        std::unique_ptr<Literal> buildLiteral_(const YaraFilePtr &yara_file, const TSNode &literal_node, std::string_view sv);

        std::unique_ptr<Literal> buildStrLiteral_(const YaraFilePtr &yara_file, const TSNode &literal_node, std::string_view sv);
        std::unique_ptr<Literal> buildIntLiteral_(const YaraFilePtr &yara_file, const TSNode &literal_node, std::string_view sv);
        std::unique_ptr<Literal> buildBoolLiteral_(const YaraFilePtr &yara_file, const TSNode &literal_node, std::string_view sv);
        std::unique_ptr<Literal> buildFloatLiteral_(const YaraFilePtr &yara_file, const TSNode &literal_node, std::string_view sv);

        /**
         * Build rule condition 
         */
        void buildCondition_(const YaraFilePtr &yara_file, const RulePtr &rule, const TSNode &condition_node, std::string_view sv);


        /**
         * Holds information about currently parsed object  
         */
        struct expression_ctx_t {
            const YaraFilePtr &file; ///< Parent file
            const RulePtr &rule; ///< Parent rule
            const json *module_ctx; ///< JSON object with module context
            std::vector<var_def_t> vars; ///< List with variables defined in for loops
            bool is_in_loop = false; ///< Is this expression internal expression of for expression?
            
            expression_ctx_t(
                const YaraFilePtr &f, 
                const RulePtr &r, 
                const json *m = nullptr, 
                bool in_loop = false
            ) : file(f), rule(r), module_ctx(m), is_in_loop(in_loop) {};

            /**
             * Performs searching in variable list 
             */
            bool hasVariable(std::string_view id);
        
            /**
             * Performs searching in variable list 
             * @return Structure with information about defined variable
             */
            const var_def_t &getVariable(std::string_view id);
        };


        /**
         * General type of method that convert TS expression node to high 
         * level objects 
         */
        using expression_method_ptr = ExpressionPtr (TSParserAdapter::*)(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);

        /**
         * Maps TSNode type to corresponding function
         */
        const static std::unordered_map<std::string_view, expression_method_ptr> expression_method_tab_;


        //---------------- Methods for processing expressions -----------------

        ExpressionPtr buildExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);

        std::shared_ptr<Range> buildRange_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);

        std::shared_ptr<VarListExpression> buildVarList_(expression_ctx_t &ctx, const var_def_t &var_ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildIterable_(expression_ctx_t &ctx, var_def_t &var_ctx, const TSNode &list_node, std::string_view sv);

        std::shared_ptr<StructExpression> buildStructFromCache_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);

        std::shared_ptr<ArrayExpression> buildArrayAccess_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        std::shared_ptr<StructExpression> buildStructureAccess_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        std::shared_ptr<FunctionCallExpression> buildFunctionCall_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        std::shared_ptr<PlainSymbol> buildPlainSymbol_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        
        std::shared_ptr<Symbol> buildSymbol_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);

        ExpressionPtr buildPercentQuantifier_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildQuantifierExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);

        template <typename T>
        ExpressionPtr buildBinaryExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);

        template <typename T>
        ExpressionPtr buildUnaryExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);

        template <typename T>
        ExpressionPtr buildStringPropExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);

        template <typename T>
        ExpressionPtr buildIndexedStringPropExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);

        ExpressionPtr buildSetExpression_(expression_ctx_t &ctx, const TSNode &set_node, std::string_view sv);

        ExpressionPtr buildSymbolExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildEnumExpression_(expression_ctx_t &ctx, const TSNode &enum_node, std::string_view sv);        
        ExpressionPtr buildForIntExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildForExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildThemExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildRuleWildcardExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildStringWildcardExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildExpressionSet_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildStringSet_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildRuleSet_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        
        ExpressionPtr buildOfExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildSizeLiteral_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildUnaryExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildStringPropExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildRegexpExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildStringCountExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildStringExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildParenthesesExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildLiteralExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);
        ExpressionPtr buildInExpression_(expression_ctx_t &ctx, const TSNode &expr_node, std::string_view sv);

        //---

        /**
         * Builds contextual semantic properties of symbol from given var_def_t
         * structure  
         */
        void buildSymbolContextFrom(const std::shared_ptr<PlainSymbol> &symbol, const var_def_t &var);
        void buildSymbolContextFrom(const std::shared_ptr<PlainSymbol> &symbol, Import *import, const expression_ctx_t &ctx, const std::vector<YaraFileElementBindable *> &includes);
        void buildSymbolContextFrom(const std::shared_ptr<PlainSymbol> &symbol, const std::unique_ptr<IntVariable> &int_var);
        void buildSymbolContextFrom(const std::shared_ptr<PlainSymbol> &symbol, Rule *rule, const expression_ctx_t &ctx, const std::vector<YaraFileElementBindable *> &includes);
        void buildSymbolContextFrom(const std::shared_ptr<PlainSymbol> &symbol, ExtVariable *ext_var);

        /**
         * Sets semantic properties of symbol (symbol type - struct/array/..., data type, parent module object...)
         */
        void buildSymbolContext(std::shared_ptr<PlainSymbol> identifier, expression_ctx_t &ctx);

        /**
         * Builds string literal content and if it is necessary adds related
         * semantic errors to yara_file 
         */
        std::string buildStr_(const YaraFilePtr &yara_file, const TSNode &str_node, std::string_view sv);

        /**
         * Builds integer literal content from given TSNode and if it is 
         * necessary adds related semantic errors to yara_file 
         * @return integer value or nothing if value cannot be converted
         */
        std::optional<int64_t> buildInt_(const YaraFilePtr &yara_file, const TSNode &literal_node, std::string_view sv);

        /**
         * Builds float literal content from given TSNode 
         * @return float value or nothing if value cannot be converted
         */
        std::optional<float> buildFloat_(const YaraFilePtr &yara_file, const TSNode &literal_node, std::string_view sv);

        /*
         * Determines string properties - base, sign, start of integer value 
         * and returns them as output arguments
         */
        void getIntProps(const TSNode &int_node, int &base, int &sign, int &start);

        /**
         * Build FileInclude object from given TSNode structure 
         */
        std::unique_ptr<FileInclude> buildInclude_(YaraSource *parent_src, const YaraFilePtr &yara_file, const TSNode &node, std::string_view sv);

        /**
         * Build Import object from given TSNode structure 
         */
        std::unique_ptr<Import> buildImport_(const YaraFilePtr &yara_file, const TSNode &node, std::string_view sv);

        
        /**
         * Updates the internal string of YaraFile. Only string is updated!
         * To update structure of high level representation, update_ method
         * must be called after this method. 
         */
        void updateString_(const std::string &string, const YaraFilePtr &old_yara_file);
        
        /**
         * Updates given yara file object 
         */
        void updateYaraFile_(YaraSource *parent_src, const YaraFilePtr &old_yara_file);

        /**
         * Updates structure of high level representation stored in yara file.
         * This update is made due to edits, that were done by the 
         * YaraSource::edit and due to new string.
         * 
         * @warning updateString_ method must be called first to update
         * internal string of YaraFile object  
         */
        bool update_(YaraSource *parent_src, const YaraFilePtr &old_yara_file);

        void addFileInclude(std::unique_ptr<FileInclude> &&include, YaraSource *parent_src, const YaraFilePtr &yara_file);

        /**
         * Rebuilds the part of high representation of YaraFile - which part 
         * depends on the given root node. Caller must ensure, that rebuilding
         * some parts of high level representation does not case any redundancies. 
         */
        void rebuildYaraFileSubtree_(YaraSource *parent_src, const YaraFilePtr &y_file, const TSNode &root);
};


/*
 * Inline function defintions 
 */


/**
 * Returns child node, that is marked by given field name 
 */
TSNode TSParserAdapter::getNodeChildByFieldName_(const TSNode &parent, std::string_view field_name) {
    return ts_node_child_by_field_name(parent, field_name.data(), strlen(field_name.data()));
}


/**
 * Returns length of text, that is represented by given node 
 */
size_t TSParserAdapter::getNodeLen_(const TSNode &node) {
    return ts_node_end_byte(node) - ts_node_start_byte(node);
}


/**
 * Converts TSNode position to offset_t structure 
 */
offset_t TSParserAdapter::getNodeOffset_(const TSNode &node) {
    return ts_node_start_byte(node);
}


/**
 * Converts TSPoint -like range to range_t structure 
 */
range_t TSParserAdapter::getNodeRange_(const TSNode &node) {
    TSPoint start_pt = ts_node_start_point(node);
    TSPoint end_pt = ts_node_end_point(node);

    return {{start_pt.row, start_pt.column}, {end_pt.row, end_pt.column}};
}


/**
 * Converts TSPoint of start to yaramodv4 position (point_t structure) 
 */
point_t TSParserAdapter::getNodePos_(const TSNode &node) {
    TSPoint start_pt = ts_node_start_point(node);

    return {start_pt.row, start_pt.column};
}


/**
 * Checks if node is valid - it must not be null, missing or ERROR 
 */
bool TSParserAdapter::isValidNode_(const TSNode &node) {
    return !ts_node_is_null(node) &&
        !ts_node_is_missing(node) && 
        strcmp(ts_node_type(node), "ERROR");
}


/**
 * Determines if node is global symbol or not 
 */
bool TSParserAdapter::isGlobalNode_(const TSNode &node) {
    return !strcmp(ts_node_type(node), "comment");
}


/**
 * Checks whether given node has specific type 
 */
bool TSParserAdapter::isType_(const TSNode &node, const char* type) {
    return !strcmp(ts_node_type(node), type);
}


/**
 * Checks, whether node represents only one ASCII character 
 */
bool TSParserAdapter::isAsciiChar_(const TSNode &node, std::string_view sv) {
    return getNodeLen_(node) == 1 || getNodeStringV_(node, sv).at(0) >= 0; ///< Ascii character node should have only one byte and it should be between 0 and 127
}
