/**
 * \file pcre2nfa.hpp
 * \brief Structures to transform pcre into nfa
 * \author Andrej Hank <xhanka00@liberouter.org> 
 * \date 2007
 * 
 * $Id: pcre2nfa.hpp,v 1.11 2007/09/27 13:10:43 xhanka00 Exp $
 *
 *
 *
 * NOTE: Unicode not supported!
 */

#ifndef __PCRE2NFA_HPP__
#define __PCRE2NFA_HPP__

#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <vector>
#include <map>
#include <ctype.h>
#include <err.h>
#include <stdlib.h>
#include "compat.h"

using namespace std;

//! temporary file for 1 actual regex
#define TMP_REGEX_FILE 			"./tmp/regex"
//! temporary file for dot
#define TMP_DOT_FILE 			"./tmp/nfa.dot"
//! file with generated NFA in postscript
#define GENERATED_PS_FILE		"./generated/nfa.ps"
//! generated ruleset file
#define	GENERATED_RULESET_FILE_SUCCESS	"./generated/ruleset"
//! generated failed ruleset file
#define	GENERATED_RULESET_FILE_FAILED	"./generated/ruleset.failed"
//! generated VHDL file
#define	GENERATED_VHDL_FILE		"./generated/nfa.vhd"
//! generated VHDL package file
#define GENERATED_VHDL_PACKAGE_FILE	"./generated/nfa_par.vhd"

//! Max size of buffer for regex
const int REGEX_MAX_SIZE = 1024;

/* complete/partial debug */
//#define ALLDEBUG
/*! \brief C++ debug/verbose macro */
#ifndef DEBUG
#ifndef ALLDEBUG
	// separate debug and debug_header in each source file
	#define DEBUG(str)    if (debug) cout << DEBUG_HEADER << ": " << str
#else
	#define DEBUG(str)    if (debug) cout << str
#endif
	#define DEBUG_CONT(str)    if (debug) cout << str
#endif

//! Control debug - global variable debug must exist
extern int debug;

/// End State
struct tEndState {
	/// RulesId
   	vector<int> ruleId;
	/// State Number
	int stateNo;           
};

/// NFA Move
struct tMove {
	/// State from
   	int stateFrom; 
	/// State to
	int stateTo;
	/// Index into symbol table
	int symbolTableIndex;
	/// Is epsilon move
	bool epsMove;            
};

/// PCRE Modifiers
struct tModifiers {
	/// case insensitive
	bool i; 
	/// multiline
	bool m;
	/// single line
	bool s; 
	/// extended legibility
	bool x; 
};

/// Character Classes
enum tCharClass { CC_ANY, CC_EOL, CC_BOL, CC_ALPHA, CC_ALNUM, CC_ASCII, CC_BLANK, CC_CTTRL,
	CC_DIGIT, CC_GRAPH, CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER,
	CC_WORD, CC_XDIGIT, NEG_CC_ALPHA, NEG_CC_ALNUM, NEG_CC_ASCII,
	NEG_CC_BLANK, NEG_CC_CTTRL, NEG_CC_DIGIT, NEG_CC_GRAPH, NEG_CC_LOWER,
	NEG_CC_PRINT, NEG_CC_PUNCT, NEG_CC_SPACE, NEG_CC_UPPER, NEG_CC_WORD,
	NEG_CC_XDIGIT
};

/// Strings according to CC
const string CCStrings[] = { "any", "eol", "bol", "alpha", "alnum", "ascii",
	"blank", "cntrl", "digit", "graph", "lower", "print", "punct", "space",
	"upper", "word", "xdigit", "neg_alpha", "neg_alnum", "neg_ascii",
	"neg_blank", "neg_cntrl", "neg_digit", "neg_graph", "neg_lower",
	"neg_print", "neg_punct", "neg_space", "neg_upper", "neg_word",
	"neg_xdigit" }; 

/// number of classes
const int CC_COUNT = 31;

/// nember of classes with it's negation
const int CC_NEG_COUNT = 14;

/// Symbol Table Record
struct tSymbolTableRecord {
	/// Enumerated characters
	vector<char> enumerated;
	/// Character class vector
	vector<tCharClass> classFlags;	
};

typedef vector<tSymbolTableRecord> tSymbolTable;

/// NFA representation
struct tAutomata {
	/// State Count
   	int stateCount;               
	/// Moves Vector
   	vector<tMove> moves;          
	/// End States Vector
   	vector<tEndState> endStates;  
	/// Is State End State ?
   	map<int , bool> mapEndStates; 
	/// Symbol Table
	tSymbolTable symbolTable;	 
};

/// Grouping Record - record on stack
struct tGroupingRecord {
	/// Start state number
	int startState;
	/// Is end state created ?
	bool endStateCreated;
	/// End state number
	int endState;
};

// init function
void initPcreParser(map<string, tCharClass>& map, const string labels[], int count);

// modifiers handling 
tModifiers getModifiers(string regex);
void printModifiers(tModifiers modifiers);


// basic FSM creating
bool mkMove(tAutomata *au, int from, int to, tSymbolTableRecord *with, bool epsMove);
void addState(tAutomata *au, bool isEndState, int * currentState);
bool addEndStateRule(tEndState &endState, int ruleId);
void symbolRecordAddEnumerated(tModifiers modifiers, tSymbolTableRecord &record, char ch);
int insertSymbolRecord(tSymbolTable &table, tSymbolTableRecord record);

// grouping stack functions
void addGroupingRecord(vector<tGroupingRecord> &stack, int start, bool endStateCreated, int end);
void removeGroupingRecord(vector<tGroupingRecord> &stack, tGroupingRecord &lastGroup);
void groupStart(tAutomata *au, vector<tGroupingRecord> &groupingStack, int * currentState);
void groupEnd(tAutomata *au, vector<tGroupingRecord> &groupingStack, int * currentState, tGroupingRecord &lastGroup);
void groupOr(tAutomata *au, vector<tGroupingRecord> &groupingStack, int * currentState);

// quantity
void quantityZero(tAutomata *au, tGroupingRecord &lastGroup, int currentState);
void quantityMore(tAutomata *au, bool checkGroup, tGroupingRecord &lastGroup, int currentState);

// repeating
bool repeatPart(tAutomata *au, int stateFrom, int stateTo, int compulsoryTimes, int optionalTimes, int *currentState);
void repeatLast(tAutomata *au, int *currentState, int compulsoryTimes, int optionalTimes);

// generate postscript
void printSymbolTable(ostream &fout, tSymbolTable table);
bool automata2ps(string regex, tAutomata *automata, string psfile);
void printSymbolTableRecord(ostream &fout, tSymbolTableRecord *with, bool epsMove, bool dot);
#endif
