%{
/**
 * \file pcre.y
 * \brief Sytax and semantic analyzer for pcre transformations
 * \author Andrej Hank <xhanka00@liberouter.org> 
 * \date 2007
 * 
 * $Id: pcre.y,v 1.22 2008/01/22 11:55:29 kastil Exp $
 *
 * \note Creating moves predecess creating new states because creating new state changes currentState variable
 */
#include <iostream>
#include <fstream>
#include <sstream>
#include <err.h>
#include "pcre.tab.h"
#include "pcre2nfa.hpp"
#include "pcre2hw.hpp"
#include "nfa2dfa.h"

__RCSID("$Id: pcre.y,v 1.22 2008/01/22 11:55:29 kastil Exp $");
//! Version
#define VERSION "$Id: pcre.y,v 1.22 2008/01/22 11:55:29 kastil Exp $"
//! Acceptable command line arguments
#define ARGUMENTS       "dhsVl:c:"

using namespace std;

// debug confing --------------------------------------------------------------
#define DEBUG_HEADER "yacc    "
/// control debug printings
int debug;
/// control verbosity
int silent = 0;

// interface with lex ---------------------------------------------------------
// functions
int yylex();
void yyerror(char *);
void yyrestart(FILE *);
// to get flex to INITIAL state
void flexReset();
// global lex input
extern FILE *yyin;

// global variables -----------------------------------------------------------
/// string to character Class map - from pcre2nfa.hpp
map<std::string, tCharClass> CCMap;

/// automata being created
tAutomata au;	
/// pointer to currnet state
int currentState;
/// symbol record table used while inserting new moves
tSymbolTableRecord * symbolRecord;

/// grouping vector - stores grouping records
vector<tGroupingRecord> groupingStack;
/// last GroupingRecord
tGroupingRecord lastGroup;
/// last after group - used with repeating
int lastAfterGroup;

/// repeating value - compulsory times
int repeatCompulsory;
/// repeating value - optional times
int repeatOptionaly;

/// PCRE modifiers
tModifiers modifiers;

%}

%token LBRA RBRA INT COMMA LBOX RBOX SLASH LPAR RPAR ANY ZEROONE ONEMORE
%token ZEROMORE OR ASCII NEGATE DASH CHARCLASS2VALUE CHARCLASS SLASHCHARCLASS
%token NEGSLASHCHARCLASS EOL BOL SPACE

%%
/* PCRE gramatics */
/* hierarchy - element | grouping -> unit -> extended unit -> exp -> pcre */

/* m/PATTERN/cgimosx */
pcre:	modif_front inslash modif_rear { DEBUG("<< pcre" << endl); }
;

modif_front: 	/* empty */
	|	ASCII
;

modif_rear:	/* empty */
	|	ASCII modif_rear
;

inslash: SLASH exp SLASH {
	// end global group
	groupEnd(&au, groupingStack, &currentState, lastGroup);
	// make last end state
	mkMove(&au, currentState, au.stateCount, NULL, true);
	addState(&au, true, &currentState);
	addEndStateRule(au.endStates.back(), au.moves.size() - 1);
}
;

exp:		ext_unit
	|	ext_unit exp
	|	ext_unit or exp
;

/* * ? + quantity */
quantity: 	ZEROONE {
	quantityZero(&au, lastGroup, currentState);
}
	| 	ZEROMORE {
	// important! qantityMore before quantityZero, quantityMore works with
	// last move
	quantityMore(&au, true, lastGroup, currentState);
	quantityZero(&au, lastGroup, currentState);
}
	|	ONEMORE {
	quantityMore(&au, true, lastGroup, currentState);
}
;

/* not greedy - in hw found both greedy and not greedy because of more active
 * states at time */
not_greedy: /* empty */ 
	  | ZEROONE {
		  DEBUG("not greedy" << endl);
		  /* only reflects possible syntax */
}
;


/* group branch */
or:	OR { 
	groupOr(&au, groupingStack, &currentState);
};

/* unit */
unit:	 	element
	|	grouping;

/* extended unit */
ext_unit:	unit
	|	unit repeating not_greedy
	|	unit quantity not_greedy
;

/* basic elements
   ASCII
   . as any character
   [class]
 */
element:	ASCII 	{
	tSymbolTableRecord tmpSymbolRecord;
	symbolRecordAddEnumerated(modifiers, tmpSymbolRecord, yylval);
	// make move
	mkMove(&au, currentState, au.stateCount, &tmpSymbolRecord, false);
	// add state - changes currentState
	addState(&au, false, &currentState);
	
	// NOTE epsilon move into beginning of automata
	//mkMove(&au, au.stateCount - 1, 0, NULL, true);
}
	|	ANY {
	tSymbolTableRecord tmpSymbolRecord;
	tmpSymbolRecord.classFlags.push_back(CCMap["any"]);
	// make move
	mkMove(&au, currentState, au.stateCount, &tmpSymbolRecord, false);
	// add state - changes currentState
	addState(&au, false, &currentState);
}
	| 	EOL { // doesn't reflect /s /m modifiers
	tSymbolTableRecord tmpSymbolRecord;
	tmpSymbolRecord.classFlags.push_back(CCMap["eol"]);
	// make move
	mkMove(&au, currentState, au.stateCount, &tmpSymbolRecord, false);
	// add state - changes currentState
	addState(&au, false, &currentState);
}
	| 	BOL {
	tSymbolTableRecord tmpSymbolRecord;
	tmpSymbolRecord.classFlags.push_back(CCMap["bol"]);
	// make move
	mkMove(&au, currentState, au.stateCount, &tmpSymbolRecord, false);
	// add state - changes currentState
	addState(&au, false, &currentState);
}
	|	SPACE {
	tSymbolTableRecord tmpSymbolRecord;
	// permit spaces if x modifier enabled
	if(!modifiers.x) {
		symbolRecordAddEnumerated(modifiers, tmpSymbolRecord, yylval);
		DEBUG( "|" << yylval << "|" << endl);
		// make move
		mkMove(&au, currentState, au.stateCount, &tmpSymbolRecord, false);
		// add state - changes currentState
		addState(&au, false, &currentState);
	}
}
	|	class
;

/* grouping (abc) */
grouping:	lpar exp RPAR {
	groupEnd(&au, groupingStack, &currentState, lastGroup);
}
;

/* mark beginnig of grouping */
lpar:	LPAR {
   	groupStart(&au, groupingStack, &currentState);
}
;

/* {x,y} number of iterations */
repeating:	LBRA interval RBRA { 
	 // -1 - one already exists
	 if(currentState == lastGroup.endState) {
		repeatPart(&au, lastGroup.startState, currentState, repeatCompulsory - 1, repeatOptionaly - 1, &currentState); 
	 } else { // single state
	 	repeatLast(&au, &currentState, repeatCompulsory - 1, repeatOptionaly - 1);
	 }
}
;

/* inside repeating */
interval:	firstInteger {
	repeatOptionaly = repeatCompulsory;
}

	|	firstInteger COMMA {
	repeatOptionaly = 0;
}
	|	firstInteger COMMA secondInteger
;

firstInteger: INT {
       // save first value
       repeatCompulsory = yylval;
}
;

secondInteger: INT {
       // save second value
       repeatOptionaly = yylval;
}
;

/* [] or \w\W\s\S\d\D class of characters */
class:		classStart inclass RBOX {
     	// class completed
	// make move
	mkMove(&au, currentState, au.stateCount, symbolRecord, false);
	// add state - changes currentState
	addState(&au, false, &currentState);
	// delete structure
	delete(symbolRecord);
}
	|	classStart NEGATE inclass RBOX {
	// negate
	tSymbolTableRecord tmpSymbolRecord;
	if(symbolRecord->enumerated.size() != 0) {
		// fill fully tmp
		for(int ch = 0; ch <= 255; ch++) {
			symbolRecordAddEnumerated(modifiers, tmpSymbolRecord, ch);
		}
		// negate enumerated characters - erase present
		for(vector<char>::iterator chariter = symbolRecord->enumerated.begin(); chariter != symbolRecord->enumerated.end(); chariter++) {
			for(vector<char>::iterator finditer = tmpSymbolRecord.enumerated.begin(); finditer != tmpSymbolRecord.enumerated.end(); finditer++) {
				if(*chariter == *finditer) {
					tmpSymbolRecord.enumerated.erase(finditer);
					break;
				}
			}
		}
	}
	// negate character classes
	// NOTE sure that "any" won't come
	for(vector<tCharClass>::iterator classiter = symbolRecord->classFlags.begin(); classiter != symbolRecord->classFlags.end(); classiter++) {
		tmpSymbolRecord.classFlags.push_back((tCharClass)(*classiter + CC_NEG_COUNT));
	}

     	// negation completed
	// make move
	mkMove(&au, currentState, au.stateCount, &tmpSymbolRecord, false);
	// add state - changes currentState
	addState(&au, false, &currentState);
	// delete structure
	delete(symbolRecord);
}
	| 	SLASHCHARCLASS {
	tSymbolTableRecord tmpSymbolRecord;
	tmpSymbolRecord.classFlags.push_back((tCharClass)yylval);
	// make move
	mkMove(&au, currentState, au.stateCount, &tmpSymbolRecord, false);
	// add state - changes currentState
	addState(&au, false, &currentState);

}
	|	NEGSLASHCHARCLASS {
	// same as above SLASHCHARCLASS but uses neg_ preffix
	tSymbolTableRecord tmpSymbolRecord;
	tmpSymbolRecord.classFlags.push_back((tCharClass)yylval);
	// make move
	mkMove(&au, currentState, au.stateCount, &tmpSymbolRecord, false);
	// add state - changes currentState
	addState(&au, false, &currentState);

}
;

classStart:	LBOX {
	/* create tSymbolTableRecord structure */
	symbolRecord = new tSymbolTableRecord;
	DEBUG("CC start" << endl);
}
;

/* inside class */
inclass: inclass_unit
       | inclass_unit inclass
;

inclass_unit: 	ASCII {
		DEBUG("char class adding " << (char)yylval << endl);
		symbolRecordAddEnumerated(modifiers, *symbolRecord, yylval);
}
	| CHARCLASS2VALUE {
		char low, high;
		// extract 2 values from yylval
		low = (char)yylval;
		high = (char)(yylval >> 8);
			
		DEBUG("char class 2 values - " << low << "-" << high << endl);
		// TODO convert into classes if possible
		for(char ch = low; ch <= high; ch++) {
			symbolRecordAddEnumerated(modifiers, *symbolRecord, ch);
		}
}
	| CHARCLASS {
		DEBUG("char class " << CCStrings[yylval] << endl);
		symbolRecord->classFlags.push_back((tCharClass)yylval);
}
;

%%
//! standard lex error function
void yyerror(char *s) {
	warnx("Terminating current pcre: %s", s);
}

/* -------------------------------------------------------------------------- */
/** \brief saveParserState Save parser state - all important variables
* 
* \param au Automaton
* \param currentState Current state 
* \param groupingStack Grouping stack
* \param lastGroup Last group
* \param sau Where to save automaton
* \param scurrentState Where to save current state
* \param sgroupingStack Where to save grouping stack
* \param slastGroup Where to save last group
*/
/* -------------------------------------------------------------------------- */
void saveParserState(tAutomata au, int currentState, vector<tGroupingRecord>
groupingStack, tGroupingRecord lastGroup, tAutomata *sau, int *scurrentState,
vector<tGroupingRecord> *sgroupingStack, tGroupingRecord *slastGroup){
	// last group
	*slastGroup = lastGroup;
	// currentState
	*scurrentState = currentState;
	// grouping stack
	*sgroupingStack = groupingStack;
	// automaton
	*sau = au;
}

/* -------------------------------------------------------------------------- */
/** \brief tryParse
* 
* Note: expects initialized parser and works with global variables as whole
* parser
* \param regexp 
* 
* \return 
*	- true - if parsed successfully, parser includes newly parsed rule
*	- false - if failed
*
*/
/* -------------------------------------------------------------------------- */
bool tryParse(char *regexp) {
	tAutomata sau;
	int scurrentState;
	vector<tGroupingRecord> sgroupingStack;
	tGroupingRecord slastGroup;
	// save parser state
	saveParserState(au, currentState, groupingStack, lastGroup, &sau,
	&scurrentState, &sgroupingStack, &slastGroup);
	
	// create initial state and groupStart, link it with zero state
	addState(&au, false, &currentState);
	mkMove(&au, 0, currentState, NULL, true);
	groupStart(&au, groupingStack, &currentState);

	// prepare input
	yyin = fopen(TMP_REGEX_FILE, "w+");
	if(yyin == NULL){
		perror("Open TMP file for writing: ");
		errx(1, "TMP file error1");
}
	fputs(regexp, yyin);
	fclose(yyin);
	// open for reading
	yyin = fopen(TMP_REGEX_FILE, "r");
	if(yyin == NULL)
		errx(1, "TMP file error2");

	// flush flex buffers, reset it
	yyrestart(yyin);
	flexReset();
	//BEGIN(INITIAL);

	// if success add rule
	if(yyparse ()) { // parse failed
		// get back parser before failed rule
		saveParserState(sau, scurrentState, sgroupingStack, slastGroup, &au,
		&currentState, &groupingStack, &lastGroup);
        fclose(yyin);
		return false;
	} else { // parse OK
        fclose(yyin);
		return true;
	}
}

/* -------------------------------------------------------------------------- */
/** \brief usage Display help
*/
/* -------------------------------------------------------------------------- */
void usage()
{
        printf ("Usage: %s [-dhsV] ruleset-file\n", getprogname());
        printf("-d 	Debug mode\n");
        printf("-h	Show this text\n");
        printf("-s	Silent, print only summary\n");
        printf("-V	Show version\n");
}

/* -------------------------------------------------------------------------- */
/** \brief main Launch parser
* 
* \param argc Argument count
* \param argv Argument vector
* 
* \return
*	0 - Success
*/
/* -------------------------------------------------------------------------- */
int main(int argc, char *argv[]) {
	debug = 0;

	char * rulesetFile;
	char tmpRegex[REGEX_MAX_SIZE];
	int noOfSuccessRules = 0, noOfFailedRules = 0;
	char c;
        int Limit,CharNum;

        if (argc == 1) {
                printf("Nothing to do.\n");
                return 0;
        }

	// process parameters
        while ((c = getopt(argc, argv, ARGUMENTS)) != -1) {
                switch (c) {

                case 'h':
                        usage(); 
                        return 0;
                case 'd':
                        debug = 1;
                        break;
                case 's':
                        silent = 1;
                        break;
                case 'V':
                        printf("%s\n", VERSION);
                        exit(0);
                case 'l':
                        Limit = atoi(optarg);
                        break;
                case 'c':
                        CharNum = atoi(optarg);
                        break;
                default:
                        errx(1, "Unknown argument -%c", optopt);
                }
        }
        argc -= optind;
        argv += optind;

        if (argc != 1) {
		errx(1, "Select ruleset file");
	} else {
		// get source file with snort ruleset
		rulesetFile = *argv;
	}

	// output ruleset
	ofstream rulesetOutSuccess(GENERATED_RULESET_FILE_SUCCESS);
	// output failed ruleset
	ofstream rulesetOutFailed(GENERATED_RULESET_FILE_FAILED);

	// open ruleset file
	ifstream inFile(rulesetFile);
	if(!inFile.is_open())
		errx(1, "file error");

	// init CCMap
	initPcreParser(CCMap, CCStrings, CC_COUNT);
	// make automata structures - zero state
	currentState = 0;
	au.stateCount = 1;

	// for each rule in ruleset file
        // edit to make single automat for each rule by Jan Kastil
        vector<tAutomata> NFA; 
	while(inFile.getline(tmpRegex, REGEX_MAX_SIZE)) {
		if(!silent)
			cout << "Converting " << tmpRegex << endl;

		modifiers = getModifiers((string)tmpRegex);
		if(debug)
			printModifiers(modifiers);

		if(tryParse(tmpRegex)) { // if rule successfully parsed, increment number of rules
                        tAutomata Tmp = au; 
			if(!silent)
				cout << "Parse OK" << endl;
			rulesetOutSuccess << tmpRegex << endl;
			noOfSuccessRules++;
                        au.stateCount = 1;
                        au.endStates.clear();
                        au.symbolTable.clear();
                        au.moves.clear();
                        au.mapEndStates.clear();
                        tryParse(tmpRegex);
                        NFA.push_back(au);
                        au = Tmp;
                        
		} else {
			if(!silent)
				cout << "Parse Failed" << endl;
			noOfFailedRules++;
			rulesetOutFailed << tmpRegex << endl;
		}
	}
	inFile.close();
	rulesetOutSuccess.close();
	rulesetOutFailed.close();

	cout << "--------------------------------------------------------------------------------" << endl;
	cout << "Parsed file           : " << rulesetFile << endl;
	cout << "--------------------------------------------------------------------------------" << endl;
	cout << "Sucessfully generated :" << noOfSuccessRules << " rules" << endl;
	cout << "Failed                :" << noOfFailedRules << " rules" << endl;
	cout << "--------------------------------------------------------------------------------" << endl;

	if(noOfSuccessRules > 0) {
                SaveAutomaton(&NFA,CharNum);
//		Histograms1(&NFA);
		// export into postscript
		if(automata2ps("snort ruleset", &(NFA.at(0)), GENERATED_PS_FILE))
			cout << "Postscript successfully created  -> " << GENERATED_PS_FILE << endl;
		else
			cout << "Can't create postscript file, have you \"dot\" installed?" << endl;
		// generate hw representation
/*		nfa2hw(&au, 1, GENERATED_VHDL_FILE, GENERATED_VHDL_PACKAGE_FILE);
		cout << "VHDL code successfully generated -> " <<
		GENERATED_VHDL_FILE << endl;
		cout << "Final ruleset generated          -> " <<
		GENERATED_RULESET_FILE_SUCCESS << endl;

		#ifndef NDEBUG
	       		printSymbolTable(cout, au.symbolTable); 
		#endif
		#ifdef NDEBUG*/
//                         Histograms(&NFA);
			//Groups(&NFA,Limit,CharNum);
                        //CreateCharastristics(&NFA);
			//cout<<Limit<<CharNum<<endl;
	//		Characteristics(&NFA);
                        //Statistics(&NFA,Limit,CharNum);
	//	#endif
        }

	if(noOfFailedRules > 0)
		cout << "Failed ruleset generated         -> " <<
		GENERATED_RULESET_FILE_FAILED << endl;

	return 0;
}
