%option nounput   
%option outfile="pcre.yy.c"

%{
/**
 * \file pcre.l
 * \brief Lexical analyzer for pcre transformations
 * \author Andrej Hank <xhanka00@liberouter.org> 
 * \date 2007
 * 
 * Copyright (C) 2007 CESNET
 * 
 * $Id: pcre.l,v 1.9 2008/02/04 18:54:37 xkobie00 Exp $
 *
 */
#include <iostream>
#include "pcre.tab.h"
#include "pcre2nfa.hpp"

__RCSID("$Id: pcre.l,v 1.9 2008/02/04 18:54:37 xkobie00 Exp $");
#define DEBUG_HEADER "lex     "

using namespace std;

// String to Character Class map - from pcre2nfa.hpp
extern map<std::string, tCharClass> CCMap;

/*
	%s uses own and other rules
	%x only own rules
*/
%}

INT	[1-9][0-9]*
STRING	[a-zA-Z0-9_-]+
HEX	\\x[a-fA-F0-9][a-fA-F0-9]

%s times
%x class
%x esc
%%
"{"	{ // start of quantifiing
		DEBUG("LBRA" << endl);
		BEGIN(times);
		return LBRA;
	}
"}"	{ // end of quantifiing
		DEBUG("RBRA" << endl);
		BEGIN(INITIAL);
		return RBRA;
	}
<times>{INT} 	{
			DEBUG("INT" << endl);
			yylval = atoi(yytext);
			return INT;
		}
<times>,	{
			DEBUG("COMMA" << endl);
			return COMMA;
		}
"\\"	{ // start of escape sequence
		DEBUG("Escape seqence " << endl);
		BEGIN(esc);
}
<esc>t|n|r|f|a|e	{ // special characters
			switch (yytext[0]) {
				case 't':
					yylval = '\t';
					break;
				case 'n':
					yylval = '\n';
					break;
				case 'r':
					yylval = '\r';
					break;
				case 'f':
					yylval = '\f';
					break;
				case 'a':
					yylval = '\a';
					break;
				case 'e':
					yylval = '\e';
					break;
				default:
					break;
			}
			BEGIN(INITIAL);
			return ASCII;
		}
<esc>w|W|s|S|d|d	{ // character classes la perl
			BEGIN(INITIAL);
			switch (yytext[0]) {
				case 'd':
					yylval = CCMap["digit"];
					return SLASHCHARCLASS;
				case 'D':
					yylval = CCMap["neg_digit"];
					return NEGSLASHCHARCLASS;
				case 's':
					yylval = CCMap["space"]; // TODO \s not
								// exactly [:space:]
					return SLASHCHARCLASS;
				case 'S':
					yylval = CCMap["neg_space"];
					return NEGSLASHCHARCLASS;
				case 'w':
					yylval = CCMap["word"];
					return SLASHCHARCLASS;
				case 'W':
					yylval = CCMap["neg_word"];
					return NEGSLASHCHARCLASS;
				default:
					break;
			}
}
<esc>\/|\"|\'|\\|\?|\*|\+|\.|:|\{|\}|\(|\)|\[|\]|!|%|" "|\$|\^ { // special characters
		yylval = yytext[0];
		BEGIN(INITIAL);
		return ASCII;
}
<esc>0[0-7]{3} { // escape sequence octal
		if(sscanf(yytext, "%o", &yylval) != 1) 
			errx(1, "Wrong escape character number - %s", yytext);
		if(!(yylval >= 0 && yylval < 256))
			errx(1, "Wrong escape character number - %s", yytext);

		DEBUG("ASCII as octal escape sequence " << yylval << endl);

		BEGIN(INITIAL);
		return ASCII;
}
<esc>x[0-9a-fA-F]{2} { // escape sequence hexa
		if(sscanf(yytext, "x%x", &yylval) != 1) {
			warnx("Wrong escape character number - %s", yytext);
			yyrestart(yyin);
			BEGIN(INITIAL);
			return 0;
		}
		if(!(yylval >= 0 && yylval < 256)) {
			warnx("Wrong escape character number - %s", yytext);
			yyrestart(yyin);
			BEGIN(INITIAL);
			return 0;
		}

		DEBUG("ASCII as hexa escape sequence " << yylval << endl);

		BEGIN(INITIAL);
		return ASCII;
}
<esc>. { // incorrect escape sequence
	warnx("Incorrect escape sequence \\%s", yytext);
	yyrestart(yyin);
	BEGIN(INITIAL);
	return 0;
}

"["	{ // start of character class
		DEBUG("LBOX" << endl);
		BEGIN(class);
		return LBOX;
	}
<class>"]"	{ // end of class
			DEBUG("RBOX" << endl);
			BEGIN(INITIAL);
			return RBOX;
		}
<class>"^"	{
			DEBUG("INTERAL NEGATE" << endl);
			return NEGATE;
		}
<class>"\\-"	{
			/* if "-" not meaning from - to - backslashed, return only ASCII */
			yylval = '-';
			DEBUG("ASCII " << yytext << endl);
			return ASCII;
		}
<class>"-"	{
			/* if "-" not meaning from - to - at and start or and of
			 * [-abc] or [abc-], return only ASCII */
			yylval = '-';
			DEBUG("ASCII " << yytext << endl);
			return ASCII;
		}

<class>{HEX}-{HEX} {
			DEBUG("CHARCLASS2VALUE ");
			// return 2 values - high and low boundary - shifted <<8
			char high, low;
			if(sscanf(yytext, "\\x%x-\\x%x", &low, &high) != 2)
				errx(1, "character class error %s", yytext);
			if(low >= high)
				errx(1, "character class error %s", yytext);

			yylval = (high << 8) + low;
			DEBUG_CONT((char)yylval << "-" << (char)(yylval >> 8) << endl);
				
			return CHARCLASS2VALUE;
		}

<class>.-{HEX} {
			DEBUG("CHARCLASS2VALUE ");
			// return 2 values - high and low boundary - shifted <<8
			char high, low;
			if(sscanf(yytext, "%c-\\x%x", &low, &high) != 2)
				errx(1, "character class error %s", yytext);
			if(low >= high)
				errx(1, "character class error %s", yytext);

			yylval = (high << 8) + low;
			DEBUG_CONT((char)yylval << "-" << (char)(yylval >> 8) << endl);
				
			return CHARCLASS2VALUE;
		}

<class>{HEX}-[^-\]]	{
			DEBUG("CHARCLASS2VALUE ");
			// return 2 values - high and low boundary - shifted <<8
			char high, low;
			if(sscanf(yytext, "\\x%x-%c", &low, &high) != 2)
				errx(1, "character class error %s", yytext);
			if(low >= high)
				errx(1, "character class error %s", yytext);

			yylval = (high << 8) + low;
			DEBUG_CONT((char)yylval << "-" << (char)(yylval >> 8) << endl);
				
			return CHARCLASS2VALUE;
		}

<class>.-[^-\]]	{
			DEBUG("CHARCLASS2VALUE ");
			// return 2 values - high and low boundary - shifted <<8
			char high, low;
			if(sscanf(yytext, "%c-%c", &low, &high) != 2)
				errx(1, "character class error %s", yytext);
			if(low >= high)
				errx(1, "character class error %s", yytext);

			yylval = (high << 8) + low;
			DEBUG_CONT((char)yylval << "-" << (char)(yylval >> 8) << endl);
				
			return CHARCLASS2VALUE;
		}




<class>\[\:(alpha|alnum|ascii|blank|cntrl|digit|graph|lower|print|punct|space|upper|word|xdigit)\:\]	{		
			DEBUG("CHARCLASS " << yytext);
			// crop out [: :]
			string cropped (yytext);
			cropped.erase(0, 2);
			cropped.erase(cropped.size() - 2, cropped.size() - 1);
			tCharClass CCIndex = CCMap[cropped];
			DEBUG_CONT(" with index " << (int)CCIndex << endl);
			yylval = CCIndex;
			return CHARCLASS;
		}
<class>.	{		
			DEBUG("ASCII " << yytext << endl);
			yylval = yytext[0];
			return ASCII;
		}
"/"	{
		DEBUG("SLASH" << endl);
		return SLASH;
	}
"("	{
		DEBUG("LPAR" << endl);
		return LPAR;
	}
")"	{
		DEBUG("RPAR" << endl);
		return RPAR;
	}
"."	{
		DEBUG("ANY" << endl);
		return ANY;
	}
"?"	{
		DEBUG("ZEROONE" << endl);
		return ZEROONE;
	}
"+"	{
		DEBUG("ONEMORE" << endl);
		return ONEMORE;
	}
"*"	{
		DEBUG("ZEROMORE" << endl);
		return ZEROMORE;
	}
"|"	{
		DEBUG("OR" << endl);
		return OR;
	}
"$"	{
		DEBUG("EOL" << endl);
		return EOL;
	}
"^"	{
		DEBUG("BOL" << endl);
		return BOL;
	}
[[:space:]]	{
		DEBUG("ASCII SPACE" << yytext << endl);
		yylval = yytext[0];
		return SPACE;
	}
.	{
		DEBUG("ASCII " << yytext << endl);
		yylval = yytext[0];
		return ASCII;
	}
%%

void flexReset() {
	BEGIN(INITIAL);
}
