/*
								+----------------------------------+
								|                                  |
								|  ***   Regular expr tools   ***  |
								|                                  |
								|   Copyright  -tHE SWINe- 2007   |
								|                                  |
								|            RegExp.cpp            |
								|                                  |
								+----------------------------------+
*/

/*
 *	passed code revision at 2007-09-24
 *
 *	structures TTransition and TState were moved from CScanner to TScannerDrivingTable
 *
 *	2007-09-26
 *
 *	rearranged members of TScannerDrivingTable::TState so the structure is smaller
 *
 *	2007-10-25
 *
 *	fixed error in CRegExp_DFSM::MakeMinimal causing equal states linking
 *	themselves couldn't be merged
 *
 *	2008-03-23
 *
 *	fixed couple of memory leaks in CRegExp_NFSM::CollapseIdentityTransitions
 *	and in CRegExp_DFSM::MakeMinimal
 *	improved TScannerDrivingTable::PrintSource so it can be directly used to create header files
 *
 *	2008-04-20
 *
 *	optimized TScannerDrivingTable::TTransition so it can be whole packed to 64 bits
 *	(n_char_min and n_state offen fit in 16-bit integer) therefore speeding-up lexical
 *	analysis a little bit (experiments on XML parser showed improvement about 9%)
 *
 *	2009-05-23
 *
 *	removed all instances of std::vector::reserve and replaced them by stl_ut::Reserve_*
 *
 *	2009-10-20
 *
 *	fixed some warnings when compiling under VC 2005, implemented "Security
 *	Enhancements in the CRT " for VC 2008. compare against MyProjects_2009-10-19_
 *
 */

#define _SCL_SECURE_NO_WARNINGS

#include "NewFix.h"
#include "CallStack.h"
#include <stdio.h>
#include <ctype.h>
#include <vector>
#include <algorithm>
#include "BitArray.h"
#include "RegExp.h"
#include "StlUtils.h"

#if defined(_MSC_VER) && !defined(_MWERKS)
#define for if(0) {} else for
#endif
// msvc 'for' scoping hack

const int n_first_unicode_char = 0, n_last_unicode_char = 0x10ffff;
// min/max expected input chars

/*
 *								--- CRegExp_Lexer::CPrintToken ---
 */

#ifdef REG_EXP_COMPILER_DUMP

/*
 *	static const char *CRegExp_Lexer::CPrintToken::p_s_TokenName(int n_token_type)
 *		- returns string name for n_token_type
 *		- if n_token_type is out of token types range it always returns "token_Unknown"
 */
const char *CRegExp_Lexer::CPrintToken::p_s_TokenName(int n_token_type)
{
	__FuncGuard("CRegExp_Lexer::CPrintToken::p_s_TokenName");

	const char *p_token_name_list[] = {
		"token_Unknown",
		"token_Char", "token_AnyChar", "token_Option",
		"token_LeftPar", "token_RightPar",
		"token_CharRange",
		"token_Quant"
	};
	if(n_token_type < token_Unknown || n_token_type > token_Quant)
		return p_token_name_list[0];
	return p_token_name_list[n_token_type - token_Unknown];
}

/*
 *	void CRegExp_Lexer::CPrintToken::operator ()(const TToken &r_token) const
 *		- prints token type and it's possible data
 *		  (such as character codes for char-range tokens, etc)
 */
void CRegExp_Lexer::CPrintToken::operator ()(const TToken &r_token) const
{
	__FuncGuard("CRegExp_Lexer::CPrintToken::operator ()");

	printf("%s", p_s_TokenName(r_token.n_type));
	switch(r_token.n_type) {
	case token_Unknown:
	case token_AnyChar:
	case token_LeftPar:
	case token_RightPar:
	case token_Option:
		printf("\n");
		break;

	case token_Char:
		if(r_token.t_data.n_character < 128 && isprint(r_token.t_data.n_character)) {
			printf("(char=\'%c\', code=0x%02x)\n", r_token.t_data.n_character,
				r_token.t_data.n_character);
		} else
			printf("(code=0x%02x)\n", r_token.t_data.n_character);
		break;

	case token_Quant:
		if(r_token.t_data.t_quant.n_max == -1)
			printf("(min-count=%d, max-count=inf)\n", r_token.t_data.t_quant.n_min);
		else {
			printf("(min-count=%d, max-count=%d)\n", r_token.t_data.t_quant.n_min,
				r_token.t_data.t_quant.n_max);
		}
		break;

	case token_CharRange:
		{
			printf("(range-count=%d, inverse=%d)\n", r_token.t_data.t_range.n_range_num,
				r_token.t_data.t_range.b_inverse);
			for(int j = 0; j < r_token.t_data.t_range.n_range_num; ++ j) {
				if(r_token.t_data.t_range.p_range[j].n_char_min < 128 &&
				   r_token.t_data.t_range.p_range[j].n_char_max < 128 &&
				   isprint(r_token.t_data.t_range.p_range[j].n_char_min) &&
				   isprint(r_token.t_data.t_range.p_range[j].n_char_max)) {
					printf("\t(min-char=\'%c\', max-char=\'%c\', min-code=0x%02x, "
						"max-code=0x%02x)\n", r_token.t_data.t_range.p_range[j].n_char_min,
						r_token.t_data.t_range.p_range[j].n_char_max,
						r_token.t_data.t_range.p_range[j].n_char_min,
						r_token.t_data.t_range.p_range[j].n_char_max);
				} else {
					printf("\t(min-code=0x%02x, max-code=0x%02x)\n",
						r_token.t_data.t_range.p_range[j].n_char_min,
						r_token.t_data.t_range.p_range[j].n_char_max);
				}
			}
		}
		break;

	default:
		_ASSERTE(0);
		break;
	}
}

#endif //REG_EXP_COMPILER_DUMP

/*
 *								--- ~CRegExp_Lexer::CPrintToken:: ---
 */

/*
 *								--- CRegExp_Lexer ---
 */

/*
 *	static bool CRegExp_Lexer::Tokenize(const char *p_s_regexp,
 *		std::vector<TToken> &r_token_list)
 *		- fills r_token_list with tokens generated from p_s_regexp regular expression
 *		- returns true on success, false on failure
 *		- note it prints error messages to stderr
 */
bool CRegExp_Lexer::Tokenize(const char *p_s_regexp, std::vector<TToken> &r_token_list)
{
	__FuncGuard("CRegExp_Lexer::Tokenize");

	r_token_list.clear();
	for(const char *p_s_start = p_s_regexp; *p_s_regexp; ++ p_s_regexp) {
		TToken t_token;
		t_token.n_column = (p_s_regexp - p_s_start) + 1;
		t_token.n_type = token_Unknown;
		// prepare token ...

		if(!stl_ut::Reserve_1More(r_token_list)) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
			fprintf(stderr, "error: not enough memory\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
			return false; // no memory
		}
		// alloc

		switch(*p_s_regexp) {
		case '\\': // escape the next character
			{
				int n_value;
				if(!Parse_EscapeSequence(++ p_s_regexp, n_value))
					return false;
				t_token.n_type = token_Char;
				t_token.t_data.n_character = n_value;
			}
			break;

		case '.':
			t_token.n_type = token_AnyChar;
			break;
		case '(':
			t_token.n_type = token_LeftPar;
			break;
		case ')':
			t_token.n_type = token_RightPar;
			break;
		case '|':
			t_token.n_type = token_Option;
			break;

		case '[': // t_odo
			{
				t_token.n_type = token_CharRange;
				if(*(++ p_s_regexp) == '^') {
					t_token.t_data.t_range.b_inverse = true;
					++ p_s_regexp;
				} else
					t_token.t_data.t_range.b_inverse = false;

				t_token.t_data.t_range.n_range_num = 0;
				t_token.t_data.t_range.p_range = 0;

				std::vector<TInterval> range_list;

				// parse characters
				while(*p_s_regexp != ']') {
					if(!*p_s_regexp) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
						fprintf(stderr, "error: unexpected end of expression while "
							"inside \'[]\' block\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
						return false;
					}

					TInterval t_range;

					if(!stl_ut::Reserve_1More(range_list)) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
						fprintf(stderr, "error: not enough memory\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
						return false;
					}

					if(*p_s_regexp == '\\') {
						if(!Parse_EscapeSequence(++ p_s_regexp, t_range.n_char_min))
							return false;
					} else if(*p_s_regexp == '-' || *p_s_regexp == '[' || *p_s_regexp == ']' ||
					   *p_s_regexp == '(' || *p_s_regexp == ')' || *p_s_regexp == '.' ||
					   *p_s_regexp == '|' || *p_s_regexp == '*' || *p_s_regexp == '+' ||
					   *p_s_regexp == '?' || *p_s_regexp == '^') {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
						fprintf(stderr, "error: missing \'range-from\'"
							" character before \'%c\'\n", *p_s_regexp);
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
						return false;
					} else
						t_range.n_char_min = *p_s_regexp;
					++ p_s_regexp;
					// get character

					if(*p_s_regexp == '-') {
						// range of characters

						++ p_s_regexp;

						if(*p_s_regexp == '\\') {
							if(!Parse_EscapeSequence(++ p_s_regexp, t_range.n_char_max))
								return false;
						} else if(*p_s_regexp == '-' || *p_s_regexp == '[' ||
						   *p_s_regexp == ']' || *p_s_regexp == '(' || *p_s_regexp == ')' ||
						   *p_s_regexp == '.' || *p_s_regexp == '|' || *p_s_regexp == '*' ||
						   *p_s_regexp == '+' || *p_s_regexp == '?' || *p_s_regexp == '^') {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
							fprintf(stderr, "error: missing \'range-to\' character"
								" before \'%c\'\n", *p_s_regexp);
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
							return false;
						} else
							t_range.n_char_max = *p_s_regexp;
						++ p_s_regexp;
						// get second character

						if(t_range.n_char_min > t_range.n_char_max) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
							fprintf(stderr, "warning: char range should have characters in "
								"ascending order: \'%c'-'%c\' (0x%02x-0x%02x)\n",
								t_range.n_char_max, t_range.n_char_min, t_range.n_char_max,
								t_range.n_char_min);
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
							t_range.n_char_min ^= t_range.n_char_max;
							t_range.n_char_max ^= t_range.n_char_min;
							t_range.n_char_min ^= t_range.n_char_max;
						}
						// swap range if necessary
					} else
						t_range.n_char_max = t_range.n_char_min;

					range_list.push_back(t_range);
				}

				if(!(t_token.t_data.t_range.p_range = new(std::nothrow) TInterval[range_list.size()])) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
					fprintf(stderr, "error: not enough memory\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
					return false;
				}
				/*for(int i = 0; i < range_list.size(); ++ i)
					t_token.t_data.t_range.p_range[i] = range_list[i];*/
				std::copy(range_list.begin(), range_list.end(), t_token.t_data.t_range.p_range);
				t_token.t_data.t_range.n_range_num = range_list.size();
				// copy range list
			}
			break;

		case '+':
			t_token.n_type = token_Quant;
			t_token.t_data.t_quant.n_min = 1;
			t_token.t_data.t_quant.n_max = -1; // inf
			break;
		case '*':
			t_token.n_type = token_Quant;
			t_token.t_data.t_quant.n_min = 0;
			t_token.t_data.t_quant.n_max = -1; // inf
			break;
		case '?':
			t_token.n_type = token_Quant;
			t_token.t_data.t_quant.n_min = 0;
			t_token.t_data.t_quant.n_max = 1;
			break;
		case '{':
			{
				// complex quantifier
				++ p_s_regexp;

				while(isspace(*p_s_regexp))
					++ p_s_regexp;

				if(!*p_s_regexp) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
					fprintf(stderr, "error: unexpected end of expression "
						"while inside \'{}\' block\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
					return false;
				}

				if(*p_s_regexp < '0'|| *p_s_regexp > '9') {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
					fprintf(stderr, "error: mising minimal number of "
						"repeats before \'%c\' (0x%02x)\n", *p_s_regexp, *p_s_regexp);
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
					return false;
				}
				int n_number = 0;
				while(*p_s_regexp >= '0' && *p_s_regexp <= '9') {
					if(n_number > INT_MAX / 10) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
						fprintf(stderr, "error: number out of range\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
						return false;
					}
					n_number *= 10;
					if(n_number > INT_MAX - (*p_s_regexp - '0')) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
						fprintf(stderr, "error: number out of range\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
						return false;
					}
					n_number += *p_s_regexp ++ - '0';
				}
				// parse number

				while(isspace(*p_s_regexp))
					++ p_s_regexp;

				if(!*p_s_regexp) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
					fprintf(stderr, "error: unexpected end of expression "
						"while inside \'{}\' block\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
					return false;
				}

				int n_number2 = 0;
				
				if(*p_s_regexp == '}')
					n_number2 = n_number; // {n} syntax
				else {
					if(*p_s_regexp != ',') {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
						fprintf(stderr, "error: mising \',\' before \'%c\' (0x%02x)\n",
							*p_s_regexp, *p_s_regexp);
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
						return false;
					}
					++ p_s_regexp;

					while(isspace(*p_s_regexp))
						++ p_s_regexp;

					if(!*p_s_regexp) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
						fprintf(stderr, "error: unexpected end of expression "
							"while inside \'{}\' block\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
						return false;
					}

					if(*p_s_regexp == '}')
						n_number2 = -1; // {n,} syntax
					else {
						if(*p_s_regexp < '0'|| *p_s_regexp > '9') {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
							fprintf(stderr, "error: mising minimal number of repeats "
								"before \'%c\' (0x%02x)\n", *p_s_regexp, *p_s_regexp);
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
							return false;
						}
						while(*p_s_regexp >= '0' && *p_s_regexp <= '9') {
							if(n_number2 > INT_MAX / 10) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
								fprintf(stderr, "error: number out of range\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
								return false;
							}
							n_number2 *= 10;
							if(n_number2 > INT_MAX - (*p_s_regexp - '0')) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
								fprintf(stderr, "error: number out of range\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
								return false;
							}
							n_number2 += *p_s_regexp ++ - '0';
						}
					}
					// parse second number

					while(isspace(*p_s_regexp))
						++ p_s_regexp;

					if(!*p_s_regexp) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
						fprintf(stderr, "error: unexpected end of expression "
							"while inside \'{}\' block\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
						return false;
					}

					if(*p_s_regexp != '}') {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
						fprintf(stderr, "error: mising \'}\' before \'%c\' (0x%02x)\n",
							*p_s_regexp, *p_s_regexp);
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
						return false;
					}
				}

				if(n_number > n_number2) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
					fprintf(stderr, "warning: repeat count should come in"
						" ascending order: %d - %d\n", n_number2, n_number);
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
					n_number2 ^= n_number;
					n_number ^= n_number2;
					n_number2 ^= n_number;
				}
				// swap range if necessary

				t_token.n_type = token_Quant;
				t_token.t_data.t_quant.n_min = n_number;
				t_token.t_data.t_quant.n_max = n_number2;
			}
			break;

		case '}':
		case ']':
		case '^':
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
			fprintf(stderr, "error: unexpected character \'%c\'\n", *p_s_regexp);
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
			return false;

		default:
			t_token.n_type = token_Char;
			t_token.t_data.n_character = *p_s_regexp;
			// simple character
			break;
		}

		r_token_list.push_back(t_token);
	}

	return true;
}

/*
 *	static bool CRegExp_Lexer::Parse_EscapeSequence(const char *&p_s_regexp, int &r_n_value)
 *		- parses escape sequence p_s_regexp (points onto first char, next to '\',
 *		  ie. for "\n", p_s_regexp contains "n")
 */
bool CRegExp_Lexer::Parse_EscapeSequence(const char *&p_s_regexp, int &r_n_value)
{
	__FuncGuard("CRegExp_Lexer::Parse_EscapeSequence");

	switch(*p_s_regexp) {
	case 'x':
		{
			int n_value = 0;
			for(++ p_s_regexp; *p_s_regexp != ';'; ++ p_s_regexp) {
				n_value <<= 4;
				if(*p_s_regexp >= '0' && *p_s_regexp <= '9')
					n_value |= *p_s_regexp - '0';
				else if(*p_s_regexp >= 'a' && *p_s_regexp <= 'f')
					n_value |= *p_s_regexp - 'a' + 10;
				else if(*p_s_regexp >= 'A' && *p_s_regexp <= 'F')
					n_value |= *p_s_regexp - 'A' + 10;
				else {
					if(!*p_s_regexp) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
						fprintf(stderr, "error: unexpected end of expression while "
							"parsing escape sequence\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
						return false;
					}

#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
					fprintf(stderr, "error: invalid character in hexedecimal escape "
						"sequence: \'%c\' (0x%02x)\n",
						*p_s_regexp, (unsigned char)*p_s_regexp);
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
					return false;
				}
			}
			r_n_value = n_value;
			// parse escape sequence with hex character code
		}
		return true;

	case '-':
	case '(':
	case ')':
	case '[':
	case ']':
	case '{':
	case '}':
	case '.':
	case '|':
	case '*':
	case '+':
	case '?':
	case '^':
	case '\\':
		r_n_value = *p_s_regexp;
		// parse simple escape sequence
		return true;

	default:
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
		if(!*p_s_regexp) {
			fprintf(stderr, "error: unexpected end of expression"
				" while parsing escape sequence\n");
		} else {
			fprintf(stderr, "error: invalid escape sequence character: \'%c\' (0x%02x)\n",
				*p_s_regexp, (unsigned char)*p_s_regexp);
		}
#endif //REG_EXP_COMPILER_STDERR_OUTPUT

		return false;
	}
}

/*
 *								--- ~CRegExp_Lexer ---
 */

/*
 *								--- finite state machinery ---
 */

/*
 *								--- TNFAState ---
 */

int TNFAState::n_id_space = -1;

/*
 *								--- ~TNFAState ---
 */

/*
 *								--- TScannerDrivingTable ---
 */

/*
 *	void TScannerDrivingTable::PrintSource(FILE *p_stream,
 *		const char *p_s_type_s = "const TScannerDrivingTable::TState",
 *		const char *p_s_type_t = "const TScannerDrivingTable::TTransition",
 *		const char *p_s_name_s = "p_state",
 *		const char *p_s_name_t = "p_transition",
 *		const char *p_s_name_s_num = "n_state_num",
 *		const char *p_s_name_t_num = "n_transition_num",
 *		const char *p_s_class_name = 0,
 *		int n_max_line_length = 100)
 *		- prints driving table source to file p_stream
 *		- note it doesn't check for any i/o errors
 */
void TScannerDrivingTable::PrintSource(FILE *p_stream,
	const char *p_s_type_s /*= "const TScannerDrivingTable::TState"*/,
	const char *p_s_type_t /*= "const TScannerDrivingTable::TTransition"*/,
	const char *p_s_name_s /* ="p_state"*/,
	const char *p_s_name_t /* ="p_transition"*/,
	const char *p_s_name_s_num /* ="n_state_num"*/,
	const char *p_s_name_t_num /* ="n_transition_num"*/,
	const char *p_s_class_name /*= 0*/,
	int n_max_line_length /*= 100*/)
{
	__FuncGuard("TScannerDrivingTable::PrintSource");

	if(!p_state || !p_transition)
		return;

	/*if(p_s_class_name) {
		fprintf(p_stream, "// header:\n");
		fprintf(p_stream, "static const int %s; // %d\n", p_s_name_t_num, n_transition_num);
		fprintf(p_stream, "static const int %s; // %d\n", p_s_name_s_num, n_state_num);
		// counts

		fprintf(p_stream, "static %s %s[%d];\n\t", p_s_type_t, p_s_name_t, n_transition_num);
		fprintf(p_stream, "static %s %s[%d];\n\t", p_s_type_s, p_s_name_s, n_state_num);
		// array defs

		fprintf(p_stream, "\n// source:\n");
	}*/
	// print header

	if(p_s_class_name) {
		if(p_s_name_t_num) {
			fprintf(p_stream, "const int %s::%s = %d;\n",
				p_s_class_name, p_s_name_t_num, n_transition_num);
		}
		if(p_s_name_s_num) {
			fprintf(p_stream, "const int %s::%s = %d;\n",
				p_s_class_name, p_s_name_s_num, n_state_num);
		}
	} else {
		if(p_s_name_t_num)
			fprintf(p_stream, "const int %s = %d;\n", p_s_name_t_num, n_transition_num);
		if(p_s_name_s_num)
			fprintf(p_stream, "const int %s = %d;\n", p_s_name_s_num, n_state_num);
	}
	// print counts

	std::string s_temp;
	// temporary string buffer

	n_max_line_length -= 4; // tab size

	if(p_s_name_t) {
		if(p_s_class_name) {
			fprintf(p_stream, "%s %s::%s[%d] = {\n\t",
				p_s_type_t, p_s_class_name, p_s_name_t, n_transition_num);
		} else
			fprintf(p_stream, "%s %s[%d] = {\n\t", p_s_type_t, p_s_name_t, n_transition_num);
		{
			int n_line_length = 0;
			for(const TTransition *p_tran = p_transition, *p_end = p_transition +
			   n_transition_num; p_tran != p_end; ++ p_tran) {
				stl_ut::Format(s_temp, (p_tran + 1 != p_end)?
					"{0x%x, 0x%x, %d}," : "{0x%x, 0x%x, %d}",
					p_tran->n_char_max, p_tran->n_char_min, p_tran->n_state);
				int n_length = s_temp.length();
				if(n_line_length + (n_line_length != 0) + n_length >= n_max_line_length) {
					fprintf(p_stream, "\n\t%s", s_temp.c_str());
					n_line_length = n_length;
				} else
					n_line_length += fprintf(p_stream, (n_line_length)? " %s" : "%s", s_temp.c_str());
			}
		}
		fprintf(p_stream, "\n};\n");
	}
	// print transitions

	if(p_s_name_s) {
		if(p_s_class_name) {
			fprintf(p_stream, "%s %s::%s[%d] = {\n\t",
				p_s_type_s, p_s_class_name, p_s_name_s, n_state_num);
		} else
			fprintf(p_stream, "%s %s[%d] = {\n\t", p_s_type_s, p_s_name_s, n_state_num);
		{
			int n_line_length = 0;
			for(const TState *p_state_ = p_state, *p_end = p_state +
			   n_state_num; p_state_ != p_end; ++ p_state_) {
				stl_ut::Format(s_temp, (p_state_ + 1 != p_end)?
					"{%s + %d, %d, %d}," : "{%s + %d, %d, %d}",
					p_s_name_t, p_state_->p_transition - p_transition,
					p_state_->n_regexp_id, p_state_->n_transition_num);
				int n_length = s_temp.length();
				if(n_line_length + (n_line_length != 0) + n_length >= n_max_line_length) {
					fprintf(p_stream, "\n\t%s", s_temp.c_str());
					n_line_length = n_length;
				} else
					n_line_length += fprintf(p_stream, (n_line_length)? " %s" : "%s", s_temp.c_str());
			}
		}
		fprintf(p_stream, "\n};\n");
	}
	// print states
}

/*
 *	void TScannerDrivingTable::Free()
 *		- frees up allocated data
 */
void TScannerDrivingTable::Free()
{
	__FuncGuard("TScannerDrivingTable::Free");

	if(p_state) {
		delete[] p_state;
		p_state = 0;
	}
	if(p_transition) {
		delete[] p_transition;
		p_transition = 0;
	}
}

/*
 *								--- ~TScannerDrivingTable ---
 */

/*
 *								--- CRegExp_DFSM ---
 */

class CRegExp_DFSM::CFindStatePtr {
protected:
	const TDFAState &m_r_state;

public:
	inline CFindStatePtr(const TDFAState &r_state)
		:m_r_state(r_state)
	{}

	inline bool operator ()(const TDFAState *p_state) const
	{
		return m_r_state == *p_state;
	}
};

class CRegExp_DFSM::CCopyStates {
protected:
	TScannerDrivingTable::TState *m_p_state;
	TScannerDrivingTable::TTransition *m_p_transition;
	const std::vector<TDFATransition> &m_r_transition_list;
	const std::vector<TDFAState*> &m_r_state_list;
	const TDFAState *m_p_init_state;

public:
	class CCopyTransitions {
	protected:
		TScannerDrivingTable::TTransition *m_p_transition;
		const TDFAState *m_p_state;
		const TDFAState *m_p_init_state;
		const std::vector<TDFAState*> &m_r_state_list;
		unsigned int m_n_transition_num;

	public:
		inline CCopyTransitions(const TDFAState *p_state,
			TScannerDrivingTable::TTransition *p_transition,
			const std::vector<TDFAState*> &r_state_list, const TDFAState *p_init_state)
			:m_p_state(p_state), m_p_transition(p_transition),
			m_r_state_list(r_state_list), m_p_init_state(p_init_state), m_n_transition_num(0)
		{}

		inline void operator ()(const TDFATransition &r_transition)
		{
			if(r_transition.p_start == m_p_state) {
				m_p_transition->n_char_min = r_transition.n_char_min;
				m_p_transition->n_char_max = r_transition.n_char_max;
				m_p_transition->n_state = (r_transition.p_end == m_p_init_state)? 0 :
				    (std::find(m_r_state_list.begin(), m_r_state_list.end(),
					r_transition.p_end) - m_r_state_list.begin()) + 1;
				_ASSERTE(unsigned(m_p_transition->n_state) < m_r_state_list.size() + 1);
				// copy transition (gotta find state in the list)

				++ m_n_transition_num;
				++ m_p_transition;
			}
		}

		inline operator unsigned int() const
		{
			return m_n_transition_num;
		}
	};

	inline CCopyStates(TScannerDrivingTable::TState *p_state,
		TScannerDrivingTable::TTransition *p_transition,
		const std::vector<TDFATransition> &r_transition_list,
		const std::vector<TDFAState*> &r_state_list, const TDFAState *p_init_state)
		:m_p_state(p_state), m_p_transition(p_transition),
		m_r_transition_list(r_transition_list), m_r_state_list(r_state_list),
		m_p_init_state(p_init_state)
	{}

	inline void operator ()(const TDFAState *p_state)
	{
		m_p_state->n_regexp_id = p_state->n_expression_id;
		m_p_state->p_transition = m_p_transition;
		m_p_state->n_transition_num = std::for_each(m_r_transition_list.begin(),
			m_r_transition_list.end(), CCopyTransitions(p_state, m_p_transition,
			m_r_state_list, m_p_init_state));
		// copy state data, assign transitions ...

		m_p_transition += m_p_state->n_transition_num;
		++ m_p_state;
		// move onto next free state and transitions
	}
};

#ifdef REG_EXP_MACHINES_DOTTY_CAPABILITY
class CRegExp_DFSM::CPrintAcceptingStateNames {
protected:
	FILE *m_p_out;

public:
	inline CPrintAcceptingStateNames(FILE *p_out)
		:m_p_out(p_out)
	{}

	inline void operator ()(const TDFAState *p_state)
	{
		if(p_state->n_expression_id != -1) {
			fprintf(m_p_out, " A%d_", p_state->n_expression_id);
			for(unsigned int i = 0; i < p_state->state_flags.n_Length(); ++ i) {
				if(p_state->state_flags[i])
					fprintf(m_p_out, "_%d", i);
			}
			fprintf(m_p_out, "_");
		}
	}
};

class CRegExp_DFSM::CPrintTransitionNames {
protected:
	FILE *m_p_out;

public:
	inline CPrintTransitionNames(FILE *p_out)
		:m_p_out(p_out)
	{}

	inline void operator ()(const TDFATransition &r_transition)
	{
		fprintf(m_p_out, "\t");
		PrintState(r_transition.p_start);
		fprintf(m_p_out, " -> ");
		PrintState(r_transition.p_end);

		if(r_transition.n_char_min == n_first_unicode_char &&
		   r_transition.n_char_max == n_last_unicode_char)
			fprintf(m_p_out, " [ label = \"any\" ];\n");
		else if(r_transition.n_char_min == r_transition.n_char_max) {
			if(r_transition.n_char_min < 128 && isprint(r_transition.n_char_min)) {
				if(r_transition.n_char_min == '\"')
					fprintf(m_p_out, " [ label = \"\'\\\"\'\" ];\n");
				else if(r_transition.n_char_min == '\\')
					fprintf(m_p_out, " [ label = \"\'\\\\\'\" ];\n");
				else {
					fprintf(m_p_out, " [ label = \"\'%c\'\" ];\n",
						r_transition.n_char_min);
				}
			} else
				fprintf(m_p_out, " [ label = \"0x%02x\" ];\n", r_transition.n_char_min);
		} else {
			if(r_transition.n_char_min < 128 && r_transition.n_char_max < 128 &&
			   isprint(r_transition.n_char_min) && isprint(r_transition.n_char_max)) {
				if(r_transition.n_char_min == '\"')
					fprintf(m_p_out, " [ label = \"\'\\\"'");
				else if(r_transition.n_char_min == '\\')
					fprintf(m_p_out, " [ label = \"\'\\\\\'");
				else {
					fprintf(m_p_out, " [ label = \"\'%c\'",
						r_transition.n_char_min);
				}
				if(r_transition.n_char_max == '\"')
					fprintf(m_p_out, " - \'\\\"\'\" ];\n");
				else if(r_transition.n_char_max == '\\')
					fprintf(m_p_out, " - \'\\\\\'\" ];\n");
				else {
					fprintf(m_p_out, " - \'%c\'\" ];\n",
						r_transition.n_char_max);
				}
			} else {
				fprintf(m_p_out, " [ label = \"0x%02x - 0x%02x\" ];\n",
					r_transition.n_char_min, r_transition.n_char_max);
			}
		}
	}

	inline void PrintState(const TDFAState *p_state)
	{
		if(p_state->n_expression_id != -1)
			fprintf(m_p_out, "A%d_", p_state->n_expression_id);

		for(unsigned int i = 0; i < p_state->state_flags.n_Length(); ++ i) {
			if(p_state->state_flags[i])
				fprintf(m_p_out, "_%d", i);
		}

		fprintf(m_p_out, "_");
	}
};
#endif //REG_EXP_MACHINES_DOTTY_CAPABILITY

/*
 *	TDFAState *CRegExp_DFSM::p_FindState(const TDFAState &r_state)
 *		- finds state with the same bit array as r_state and returns it's pointer
 *		- returns 0 in case no such state was found
 *		- note regular expression id is not compared
 */
TDFAState *CRegExp_DFSM::p_FindState(const TDFAState &r_state)
{
	__FuncGuard("CRegExp_DFSM::p_FindState");

	if(m_t_initial_state == r_state)
		return &m_t_initial_state;
	std::vector<TDFAState*>::iterator p_state_iter;
	if((p_state_iter = std::find_if(m_state_list.begin(),
	   m_state_list.end(), CFindStatePtr(r_state))) != m_state_list.end())
		return *p_state_iter;
	return 0;
}

/*
 *	TDFAState *CRegExp_DFSM::p_GetState(unsigned int n_array_size)
 *		- returns new unused state or 0 in case there was not enough memory
 *		- state bit array is allocated to n_array_size bits
 */
TDFAState *CRegExp_DFSM::p_GetState(unsigned int n_array_size)
{
	__FuncGuard("CRegExp_DFSM::p_GetState");

	if(!stl_ut::Reserve_1More(m_state_list))
		return 0;
	TDFAState *p_state;
	if(!(p_state = new(std::nothrow) TDFAState(n_array_size)))
		return 0;
	if(p_state->state_flags.n_Length() != n_array_size) {
		delete p_state;
		return 0;
	}
	m_state_list.push_back(p_state);
	return p_state;
}

#ifdef REG_EXP_MACHINES_DOTTY_CAPABILITY
/*
 *	void CRegExp_DFSM::PrintDotty(FILE *p_out)
 *		- prints dotty format file, containing FSM graph description
 *		- note this doesn't check for any i/o errors
 */
void CRegExp_DFSM::PrintDotty(FILE *p_out)
{
	__FuncGuard("CRegExp_DFSM::PrintDotty");

	fprintf(p_out, "digraph finite_state_machine "
		"{\n" /*"\tsize=\"20,20\"\n"*/ "\tnode [shape = doublecircle];");
	// header

	CPrintAcceptingStateNames x(p_out); x(&m_t_initial_state);
	std::for_each(m_state_list.begin(), m_state_list.end(), CPrintAcceptingStateNames(p_out));
	// accepting states to be marked as doublecircles
	
	fprintf(p_out, ";\n\tnode [shape = circle];\n");
	// say unnamed nodes are circular

	std::for_each(m_transition_list.begin(),
		m_transition_list.end(), CPrintTransitionNames(p_out));
	// print transitions

	fprintf(p_out, "}\n");
	// end of dotty file
}
#endif //REG_EXP_MACHINES_DOTTY_CAPABILITY

/*
 *	bool CRegExp_DFSM::MakeMinimal()
 *		- minimalizes DFSM using a little bit profound and slow algorithm
 *		- todo: implement something better
 *		- returns true on success, false on failure
 */
bool CRegExp_DFSM::MakeMinimal()
{
	__FuncGuard("CRegExp_DFSM::MakeMinimal");

	bool b_change;
	do {
		b_change = false;
		for(size_t i = 0; i < m_state_list.size(); ++ i) {
			TDFAState *p_i_state = m_state_list[i];
			for(size_t j = i + 1; j < m_state_list.size(); ++ j) {
				TDFAState *p_j_state = m_state_list[j];

				if(p_i_state->n_expression_id != p_j_state->n_expression_id/* &&
				   p_i_state->n_expression_id != -1 && p_j_state->n_expression_id != -1*/)
					continue;
				// can't merge two final states

				/*if(!p_i_state->n_expression_id) {
					CPrintAcceptingStateNames printstates(stdout);
					printstates(p_i_state);
					printstates(p_j_state);
					// debug
				}*/

				std::vector<TDFATransition> i_transitions;
				std::vector<TDFATransition> j_transitions;
				for(int k = 0, n = m_transition_list.size(); k < n; ++ k) {
					if(m_transition_list[k].p_start == p_i_state) {
						if(!stl_ut::Reserve_1More(i_transitions))
							return false;
						i_transitions.push_back(m_transition_list[k]);
					} else if(m_transition_list[k].p_start == p_j_state) {
						if(!stl_ut::Reserve_1More(j_transitions))
							return false;
						j_transitions.push_back(m_transition_list[k]);
					}
				}
				// gather transitions coming out of i
				// gather transitions coming out of j

				if(j_transitions.size() != i_transitions.size())
					continue;
				// different transition counts

				bool b_different = false;
				for(int k = 0, n = i_transitions.size(); k < n; ++ k) {
					bool b_found = false;
					for(int l = 0; l < n; ++ l) {
						if(j_transitions[l].n_char_min == i_transitions[k].n_char_min &&
						   j_transitions[l].n_char_max == i_transitions[k].n_char_max &&
						   (j_transitions[l].p_end == i_transitions[k].p_end || // common target
						   (j_transitions[l].p_end == p_j_state &&
						   i_transitions[k].p_end == p_i_state))) { // or both target self
							b_found = true;
							break;
						}
					}
					if(!b_found) {
						b_different = true;
						break;
					}
				}
				if(b_different)
					continue;
				// different transitions

				/*if(p_i_state->n_expression_id == -1)
					p_i_state->n_expression_id = p_j_state->n_expression_id;*/
				// maybe merge final states

				for(size_t k = 0; k < m_transition_list.size(); ++ k) {
					if(m_transition_list[k].p_start == p_j_state) {
						m_transition_list.erase(m_transition_list.begin() + k);
						-- k;
						// delete duplicate transitions
					} else if(m_transition_list[k].p_end == p_j_state) {
						m_transition_list[k].p_end = p_i_state;
						// rearrange incoming transitions to lead to i
					}
				}
				delete m_state_list[j]; // delete state!
				m_state_list.erase(m_state_list.begin() + j);
				-- j;
				// i and j both lead to same states; remove j and rearrange transitions

				b_change = true;
			}
		}
	} while(b_change);
	// make minimal

	return true;
}

/*
 *	bool CRegExp_DFSM::MakeTable(TScannerDrivingTable &r_table) const
 *		- creates driving table for CScanner
 *		- note the transitions come sorted by min-char so binary search can be used in lexer
 *		- returns true on success, false on failure
 */
bool CRegExp_DFSM::MakeTable(TScannerDrivingTable &r_table) const
{
	__FuncGuard("CRegExp_DFSM::MakeTable");

	r_table.n_state_num = m_state_list.size() + 1;
	r_table.n_transition_num = m_transition_list.size();
	if(!(r_table.p_state = new(std::nothrow) TScannerDrivingTable::TState[r_table.n_state_num]))
		return false;
	if(!(r_table.p_transition = new(std::nothrow) TScannerDrivingTable::TTransition[r_table.n_transition_num])) {
		delete[] r_table.p_state;
		return false;
	}
	// alloc

	r_table.p_state->n_regexp_id = m_t_initial_state.n_expression_id;
	r_table.p_state->p_transition = r_table.p_transition;
	r_table.p_state->n_transition_num = std::for_each(m_transition_list.begin(),
		m_transition_list.end(), CCopyStates::CCopyTransitions(&m_t_initial_state,
		r_table.p_transition, m_state_list, &m_t_initial_state));
	// copy first state

	std::for_each(m_state_list.begin(), m_state_list.end(), CCopyStates(r_table.p_state + 1,
		r_table.p_transition + r_table.p_state[0].n_transition_num,
		m_transition_list, m_state_list, &m_t_initial_state));
	// copy states and transitions

	return true;
}

/*
 *								--- ~CRegExp_DFSM ---
 */

/*
 *								--- CRegExp_NFSM ---
 */

#ifdef REG_EXP_MACHINES_DOTTY_CAPABILITY
class CRegExp_NFSM::CPrintAcceptingStateNames {
protected:
	FILE *m_p_out;

public:
	inline CPrintAcceptingStateNames(FILE *p_out)
		:m_p_out(p_out)
	{}

	inline void operator ()(const TNFAState *p_state)
	{
		if(p_state->n_expression_id != -1)
			fprintf(m_p_out, " A%d__%d_", p_state->n_expression_id, p_state->n_id);
	}
};

class CRegExp_NFSM::CPrintTransitionNames {
protected:
	FILE *m_p_out;

public:
	inline CPrintTransitionNames(FILE *p_out)
		:m_p_out(p_out)
	{}

	inline void operator ()(const TNFATransition &r_transition)
	{
		fprintf(m_p_out, "\t");
		PrintState(r_transition.p_start);
		fprintf(m_p_out, " -> ");
		PrintState(r_transition.p_end);

		if(r_transition.n_type == TNFATransition::tran_Char) {
			if(r_transition.n_char_min == n_first_unicode_char &&
			   r_transition.n_char_max == n_last_unicode_char)
				fprintf(m_p_out, " [ label = \"any\" ];\n");
			else if(r_transition.n_char_min == r_transition.n_char_max) {
				if(r_transition.n_char_min < 128 && isprint(r_transition.n_char_min)) {
					if(r_transition.n_char_min == '\"')
						fprintf(m_p_out, " [ label = \"\'\\\"\'\" ];\n");
					else if(r_transition.n_char_min == '\\')
						fprintf(m_p_out, " [ label = \"\'\\\\\'\" ];\n");
					else {
						fprintf(m_p_out, " [ label = \"\'%c\'\" ];\n",
							r_transition.n_char_min);
					}
				} else
					fprintf(m_p_out, " [ label = \"0x%02x\" ];\n", r_transition.n_char_min);
			} else {
				if(r_transition.n_char_min < 128 && r_transition.n_char_max < 128 &&
				   isprint(r_transition.n_char_min) && isprint(r_transition.n_char_max)) {
					if(r_transition.n_char_min == '\"')
						fprintf(m_p_out, " [ label = \"\'\\\"'");
					else if(r_transition.n_char_min == '\\')
						fprintf(m_p_out, " [ label = \"\'\\\\\'");
					else {
						fprintf(m_p_out, " [ label = \"\'%c\'",
							r_transition.n_char_min);
					}
					if(r_transition.n_char_max == '\"')
						fprintf(m_p_out, " - \'\\\"\'\" ];\n");
					else if(r_transition.n_char_max == '\\')
						fprintf(m_p_out, " - \'\\\\\'\" ];\n");
					else {
						fprintf(m_p_out, " - \'%c\'\" ];\n",
							r_transition.n_char_max);
					}
				} else {
					fprintf(m_p_out, " [ label = \"0x%02x - 0x%02x\" ];\n",
						r_transition.n_char_min, r_transition.n_char_max);
				}
			}
		} else if(r_transition.n_type == TNFATransition::tran_Identity)
			fprintf(m_p_out, " [ label = \"-i-\" ];\n");
		else /*if(r_transition.n_type == TNFATransition::tran_Epsilon)*/ {
			_ASSERTE(r_transition.n_type == TNFATransition::tran_Epsilon);
			fprintf(m_p_out, " [ label = \"-e-\" ];\n");
		}
	}

	inline void PrintState(const TNFAState *p_state)
	{
		if(p_state->n_expression_id != -1)
			fprintf(m_p_out, "A%d__%d_", p_state->n_expression_id, p_state->n_id);
		else
			fprintf(m_p_out, "_%d_", p_state->n_id);
	}
};
#endif //REG_EXP_MACHINES_DOTTY_CAPABILITY

class CRegExp_NFSM::CReassingState {
protected:
	TNFAState *m_p_remove_state, *m_p_collapse_to;

public:
	inline CReassingState(TNFAState *p_remove_state, TNFAState *p_collapse_to)
		:m_p_remove_state(p_remove_state), m_p_collapse_to(p_collapse_to)
	{}

	inline void operator ()(TNFATransition &r_transition)
	{
		if(r_transition.p_start == m_p_remove_state)
			r_transition.p_start = m_p_collapse_to;
		if(r_transition.p_end == m_p_remove_state)
			r_transition.p_end = m_p_collapse_to;
	}
};

class CRegExp_NFSM::CGatherStates {
protected:
	TDFAState &m_r_dfa_state;
	bool m_b_changed;

public:
	inline CGatherStates(TDFAState &r_dfa_state)
		:m_r_dfa_state(r_dfa_state), m_b_changed(false)
	{}

	inline void operator ()(const TNFATransition &r_transition)
	{
		if(r_transition.n_type == TNFATransition::tran_Epsilon &&
		   m_r_dfa_state.state_flags[r_transition.p_start->n_id] &&
		   !m_r_dfa_state.state_flags[r_transition.p_end->n_id]) {
			m_b_changed = true;
			m_r_dfa_state.state_flags[r_transition.p_end->n_id] = true;
			if(r_transition.p_end->n_expression_id != -1) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
				if(m_r_dfa_state.n_expression_id != -1 &&
				   m_r_dfa_state.n_expression_id != r_transition.p_end->n_expression_id) {
					fprintf(stderr, "warning: more different accepting states on "
						"epsilon edge: ignoring F%d, keep F%d\n",
						m_r_dfa_state.n_expression_id, r_transition.p_end->n_expression_id);
				}
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
				m_r_dfa_state.n_expression_id = r_transition.p_end->n_expression_id;
			}
		}
	}

	inline operator bool() const
	{
		return m_b_changed;
	}
};

class CRegExp_NFSM::CNumberStates {
protected:
	int m_n_id_counter;

public:
	inline CNumberStates(int n_first_index = 0)
		:m_n_id_counter(n_first_index - 1)
	{}

	inline void operator ()(TNFAState *p_state)
	{
		p_state->n_id = ++ m_n_id_counter;
	}
};

class CRegExp_NFSM::CGatherTransitions {
protected:
	const TDFAState &m_r_dfa_state;
	std::vector<TNFATransition> &m_r_transition_list;
	bool m_b_result;

public:
	inline CGatherTransitions(const TDFAState &r_dfa_state,
		std::vector<TNFATransition> &r_transition_list)
		:m_r_dfa_state(r_dfa_state), m_r_transition_list(r_transition_list), m_b_result(true)
	{}

	inline void operator ()(const TNFATransition &r_transition)
	{
		if(r_transition.n_type == TNFATransition::tran_Char &&
		   m_r_dfa_state.state_flags[r_transition.p_start->n_id]) {
			_ASSERTE(!r_transition.b_Empty());
			if(!stl_ut::Reserve_1More(m_r_transition_list)) {
				m_b_result = false;
				return;
			}
			m_r_transition_list.push_back(r_transition);
		}
	}

	inline operator bool() const
	{
		return m_b_result;
	}
};

class CRegExp_NFSM::CFindConjunct {
protected:
	TInterval m_t_transition;

public:
	inline CFindConjunct(const TInterval &r_transition)
		:m_t_transition(r_transition)
	{}

	inline bool operator ()(const TInterval &r_transition) const
	{
		return m_t_transition.b_Conjunct(r_transition);
	}
};

class CRegExp_NFSM::CFindLeft {
protected:
	int m_n_min;

public:
	inline CFindLeft(const TInterval &r_transition)
		:m_n_min(r_transition.n_char_min)
	{}

	inline bool operator ()(const TInterval &r_transition) const
	{
		return r_transition.n_char_max > m_n_min;
	}
};

class CRegExp_NFSM::CCompareDFATransitions {
public:
	inline bool operator ()(const TDFATransition &r_tran1, const TDFATransition &r_tran2) const
	{
		/*int n_result;
		if(n_result = r_tran1.p_end->state_flags.n_Compare(r_tran2.p_end->state_flags))
			return n_result;*/
		if(r_tran1.p_end < r_tran2.p_end)
			return true;
		else if(r_tran1.p_end > r_tran2.p_end)
			return false;
		// first, sort by end state (use cheaper pointer comparison; it's only necessary to
		// have transitions with the same end states next to each other in the list,
		// their order isn't any significant)

		return (r_tran1.n_char_min < r_tran2.n_char_min)? true : false;
		// second, sort by character range
	}
};

class CRegExp_NFSM::CGenerateEndState {
protected:
	TDFAState &m_r_end_state;
	TInterval m_t_interval;

public:
	inline CGenerateEndState(TDFAState &r_end_state, const TInterval &r_t_interval)
		:m_r_end_state(r_end_state), m_t_interval(r_t_interval)
	{
		m_r_end_state.state_flags = false;
	}

	inline void operator ()(const TNFATransition &r_transition)
	{
		if(r_transition.b_Conjunct(m_t_interval)) {
			m_r_end_state.state_flags[r_transition.p_end->n_id] = true;
			if(r_transition.p_end->n_expression_id != -1) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
				if(m_r_end_state.n_expression_id != -1 &&
				   m_r_end_state.n_expression_id != r_transition.p_end->n_expression_id) {
					fprintf(stderr, "warning: more different accepting states on "
						"epsilon edge: ignoring F%d, keep F%d\n",
						m_r_end_state.n_expression_id, r_transition.p_end->n_expression_id);
				}
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
				m_r_end_state.n_expression_id = r_transition.p_end->n_expression_id;
			}
		}
	}
};

class CRegExp_NFSM::CEmitDFATransitions {
protected:
	TDFATransition m_t_cur_tran;
	CRegExp_DFSM &m_r_dfsm;
	bool m_b_result;

public:
	CEmitDFATransitions(TDFATransition t_first_tran, CRegExp_DFSM &r_dfsm)
		:m_t_cur_tran(t_first_tran), m_r_dfsm(r_dfsm), m_b_result(true)
	{}

	inline void operator ()(const TDFATransition &r_t_next_tran)
	{
		if(m_t_cur_tran.p_end == r_t_next_tran.p_end &&
		   m_t_cur_tran.n_char_max + 1 == r_t_next_tran.n_char_min)
			m_t_cur_tran.n_char_max = r_t_next_tran.n_char_max;
		else {
			if(!m_r_dfsm.AddTransition(m_t_cur_tran))
				m_b_result = false;
			m_t_cur_tran = r_t_next_tran;
		}
	}

	inline CEmitDFATransitions operator =(const CEmitDFATransitions &r_emit)
	{
		m_t_cur_tran = r_emit.m_t_cur_tran;
		m_r_dfsm = r_emit.m_r_dfsm;
		m_b_result = r_emit.m_b_result;
		return *this;
	}

	inline bool b_Result() const
	{
		return m_b_result;
	}

	inline TDFATransition t_LastTransition()
	{
		return m_t_cur_tran;
	}
};

/*
 *	void CRegExp_NFSM::CollapseIdentityTransitions()
 *		- as a result of state generation policy, sometimes occur more than one
 *		  constructs should end up in the same state, but there is no mechanism
 *		  to specify this state so identity transitions are added (don't confuse
 *		  with epsilon transitions)
 *		- when all states are generated, groups of states connected by identity
 *		  transitions should be collapsed to single state each which is purpose
 *		  of this function
 */
void CRegExp_NFSM::CollapseIdentityTransitions()
{
	__FuncGuard("CRegExp_NFSM::CollapseIdentityTransitions");

	std::vector<TNFATransition>::iterator p_identity_edge;
	while(m_transition_list.size() && (p_identity_edge = std::find_if(
	   m_transition_list.begin(), m_transition_list.end(),
	   b_IsIdentityTransition)) != m_transition_list.end()) {
		TNFAState *p_remove_state = (*p_identity_edge).p_start;
		TNFAState *p_collapse_to = (*p_identity_edge).p_end;
		// remember which state to collapse to another

		m_transition_list.erase(p_identity_edge);
		// remove identity transition from the list

		if(p_remove_state == p_collapse_to)
			continue;
		// degenerate transition, originated propably by collapsing two states where there
		// was more of identity transitions between them

#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
		if(p_remove_state->n_expression_id != -1 && p_collapse_to->n_expression_id != -1 &&
		   p_remove_state->n_expression_id != p_collapse_to->n_expression_id) {
			fprintf(stderr, "warning: more different accepting states on epsilon edge: "
				"ignoring F%d, keep F%d\n", p_remove_state->n_expression_id,
				p_collapse_to->n_expression_id);
		}
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
		// in case both states are accepting states with different expression target

		std::vector<TNFAState*>::iterator p_remove_state_iter;
		p_remove_state_iter = std::find(m_state_list.begin(),
			m_state_list.end(), p_remove_state);
		_ASSERTE(p_remove_state_iter != m_state_list.end());
		delete *p_remove_state_iter; // delete state!
		m_state_list.erase(p_remove_state_iter);
		// erase unwanted state from the list

		std::for_each(m_transition_list.begin(), m_transition_list.end(),
			CReassingState(p_remove_state, p_collapse_to));
		// repair state pointers in transitions
	}
}

#ifdef REG_EXP_MACHINES_DOTTY_CAPABILITY
/*
 *	void CRegExp_NFSM::PrintDotty(FILE *p_out)
 *		- prints dotty format file, containing FSM graph description
 *		- note this doesn't check for any i/o errors
 */
void CRegExp_NFSM::PrintDotty(FILE *p_out)
{
	__FuncGuard("CRegExp_NFSM::PrintDotty");

	NumberStates();
	// to have diagram, corresponding with the one for debugging DFSM

	fprintf(p_out, "digraph finite_state_machine "
		"{\n" /*"\tsize=\"20,20\"\n"*/ "\tnode [shape = doublecircle];");
	// header

	std::for_each(m_state_list.begin(), m_state_list.end(), CPrintAcceptingStateNames(p_out));
	// accepting states to be marked as doublecircles
	
	fprintf(p_out, ";\n\tnode [shape = circle];\n");
	// say unnamed nodes are circular

	std::for_each(m_transition_list.begin(),
		m_transition_list.end(), CPrintTransitionNames(p_out));
	// print transitions

	fprintf(p_out, "}\n");
	// end of dotty file
}
#endif //REG_EXP_MACHINES_DOTTY_CAPABILITY

/*
 *	bool CRegExp_NFSM::MakeDeterministic(CRegExp_DFSM &r_dfsm)
 *		- converts this NFSM to equivalent DFSM
 *		- returns true on success, false on failure
 *		- note r_dfsm should be blank
 */
bool CRegExp_NFSM::MakeDeterministic(CRegExp_DFSM &r_dfsm)
{
	__FuncGuard("CRegExp_NFSM::MakeDeterministic");

	/*
	 *	--- algorithm ---
	 *
	 *	- take init state, gather all states connected by epsilon transitions
	 *	  (this creates TDFAState)
	 *	- gather all transitions, coming outside from all gathered states
	 *	- create groups of overlapping transitions, they lead to groups of states
	 *	- create TDFAState's form these state groups, find out if they are already
	 *    in state list; if not, add them to expand list (beware adding state being created)
	 *	- repeat for another state from expand table or exit
	 *
	 */

	CollapseIdentityTransitions();
	// make sure there are no identity transitions (changes state number, must be called first)

	NumberStates();
	// make sure state id's can be used as indices to TDFSState bit arrays

	std::vector<TDFAState*> dfs_state_list; // list of states to expand
	TDFAState *p_init_state = r_dfsm.p_GetInitState();
	if(!p_init_state->state_flags.Alloc(m_state_list.size() + 1))
		return false;
	p_init_state->state_flags = false;
	p_init_state->state_flags[m_t_initial_state.n_id] = true;
	p_init_state->n_expression_id = m_t_initial_state.n_expression_id;
	GatherStates(*p_init_state);
	// gather all states, reachable by epsilon transitions from initial state

	_ASSERTE(dfs_state_list.empty());
	if(!stl_ut::Reserve_N(dfs_state_list, 1))
		return false;
	dfs_state_list.push_back(p_init_state);
	// add init state to expand list

	int n_debug_iteration_no = -1;
	while(!dfs_state_list.empty()) {
		++ n_debug_iteration_no;

		TDFAState *p_expand_state = dfs_state_list.back();
		dfs_state_list.erase(dfs_state_list.end() - 1);
		// remove single state from the list

		std::vector<TNFATransition> nfa_tran_list;
		if(!GatherTransitions(*p_expand_state, nfa_tran_list))
			return false;
		// gather all outcoming transitions

		std::vector<TDFATransition> dfa_tran_list;

		if(!CreateDisjunctTransitions(p_expand_state, nfa_tran_list, dfa_tran_list))
			return false;
		// create (possibly lots of) disjunct transitions

		for(std::vector<TDFATransition>::iterator p_dfa_iter = dfa_tran_list.begin(),
		   p_dfa_end_iter = dfa_tran_list.end(); p_dfa_iter != p_dfa_end_iter; ++ p_dfa_iter) {
			TDFATransition &r_cur_dfa_tran = *p_dfa_iter;
			TDFAState end_state(m_state_list.size() + 1);
			if(end_state.state_flags.n_Length() != m_state_list.size() + 1)
				return false;
			std::for_each(nfa_tran_list.begin(), nfa_tran_list.end(),
				CGenerateEndState(end_state, (TInterval)r_cur_dfa_tran));
			// create end state (and determine expression id; note it might be useful
			// to generate warnings about conflicts here)

			GatherStates(end_state);
			// go trough epsilon transitions and add more states

			TDFAState *p_end_state;
			if(!(p_end_state = r_dfsm.p_FindState(end_state))) {
				if(!(p_end_state = r_dfsm.p_GetState(m_state_list.size() + 1)))
					return false;
				*p_end_state = end_state;

				if(!stl_ut::Reserve_1More(dfs_state_list))
					return false;
				dfs_state_list.push_back(p_end_state);
				// this one was not yet in dfsm machine, need to expand it as well
			}
			// get either existing state or generate a new state

			r_cur_dfa_tran.p_end = p_end_state;
			// set end state
		}
		// generate transition's end states

		std::sort(dfa_tran_list.begin(), dfa_tran_list.end(), CCompareDFATransitions());
		// sort transitions by end state and by input char range

		if(dfa_tran_list.size()) { // maybe a bit pedantic
			CEmitDFATransitions t_emit(dfa_tran_list.front(), r_dfsm);
			t_emit = std::for_each(dfa_tran_list.begin() + 1, dfa_tran_list.end(), t_emit);
			if(!t_emit.b_Result() || !r_dfsm.AddTransition(t_emit.t_LastTransition()))
				return false;
		}
		// merge transitions with the same end states and coinciding input char ranges
	}

	return true;
}

/*
 *	void CRegExp_NFSM::GatherStates(TDFAState &r_state) const
 *		- marks all states r_state can reach trough epsilon transitions from states already
 *		  marked in r_state
 *		- note this requires having state id's equal to their indices in DFAState bit array.
 *		  that can be assured by calling NumberStates()
 */
void CRegExp_NFSM::GatherStates(TDFAState &r_state) const
{
	__FuncGuard("CRegExp_NFSM::GatherStates");

	while(std::for_each(m_transition_list.begin(),
	   m_transition_list.end(), CGatherStates(r_state)))
		;
	// gather states until there are no more states (newly marked ones may lead to more states)
}

/*
 *	void CRegExp_NFSM::NumberStates()
 *		- assign states id's which can be used as indices
 */
void CRegExp_NFSM::NumberStates()
{
	__FuncGuard("CRegExp_NFSM::NumberStates");

	m_t_initial_state.n_id = 0;
	std::for_each(m_state_list.begin(), m_state_list.end(), CNumberStates(1));
}

/*
 *	bool CRegExp_NFSM::GatherTransitions(const TDFAState &r_dfa_state,
 *		std::vector<const TNFATransition> &r_transition_list) const
 *		- gather all non-epsilon transitions coming out of any state,
 *		  marked in r_dfa_state and add them into r_transition_list
 *		- returns true on success or false on failure (not enough memory)
 */
bool CRegExp_NFSM::GatherTransitions(const TDFAState &r_dfa_state,
	std::vector<TNFATransition> &r_transition_list) const
{
	__FuncGuard("CRegExp_NFSM::GatherTransitions");

	return std::for_each(m_transition_list.begin(),
		m_transition_list.end(), CGatherTransitions(r_dfa_state, r_transition_list));
}

/*
 *	static bool CRegExp_NFSM::CreateDisjunctTransitions(TDFAState *p_start,
 *		const std::vector<TNFATransition> &r_nfa_tran_list,
 *		std::vector<TDFATransition> &r_dfa_tran_list)
 *		- takes set of transitions r_nfa_tran_list whose input character intervals
 *		  may overlap as input and generates set of DFA transitions with disjunct
 *		  input character intervals, but allowing the same input characters
 *		- r_dfa_tran_list is filled with generated transitions
 *		- state p_start is used as start state in newly generated transitions,
 *		  their end state is set to 0 (yet to be determined)
 */
bool CRegExp_NFSM::CreateDisjunctTransitions(TDFAState *p_start,
	const std::vector<TNFATransition> &r_nfa_tran_list,
	std::vector<TDFATransition> &r_dfa_tran_list)
{
	__FuncGuard("CRegExp_NFSM::CreateDisjunctTransitions");

	for(std::vector<TNFATransition>::const_iterator p_nfa_iter = r_nfa_tran_list.begin(),
	   p_nfa_end_iter = r_nfa_tran_list.end(); p_nfa_iter != p_nfa_end_iter; ++ p_nfa_iter) {
		if((*p_nfa_iter).n_type != TNFATransition::tran_Char)
			return false;
		// no epsilons here!

		TInterval t_add_interval = (TInterval)*p_nfa_iter;
		for(dfa_offset_t n_search_offset = 0;;) {
			// optimized searching utilizing the fact r_dfa_tran_list is sorted
			// by input char interval values

			_ASSERTE(!t_add_interval.b_Empty());
			// just check if we aren't creating degenerate transitions

			std::vector<TDFATransition>::iterator p_conjunct;
			if((p_conjunct = std::find_if(r_dfa_tran_list.begin() + n_search_offset,
			   r_dfa_tran_list.end(), CFindConjunct(t_add_interval))) ==
			   r_dfa_tran_list.end()) {
				// NFA transition doesn't intersect any of DFA transitions.

				if(!stl_ut::Reserve_1More(r_dfa_tran_list))
					return false;

				std::vector<TDFATransition>::iterator p_insert_before = std::find_if(
					r_dfa_tran_list.begin(), r_dfa_tran_list.end(), CFindLeft(t_add_interval));
				// find proper insertion point so array remains
				// sorted after inserting the new interval

				r_dfa_tran_list.insert(p_insert_before, TDFATransition(p_start, 0,
					t_add_interval.n_char_min, t_add_interval.n_char_max));
				// just add it and go away

				break;
				// done
			} else {
				// NFA transition char range interval conjuncts DFA transition interval.
				// break it into disjunct intervals (zero, one or two of them) and try again

				if(!(t_add_interval == ((TInterval)*p_conjunct))) {
					TInterval t_conjunct = (TInterval)*p_conjunct;
					dfa_offset_t n_conjunct_index = p_conjunct - r_dfa_tran_list.begin();
					// must keep it like this, insert() and reserve() invalidates iterators

					n_search_offset = n_conjunct_index + 1;
					// update search offset

					if(!t_conjunct.b_Contained(t_add_interval)) {
						TInterval t_intersection =
							t_conjunct.t_Intersection(t_add_interval);
						_ASSERTE(!t_intersection.b_Empty());
						TInterval t_complement1 = t_conjunct.t_LeftComplement(t_add_interval);
						TInterval t_complement2 = t_conjunct.t_RightComplement(t_add_interval);
						// three parts of original interval; one of complements might be
						// empty interval

						bool b_cm1 = !t_complement1.b_Empty();
						bool b_cm2 = !t_complement2.b_Empty();

						if(!stl_ut::Reserve_NMore(r_dfa_tran_list, int(b_cm1) + int(b_cm2)))
							return false;
						// make sure there's enough space

						r_dfa_tran_list[n_conjunct_index] = TDFATransition(p_start, 0,
							((b_cm1)? t_complement1 : t_intersection).n_char_min,
							((b_cm1)? t_complement1 : t_intersection).n_char_max);
						if(b_cm1) {
							r_dfa_tran_list.insert(r_dfa_tran_list.begin() +
								n_conjunct_index + 1, TDFATransition(p_start, 0,
								t_intersection.n_char_min, t_intersection.n_char_max));
						}
						if(b_cm2) {
							r_dfa_tran_list.insert(r_dfa_tran_list.begin() +
								n_conjunct_index + b_cm1 + 1, TDFATransition(p_start, 0,
								t_complement2.n_char_min, t_complement2.n_char_max));
						}
						// add transition parts into the list while keeping
						// transitions sorted by their input character intervals

						n_search_offset += b_cm1 + b_cm2;
						// update search offset
					}
					// subdivide the original transition so no partial intersections occur

					TInterval t_left = t_add_interval.t_LeftComplement(t_conjunct);
					TInterval t_right = t_add_interval.t_RightComplement(t_conjunct);
					// subdivide the transition being added

					if(!t_left.b_Empty()) {
						if(!stl_ut::Reserve_1More(r_dfa_tran_list))
							return false;
						// make sure there's enough space

						r_dfa_tran_list.insert(r_dfa_tran_list.begin() +
							n_conjunct_index, TDFATransition(p_start, 0,
							t_left.n_char_min, t_left.n_char_max));

						++ n_search_offset;
						// update search offset
					}
					// can add left part right away

					if(!t_right.b_Empty())
						t_add_interval = t_right;
					else
						break;
					// either continue with right part or we're done
				} else {
					break;
					// transition with the same interval is already in the list, done
				}
			}
		}
	}
	// add all transitions so all characters are covered, but none of transitions overlap

#ifdef _DEBUG
	for(std::vector<TDFATransition>::iterator p_dfa_iter = r_dfa_tran_list.begin(),
	   p_dfa_end_iter = r_dfa_tran_list.end(); p_dfa_iter != p_dfa_end_iter; ++ p_dfa_iter) {
		if(p_dfa_iter != r_dfa_tran_list.begin()) {
			_ASSERTE((*(p_dfa_iter - 1)).n_char_max < (*p_dfa_iter).n_char_min);
			// must come sorted
		}

		_ASSERTE((*p_dfa_iter).n_char_min <= (*p_dfa_iter).n_char_max);
		// no malformed transitions

		_ASSERTE(std::find_if(r_dfa_tran_list.begin(), p_dfa_iter,
			CFindConjunct((*p_dfa_iter))) == p_dfa_iter);
		_ASSERTE(std::find_if(p_dfa_iter + 1, r_dfa_tran_list.end(),
			CFindConjunct((*p_dfa_iter))) == r_dfa_tran_list.end());
		// no overlapping transitions
	}
	// just check algorithm correctness
#endif

	return true;
}

/*
 *								--- ~CRegExp_NFSM ---
 */

/*
 *								--- ~finite state machinery ---
 */

/*
 *								--- CRegExp_Parser::CNode ---
 */

#ifdef REG_EXP_COMPILER_DUMP
void CRegExp_Parser::CNode::Print(int n_level)
{
	__FuncGuard("CRegExp_Parser::CNode::Print");

	PrintIndent(n_level);
	printf("unknown-node\n");
}

void CRegExp_Parser::CNode::PrintIndent(int n_level)
{
	__FuncGuard("CRegExp_Parser::CNode::PrintIndent");

	while(n_level --)
		printf("    ");
}
#endif //REG_EXP_COMPILER_DUMP

/*
 *								--- ~CRegExp_Parser::CNode ---
 */

/*
 *								--- CRegExp_Parser::CNodeString ---
 */

class CRegExp_Parser::CNodeString::CAddSerialTransitions {
protected:
	TNFAState *m_p_initial;
	CRegExp_NFSM &m_r_fsm;

public:
	inline CAddSerialTransitions(TNFAState *p_initial, CRegExp_NFSM &r_fsm)
		:m_p_initial(p_initial), m_r_fsm(r_fsm)
	{}

	inline void operator ()(CNode *p_serial_node)
	{
		if(m_p_initial)
			m_p_initial = p_serial_node->p_AddTransitions(m_p_initial, m_r_fsm);
	}

	inline operator TNFAState*() const
	{
		return m_p_initial;
	}
};

class CRegExp_Parser::CNodeRangeChar::CFindConjunct {
protected:
	TInterval m_t_range;

public:
	inline CFindConjunct(const TInterval &r_range)
		:m_t_range(r_range)
	{}

	inline bool operator ()(const TInterval &r_t_range) const
	{
		_ASSERTE(!m_t_range.b_Empty());
		_ASSERTE(!r_t_range.b_Empty());
		// assert no malformed intervals

		return m_t_range.b_Conjunct(r_t_range);
	}
};

CRegExp_Parser::CNodeString::~CNodeString()
{
	__FuncGuard("CRegExp_Parser::CNodeString::~CNodeString");

	std::for_each(m_node_list.begin(), m_node_list.end(), DeleteNodes);
}

TNFAState *CRegExp_Parser::CNodeString::p_AddTransitions(TNFAState *p_initial,
	CRegExp_NFSM &r_fsm) const
{
	__FuncGuard("CRegExp_Parser::CNodeString::p_AddTransitions");

	_ASSERTE(m_node_list.size() > 0);
	return std::for_each(m_node_list.begin(), m_node_list.end(),
		CAddSerialTransitions(p_initial, r_fsm));
	// add serial transitions generated by nodes themselves or inital transition
	// in case there were no nodes
}

bool CRegExp_Parser::CNodeString::AddNode(CNode *p_node)
{
	__FuncGuard("CRegExp_Parser::CNodeString::AddNode");

	if(!stl_ut::Reserve_1More(m_node_list))
		return false;
	m_node_list.push_back(p_node);
	return true;
}

#ifdef REG_EXP_COMPILER_DUMP
void CRegExp_Parser::CNodeString::Print(int n_level)
{
	__FuncGuard("CRegExp_Parser::CNodeString::Print");

	PrintIndent(n_level);
	printf("node-string(length=%d)\n", m_node_list.size());
	for(int i = 0; i < m_node_list.size(); ++ i)
		m_node_list[i]->Print(n_level + 1);
}
#endif //REG_EXP_COMPILER_DUMP

/*
 *								--- ~CRegExp_Parser::CNodeString ---
 */

/*
 *								--- CRegExp_Parser::CNodeChar ---
 */

TNFAState *CRegExp_Parser::CNodeChar::p_AddTransitions(TNFAState *p_initial,
	CRegExp_NFSM &r_fsm) const
{
	__FuncGuard("CRegExp_Parser::CNodeChar::p_AddTransitions");

	TNFAState *p_final;
	if(!(p_final = r_fsm.p_GetState()))
		return 0;
	return (r_fsm.AddTransition(TNFATransition(p_initial, p_final,
		TNFATransition::tran_Char, m_n_code, m_n_code)))? p_final : 0;
	// add single transition triggered by the character
}

#ifdef REG_EXP_COMPILER_DUMP
void CRegExp_Parser::CNodeChar::Print(int n_level)
{
	__FuncGuard("CRegExp_Parser::CNodeChar::Print");

	PrintIndent(n_level);
	if(m_n_code < 128 && isprint(m_n_code))
		printf("node-char(char=\'%c\', code=0x%02x)\n", m_n_code, m_n_code);
	else
		printf("node-char(code=0x%02x)\n", m_n_code);
}
#endif //REG_EXP_COMPILER_DUMP

/*
 *								--- ~CRegExp_Parser::CNodeChar ---
 */

/*
 *								--- CRegExp_Parser::CNodeAnyChar ---
 */

TNFAState *CRegExp_Parser::CNodeAnyChar::p_AddTransitions(TNFAState *p_initial,
	CRegExp_NFSM &r_fsm) const
{
	__FuncGuard("CRegExp_Parser::CNodeAnyChar::p_AddTransitions");

	TNFAState *p_final;
	if(!(p_final = r_fsm.p_GetState()))
		return 0;
	return (r_fsm.AddTransition(TNFATransition(p_initial, p_final,
		TNFATransition::tran_Char, n_first_unicode_char, n_last_unicode_char)))? p_final : 0;
	// add single transition triggered by the character
}

#ifdef REG_EXP_COMPILER_DUMP
void CRegExp_Parser::CNodeAnyChar::Print(int n_level)
{
	__FuncGuard("CRegExp_Parser::CNodeAnyChar::Print");

	PrintIndent(n_level);
	printf("node-any-char\n");
}
#endif //REG_EXP_COMPILER_DUMP

/*
 *								--- ~CRegExp_Parser::CNodeAnyChar ---
 */

/*
 *								--- CRegExp_Parser::CNodeRangeChar ---
 */

TNFAState *CRegExp_Parser::CNodeRangeChar::p_AddTransitions(TNFAState *p_initial,
	CRegExp_NFSM &r_fsm) const
{
	__FuncGuard("CRegExp_Parser::CNodeRangeChar::p_AddTransitions");

	TNFAState *p_final;
	if(!(p_final = r_fsm.p_GetState()))
		return 0;

	std::vector<TInterval> range_list;

	{
		if(!stl_ut::Reserve_N(range_list, m_n_range_num + 2))
			return 0;
		// create list of char ranges

		for(const TInterval *p_range = m_p_range,
		   *p_end = m_p_range + m_n_range_num; p_range != p_end; ++ p_range) {
			TInterval t_range = *p_range;
			std::vector<TInterval>::iterator p_collider;
			while((p_collider = std::find_if(range_list.begin(),
			   range_list.end(), CFindConjunct(t_range))) != range_list.end()) {
				t_range = t_range.t_Union(*p_collider);
				// extend to wholy contain self and collider

				range_list.erase(p_collider);
				// erase collider
			}
			// in case it collides with segments, create unions

			range_list.push_back(t_range);
			// push it inside
		}
		// union ranges

		std::sort(range_list.begin(), range_list.end(), CompareIntervals);
		// sort ranges

		for(size_t i = 0; i < range_list.size(); ++ i) {
			_ASSERTE(!range_list[i].b_Empty());
			if(i && range_list[i - 1].n_char_max + 1 == range_list[i].n_char_min) {
				range_list[i - 1].n_char_max = range_list[i].n_char_max;
				range_list.erase(range_list.begin() + i);
				-- i;
			}
		}
		// connect coinciding ranges
	}
	// nicely merge ranges

	if(m_b_inverse) {
		if(range_list.size()) {
			if(range_list[0].n_char_min > n_first_unicode_char) {
				if(!r_fsm.AddTransition(TNFATransition(p_initial, p_final,
				   TNFATransition::tran_Char, n_first_unicode_char,
				   range_list[0].n_char_min - 1)))
					return 0;
				// range between zero and lowest char
			}
			for(int i = 1, n = range_list.size(); i < n; ++ i) {
				_ASSERTE(range_list[i - 1].n_char_max + 1 <= range_list[i].n_char_min - 1);
				// would cause empty range
				if(!r_fsm.AddTransition(TNFATransition(p_initial, p_final,
				   TNFATransition::tran_Char, range_list[i - 1].n_char_max + 1,
				   range_list[i].n_char_min - 1)))
					return 0;
				// range between neigbor ranges
			}
			if(range_list[range_list.size() - 1].n_char_max < n_last_unicode_char) {
				if(!r_fsm.AddTransition(TNFATransition(p_initial, p_final,
				   TNFATransition::tran_Char, range_list[range_list.size() - 1].n_char_max + 1,
				   n_last_unicode_char)))
					return 0;
				// range between max-char and highest char
			}
		}
		// add inverse ranges
	} else {
		for(int i = 0, n = range_list.size(); i < n; ++ i) {
			if(!r_fsm.AddTransition(TNFATransition(p_initial, p_final,
			   TNFATransition::tran_Char, range_list[i].n_char_min, range_list[i].n_char_max)))
				return 0;
			// range between neigbor ranges
		}
		// add ranges
	}

	return p_final;
}

#ifdef REG_EXP_COMPILER_DUMP
void CRegExp_Parser::CNodeRangeChar::Print(int n_level)
{
	__FuncGuard("CRegExp_Parser::CNodeRangeChar::Print");

	PrintIndent(n_level);
	printf("node-range-char(inverse=%d, range-num=%d)\n", (m_b_inverse)? 1 : 0, m_n_range_num);
	for(const TInterval *p_range = m_p_range, *p_end = m_p_range + m_n_range_num;
	   p_range != p_end; ++ p_range) {
		PrintIndent(n_level + 1);
		if(p_range->n_char_min < 128 && isprint(p_range->n_char_min) &&
		   p_range->n_char_max < 128 && isprint(p_range->n_char_max)) {
			printf("range(min-char=\'%c\', max-char=\'%c\', min-code=0x%02x, max-code=0x%02x)\n",
				p_range->n_char_min, p_range->n_char_max,
				p_range->n_char_min, p_range->n_char_max);
		} else {
			printf("range(min-code=0x%02x, max-code=0x%02x)\n",
				p_range->n_char_min, p_range->n_char_max);
		}
	}
}
#endif //REG_EXP_COMPILER_DUMP

/*
 *								--- ~CRegExp_Parser::CNodeRangeChar ---
 */

/*
 *								--- CRegExp_Parser::CNodeQuant ---
 */

CRegExp_Parser::CNodeQuant::~CNodeQuant()
{
	__FuncGuard("CRegExp_Parser::CNodeQuant::~CNodeQuant");

	if(m_p_subnode)
		delete m_p_subnode;
}

TNFAState *CRegExp_Parser::CNodeQuant::p_AddTransitions(TNFAState *p_initial,
	CRegExp_NFSM &r_fsm) const
{
	__FuncGuard("CRegExp_Parser::CNodeQuant::p_AddTransitions");

	for(int i = 0; i < m_n_min_repeat; ++ i) {
		if(!(p_initial = m_p_subnode->p_AddTransitions(p_initial, r_fsm)))
			return 0;
	}
	// add first requred transitions (serial connections)

	if(m_n_max_repeat == -1) {
		if(m_n_min_repeat > 0) {
			TNFAState *p_node_final;
			if(!(p_node_final = m_p_subnode->p_AddTransitions(p_initial, r_fsm)) ||
			   !r_fsm.AddTransition(TNFATransition(p_node_final,
			   p_initial, TNFATransition::tran_Identity)))
				return 0;
			return p_initial;
		} else {
			TNFAState *p_node_final, *p_node_closed;
			if(!(p_node_closed = m_p_subnode->p_AddTransitions(p_initial, r_fsm)) ||
			   !(p_node_final = m_p_subnode->p_AddTransitions(p_node_closed, r_fsm)) ||
			   !r_fsm.AddTransition(TNFATransition(p_initial,
			   p_node_closed, TNFATransition::tran_Epsilon)) ||
			   !r_fsm.AddTransition(TNFATransition(p_node_final,
			   p_node_closed, TNFATransition::tran_Identity)))
				return 0;
			return p_node_closed; // need extra epsilon closure
		}
	}
	// in case there is infinite looping, create one more transition to new state and
	// epsilon transition back to initial which becomes final as well

	if(m_n_min_repeat < m_n_max_repeat) {
		TNFAState *p_final;
		if(!(p_final = r_fsm.p_GetState()))
			return 0;
		// final state

		for(int i = m_n_min_repeat; i < m_n_max_repeat; ++ i) {
			if(!r_fsm.AddTransition(TNFATransition(p_initial, p_final,
			   TNFATransition::tran_Epsilon)) || !(p_initial =
			   m_p_subnode->p_AddTransitions(p_initial, r_fsm)))
				return 0;
			// add last resort transition from current to final state
			// add next transition in the chain to the new state
		}
		if(!r_fsm.AddTransition(TNFATransition(p_initial, p_final,
		   TNFATransition::tran_Identity)))
			return 0;
		// add epsilon transition from chain end to final state

		return p_final;
	} else
		return p_initial; // otherwise last in chain is final state
}

#ifdef REG_EXP_COMPILER_DUMP
void CRegExp_Parser::CNodeQuant::Print(int n_level)
{
	__FuncGuard("CRegExp_Parser::CNodeQuant::Print");

	PrintIndent(n_level);
	printf("node-quant(min-repeat=%d, max-repeat=%d)\n", m_n_min_repeat, m_n_max_repeat);
	if(m_p_subnode)
		m_p_subnode->Print(n_level + 1);
	else {
		PrintIndent(n_level + 1);
		printf("null-subnode\n");
	}
}
#endif //REG_EXP_COMPILER_DUMP

/*
 *								--- ~CRegExp_Parser::CNodeQuant ---
 */

/*
 *								--- CRegExp_Parser::CNodeOption ---
 */

TNFAState *CRegExp_Parser::CNodeOption::p_AddTransitions(TNFAState *p_initial,
	CRegExp_NFSM &r_fsm) const
{
	__FuncGuard("CRegExp_Parser::CNodeOption::p_AddTransitions");

	_ASSERTE(m_node_list.size() >= 2);
	TNFAState *p_final;
	if(!(p_final = r_fsm.p_GetState()))
		return 0;
	return std::for_each(m_node_list.begin(), m_node_list.end(),
		CAddParallelTransitions(p_initial, p_final, r_fsm));
	// add parallel transitions generated by nodes themselves and transition form each node's
	// final state to this nodes final state (note in case there are no nodes, no transition
	// is added and so states end up separated)
}

#ifdef REG_EXP_COMPILER_DUMP
void CRegExp_Parser::CNodeOption::Print(int n_level)
{
	__FuncGuard("CRegExp_Parser::CNodeOption::Print");

	PrintIndent(n_level);
	printf("node-option(length=%d)\n", m_node_list.size());
	for(int i = 0; i < m_node_list.size(); ++ i)
		m_node_list[i]->Print(n_level + 1);
}
#endif //REG_EXP_COMPILER_DUMP

/*
 *								--- ~CRegExp_Parser::CNodeOption ---
 */

/*
 *								--- CRegExp_Parser ---
 */

/*
 *	token_iterator CRegExp_Parser::p_Parse_RegularExpression(token_iterator p_begin,
 *		token_iterator p_end, CNodeString &r_reg_exp)
 *		- parses regular expression from tokens p_begin trough p_end
 *		- parse tree nodes are added to r_reg_exp (which should be blank)
 *		- returns iterator pointing on last processed token
 */
CRegExp_Parser::token_iterator CRegExp_Parser::p_Parse_RegularExpression(
	CRegExp_Parser::token_iterator p_begin, CRegExp_Parser::token_iterator p_end,
	CRegExp_Parser::CNodeString &r_reg_exp)
{
	__FuncGuard("CRegExp_Parser::p_Parse_RegularExpression");

	while(p_begin != p_end) {
		CNode *p_node = p_Parse_RE_Option(p_begin, p_end);

		if(!p_node)
			break;

		if(!r_reg_exp.AddNode(p_node)) {
			m_b_parse_errors = true;
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
			fprintf(stderr, "error: not enough memory\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
			return p_end;
		}
	}

	return p_begin;
}

CRegExp_Parser::CNode *CRegExp_Parser::p_Parse_RE_Val(CRegExp_Parser::token_iterator &r_p_begin,
	CRegExp_Parser::token_iterator p_end)
{
	__FuncGuard("CRegExp_Parser::p_Parse_RE_Val");

	CRegExp_Lexer::TToken t_token = *r_p_begin;
	switch(t_token.n_type) {
	case CRegExp_Lexer::token_Unknown:
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
		fprintf(stderr, "error: unknown token ocurred in token stream\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
		break;
	case CRegExp_Lexer::token_Char:
		{
			++ r_p_begin;
			CNodeChar *p_node;
			if(!(p_node = new(std::nothrow) CNodeChar(t_token.t_data.n_character))) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
				fprintf(stderr, "error: not enough memory\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
				m_b_parse_errors = true;
			}
			return p_node;
		}
	case CRegExp_Lexer::token_AnyChar:
		{
			++ r_p_begin;
			CNodeAnyChar *p_node;
			if(!(p_node = new(std::nothrow) CNodeAnyChar)) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
				fprintf(stderr, "error: not enough memory\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
				m_b_parse_errors = true;
			}
			return p_node;
		}
	case CRegExp_Lexer::token_LeftPar:
		{
			++ r_p_begin;
			CNodeString *p_node;
			if(!(p_node = new(std::nothrow) CNodeString)) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
				fprintf(stderr, "error: not enough memory\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
				m_b_parse_errors = true;
				return 0;
			}
			r_p_begin = p_Parse_RegularExpression(r_p_begin, p_end, *p_node);
			if(r_p_begin >= p_end || (*r_p_begin).n_type != CRegExp_Lexer::token_RightPar) {
				delete p_node;
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
				if(r_p_begin >= p_end)
					fprintf(stderr, "error: unexpected end of token stream while looking for \')\'\n");
				else {
#ifdef REG_EXP_COMPILER_DUMP
					fprintf(stderr, "error: missing \')\' : before \'%s\'\n",
						CRegExp_Lexer::CPrintToken::p_s_TokenName((*r_p_begin).n_type));
#else //REG_EXP_COMPILER_DUMP
					fprintf(stderr, "error: missing \')\'\n");
#endif //REG_EXP_COMPILER_DUMP
				}
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
				m_b_parse_errors = true;
				return 0;
			}
			++ r_p_begin;
			return p_node;
		}
	case CRegExp_Lexer::token_CharRange:
		{
			++ r_p_begin;
			CNodeRangeChar *p_node;
			if(!(p_node = new(std::nothrow) CNodeRangeChar(t_token.t_data.t_range.b_inverse,
			   t_token.t_data.t_range.n_range_num, t_token.t_data.t_range.p_range))) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
				fprintf(stderr, "error: not enough memory\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
				m_b_parse_errors = true;
			}
			return p_node;
		}
	default: // !!
		return 0;
	}

	m_b_parse_errors = true;
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
	fprintf(stderr, "error: error parsing regular expression\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
	return 0;
}

CRegExp_Parser::CNode *CRegExp_Parser::p_Parse_RE_Quant(CRegExp_Parser::token_iterator &r_p_begin,
	CRegExp_Parser::token_iterator p_end)
{
	__FuncGuard("CRegExp_Parser::p_Parse_RE_Quant");

	CNode *p_node = p_Parse_RE_Val(r_p_begin, p_end);

	while(r_p_begin != p_end) {
		CRegExp_Lexer::TToken t_token = *r_p_begin;
		switch(t_token.n_type) {
		case CRegExp_Lexer::token_Quant:
			{
				if(!p_node) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
					fprintf(stderr, "error: missing characters before quantifier\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
					m_b_parse_errors = true;
					return 0;
				}

				++ r_p_begin;
				CNodeQuant *p_quant_node;
				if(!(p_quant_node = new(std::nothrow) CNodeQuant(p_node, t_token.t_data.t_quant.n_min,
				   t_token.t_data.t_quant.n_max))) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
					fprintf(stderr, "error: not enough memory\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
					m_b_parse_errors = true;
					return 0;
				}
				p_node = p_quant_node;
			}
			break;
		default:
			return p_node;
		}
	}

	return p_node;
}

CRegExp_Parser::CNode *CRegExp_Parser::p_Parse_RE_Option(CRegExp_Parser::token_iterator &r_p_begin,
	CRegExp_Parser::token_iterator p_end)
{
	__FuncGuard("CRegExp_Parser::p_Parse_RE_Option");

	CNode *p_node = p_Parse_RE_Quant(r_p_begin, p_end);

	while(r_p_begin != p_end) {
		CRegExp_Lexer::TToken t_token = *r_p_begin;
		switch(t_token.n_type) {
		case CRegExp_Lexer::token_Option:
			{
				if(!p_node) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
					fprintf(stderr, "error: missing characters before option operator\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
					m_b_parse_errors = true;
					return 0;
				}

				CNodeOption *p_option_node;
				if(!(p_option_node = new(std::nothrow) CNodeOption)) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
					fprintf(stderr, "error: not enough memory\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
					m_b_parse_errors = true;
					return 0;
				}
				// create option node

				if(!p_option_node->AddNode(p_node)) {
					delete p_node;
					delete p_option_node;
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
					fprintf(stderr, "error: not enough memory\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
					m_b_parse_errors = true;
					return 0;
				}
				// add left node

				do {
					++ r_p_begin;
					CNode *p_right_node = p_Parse_RE_Quant(r_p_begin, p_end);
					if(!p_right_node)
						break;
					if(!p_option_node->AddNode(p_right_node)) {
						delete p_right_node;
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
						fprintf(stderr, "error: not enough memory\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
						m_b_parse_errors = true;
						return 0;
					}
				} while(r_p_begin != p_end && (*r_p_begin).n_type == CRegExp_Lexer::token_Option);
				// add right nodes

				if(p_option_node->n_Size() < 2) {
#ifdef REG_EXP_COMPILER_STDERR_OUTPUT
					fprintf(stderr, "error: missing characters after option operator\n");
#endif //REG_EXP_COMPILER_STDERR_OUTPUT
					m_b_parse_errors = true;
				}
				// check wheter there were right nodes

				p_node = p_option_node;
			}
			break;
		default:
			return p_node;
		}
	}

	return p_node;
}

/*
 *								--- ~CRegExp_Parser ---
 */
