###############################################################################
#  msfm_parser.py: Module for PATTERN MATCH - parser of MSFM file format 
#                  produced by "C" parser.
#  Copyright (C) 2010 Brno University of Technology, ANT @ FIT
#  Author(s): Vlastimil Kosar <ikosar@fit.vutbr.cz>
###############################################################################
#
#  LICENSE TERMS
#
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions
#  are met:
#  1. Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#  2. Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in
#     the documentation and/or other materials provided with the
#     distribution.
#  3. All advertising materials mentioning features or use of this software
#     or firmware must display the following acknowledgement:
#
#       This product includes software developed by the University of
#       Technology, Faculty of Information Technology, Brno and its
#       contributors.
#
#  4. Neither the name of the Company nor the names of its contributors
#     may be used to endorse or promote products derived from this
#     software without specific prior written permission.
#
#  This software or firmware is provided ``as is'', and any express or implied
#  warranties, including, but not limited to, the implied warranties of
#  merchantability and fitness for a particular purpose are disclaimed.
#  In no event shall the company or contributors be liable for any
#  direct, indirect, incidental, special, exemplary, or consequential
#  damages (including, but not limited to, procurement of substitute
#  goods or services; loss of use, data, or profits; or business
#  interruption) however caused and on any theory of liability, whether
#  in contract, strict liability, or tort (including negligence or
#  otherwise) arising in any way out of the use of this software, even
#  if advised of the possibility of such damage.
#
#  $Id$

import commands
import os
import re
import nfa_data
from nfa_parser import nfa_parser
import copy
import sym_char
import sym_char_class
from b_state import b_State
import random
import sys

class msfm_parser(nfa_parser):
    """Class for parsing RE using C regexp parser.
    
     FORMAT of Automata file (MSFM)
      - Number of the States in the automaton
      - Number of the transition in the automaton
      - Each transition is represenetd by one line in the file. Line 
        is in format Source_State|Symbol|Target_State|Epsilon
      - End of the transition table is represented by line of #
      - Number of the end states
      - Line with identifikator of the endState. Every endstate is 
        folowed by , (coma)
      - End of endState section is represented by line of #
      - Number of the symbols in symbol table
      - Every symbol is stored on its own line and it is represented 
        as Symbol_Number:Character1|Character2|
      - End of the file
    """
    def __init__(self):
        """ Class constructor. Inits the seed of random generator. """
        nfa_parser.__init__(self)           #Calling the parent function
        random.seed()
         
    def get_nfa(self):
        """Parse a current line and returns parsed nfa.
        Returns:
        Created automaton in nfa_data format. Returns None if failure happens.
        """
        # Check if some reg. exp. are set. 
        if (self._position < 0):
            return None
        
        # Create random value.
        value = random.randint(0, sys.maxint)
        
        # Save regular expression to file.
        line = self._text[self._position]
        
        # msfm parser doesn't support ^, so we will use this workaround
        i = 0
        outline = str()
        while line[i] != '/':
            outline += line[i]
            i += 1
        outline += line[i]
        i += 1
        if line[i] == '^':
            i += 1
        else:
            outline += ".*"
        while (i < len(line)):
            outline += line[i]
            i += 1
        line = outline
        f = open("./parser/text_123457957256884558_" + str(value) +".txt","wb");
        f.write(line)
        f.close()
        
        # invoke C regexp parser
        cmd = "cd parser; ./pcre_gen text_123457957256884558_" + str(value) +".txt; cp Automat0.msfm ../Automat0.msfm; rm -rf text_123457957256884558_" + str(value) +".txt; cd .."
        res = commands.getstatusoutput(cmd)
        
        # If error, stop.
        if res[0] != 0:
            print(res[1]);
            return None;
        else:
            # Opens file automat
            f = open("Automat0.msfm", "rb");
            # Read file as binary values.
            blob = f.read()
            # Preprocessed automaton file
            FSMfile = list()
            i = 0;
            trueLine = str()
            # Preprocess binary file
            while i < len(blob):
                if (blob[i] != '\n'):
                    if ((blob[i]) == '|' and (i + 2 < len(blob)) and (blob[i + 1] == '|') and (blob[i + 2]) == '|'):
                        trueLine = trueLine + blob[i]
                        i = i + 1
                        trueLine = trueLine + blob[i]
                        i = i + 1
                    else:
                        trueLine = trueLine + blob[i]
                elif (blob[i] == '\n') and (i + 1 < len(blob)) and (blob[i + 1] == '|'):
                    trueLine = trueLine + blob[i]
                else:
                    #trueLine = trueLine + blob[i]
                    FSMfile.insert(len(FSMfile), copy.deepcopy(trueLine))
                    trueLine = str()
                i = i + 1
            f.close();
            
            # FORMAT of Automata file
            #  - Number of the States in the automaton
            #  - Number of the transition in the automaton
            #  - Each transition is represenetd by one line in the file. Line 
            #    is in format Source_State|Symbol|Target_State|Epsilon
            #  - End of the transition table is represented by line of #
            #  - Number of the end states
            #  - Line with identifikator of the endState. Every endstate is 
            #    folowed by , (coma)
            #  - End of endState section is represented by line of #
            #  - Number of the symbols in symbol table
            #  - Every symbol is stored on its own line and it is represented 
            #    as Symbol_Number:Character1|Character2|
            #  - End of the file
    
            TransitionTable = [x.split("|") for x in FSMfile[2:int(FSMfile[1])+2]];         
            # Transition table is list of the list and represents the whole 
            # transition table of the automata.  2 is an index of the first 
            # transition FSMfile[1] is the number of the transition in automaton
            
            # List of the endStates is stored after all transition (FSMfile[1])
            # and after 4 other lines (number of states, number of transitions,
            # number of endstates, and the line of ####
            # Endstates are isolated by , (coma) 
            Endstates = FSMfile[int(FSMfile[1])+4].split(",")
            
            # Alphabet symbols start on the index FSMfile[1] 
            # (all transitions) + 7 (4 as before + line of #, 
            # line of endstates and number of symbols) 
            Symbols = (FSMfile[int(FSMfile[1])+7:]);
                        
            nfa = nfa_data.nfa_data()
            
            # Creates end states objects.
            for state in Endstates:
                if state != "":
                    Tmp = b_State(int(state),self._position)     #Creates state which is described by the int(State)
                    nfa.states[Tmp.get_id()] = Tmp
                    nfa.final.add(Tmp.get_id())
            
            # Set start state to 0.
            nfa.start = 0;
            
            # For every symbol in alphabet
            for ActSym in Symbols:                        
                # Separate symbol number and symbol data (done by first :)
                StartSym = ActSym.find(":");
                symSet = set([x for x in ActSym[StartSym+1:len(ActSym)-1].split("|")])
                symSetMod = set()
                # If | in alphabet add it.
                for s in symSet:
                    if s != '':
                        symSetMod.add(s)
                    else:
                        symSetMod.add('|')
                                
                # Create char if number of symbols is 1.
                if len(symSetMod) == 1:
                    char = symSetMod.pop()
                    Symbol = sym_char.b_Sym_char(char,char,int(ActSym[:StartSym], 16))
                    nfa.alphabet[Symbol.get_id()] = Symbol
#                    nfa.alphabet[int(ActSym[:StartSym], 16)] = sym_char.b_Sym_char(char, char)
                else:
                    # Create char class otherwise.
#                    nfa.alphabet[int(ActSym[:StartSym], 16)] = sym_char_class.b_Sym_char_class(str(symSetMod), symSetMod)
                    strSymSetMod = str()
                    for sym in symSetMod:
                        strSymSetMod += sym
                    strSymSetMod = "[" + strSymSetMod + "]"
                    #nfa.alphabet[int(ActSym[:StartSym], 16)] 
                    Tmp = sym_char_class.b_Sym_char_class(strSymSetMod,symSetMod,int(ActSym[:StartSym], 16))
                    nfa.alphabet[Tmp.get_id()] = Tmp
#>>>>>>> .r267
                    
            # TODO: use special class for Epsilon?
            # Epsilon is representad now as sym_char object with char "" and index -1
            #nfa.alphabet[-1] 
            Tmp = sym_char.b_Sym_char("Epsilon", "",-1)
            nfa.alphabet[Tmp.get_id()] = Tmp
            
            # Add non final states to automaton.
            for transition in TransitionTable:
                # if not in states, add start state of transition.
                if not (int(transition[0]) in nfa.states):
                    nfa.states[int(transition[0])] = b_State(int(transition[0]), -1)
                
                # if not in states, add end state of transition.
                if not (int(transition[2]) in nfa.states):
                    nfa.states[int(transition[2])] = b_State(int(transition[2]), -1)
                
                # Handle epsilon transitions.
                alphaNum = -1
                if transition[3] == '1':
                    alphaNum = -1
                else:
                    alphaNum = int(transition[1])
                
                # Add transition to automaton.
                nfa.transitions.add((int(transition[0]), alphaNum, int(transition[2])))
                    
            return nfa

###############################################################################
# End of File msfm_parser.py                                                  #
###############################################################################