############################################################################
#  hybrid_fa.py: Module for PATTERN MATCH algorithm Hybrid Finite Automat
#  Copyright (C) 2010 Brno University of Technology, ANT @ FIT
#  Author(s): Jaroslav Suchodol
############################################################################
#
#  LICENSE TERMS
#
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions
#  are met:
#  1. Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#  2. Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in
#     the documentation and/or other materials provided with the
#     distribution.
#  3. All advertising materials mentioning features or use of this software
#     or firmware must display the following acknowledgement:
#
#       This product includes software developed by the University of
#       Technology, Faculty of Information Technology, Brno and its
#       contributors.
#
#  4. Neither the name of the Company nor the names of its contributors
#     may be used to endorse or promote products derived from this
#     software without specific prior written permission.
#
#  This software or firmware is provided ``as is'', and any express or
#  implied warranties, including, but not limited to, the implied warranties
#  of merchantability and fitness for a particular purpose are disclaimed.
#  In no event shall the company or contributors be liable for any
#  direct, indirect, incidental, special, exemplary, or consequential
#  damages (including, but not limited to, procurement of substitute
#  goods or services; loss of use, data, or profits; or business
#  interruption) however caused and on any theory of liability, whether
#  in contract, strict liability, or tort (including negligence or
#  otherwise) arising in any way out of the use of this software, even
#  if advised of the possibility of such damage.
#
#  $Id$

"""Module for pattern match: algorithm for Hybrid Finite Automat."""

# Based on "A hybrid finite automaton for practical deep packet inspection"
# ISBN: 978-1-59593-770-4
# URL: http://portal.acm.org/citation.cfm?id=1364656

from netbench.pattern_match import sym_char
from netbench.pattern_match import msfm_parser
from netbench.pattern_match.b_dfa import b_dfa
from netbench.pattern_match.b_nfa import b_nfa
import os, re, tempfile

class HYBRID_FA(b_dfa):
  """Class for Hybrid Finite Automat."""

  def __init__(self, FileName):
    """Construct basic items and make hybrid automat."""

    b_dfa.__init__(self)
    self.make_hyb_fa(FileName)

  def make_hyb_fa(self, FileName):
    """Fuction for make Hybrid FA from RE in FileName."""

  # Look into file "FileName" and divide it to DFA file and NFA files
  # accordance with state blow up patterns
    self.D = []   # DFA part of joining NFA
    self.N = []   # NFA part of joining DFA
    d = []        # list of dfa RE (not containing state blow up RE)
    self.c_c = [] # list indicating counting constraints NFA
    fr = open(FileName, "r")
    dfa_file = tempfile.NamedTemporaryFile(delete=False)
    # in every line in file check for state blow up pattern, if found,
    # then separate it to DFA and NFA part
    for line in fr.readlines():
      rm_dot_star = False
      # remove '\n' from end of line
      line = line.rsplit('\n', 1)[0]
      pattern = re.compile(r"""
      (?<=[^/])[.](?=[*])   # for like .*
      (?![^[]*?\])          # because of .* not in [.*]
      |
      \[\^
      .*?                   # for like [^abc]*
      (?<!\\)\](?=[*])
      (?![^[]*?\])
      |
      [.](?=[{])            # for like .{15}
      (?![^[]*?\])
      |
      .(?=[{])              # for like a{15}
      (?![^[]*?\])
      |
      \[\^?                 # for like [abc]{15} or [^abc]{15}
      .*?   
      (?<!\\)\](?=[{])
      (?![^[]*?\])
      """, re.X)
      # split line to DFA part (split[0]) and NFA part
      split = re.split(pattern, line, maxsplit=1)
      # remove .* from begin of pattern
      if split[0].find(".*") != -1:
        rm_dot_star = True
        split[0] = '/' + split[0][split[0].find(".*") + 2:]
      # line NOT contain .* or .{m,n} like pattern
      if len(split) == 1:
        dfa_file.write(split[0] + '\n')
        d.append(split[0][1:-1])
      # line contain .* or .{m,n} like pattern
      else :
        dfa_file.write(split[0] + "/\n")
        self.D.append(split[0][1:])
        self.N.append(tempfile.NamedTemporaryFile(delete=False))
        if rm_dot_star:
          # 2 + because of removing .* from begin of pattern
          self.N[-1].write('/' + line[(2 + len(split[0])):] + '\n')
        else :
          self.N[-1].write('/' + line[len(split[0]):] + '\n')
        self.N[-1].close()
        # set c_c
        if split[1][0] == '{':
          self.c_c.append(1)
        else :
          self.c_c.append(0)
    fr.close()
    dfa_file.close()
  # Make DFA part of hybrid automat.
    # Parse DFA file
    parser = msfm_parser.msfm_parser()
    parser.load_file(dfa_file.name)
    # Make automat from RE which was in input DFA file
    self.dfa = b_dfa()
    os.chdir("../../")
    self.dfa.create_by_parser(parser)
    os.chdir("algorithms/hybrid_fa/")
    # Make Deterministic FA
    self.dfa.determinise(False)
    # remove temporary DFA file
    os.unlink(dfa_file.name)
  # Make NFA parts of hybrid automat.
    for i in range(0, len(self.N)):
      # Parse input NFA file
      parser.load_file(self.N[i].name)
      # Make automat from RE which was in input NFA file
      nfa = b_nfa()
      os.chdir("../../")
      nfa.create_by_parser(parser)
      os.chdir("algorithms/hybrid_fa/")
      nfa.remove_epsilons()
      nfa._automaton.Flags["Hybrid FA - one NFA part"] = True
      # remove temporary NFA file
      os.unlink(self.N[i].name)
      self.N[i] = nfa
  # Discover states where are joining DFA->NFA.
    a = self.dfa._automaton   # shortcut
    self.tran_aut = []        # transitions between DFA and NFA
    cur_state = a.start
    # sort transitions
    self.sort = {}  # sorted transitions
    for s in range(0, len(a.states)):
      self.sort[s] = []
    for t in a.transitions:
      self.sort[t[0]].append(t[1:])
    for i in range(0, len(self.D)):
      for c in self.D[i]:
        for t in self.sort[cur_state]:
          # single character
          if isinstance(a.alphabet[t[0]], sym_char.b_Sym_char):
            if a.alphabet[t[0]].char == c:
              cur_state = t[-1]
              # skip other transitions
              break
          # character range
          else :
            if c in a.alphabet[t[0]].charClass:
              cur_state = t[-1]
              # skip other transitions
              break
      self.tran_aut.append(cur_state)
      cur_state = a.start
  # Remove joining DFA->NFA state from final states if
  # do not exist any RE which is same as DFA part of NFA
  # example two RE: ab, ab.*cd
    for i in range(0, len(self.D)):
      if self.D[i] not in d:
        if self.tran_aut[i] in a.final:
          a.final.remove(self.tran_aut[i])
  # Add automat flag.
    a.Flags["Hybrid FA - DFA part"] = True

  def show(self, FileName):
    """Print states, alphabet, start, transitions, final, Flags of DFA
    part and NFA parts. And save graphviz dot file, representing
    graphical structure of nfa_data."""

  # Display DFA part.
    print '*' * 80
    print "*** HYBRID FA - DFA part ***"
    print "STATES:", self.dfa._automaton.states, '\n'
    print "ALPHABET:", self.dfa._automaton.alphabet, '\n'
    print "START STATE:", self.dfa._automaton.start, '\n'
    print "TRANSITIONS:", self.dfa._automaton.transitions, '\n'
    print "FINAL STATES:", self.dfa._automaton.final, '\n'
    print "FLAGS OF AUTOMAT:", self.dfa._automaton.Flags
    print '*' * 80, '\n'
    self.dfa._automaton.Show(FileName)
  # Display NFA parts.
    for i in range(0, len(self.N)):
      print '*' * 80
      print "*** HYBRID FA - NFA part which joining to",
      print "\"{0}\" in DFA part ***".format(self.D[i])
      print "STATES:", self.N[i]._automaton.states, '\n'
      print "ALPHABET:", self.N[i]._automaton.alphabet, '\n'
      print "START STATE:", self.N[i]._automaton.start, '\n'
      print "TRANSITIONS:", self.N[i]._automaton.transitions, '\n'
      print "FINAL STATES:", self.N[i]._automaton.final, '\n'
      print "FLAGS OF AUTOMAT:", self.N[i]._automaton.Flags
      print '*' * 80, '\n'
      self.N[i]._automaton.Show(FileName + "_NFA_" + str(i) + ".dot")

  def SaveToFile(self, FileName):
    """Make file which represent the Hybrid automat.
    This file will be input into algorithm written in C language."""

    a = self.dfa._automaton   # shortcut
    fw = open(FileName, 'w')

  # First write DFA part.
    # write the number of states
    fw.write(str(len(a.states)) + '\n')
    # write ALPHABET
    alphabet = ""
    length = ""
    pom_length = 0
    for index in range(0, len(a.alphabet.keys()), 1):
      if isinstance(a.alphabet[index], sym_char.b_Sym_char):
        alphabet += str(ord(str(a.alphabet[index]))) + '|'
        length += str(pom_length) + "->" + str(pom_length) + '|'
        pom_length += 1
      else:
        for char in a.alphabet[index].charClass:
          alphabet += str(ord(char)) + '|'
        length += str(pom_length) + "->" + str(pom_length + \
           len(a.alphabet[index].charClass) - 1) + '|'
        pom_length += len(a.alphabet[index].charClass)
    fw.write(str(len(a.alphabet.keys())) + '\n' + length + '\n')
    length_alphabet = len(alphabet.split('|')) - 1
    fw.write(str(length_alphabet) + "->" + alphabet + '\n')
    # write START STATE
    fw.write(str(a.start) + '\n')
    # write TRANSITIONS
    t_str = ""  # transitions in string (tran_char->dest_state)
    c_t = ""  # count transitions for state
    for s in range(0, len(a.states)):
      c_t += str(len(self.sort[s])) + '|'
      for i in range(0, len(self.sort[s])):
        t_str += str(self.sort[s][i][0]) + '->' + \
            str(self.sort[s][i][1]) + '|'
    fw.write(c_t + '\n' + t_str + '\n')
    # write FINAL STATES
    final = ""
    for x in list(a.final):
      final += str(x) + '|'
    fw.write(str(len(a.final)) + '\n' + final + '\n')
  # Second write NFA parts.
    # write count of NFAs
    fw.write("count NFA: " + str(len(self.tran_aut)) + '\n')
    for i in range(0, len(self.N)):
      nfa = self.N[i]
      # write transition state between dfa and nfa
      fw.write(str(self.tran_aut[i]) + "->nfa->c_c: " + \
          str(self.c_c[i]) + '\n')
      # write the number of states
      fw.write(str(len(nfa._automaton.states)) + '\n')
      # write real number of state
      real = ""
      for r in sorted(nfa._automaton.states.keys()):
        real += str(r) + '|'
      fw.write(real + '\n')
      # write START STATE
      fw.write(str(nfa._automaton.start) + '\n')
      # write TRANSITIONS
      sort = {} # sorted transitions
      for s in nfa._automaton.states.keys():
        sort[s] = []
      for t in nfa._automaton.transitions:
        sort[t[0]].append(t[1:])
      for s in sorted(nfa._automaton.states.keys()):
        fw.write("t_c: " + str(len(sort[s])) + '\n')
        for t in sort[s]:
          t_c = ""  # transition chars
          if isinstance(nfa._automaton.alphabet[t[0]], sym_char.b_Sym_char):
            fw.write("c_c: 1, d: " + str(t[1]) + '\n')
            fw.write(str(ord(nfa._automaton.alphabet[t[0]].char)) + "|\n")
          else :
            fw.write("c_c: " + \
            str(len(nfa._automaton.alphabet[t[0]].charClass)) + \
            ", d: " + str(t[1]) + '\n')
            chars = ""
            for c in nfa._automaton.alphabet[t[0]].charClass:
              chars += str(ord(c)) + '|'
            fw.write(chars + '\n')
      # write FINAL STATES
      final = ""
      for x in list(nfa._automaton.final):
        final += str(sorted(nfa._automaton.states.keys()).index(x)) + '|'
      fw.write(str(len(nfa._automaton.final)) + '\n' + final + '\n')

  def report_memory(self):
    """Print number DFA/NFA states and transitions."""

    n_states = 0
    n_transitions = 0

    for i in range(0, len(self.N)):
      n_states += len(self.N[i]._automaton.states)
      n_transitions += len(self.N[i]._automaton.transitions)
    print "\nReport memory:"
    print "Number DFA states:", len(self.dfa._automaton.states), "---", \
        "Number DFA transitions:", len(self.dfa._automaton.transitions)
    print "Number NFA states:", n_states, "---", \
        "Number NFA transitions:", n_transitions, "\n"

