1#
2#   Plex - Transition Maps
3#
4#   This version represents state sets direcly as dicts
5#   for speed.
6#
7
8from sys import maxint as maxint
9
10class TransitionMap(object):
11  """
12  A TransitionMap maps an input event to a set of states.
13  An input event is one of: a range of character codes,
14  the empty string (representing an epsilon move), or one
15  of the special symbols BOL, EOL, EOF.
16
17  For characters, this implementation compactly represents
18  the map by means of a list:
19
20    [code_0, states_0, code_1, states_1, code_2, states_2,
21      ..., code_n-1, states_n-1, code_n]
22
23  where |code_i| is a character code, and |states_i| is a
24  set of states corresponding to characters with codes |c|
25  in the range |code_i| <= |c| <= |code_i+1|.
26
27  The following invariants hold:
28    n >= 1
29    code_0 == -maxint
30    code_n == maxint
31    code_i < code_i+1 for i in 0..n-1
32    states_0 == states_n-1
33
34  Mappings for the special events '', BOL, EOL, EOF are
35  kept separately in a dictionary.
36  """
37
38  map = None     # The list of codes and states
39  special = None # Mapping for special events
40
41  def __init__(self, map = None, special = None):
42    if not map:
43      map = [-maxint, {}, maxint]
44    if not special:
45      special = {}
46    self.map = map
47    self.special = special
48    #self.check() ###
49
50  def add(self, event, new_state,
51    TupleType = tuple):
52    """
53    Add transition to |new_state| on |event|.
54    """
55    if type(event) is TupleType:
56      code0, code1 = event
57      i = self.split(code0)
58      j = self.split(code1)
59      map = self.map
60      while i < j:
61        map[i + 1][new_state] = 1
62        i = i + 2
63    else:
64      self.get_special(event)[new_state] = 1
65
66  def add_set(self, event, new_set,
67    TupleType = tuple):
68    """
69    Add transitions to the states in |new_set| on |event|.
70    """
71    if type(event) is TupleType:
72      code0, code1 = event
73      i = self.split(code0)
74      j = self.split(code1)
75      map = self.map
76      while i < j:
77        map[i + 1].update(new_set)
78        i = i + 2
79    else:
80      self.get_special(event).update(new_set)
81
82  def get_epsilon(self,
83    none = None):
84    """
85    Return the mapping for epsilon, or None.
86    """
87    return self.special.get('', none)
88
89  def iteritems(self,
90    len = len):
91    """
92    Return the mapping as an iterable of ((code1, code2), state_set) and
93    (special_event, state_set) pairs.
94    """
95    result = []
96    map = self.map
97    else_set = map[1]
98    i = 0
99    n = len(map) - 1
100    code0 = map[0]
101    while i < n:
102      set = map[i + 1]
103      code1 = map[i + 2]
104      if set or else_set:
105        result.append(((code0, code1), set))
106      code0 = code1
107      i = i + 2
108    for event, set in self.special.iteritems():
109      if set:
110        result.append((event, set))
111    return iter(result)
112  items = iteritems
113
114  # ------------------- Private methods --------------------
115
116  def split(self, code,
117    len = len, maxint = maxint):
118    """
119    Search the list for the position of the split point for |code|,
120    inserting a new split point if necessary. Returns index |i| such
121    that |code| == |map[i]|.
122    """
123    # We use a funky variation on binary search.
124    map = self.map
125    hi = len(map) - 1
126    # Special case: code == map[-1]
127    if code == maxint:
128      return hi
129    # General case
130    lo = 0
131    # loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2
132    while hi - lo >= 4:
133      # Find midpoint truncated to even index
134      mid = ((lo + hi) // 2) & ~1
135      if code < map[mid]:
136        hi = mid
137      else:
138        lo = mid
139    # map[lo] <= code < map[hi] and hi - lo == 2
140    if map[lo] == code:
141      return lo
142    else:
143      map[hi:hi] = [code, map[hi - 1].copy()]
144      #self.check() ###
145      return hi
146
147  def get_special(self, event):
148    """
149    Get state set for special event, adding a new entry if necessary.
150    """
151    special = self.special
152    set = special.get(event, None)
153    if not set:
154      set = {}
155      special[event] = set
156    return set
157
158  # --------------------- Conversion methods -----------------------
159
160  def __str__(self):
161    map_strs = []
162    map = self.map
163    n = len(map)
164    i = 0
165    while i < n:
166      code = map[i]
167      if code == -maxint:
168        code_str = "-inf"
169      elif code == maxint:
170        code_str = "inf"
171      else:
172        code_str = str(code)
173      map_strs.append(code_str)
174      i = i + 1
175      if i < n:
176        map_strs.append(state_set_str(map[i]))
177      i = i + 1
178    special_strs = {}
179    for event, set in self.special.iteritems():
180      special_strs[event] = state_set_str(set)
181    return "[%s]+%s" % (
182      ','.join(map_strs),
183      special_strs
184    )
185
186  # --------------------- Debugging methods -----------------------
187
188  def check(self):
189    """Check data structure integrity."""
190    if not self.map[-3] < self.map[-1]:
191      print(self)
192      assert 0
193
194  def dump(self, file):
195    map = self.map
196    i = 0
197    n = len(map) - 1
198    while i < n:
199      self.dump_range(map[i], map[i + 2], map[i + 1], file)
200      i = i + 2
201    for event, set in self.special.iteritems():
202      if set:
203        if not event:
204          event = 'empty'
205        self.dump_trans(event, set, file)
206
207  def dump_range(self, code0, code1, set, file):
208    if set:
209      if code0 == -maxint:
210        if code1 == maxint:
211          k = "any"
212        else:
213          k = "< %s" % self.dump_char(code1)
214      elif code1 == maxint:
215        k = "> %s" % self.dump_char(code0 - 1)
216      elif code0 == code1 - 1:
217        k = self.dump_char(code0)
218      else:
219        k = "%s..%s" % (self.dump_char(code0),
220          self.dump_char(code1 - 1))
221      self.dump_trans(k, set, file)
222
223  def dump_char(self, code):
224    if 0 <= code <= 255:
225      return repr(chr(code))
226    else:
227      return "chr(%d)" % code
228
229  def dump_trans(self, key, set, file):
230    file.write("      %s --> %s\n" % (key, self.dump_set(set)))
231
232  def dump_set(self, set):
233    return state_set_str(set)
234
235#
236#   State set manipulation functions
237#
238
239#def merge_state_sets(set1, set2):
240#        for state in set2.keys():
241#            set1[state] = 1
242
243def state_set_str(set):
244  return "[%s]" % ','.join(["S%d" % state.number for state in set])
245
246
247
248