1# 2# Plex - Transition Maps 3# 4# This version represents state sets direcly as dicts 5# for speed. 6# 7 8from sys import maxint as maxint 9 10class TransitionMap(object): 11 """ 12 A TransitionMap maps an input event to a set of states. 13 An input event is one of: a range of character codes, 14 the empty string (representing an epsilon move), or one 15 of the special symbols BOL, EOL, EOF. 16 17 For characters, this implementation compactly represents 18 the map by means of a list: 19 20 [code_0, states_0, code_1, states_1, code_2, states_2, 21 ..., code_n-1, states_n-1, code_n] 22 23 where |code_i| is a character code, and |states_i| is a 24 set of states corresponding to characters with codes |c| 25 in the range |code_i| <= |c| <= |code_i+1|. 26 27 The following invariants hold: 28 n >= 1 29 code_0 == -maxint 30 code_n == maxint 31 code_i < code_i+1 for i in 0..n-1 32 states_0 == states_n-1 33 34 Mappings for the special events '', BOL, EOL, EOF are 35 kept separately in a dictionary. 36 """ 37 38 map = None # The list of codes and states 39 special = None # Mapping for special events 40 41 def __init__(self, map = None, special = None): 42 if not map: 43 map = [-maxint, {}, maxint] 44 if not special: 45 special = {} 46 self.map = map 47 self.special = special 48 #self.check() ### 49 50 def add(self, event, new_state, 51 TupleType = tuple): 52 """ 53 Add transition to |new_state| on |event|. 54 """ 55 if type(event) is TupleType: 56 code0, code1 = event 57 i = self.split(code0) 58 j = self.split(code1) 59 map = self.map 60 while i < j: 61 map[i + 1][new_state] = 1 62 i = i + 2 63 else: 64 self.get_special(event)[new_state] = 1 65 66 def add_set(self, event, new_set, 67 TupleType = tuple): 68 """ 69 Add transitions to the states in |new_set| on |event|. 70 """ 71 if type(event) is TupleType: 72 code0, code1 = event 73 i = self.split(code0) 74 j = self.split(code1) 75 map = self.map 76 while i < j: 77 map[i + 1].update(new_set) 78 i = i + 2 79 else: 80 self.get_special(event).update(new_set) 81 82 def get_epsilon(self, 83 none = None): 84 """ 85 Return the mapping for epsilon, or None. 86 """ 87 return self.special.get('', none) 88 89 def iteritems(self, 90 len = len): 91 """ 92 Return the mapping as an iterable of ((code1, code2), state_set) and 93 (special_event, state_set) pairs. 94 """ 95 result = [] 96 map = self.map 97 else_set = map[1] 98 i = 0 99 n = len(map) - 1 100 code0 = map[0] 101 while i < n: 102 set = map[i + 1] 103 code1 = map[i + 2] 104 if set or else_set: 105 result.append(((code0, code1), set)) 106 code0 = code1 107 i = i + 2 108 for event, set in self.special.iteritems(): 109 if set: 110 result.append((event, set)) 111 return iter(result) 112 items = iteritems 113 114 # ------------------- Private methods -------------------- 115 116 def split(self, code, 117 len = len, maxint = maxint): 118 """ 119 Search the list for the position of the split point for |code|, 120 inserting a new split point if necessary. Returns index |i| such 121 that |code| == |map[i]|. 122 """ 123 # We use a funky variation on binary search. 124 map = self.map 125 hi = len(map) - 1 126 # Special case: code == map[-1] 127 if code == maxint: 128 return hi 129 # General case 130 lo = 0 131 # loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2 132 while hi - lo >= 4: 133 # Find midpoint truncated to even index 134 mid = ((lo + hi) // 2) & ~1 135 if code < map[mid]: 136 hi = mid 137 else: 138 lo = mid 139 # map[lo] <= code < map[hi] and hi - lo == 2 140 if map[lo] == code: 141 return lo 142 else: 143 map[hi:hi] = [code, map[hi - 1].copy()] 144 #self.check() ### 145 return hi 146 147 def get_special(self, event): 148 """ 149 Get state set for special event, adding a new entry if necessary. 150 """ 151 special = self.special 152 set = special.get(event, None) 153 if not set: 154 set = {} 155 special[event] = set 156 return set 157 158 # --------------------- Conversion methods ----------------------- 159 160 def __str__(self): 161 map_strs = [] 162 map = self.map 163 n = len(map) 164 i = 0 165 while i < n: 166 code = map[i] 167 if code == -maxint: 168 code_str = "-inf" 169 elif code == maxint: 170 code_str = "inf" 171 else: 172 code_str = str(code) 173 map_strs.append(code_str) 174 i = i + 1 175 if i < n: 176 map_strs.append(state_set_str(map[i])) 177 i = i + 1 178 special_strs = {} 179 for event, set in self.special.iteritems(): 180 special_strs[event] = state_set_str(set) 181 return "[%s]+%s" % ( 182 ','.join(map_strs), 183 special_strs 184 ) 185 186 # --------------------- Debugging methods ----------------------- 187 188 def check(self): 189 """Check data structure integrity.""" 190 if not self.map[-3] < self.map[-1]: 191 print(self) 192 assert 0 193 194 def dump(self, file): 195 map = self.map 196 i = 0 197 n = len(map) - 1 198 while i < n: 199 self.dump_range(map[i], map[i + 2], map[i + 1], file) 200 i = i + 2 201 for event, set in self.special.iteritems(): 202 if set: 203 if not event: 204 event = 'empty' 205 self.dump_trans(event, set, file) 206 207 def dump_range(self, code0, code1, set, file): 208 if set: 209 if code0 == -maxint: 210 if code1 == maxint: 211 k = "any" 212 else: 213 k = "< %s" % self.dump_char(code1) 214 elif code1 == maxint: 215 k = "> %s" % self.dump_char(code0 - 1) 216 elif code0 == code1 - 1: 217 k = self.dump_char(code0) 218 else: 219 k = "%s..%s" % (self.dump_char(code0), 220 self.dump_char(code1 - 1)) 221 self.dump_trans(k, set, file) 222 223 def dump_char(self, code): 224 if 0 <= code <= 255: 225 return repr(chr(code)) 226 else: 227 return "chr(%d)" % code 228 229 def dump_trans(self, key, set, file): 230 file.write(" %s --> %s\n" % (key, self.dump_set(set))) 231 232 def dump_set(self, set): 233 return state_set_str(set) 234 235# 236# State set manipulation functions 237# 238 239#def merge_state_sets(set1, set2): 240# for state in set2.keys(): 241# set1[state] = 1 242 243def state_set_str(set): 244 return "[%s]" % ','.join(["S%d" % state.number for state in set]) 245 246 247 248