1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver"""ANTLR3 runtime package""" 2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# begin[licence] 4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# [The "BSD licence"] 6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# Copyright (c) 2005-2008 Terence Parr 7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# All rights reserved. 8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# Redistribution and use in source and binary forms, with or without 10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# modification, are permitted provided that the following conditions 11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# are met: 12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 1. Redistributions of source code must retain the above copyright 13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# notice, this list of conditions and the following disclaimer. 14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 2. Redistributions in binary form must reproduce the above copyright 15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# notice, this list of conditions and the following disclaimer in the 16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# documentation and/or other materials provided with the distribution. 17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 3. The name of the author may not be used to endorse or promote products 18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# derived from this software without specific prior written permission. 19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# end[licensc] 32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfrom antlr3.constants import EOF 34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfrom antlr3.exceptions import NoViableAltException, BacktrackingFailed 35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass DFA(object): 38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """@brief A DFA implemented as a set of transition tables. 39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Any state that has a semantic predicate edge is special; those states 41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver are generated with if-then-else structures in a specialStateTransition() 42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver which is generated by cyclicDFA template. 43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def __init__( 47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self, 48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver recognizer, decisionNumber, 49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver eot, eof, min, max, accept, special, transition 50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ): 51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## Which recognizer encloses this DFA? Needed to check backtracking 52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.recognizer = recognizer 53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.decisionNumber = decisionNumber 55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.eot = eot 56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.eof = eof 57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.min = min 58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.max = max 59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.accept = accept 60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.special = special 61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.transition = transition 62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def predict(self, input): 65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver From the input stream, predict what alternative will succeed 67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver using this DFA (representing the covering regular approximation 68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver to the underlying CFL). Return an alternative number 1..n. Throw 69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver an exception upon error. 70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver mark = input.mark() 72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = 0 # we always start at s0 73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver try: 74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for _ in xrange(50000): 75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver #print "***Current state = %d" % s 76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialState = self.special[s] 78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if specialState >= 0: 79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver #print "is special" 80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = self.specialStateTransition(specialState, input) 81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if s == -1: 82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.noViableAlt(s, input) 83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return 0 84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume() 85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver continue 86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.accept[s] >= 1: 88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver #print "accept state for alt %d" % self.accept[s] 89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.accept[s] 90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # look for a normal char transition 92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver c = input.LA(1) 93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver #print "LA = %d (%r)" % (c, unichr(c) if c >= 0 else 'EOF') 95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver #print "range = %d..%d" % (self.min[s], self.max[s]) 96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if c >= self.min[s] and c <= self.max[s]: 98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # move to next state 99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver snext = self.transition[s][c-self.min[s]] 100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver #print "in range, next state = %d" % snext 101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if snext < 0: 103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver #print "not a normal transition" 104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # was in range but not a normal transition 105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # must check EOT, which is like the else clause. 106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # eot[s]>=0 indicates that an EOT edge goes to another 107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # state. 108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.eot[s] >= 0: # EOT Transition to accept state? 109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver #print "EOT trans to accept state %d" % self.eot[s] 110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = self.eot[s] 112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume() 113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # TODO: I had this as return accept[eot[s]] 114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # which assumed here that the EOT edge always 115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # went to an accept...faster to do this, but 116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # what about predicated edges coming from EOT 117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # target? 118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver continue 119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver #print "no viable alt" 121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.noViableAlt(s, input) 122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return 0 123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = snext 125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume() 126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver continue 127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.eot[s] >= 0: 129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver #print "EOT to %d" % self.eot[s] 130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = self.eot[s] 132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume() 133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver continue 134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # EOF Transition to accept state? 136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if c == EOF and self.eof[s] >= 0: 137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver #print "EOF Transition to accept state %d" \ 138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # % self.accept[self.eof[s]] 139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.accept[self.eof[s]] 140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # not in range and not EOF/EOT, must be invalid symbol 142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.noViableAlt(s, input) 143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return 0 144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else: 146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise RuntimeError("DFA bang!") 147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver finally: 149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.rewind(mark) 150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def noViableAlt(self, s, input): 153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.recognizer._state.backtracking > 0: 154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise BacktrackingFailed 155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nvae = NoViableAltException( 157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.getDescription(), 158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.decisionNumber, 159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s, 160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input 161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.error(nvae) 164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise nvae 165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def error(self, nvae): 168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """A hook for debugging interface""" 169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pass 170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def specialStateTransition(self, s, input): 173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return -1 174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getDescription(self): 177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return "n/a" 178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## def specialTransition(self, state, symbol): 181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## return 0 182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def unpack(cls, string): 185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """@brief Unpack the runlength encoded table data. 186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Terence implemented packed table initializers, because Java has a 188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver size restriction on .class files and the lookup tables can grow 189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pretty large. The generated JavaLexer.java of the Java.g example 190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver would be about 15MB with uncompressed array initializers. 191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Python does not have any size restrictions, but the compilation of 193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver such large source files seems to be pretty memory hungry. The memory 194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver consumption of the python process grew to >1.5GB when importing a 195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 15MB lexer, eating all my swap space and I was to impacient to see, 196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if it could finish at all. With packed initializers that are unpacked 197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver at import time of the lexer module, everything works like a charm. 198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ret = [] 202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for i in range(len(string) / 2): 203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver (n, v) = ord(string[i*2]), ord(string[i*2+1]) 204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Is there a bitwise operation to do this? 206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if v == 0xFFFF: 207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver v = -1 208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ret += [v] * n 210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ret 212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver unpack = classmethod(unpack) 214