1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver"""ANTLR3 runtime package"""
2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# begin[licence]
4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#
5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# [The "BSD licence"]
6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# Copyright (c) 2005-2008 Terence Parr
7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# All rights reserved.
8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#
9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# Redistribution and use in source and binary forms, with or without
10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# modification, are permitted provided that the following conditions
11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# are met:
12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 1. Redistributions of source code must retain the above copyright
13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#    notice, this list of conditions and the following disclaimer.
14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 2. Redistributions in binary form must reproduce the above copyright
15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#    notice, this list of conditions and the following disclaimer in the
16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#    documentation and/or other materials provided with the distribution.
17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 3. The name of the author may not be used to endorse or promote products
18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#    derived from this software without specific prior written permission.
19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#
20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#
31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# end[licence]
32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport sys
34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport inspect
35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfrom antlr3 import compatible_api_versions
37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfrom antlr3.constants import DEFAULT_CHANNEL, HIDDEN_CHANNEL, EOF, \
38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     EOR_TOKEN_TYPE, INVALID_TOKEN_TYPE
39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfrom antlr3.exceptions import RecognitionException, MismatchedTokenException, \
40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     MismatchedRangeException, MismatchedTreeNodeException, \
41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     NoViableAltException, EarlyExitException, MismatchedSetException, \
42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     MismatchedNotSetException, FailedPredicateException, \
43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     BacktrackingFailed, UnwantedTokenException, MissingTokenException
44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfrom antlr3.tokens import CommonToken, SKIP_TOKEN
45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfrom antlr3.compat import set, frozenset, reversed
46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass RecognizerSharedState(object):
49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    """
50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    The set of fields needed by an abstract recognizer to recognize input
51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    and recover from errors etc...  As a separate state object, it can be
52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    shared among multiple grammars; e.g., when one grammar imports another.
53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    These fields are publically visible but the actual state pointer per
55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    parser is protected.
56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    """
57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def __init__(self):
59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # Track the set of token types that can follow any rule invocation.
60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # Stack grows upwards.
61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.following = []
62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # This is true when we see an error and before having successfully
64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # matched a token.  Prevents generation of more than one error message
65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # per error.
66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.errorRecovery = False
67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # The index into the input stream where the last error occurred.
69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # This is used to prevent infinite loops where an error is found
70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # but no token is consumed during recovery...another error is found,
71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # ad naseum.  This is a failsafe mechanism to guarantee that at least
72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # one token/tree node is consumed for two errors.
73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.lastErrorIndex = -1
74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # If 0, no backtracking is going on.  Safe to exec actions etc...
76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # If >0 then it's the level of backtracking.
77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.backtracking = 0
78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # An array[size num rules] of Map<Integer,Integer> that tracks
80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # the stop token index for each rule.  ruleMemo[ruleIndex] is
81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # the memoization table for ruleIndex.  For key ruleStartIndex, you
82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # get back the stop token for associated rule or MEMO_RULE_FAILED.
83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        #
84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # This is only used if rule memoization is on (which it is by default).
85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.ruleMemo = None
86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ## Did the recognizer encounter a syntax error?  Track how many.
88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.syntaxErrors = 0
89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # LEXER FIELDS (must be in same state object to avoid casting
92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # constantly in generated code and Lexer object) :(
93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	## The goal of all lexer rules/methods is to create a token object.
96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # This is an instance variable as multiple rules may collaborate to
97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # create a single token.  nextToken will return this object after
98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # matching lexer rule(s).  If you subclass to allow multiple token
99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # emissions, then set this to the last token to be matched or
100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # something nonnull so that the auto token emit mechanism will not
101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # emit another token.
102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.token = None
103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ## What character index in the stream did the current token start at?
105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # Needed, for example, to get the text for current token.  Set at
106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # the start of nextToken.
107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.tokenStartCharIndex = -1
108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ## The line on which the first character of the token resides
110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.tokenStartLine = None
111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ## The character position of first character within the line
113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.tokenStartCharPositionInLine = None
114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ## The channel number for the current token
116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.channel = None
117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ## The token type for the current token
119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.type = None
120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ## You can set the text for the current token to override what is in
122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # the input char buffer.  Use setText() or can set this instance var.
123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.text = None
124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass BaseRecognizer(object):
127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    """
128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @brief Common recognizer functionality.
129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    A generic recognizer that can handle recognizers generated from
131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    lexer, parser, and tree grammars.  This is all the parsing
132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    support code essentially; most of it is error recovery stuff and
133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    backtracking.
134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    """
135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    MEMO_RULE_FAILED = -2
137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    MEMO_RULE_UNKNOWN = -1
138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # copies from Token object for convenience in actions
140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    DEFAULT_TOKEN_CHANNEL = DEFAULT_CHANNEL
141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # for convenience in actions
143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    HIDDEN = HIDDEN_CHANNEL
144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # overridden by generated subclasses
146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    tokenNames = None
147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # The api_version attribute has been introduced in 3.3. If it is not
149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # overwritten in the generated recognizer, we assume a default of v0.
150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    api_version = 0
151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def __init__(self, state=None):
153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # Input stream of the recognizer. Must be initialized by a subclass.
154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.input = None
155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ## State of a lexer, parser, or tree parser are collected into a state
157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # object so the state can be shared.  This sharing is needed to
158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # have one grammar import others and share same error variables
159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # and other state variables.  It's a kind of explicit multiple
160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # inheritance via delegation of methods and shared state.
161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if state is None:
162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            state = RecognizerSharedState()
163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state = state
164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self.api_version not in compatible_api_versions:
166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            raise RuntimeError(
167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                ("ANTLR version mismatch: "
168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                 "The recognizer has been generated with API V%s, "
169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                 "but this runtime does not support this.")
170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                % self.api_version)
171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # this one only exists to shut up pylint :(
173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def setInput(self, input):
174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.input = input
175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def reset(self):
178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        reset the parser's state; subclasses must rewinds the input stream
180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # wack everything related to error recovery
183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self._state is None:
184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # no shared state work to do
185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return
186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.following = []
188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.errorRecovery = False
189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.lastErrorIndex = -1
190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.syntaxErrors = 0
191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # wack everything related to backtracking and memoization
192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.backtracking = 0
193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self._state.ruleMemo is not None:
194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self._state.ruleMemo = {}
195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def match(self, input, ttype, follow):
198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Match current input symbol against ttype.  Attempt
200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        single token insertion or deletion error recovery.  If
201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        that fails, throw MismatchedTokenException.
202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        To turn off single token insertion or deletion error
204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        recovery, override recoverFromMismatchedToken() and have it
205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        throw an exception. See TreeParser.recoverFromMismatchedToken().
206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        This way any error in a rule will cause an exception and
207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        immediate exit from rule.  Rule would recover by resynchronizing
208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        to the set of symbols that can follow rule ref.
209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        matchedSymbol = self.getCurrentInputSymbol(input)
212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self.input.LA(1) == ttype:
213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self.input.consume()
214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self._state.errorRecovery = False
215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return matchedSymbol
216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self._state.backtracking > 0:
218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # FIXME: need to return matchedSymbol here as well. damn!!
219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            raise BacktrackingFailed
220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        matchedSymbol = self.recoverFromMismatchedToken(input, ttype, follow)
222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return matchedSymbol
223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def matchAny(self, input):
226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """Match the wildcard: in a symbol"""
227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.errorRecovery = False
229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.input.consume()
230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def mismatchIsUnwantedToken(self, input, ttype):
233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return input.LA(2) == ttype
234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def mismatchIsMissingToken(self, input, follow):
237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if follow is None:
238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # we have no information about the follow; we can only consume
239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # a single token and hope for the best
240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return False
241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # compute what can follow this grammar element reference
243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if EOR_TOKEN_TYPE in follow:
244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            viableTokensFollowingThisRule = self.computeContextSensitiveRuleFOLLOW()
245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            follow = follow | viableTokensFollowingThisRule
246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if len(self._state.following) > 0:
248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                # remove EOR if we're not the start symbol
249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                follow = follow - set([EOR_TOKEN_TYPE])
250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # if current token is consistent with what could come after set
252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # then we know we're missing a token; error recovery is free to
253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # "insert" the missing token
254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if input.LA(1) in follow or EOR_TOKEN_TYPE in follow:
255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return True
256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return False
258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def reportError(self, e):
261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """Report a recognition problem.
262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        This method sets errorRecovery to indicate the parser is recovering
264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        not parsing.  Once in recovery mode, no errors are generated.
265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        To get out of recovery mode, the parser must successfully match
266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        a token (after a resync).  So it will go:
267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        1. error occurs
269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        2. enter recovery mode, report error
270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        3. consume until token found in resynch set
271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        4. try to resume parsing
272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        5. next match() will reset errorRecovery mode
273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        If you override, make sure to update syntaxErrors if you care about
275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        that.
276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # if we've already reported an error and have not matched a token
280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # yet successfully, don't report any errors.
281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self._state.errorRecovery:
282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return
283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.syntaxErrors += 1 # don't count spurious
285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.errorRecovery = True
286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.displayRecognitionError(self.tokenNames, e)
288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def displayRecognitionError(self, tokenNames, e):
291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        hdr = self.getErrorHeader(e)
292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        msg = self.getErrorMessage(e, tokenNames)
293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.emitErrorMessage(hdr+" "+msg)
294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getErrorMessage(self, e, tokenNames):
297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        What error message should be generated for the various
299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        exception types?
300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Not very object-oriented code, but I like having all error message
302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        generation within one method rather than spread among all of the
303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        exception classes. This also makes it much easier for the exception
304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        handling because the exception classes do not have to have pointers back
305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        to this object to access utility routines and so on. Also, changing
306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        the message for an exception type would be difficult because you
307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        would have to subclassing exception, but then somehow get ANTLR
308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        to make those kinds of exception objects instead of the default.
309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        This looks weird, but trust me--it makes the most sense in terms
310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        of flexibility.
311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        For grammar debugging, you will want to override this to add
313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        more information such as the stack frame with
314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        getRuleInvocationStack(e, this.getClass().getName()) and,
315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        for no viable alts, the decision description and state etc...
316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Override this to change the message generated for one or more
318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        exception types.
319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if isinstance(e, UnwantedTokenException):
322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            tokenName = "<unknown>"
323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if e.expecting == EOF:
324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                tokenName = "EOF"
325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            else:
327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                tokenName = self.tokenNames[e.expecting]
328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "extraneous input %s expecting %s" % (
330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                self.getTokenErrorDisplay(e.getUnexpectedToken()),
331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                tokenName
332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                )
333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        elif isinstance(e, MissingTokenException):
335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            tokenName = "<unknown>"
336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if e.expecting == EOF:
337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                tokenName = "EOF"
338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            else:
340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                tokenName = self.tokenNames[e.expecting]
341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "missing %s at %s" % (
343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                tokenName, self.getTokenErrorDisplay(e.token)
344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                )
345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        elif isinstance(e, MismatchedTokenException):
347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            tokenName = "<unknown>"
348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if e.expecting == EOF:
349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                tokenName = "EOF"
350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            else:
351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                tokenName = self.tokenNames[e.expecting]
352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "mismatched input " \
354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + self.getTokenErrorDisplay(e.token) \
355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + " expecting " \
356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + tokenName
357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        elif isinstance(e, MismatchedTreeNodeException):
359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            tokenName = "<unknown>"
360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if e.expecting == EOF:
361324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                tokenName = "EOF"
362324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            else:
363324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                tokenName = self.tokenNames[e.expecting]
364324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
365324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "mismatched tree node: %s expecting %s" \
366324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  % (e.node, tokenName)
367324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
368324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        elif isinstance(e, NoViableAltException):
369324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "no viable alternative at input " \
370324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + self.getTokenErrorDisplay(e.token)
371324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
372324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        elif isinstance(e, EarlyExitException):
373324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "required (...)+ loop did not match anything at input " \
374324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + self.getTokenErrorDisplay(e.token)
375324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
376324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        elif isinstance(e, MismatchedSetException):
377324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "mismatched input " \
378324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + self.getTokenErrorDisplay(e.token) \
379324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + " expecting set " \
380324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + repr(e.expecting)
381324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
382324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        elif isinstance(e, MismatchedNotSetException):
383324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "mismatched input " \
384324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + self.getTokenErrorDisplay(e.token) \
385324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + " expecting set " \
386324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + repr(e.expecting)
387324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
388324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        elif isinstance(e, FailedPredicateException):
389324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "rule " \
390324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + e.ruleName \
391324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + " failed predicate: {" \
392324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + e.predicateText \
393324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + "}?"
394324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
395324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        else:
396324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = str(e)
397324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
398324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return msg
399324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
400324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
401324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getNumberOfSyntaxErrors(self):
402324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
403324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Get number of recognition errors (lexer, parser, tree parser).  Each
404324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        recognizer tracks its own number.  So parser and lexer each have
405324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        separate count.  Does not count the spurious errors found between
406324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        an error and next valid token match
407324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
408324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        See also reportError()
409324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	"""
410324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self._state.syntaxErrors
411324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
412324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
413324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getErrorHeader(self, e):
414324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
415324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        What is the error header, normally line/character position information?
416324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
417324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
418324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        source_name = self.getSourceName()
419324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if source_name is not None:
420324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return "%s line %d:%d" % (source_name, e.line, e.charPositionInLine)
421324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return "line %d:%d" % (e.line, e.charPositionInLine)
422324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
423324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
424324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getTokenErrorDisplay(self, t):
425324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
426324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        How should a token be displayed in an error message? The default
427324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        is to display just the text, but during development you might
428324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        want to have a lot of information spit out.  Override in that case
429324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        to use t.toString() (which, for CommonToken, dumps everything about
430324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        the token). This is better than forcing you to override a method in
431324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        your token objects because you don't have to go modify your lexer
432324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        so that it creates a new Java type.
433324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
434324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
435324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        s = t.text
436324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if s is None:
437324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if t.type == EOF:
438324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                s = "<EOF>"
439324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            else:
440324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                s = "<"+t.type+">"
441324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
442324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return repr(s)
443324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
444324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
445324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def emitErrorMessage(self, msg):
446324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """Override this method to change where error messages go"""
447324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        sys.stderr.write(msg + '\n')
448324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
449324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
450324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def recover(self, input, re):
451324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
452324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Recover from an error found on the input stream.  This is
453324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        for NoViableAlt and mismatched symbol exceptions.  If you enable
454324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        single token insertion and deletion, this will usually not
455324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        handle mismatched symbol exceptions but there could be a mismatched
456324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        token that the match() routine could not recover from.
457324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
458324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
459324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # PROBLEM? what if input stream is not the same as last time
460324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # perhaps make lastErrorIndex a member of input
461324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self._state.lastErrorIndex == input.index():
462324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # uh oh, another error at same token index; must be a case
463324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # where LT(1) is in the recovery token set so nothing is
464324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # consumed; consume a single token so at least to prevent
465324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # an infinite loop; this is a failsafe.
466324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            input.consume()
467324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
468324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.lastErrorIndex = input.index()
469324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        followSet = self.computeErrorRecoverySet()
470324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
471324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.beginResync()
472324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.consumeUntil(input, followSet)
473324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.endResync()
474324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
475324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
476324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def beginResync(self):
477324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
478324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        A hook to listen in on the token consumption during error recovery.
479324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        The DebugParser subclasses this to fire events to the listenter.
480324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
481324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
482324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        pass
483324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
484324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
485324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def endResync(self):
486324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
487324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        A hook to listen in on the token consumption during error recovery.
488324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        The DebugParser subclasses this to fire events to the listenter.
489324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
490324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
491324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        pass
492324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
493324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
494324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def computeErrorRecoverySet(self):
495324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
496324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Compute the error recovery set for the current rule.  During
497324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        rule invocation, the parser pushes the set of tokens that can
498324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        follow that rule reference on the stack; this amounts to
499324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        computing FIRST of what follows the rule reference in the
500324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        enclosing rule. This local follow set only includes tokens
501324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        from within the rule; i.e., the FIRST computation done by
502324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ANTLR stops at the end of a rule.
503324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
504324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        EXAMPLE
505324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
506324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        When you find a "no viable alt exception", the input is not
507324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        consistent with any of the alternatives for rule r.  The best
508324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        thing to do is to consume tokens until you see something that
509324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        can legally follow a call to r *or* any rule that called r.
510324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        You don't want the exact set of viable next tokens because the
511324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        input might just be missing a token--you might consume the
512324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        rest of the input looking for one of the missing tokens.
513324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
514324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Consider grammar:
515324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
516324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        a : '[' b ']'
517324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          | '(' b ')'
518324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          ;
519324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        b : c '^' INT ;
520324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        c : ID
521324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          | INT
522324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          ;
523324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
524324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        At each rule invocation, the set of tokens that could follow
525324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        that rule is pushed on a stack.  Here are the various "local"
526324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        follow sets:
527324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
528324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        FOLLOW(b1_in_a) = FIRST(']') = ']'
529324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        FOLLOW(b2_in_a) = FIRST(')') = ')'
530324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        FOLLOW(c_in_b) = FIRST('^') = '^'
531324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
532324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Upon erroneous input "[]", the call chain is
533324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
534324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        a -> b -> c
535324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
536324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        and, hence, the follow context stack is:
537324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
538324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        depth  local follow set     after call to rule
539324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          0         \<EOF>                    a (from main())
540324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          1          ']'                     b
541324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          3          '^'                     c
542324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
543324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Notice that ')' is not included, because b would have to have
544324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        been called from a different context in rule a for ')' to be
545324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        included.
546324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
547324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        For error recovery, we cannot consider FOLLOW(c)
548324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        (context-sensitive or otherwise).  We need the combined set of
549324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        all context-sensitive FOLLOW sets--the set of all tokens that
550324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        could follow any reference in the call chain.  We need to
551324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        resync to one of those tokens.  Note that FOLLOW(c)='^' and if
552324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        we resync'd to that token, we'd consume until EOF.  We need to
553324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
554324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        In this case, for input "[]", LA(1) is in this set so we would
555324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        not consume anything and after printing an error rule c would
556324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return normally.  It would not find the required '^' though.
557324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        At this point, it gets a mismatched token error and throws an
558324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        exception (since LA(1) is not in the viable following token
559324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        set).  The rule exception handler tries to recover, but finds
560324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        the same recovery set and doesn't consume anything.  Rule b
561324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        exits normally returning to rule a.  Now it finds the ']' (and
562324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        with the successful match exits errorRecovery mode).
563324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
564324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        So, you cna see that the parser walks up call chain looking
565324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        for the token that was a member of the recovery set.
566324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
567324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Errors are not generated in errorRecovery mode.
568324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
569324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ANTLR's error recovery mechanism is based upon original ideas:
570324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
571324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        "Algorithms + Data Structures = Programs" by Niklaus Wirth
572324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
573324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        and
574324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
575324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        "A note on error recovery in recursive descent parsers":
576324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        http://portal.acm.org/citation.cfm?id=947902.947905
577324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
578324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Later, Josef Grosch had some good ideas:
579324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
580324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        "Efficient and Comfortable Error Recovery in Recursive Descent
581324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Parsers":
582324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
583324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
584324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Like Grosch I implemented local FOLLOW sets that are combined
585324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        at run-time upon error to avoid overhead during parsing.
586324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
587324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
588324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self.combineFollows(False)
589324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
590324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
591324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def computeContextSensitiveRuleFOLLOW(self):
592324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
593324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Compute the context-sensitive FOLLOW set for current rule.
594324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        This is set of token types that can follow a specific rule
595324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        reference given a specific call chain.  You get the set of
596324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        viable tokens that can possibly come next (lookahead depth 1)
597324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        given the current call chain.  Contrast this with the
598324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        definition of plain FOLLOW for rule r:
599324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
600324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver         FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
601324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
602324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        where x in T* and alpha, beta in V*; T is set of terminals and
603324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        V is the set of terminals and nonterminals.  In other words,
604324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        FOLLOW(r) is the set of all tokens that can possibly follow
605324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        references to r in *any* sentential form (context).  At
606324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        runtime, however, we know precisely which context applies as
607324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        we have the call chain.  We may compute the exact (rather
608324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        than covering superset) set of following tokens.
609324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
610324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        For example, consider grammar:
611324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
612324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
613324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver             | "return" expr '.'
614324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver             ;
615324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
616324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
617324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver             | '(' expr ')'
618324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver             ;
619324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
620324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        The FOLLOW sets are all inclusive whereas context-sensitive
621324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        FOLLOW sets are precisely what could follow a rule reference.
622324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        For input input "i=(3);", here is the derivation:
623324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
624324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        stat => ID '=' expr ';'
625324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver             => ID '=' atom ('+' atom)* ';'
626324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver             => ID '=' '(' expr ')' ('+' atom)* ';'
627324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver             => ID '=' '(' atom ')' ('+' atom)* ';'
628324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver             => ID '=' '(' INT ')' ('+' atom)* ';'
629324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver             => ID '=' '(' INT ')' ';'
630324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
631324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        At the "3" token, you'd have a call chain of
632324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
633324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          stat -> expr -> atom -> expr -> atom
634324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
635324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        What can follow that specific nested ref to atom?  Exactly ')'
636324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        as you can see by looking at the derivation of this specific
637324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
638324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
639324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        You want the exact viable token set when recovering from a
640324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        token mismatch.  Upon token mismatch, if LA(1) is member of
641324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        the viable next token set, then you know there is most likely
642324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        a missing token in the input stream.  "Insert" one by just not
643324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        throwing an exception.
644324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
645324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
646324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self.combineFollows(True)
647324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
648324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
649324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def combineFollows(self, exact):
650324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        followSet = set()
651324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        for idx, localFollowSet in reversed(list(enumerate(self._state.following))):
652324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            followSet |= localFollowSet
653324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if exact:
654324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                # can we see end of rule?
655324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if EOR_TOKEN_TYPE in localFollowSet:
656324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    # Only leave EOR in set if at top (start rule); this lets
657324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    # us know if have to include follow(start rule); i.e., EOF
658324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    if idx > 0:
659324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        followSet.remove(EOR_TOKEN_TYPE)
660324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
661324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                else:
662324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    # can't see end of rule, quit
663324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    break
664324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
665324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return followSet
666324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
667324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
668324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def recoverFromMismatchedToken(self, input, ttype, follow):
669324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """Attempt to recover from a single missing or extra token.
670324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
671324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        EXTRA TOKEN
672324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
673324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        LA(1) is not what we are looking for.  If LA(2) has the right token,
674324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        however, then assume LA(1) is some extra spurious token.  Delete it
675324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        and LA(2) as if we were doing a normal match(), which advances the
676324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        input.
677324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
678324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        MISSING TOKEN
679324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
680324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        If current token is consistent with what could come after
681324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ttype then it is ok to 'insert' the missing token, else throw
682324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        exception For example, Input 'i=(3;' is clearly missing the
683324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ')'.  When the parser returns from the nested call to expr, it
684324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        will have call chain:
685324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
686324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          stat -> expr -> atom
687324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
688324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        and it will be trying to match the ')' at this point in the
689324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        derivation:
690324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
691324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver             => ID '=' '(' INT ')' ('+' atom)* ';'
692324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                                ^
693324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        match() will see that ';' doesn't match ')' and report a
694324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        mismatched token error.  To recover, it sees that LA(1)==';'
695324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        is in the set of tokens that can follow the ')' token
696324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        reference in rule atom.  It can assume that you forgot the ')'.
697324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
698324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
699324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        e = None
700324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
701324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # if next token is what we are looking for then "delete" this token
702324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self.mismatchIsUnwantedToken(input, ttype):
703324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            e = UnwantedTokenException(ttype, input)
704324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
705324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self.beginResync()
706324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            input.consume() # simply delete extra token
707324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self.endResync()
708324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
709324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # report after consuming so AW sees the token in the exception
710324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self.reportError(e)
711324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
712324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # we want to return the token we're actually matching
713324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            matchedSymbol = self.getCurrentInputSymbol(input)
714324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
715324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # move past ttype token as if all were ok
716324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            input.consume()
717324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return matchedSymbol
718324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
719324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # can't recover with single token deletion, try insertion
720324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self.mismatchIsMissingToken(input, follow):
721324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            inserted = self.getMissingSymbol(input, e, ttype, follow)
722324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            e = MissingTokenException(ttype, input, inserted)
723324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
724324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # report after inserting so AW sees the token in the exception
725324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self.reportError(e)
726324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return inserted
727324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
728324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # even that didn't work; must throw the exception
729324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        e = MismatchedTokenException(ttype, input)
730324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        raise e
731324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
732324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
733324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def recoverFromMismatchedSet(self, input, e, follow):
734324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """Not currently used"""
735324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
736324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self.mismatchIsMissingToken(input, follow):
737324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self.reportError(e)
738324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # we don't know how to conjure up a token for sets yet
739324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return self.getMissingSymbol(input, e, INVALID_TOKEN_TYPE, follow)
740324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
741324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # TODO do single token deletion like above for Token mismatch
742324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        raise e
743324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
744324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
745324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getCurrentInputSymbol(self, input):
746324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
747324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Match needs to return the current input symbol, which gets put
748324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        into the label for the associated token ref; e.g., x=ID.  Token
749324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        and tree parsers need to return different objects. Rather than test
750324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        for input stream type or change the IntStream interface, I use
751324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        a simple method to ask the recognizer to tell me what the current
752324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        input symbol is.
753324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
754324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        This is ignored for lexers.
755324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
756324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
757324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return None
758324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
759324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
760324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getMissingSymbol(self, input, e, expectedTokenType, follow):
761324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """Conjure up a missing token during error recovery.
762324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
763324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        The recognizer attempts to recover from single missing
764324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        symbols. But, actions might refer to that missing symbol.
765324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        For example, x=ID {f($x);}. The action clearly assumes
766324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        that there has been an identifier matched previously and that
767324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        $x points at that token. If that token is missing, but
768324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        the next token in the stream is what we want we assume that
769324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        this token is missing and we keep going. Because we
770324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        have to return some token to replace the missing token,
771324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        we have to conjure one up. This method gives the user control
772324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        over the tokens returned for missing tokens. Mostly,
773324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        you will want to create something special for identifier
774324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        tokens. For literals such as '{' and ',', the default
775324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        action in the parser or tree parser works. It simply creates
776324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        a CommonToken of the appropriate type. The text will be the token.
777324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        If you change what tokens must be created by the lexer,
778324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        override this method to create the appropriate tokens.
779324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
780324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
781324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return None
782324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
783324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
784324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver##     def recoverFromMissingElement(self, input, e, follow):
785324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver##         """
786324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver##         This code is factored out from mismatched token and mismatched set
787324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver##         recovery.  It handles "single token insertion" error recovery for
788324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver##         both.  No tokens are consumed to recover from insertions.  Return
789324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver##         true if recovery was possible else return false.
790324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver##         """
791324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
792324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver##         if self.mismatchIsMissingToken(input, follow):
793324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver##             self.reportError(e)
794324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver##             return True
795324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
796324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver##         # nothing to do; throw exception
797324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver##         return False
798324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
799324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
800324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def consumeUntil(self, input, tokenTypes):
801324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
802324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Consume tokens until one matches the given token or token set
803324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
804324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        tokenTypes can be a single token type or a set of token types
805324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
806324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
807324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
808324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if not isinstance(tokenTypes, (set, frozenset)):
809324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            tokenTypes = frozenset([tokenTypes])
810324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
811324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ttype = input.LA(1)
812324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        while ttype != EOF and ttype not in tokenTypes:
813324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            input.consume()
814324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            ttype = input.LA(1)
815324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
816324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
817324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getRuleInvocationStack(self):
818324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
819324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Return List<String> of the rules in your parser instance
820324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        leading up to a call to this method.  You could override if
821324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        you want more details such as the file/line info of where
822324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        in the parser java code a rule is invoked.
823324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
824324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        This is very useful for error messages and for context-sensitive
825324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        error recovery.
826324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
827324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        You must be careful, if you subclass a generated recognizers.
828324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        The default implementation will only search the module of self
829324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        for rules, but the subclass will not contain any rules.
830324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        You probably want to override this method to look like
831324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
832324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        def getRuleInvocationStack(self):
833324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return self._getRuleInvocationStack(<class>.__module__)
834324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
835324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        where <class> is the class of the generated recognizer, e.g.
836324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        the superclass of self.
837324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
838324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
839324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self._getRuleInvocationStack(self.__module__)
840324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
841324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
842324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def _getRuleInvocationStack(cls, module):
843324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
844324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        A more general version of getRuleInvocationStack where you can
845324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        pass in, for example, a RecognitionException to get it's rule
846324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        stack trace.  This routine is shared with all recognizers, hence,
847324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        static.
848324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
849324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        TODO: move to a utility class or something; weird having lexer call
850324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        this
851324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
852324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
853324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # mmmhhh,... perhaps look at the first argument
854324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # (f_locals[co_varnames[0]]?) and test if it's a (sub)class of
855324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # requested recognizer...
856324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
857324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        rules = []
858324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        for frame in reversed(inspect.stack()):
859324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            code = frame[0].f_code
860324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            codeMod = inspect.getmodule(code)
861324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if codeMod is None:
862324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                continue
863324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
864324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # skip frames not in requested module
865324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if codeMod.__name__ != module:
866324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                continue
867324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
868324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # skip some unwanted names
869324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if code.co_name in ('nextToken', '<module>'):
870324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                continue
871324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
872324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            rules.append(code.co_name)
873324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
874324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return rules
875324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
876324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    _getRuleInvocationStack = classmethod(_getRuleInvocationStack)
877324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
878324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
879324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getBacktrackingLevel(self):
880324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self._state.backtracking
881324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
882324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def setBacktrackingLevel(self, n):
883324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.backtracking = n
884324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
885324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
886324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getGrammarFileName(self):
887324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """For debugging and other purposes, might want the grammar name.
888324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
889324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Have ANTLR generate an implementation for this method.
890324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
891324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
892324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self.grammarFileName
893324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
894324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
895324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getSourceName(self):
896324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        raise NotImplementedError
897324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
898324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
899324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def toStrings(self, tokens):
900324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """A convenience method for use most often with template rewrites.
901324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
902324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Convert a List<Token> to List<String>
903324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
904324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
905324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if tokens is None:
906324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return None
907324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
908324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return [token.text for token in tokens]
909324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
910324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
911324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getRuleMemoization(self, ruleIndex, ruleStartIndex):
912324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
913324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Given a rule number and a start token index number, return
914324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
915324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        start index.  If this rule has parsed input starting from the
916324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        start index before, then return where the rule stopped parsing.
917324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        It returns the index of the last token matched by the rule.
918324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
919324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
920324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if ruleIndex not in self._state.ruleMemo:
921324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self._state.ruleMemo[ruleIndex] = {}
922324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
923324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self._state.ruleMemo[ruleIndex].get(
924324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            ruleStartIndex, self.MEMO_RULE_UNKNOWN
925324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            )
926324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
927324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
928324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def alreadyParsedRule(self, input, ruleIndex):
929324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
930324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Has this rule already parsed input at the current index in the
931324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
932324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        If we attempted but failed to parse properly before, return
933324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        MEMO_RULE_FAILED.
934324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
935324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        This method has a side-effect: if we have seen this input for
936324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        this rule and successfully parsed before, then seek ahead to
937324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        1 past the stop token matched for this rule last time.
938324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
939324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
940324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        stopIndex = self.getRuleMemoization(ruleIndex, input.index())
941324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if stopIndex == self.MEMO_RULE_UNKNOWN:
942324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return False
943324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
944324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if stopIndex == self.MEMO_RULE_FAILED:
945324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            raise BacktrackingFailed
946324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
947324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        else:
948324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            input.seek(stopIndex + 1)
949324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
950324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return True
951324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
952324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
953324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def memoize(self, input, ruleIndex, ruleStartIndex, success):
954324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
955324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Record whether or not this rule parsed the input at this position
956324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        successfully.
957324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
958324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
959324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if success:
960324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            stopTokenIndex = input.index() - 1
961324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        else:
962324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            stopTokenIndex = self.MEMO_RULE_FAILED
963324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
964324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if ruleIndex in self._state.ruleMemo:
965324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self._state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex
966324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
967324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
968324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def traceIn(self, ruleName, ruleIndex, inputSymbol):
969324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        sys.stdout.write("enter %s %s" % (ruleName, inputSymbol))
970324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
971324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self._state.backtracking > 0:
972324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            sys.stdout.write(" backtracking=%s" % self._state.backtracking)
973324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
974324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        sys.stdout.write('\n')
975324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
976324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
977324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def traceOut(self, ruleName, ruleIndex, inputSymbol):
978324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        sys.stdout.write("exit %s %s" % (ruleName, inputSymbol))
979324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
980324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self._state.backtracking > 0:
981324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            sys.stdout.write(" backtracking=%s" % self._state.backtracking)
982324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
983324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # mmmm... we use BacktrackingFailed exceptions now. So how could we
984324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # get that information here?
985324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        #if self._state.failed:
986324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        #    sys.stdout.write(" failed")
987324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        #else:
988324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        #    sys.stdout.write(" succeeded")
989324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
990324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        sys.stdout.write('\n')
991324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
992324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
993324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass TokenSource(object):
994324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    """
995324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @brief Abstract baseclass for token producers.
996324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
997324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    A source of tokens must provide a sequence of tokens via nextToken()
998324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    and also must reveal it's source of characters; CommonToken's text is
999324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    computed from a CharStream; it only store indices into the char stream.
1000324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1001324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    Errors from the lexer are never passed to the parser.  Either you want
1002324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    to keep going or you do not upon token recognition error.  If you do not
1003324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    want to continue lexing then you do not want to continue parsing.  Just
1004324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    throw an exception not under RecognitionException and Java will naturally
1005324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    toss you all the way out of the recognizers.  If you want to continue
1006324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    lexing then you should not throw an exception to the parser--it has already
1007324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    requested a token.  Keep lexing until you get a valid one.  Just report
1008324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    errors and keep going, looking for a valid token.
1009324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    """
1010324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1011324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def nextToken(self):
1012324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """Return a Token object from your input stream (usually a CharStream).
1013324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1014324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Do not fail/return upon lexing error; keep chewing on the characters
1015324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        until you get a good one; errors are not passed through to the parser.
1016324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1017324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1018324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        raise NotImplementedError
1019324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1020324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1021324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def __iter__(self):
1022324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """The TokenSource is an interator.
1023324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1024324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        The iteration will not include the final EOF token, see also the note
1025324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        for the next() method.
1026324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1027324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1028324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1029324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self
1030324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1031324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1032324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def next(self):
1033324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """Return next token or raise StopIteration.
1034324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1035324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Note that this will raise StopIteration when hitting the EOF token,
1036324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        so EOF will not be part of the iteration.
1037324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1038324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1039324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1040324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        token = self.nextToken()
1041324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if token is None or token.type == EOF:
1042324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            raise StopIteration
1043324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return token
1044324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1045324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1046324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass Lexer(BaseRecognizer, TokenSource):
1047324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    """
1048324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @brief Baseclass for generated lexer classes.
1049324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1050324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    A lexer is recognizer that draws input symbols from a character stream.
1051324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    lexer grammars result in a subclass of this object. A Lexer object
1052324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    uses simplified match() and error recovery mechanisms in the interest
1053324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    of speed.
1054324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    """
1055324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1056324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def __init__(self, input, state=None):
1057324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        BaseRecognizer.__init__(self, state)
1058324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        TokenSource.__init__(self)
1059324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1060324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # Where is the lexer drawing characters from?
1061324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.input = input
1062324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1063324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1064324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def reset(self):
1065324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        BaseRecognizer.reset(self) # reset all recognizer state variables
1066324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1067324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self.input is not None:
1068324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # rewind the input
1069324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self.input.seek(0)
1070324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1071324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self._state is None:
1072324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            # no shared state work to do
1073324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return
1074324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1075324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # wack Lexer state variables
1076324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.token = None
1077324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.type = INVALID_TOKEN_TYPE
1078324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.channel = DEFAULT_CHANNEL
1079324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.tokenStartCharIndex = -1
1080324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.tokenStartLine = -1
1081324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.tokenStartCharPositionInLine = -1
1082324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.text = None
1083324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1084324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1085324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def makeEOFToken(self):
1086324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        eof = CommonToken(
1087324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            type=EOF, channel=DEFAULT_CHANNEL,
1088324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            input=self.input,
1089324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            start=self.input.index(), stop=self.input.index())
1090324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        eof.line = self.input.line
1091324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        eof.charPositionInLine = self.input.charPositionInLine
1092324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return eof
1093324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1094324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def nextToken(self):
1095324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1096324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Return a token from this source; i.e., match a token on the char
1097324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        stream.
1098324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1099324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        while 1:
1101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self._state.token = None
1102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self._state.channel = DEFAULT_CHANNEL
1103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self._state.tokenStartCharIndex = self.input.index()
1104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self._state.tokenStartCharPositionInLine = self.input.charPositionInLine
1105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self._state.tokenStartLine = self.input.line
1106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self._state.text = None
1107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if self.input.LA(1) == EOF:
1108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                return self.makeEOFToken()
1109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            try:
1111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                self.mTokens()
1112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if self._state.token is None:
1114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    self.emit()
1115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                elif self._state.token == SKIP_TOKEN:
1117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    continue
1118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                return self._state.token
1120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            except NoViableAltException, re:
1122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                self.reportError(re)
1123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                self.recover(re) # throw out current char and try again
1124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            except RecognitionException, re:
1126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                self.reportError(re)
1127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                # match() routine has already called recover()
1128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def skip(self):
1131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Instruct the lexer to skip creating a token for current lexer rule
1133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        and look for another token.  nextToken() knows to keep looking when
1134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
1135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if token==null at end of any token rule, it creates one for you
1136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        and emits it.
1137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.token = SKIP_TOKEN
1140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def mTokens(self):
1143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """This is the lexer entry point that sets instance var 'token'"""
1144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        # abstract method
1146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        raise NotImplementedError
1147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def setCharStream(self, input):
1150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """Set the char stream and reset the lexer"""
1151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.input = None
1152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.reset()
1153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.input = input
1154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getSourceName(self):
1157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self.input.getSourceName()
1158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def emit(self, token=None):
1161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        The standard method called to automatically emit a token at the
1163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        outermost lexical rule.  The token object should point into the
1164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        char buffer start..stop.  If there is a text override in 'text',
1165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        use that to set the token's text.  Override this method to emit
1166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        custom Token objects.
1167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        If you are building trees, then you should also override
1169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Parser or TreeParser.getMissingSymbol().
1170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if token is None:
1173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            token = CommonToken(
1174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                input=self.input,
1175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                type=self._state.type,
1176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                channel=self._state.channel,
1177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                start=self._state.tokenStartCharIndex,
1178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                stop=self.getCharIndex()-1
1179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                )
1180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            token.line = self._state.tokenStartLine
1181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            token.text = self._state.text
1182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            token.charPositionInLine = self._state.tokenStartCharPositionInLine
1183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.token = token
1185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return token
1187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def match(self, s):
1190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if isinstance(s, basestring):
1191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            for c in s:
1192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if self.input.LA(1) != ord(c):
1193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    if self._state.backtracking > 0:
1194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        raise BacktrackingFailed
1195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    mte = MismatchedTokenException(c, self.input)
1197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    self.recover(mte)
1198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    raise mte
1199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                self.input.consume()
1201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        else:
1203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if self.input.LA(1) != s:
1204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if self._state.backtracking > 0:
1205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    raise BacktrackingFailed
1206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                mte = MismatchedTokenException(unichr(s), self.input)
1208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                self.recover(mte) # don't really recover; just consume in lexer
1209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                raise mte
1210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self.input.consume()
1212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def matchAny(self):
1215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.input.consume()
1216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def matchRange(self, a, b):
1219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self.input.LA(1) < a or self.input.LA(1) > b:
1220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if self._state.backtracking > 0:
1221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                raise BacktrackingFailed
1222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            mre = MismatchedRangeException(unichr(a), unichr(b), self.input)
1224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self.recover(mre)
1225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            raise mre
1226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.input.consume()
1228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getLine(self):
1231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self.input.line
1232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getCharPositionInLine(self):
1235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self.input.charPositionInLine
1236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getCharIndex(self):
1239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """What is the index of the current character of lookahead?"""
1240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self.input.index()
1242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getText(self):
1245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Return the text matched so far for the current token or any
1247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        text override.
1248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self._state.text is not None:
1250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return self._state.text
1251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self.input.substring(
1253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self._state.tokenStartCharIndex,
1254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self.getCharIndex()-1
1255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            )
1256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def setText(self, text):
1259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Set the complete text of this token; it wipes any previous
1261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        changes to the text.
1262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self._state.text = text
1264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    text = property(getText, setText)
1267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def reportError(self, e):
1270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ## TODO: not thought about recovery in lexer yet.
1271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ## # if we've already reported an error and have not matched a token
1273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ## # yet successfully, don't report any errors.
1274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ## if self.errorRecovery:
1275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ##     #System.err.print("[SPURIOUS] ");
1276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ##     return;
1277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ##
1278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ## self.errorRecovery = True
1279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.displayRecognitionError(self.tokenNames, e)
1281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getErrorMessage(self, e, tokenNames):
1284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        msg = None
1285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if isinstance(e, MismatchedTokenException):
1287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "mismatched character " \
1288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + self.getCharErrorDisplay(e.c) \
1289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + " expecting " \
1290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + self.getCharErrorDisplay(e.expecting)
1291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        elif isinstance(e, NoViableAltException):
1293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "no viable alternative at character " \
1294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + self.getCharErrorDisplay(e.c)
1295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        elif isinstance(e, EarlyExitException):
1297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "required (...)+ loop did not match anything at character " \
1298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + self.getCharErrorDisplay(e.c)
1299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        elif isinstance(e, MismatchedNotSetException):
1301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "mismatched character " \
1302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + self.getCharErrorDisplay(e.c) \
1303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + " expecting set " \
1304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + repr(e.expecting)
1305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        elif isinstance(e, MismatchedSetException):
1307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "mismatched character " \
1308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + self.getCharErrorDisplay(e.c) \
1309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + " expecting set " \
1310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + repr(e.expecting)
1311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        elif isinstance(e, MismatchedRangeException):
1313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = "mismatched character " \
1314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + self.getCharErrorDisplay(e.c) \
1315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + " expecting set " \
1316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + self.getCharErrorDisplay(e.a) \
1317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + ".." \
1318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                  + self.getCharErrorDisplay(e.b)
1319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        else:
1321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            msg = BaseRecognizer.getErrorMessage(self, e, tokenNames)
1322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return msg
1324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getCharErrorDisplay(self, c):
1327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if c == EOF:
1328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            c = '<EOF>'
1329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return repr(c)
1330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def recover(self, re):
1333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Lexers can normally match any char in it's vocabulary after matching
1335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        a token, so do the easy thing and just kill a character and hope
1336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        it all works out.  You can instead use the rule invocation stack
1337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        to do sophisticated error recovery if you are in a fragment rule.
1338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """
1339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.input.consume()
1341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def traceIn(self, ruleName, ruleIndex):
1344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        inputSymbol = "%s line=%d:%s" % (self.input.LT(1),
1345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                                         self.getLine(),
1346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                                         self.getCharPositionInLine()
1347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                                         )
1348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        BaseRecognizer.traceIn(self, ruleName, ruleIndex, inputSymbol)
1350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def traceOut(self, ruleName, ruleIndex):
1353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        inputSymbol = "%s line=%d:%s" % (self.input.LT(1),
1354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                                         self.getLine(),
1355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                                         self.getCharPositionInLine()
1356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                                         )
1357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        BaseRecognizer.traceOut(self, ruleName, ruleIndex, inputSymbol)
1359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1361324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1362324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass Parser(BaseRecognizer):
1363324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    """
1364324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @brief Baseclass for generated parser classes.
1365324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    """
1366324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1367324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def __init__(self, lexer, state=None):
1368324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        BaseRecognizer.__init__(self, state)
1369324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1370324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.input = lexer
1371324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1372324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1373324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def reset(self):
1374324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        BaseRecognizer.reset(self) # reset all recognizer state variables
1375324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if self.input is not None:
1376324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            self.input.seek(0) # rewind the input
1377324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1378324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1379324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getCurrentInputSymbol(self, input):
1380324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return input.LT(1)
1381324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1382324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1383324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getMissingSymbol(self, input, e, expectedTokenType, follow):
1384324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if expectedTokenType == EOF:
1385324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            tokenText = "<missing EOF>"
1386324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        else:
1387324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            tokenText = "<missing " + self.tokenNames[expectedTokenType] + ">"
1388324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        t = CommonToken(type=expectedTokenType, text=tokenText)
1389324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        current = input.LT(1)
1390324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if current.type == EOF:
1391324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            current = input.LT(-1)
1392324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1393324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if current is not None:
1394324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            t.line = current.line
1395324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            t.charPositionInLine = current.charPositionInLine
1396324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        t.channel = DEFAULT_CHANNEL
1397324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return t
1398324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1399324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1400324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def setTokenStream(self, input):
1401324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """Set the token stream and reset the parser"""
1402324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1403324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.input = None
1404324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.reset()
1405324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.input = input
1406324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1407324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1408324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getTokenStream(self):
1409324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self.input
1410324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1411324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1412324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getSourceName(self):
1413324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self.input.getSourceName()
1414324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1415324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1416324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def traceIn(self, ruleName, ruleIndex):
1417324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1))
1418324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1419324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1420324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def traceOut(self, ruleName, ruleIndex):
1421324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1))
1422324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1423324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1424324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass RuleReturnScope(object):
1425324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    """
1426324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    Rules can return start/stop info as well as possible trees and templates.
1427324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    """
1428324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1429324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getStart(self):
1430324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """Return the start token or tree."""
1431324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return None
1432324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1433324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1434324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getStop(self):
1435324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """Return the stop token or tree."""
1436324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return None
1437324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1438324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1439324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getTree(self):
1440324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """Has a value potentially if output=AST."""
1441324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return None
1442324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1443324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1444324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getTemplate(self):
1445324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        """Has a value potentially if output=template."""
1446324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return None
1447324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1448324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1449324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass ParserRuleReturnScope(RuleReturnScope):
1450324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    """
1451324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    Rules that return more than a single value must return an object
1452324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    containing all the values.  Besides the properties defined in
1453324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    RuleLabelScope.predefinedRulePropertiesScope there may be user-defined
1454324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return values.  This class simply defines the minimum properties that
1455324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    are always defined and methods to access the others that might be
1456324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    available depending on output option such as template and tree.
1457324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1458324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    Note text is not an actual property of the return value, it is computed
1459324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    from start and stop using the input stream's toString() method.  I
1460324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    could add a ctor to this so that we can pass in and store the input
1461324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    stream, but I'm not sure we want to do that.  It would seem to be undefined
1462324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    to get the .text property anyway if the rule matches tokens from multiple
1463324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input streams.
1464324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1465324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    I do not use getters for fields of objects that are used simply to
1466324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    group values such as this aggregate.  The getters/setters are there to
1467324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    satisfy the superclass interface.
1468324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    """
1469324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1470324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def __init__(self):
1471324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.start = None
1472324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.stop = None
1473324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self.tree = None  # only used when output=AST
1474324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1475324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1476324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getStart(self):
1477324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self.start
1478324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1479324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1480324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getStop(self):
1481324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self.stop
1482324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1483324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1484324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def getTree(self):
1485324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return self.tree
1486