1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver"""ANTLR3 runtime package""" 2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# begin[licence] 4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# [The "BSD licence"] 6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# Copyright (c) 2005-2008 Terence Parr 7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# All rights reserved. 8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# Redistribution and use in source and binary forms, with or without 10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# modification, are permitted provided that the following conditions 11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# are met: 12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 1. Redistributions of source code must retain the above copyright 13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# notice, this list of conditions and the following disclaimer. 14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 2. Redistributions in binary form must reproduce the above copyright 15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# notice, this list of conditions and the following disclaimer in the 16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# documentation and/or other materials provided with the distribution. 17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 3. The name of the author may not be used to endorse or promote products 18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# derived from this software without specific prior written permission. 19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# 31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# end[licence] 32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport sys 34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport inspect 35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfrom antlr3 import compatible_api_versions 37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfrom antlr3.constants import DEFAULT_CHANNEL, HIDDEN_CHANNEL, EOF, \ 38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver EOR_TOKEN_TYPE, INVALID_TOKEN_TYPE 39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfrom antlr3.exceptions import RecognitionException, MismatchedTokenException, \ 40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver MismatchedRangeException, MismatchedTreeNodeException, \ 41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NoViableAltException, EarlyExitException, MismatchedSetException, \ 42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver MismatchedNotSetException, FailedPredicateException, \ 43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver BacktrackingFailed, UnwantedTokenException, MissingTokenException 44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfrom antlr3.tokens import CommonToken, SKIP_TOKEN 45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfrom antlr3.compat import set, frozenset, reversed 46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass RecognizerSharedState(object): 49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver The set of fields needed by an abstract recognizer to recognize input 51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver and recover from errors etc... As a separate state object, it can be 52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver shared among multiple grammars; e.g., when one grammar imports another. 53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver These fields are publically visible but the actual state pointer per 55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver parser is protected. 56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def __init__(self): 59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Track the set of token types that can follow any rule invocation. 60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Stack grows upwards. 61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.following = [] 62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # This is true when we see an error and before having successfully 64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # matched a token. Prevents generation of more than one error message 65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # per error. 66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.errorRecovery = False 67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # The index into the input stream where the last error occurred. 69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # This is used to prevent infinite loops where an error is found 70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # but no token is consumed during recovery...another error is found, 71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ad naseum. This is a failsafe mechanism to guarantee that at least 72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # one token/tree node is consumed for two errors. 73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.lastErrorIndex = -1 74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # If 0, no backtracking is going on. Safe to exec actions etc... 76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # If >0 then it's the level of backtracking. 77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.backtracking = 0 78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # An array[size num rules] of Map<Integer,Integer> that tracks 80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the stop token index for each rule. ruleMemo[ruleIndex] is 81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the memoization table for ruleIndex. For key ruleStartIndex, you 82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # get back the stop token for associated rule or MEMO_RULE_FAILED. 83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # This is only used if rule memoization is on (which it is by default). 85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.ruleMemo = None 86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## Did the recognizer encounter a syntax error? Track how many. 88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.syntaxErrors = 0 89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # LEXER FIELDS (must be in same state object to avoid casting 92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # constantly in generated code and Lexer object) :( 93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## The goal of all lexer rules/methods is to create a token object. 96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # This is an instance variable as multiple rules may collaborate to 97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # create a single token. nextToken will return this object after 98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # matching lexer rule(s). If you subclass to allow multiple token 99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # emissions, then set this to the last token to be matched or 100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # something nonnull so that the auto token emit mechanism will not 101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # emit another token. 102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.token = None 103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## What character index in the stream did the current token start at? 105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Needed, for example, to get the text for current token. Set at 106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the start of nextToken. 107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.tokenStartCharIndex = -1 108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## The line on which the first character of the token resides 110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.tokenStartLine = None 111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## The character position of first character within the line 113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.tokenStartCharPositionInLine = None 114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## The channel number for the current token 116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.channel = None 117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## The token type for the current token 119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.type = None 120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## You can set the text for the current token to override what is in 122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the input char buffer. Use setText() or can set this instance var. 123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.text = None 124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass BaseRecognizer(object): 127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @brief Common recognizer functionality. 129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A generic recognizer that can handle recognizers generated from 131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer, parser, and tree grammars. This is all the parsing 132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver support code essentially; most of it is error recovery stuff and 133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver backtracking. 134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver MEMO_RULE_FAILED = -2 137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver MEMO_RULE_UNKNOWN = -1 138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # copies from Token object for convenience in actions 140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver DEFAULT_TOKEN_CHANNEL = DEFAULT_CHANNEL 141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # for convenience in actions 143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver HIDDEN = HIDDEN_CHANNEL 144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # overridden by generated subclasses 146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenNames = None 147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # The api_version attribute has been introduced in 3.3. If it is not 149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # overwritten in the generated recognizer, we assume a default of v0. 150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver api_version = 0 151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def __init__(self, state=None): 153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Input stream of the recognizer. Must be initialized by a subclass. 154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input = None 155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## State of a lexer, parser, or tree parser are collected into a state 157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # object so the state can be shared. This sharing is needed to 158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # have one grammar import others and share same error variables 159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # and other state variables. It's a kind of explicit multiple 160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # inheritance via delegation of methods and shared state. 161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if state is None: 162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state = RecognizerSharedState() 163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state = state 164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.api_version not in compatible_api_versions: 166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise RuntimeError( 167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ("ANTLR version mismatch: " 168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "The recognizer has been generated with API V%s, " 169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "but this runtime does not support this.") 170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver % self.api_version) 171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # this one only exists to shut up pylint :( 173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def setInput(self, input): 174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input = input 175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def reset(self): 178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver reset the parser's state; subclasses must rewinds the input stream 180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # wack everything related to error recovery 183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self._state is None: 184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # no shared state work to do 185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return 186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.following = [] 188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.errorRecovery = False 189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.lastErrorIndex = -1 190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.syntaxErrors = 0 191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # wack everything related to backtracking and memoization 192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.backtracking = 0 193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self._state.ruleMemo is not None: 194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.ruleMemo = {} 195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def match(self, input, ttype, follow): 198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Match current input symbol against ttype. Attempt 200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver single token insertion or deletion error recovery. If 201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver that fails, throw MismatchedTokenException. 202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver To turn off single token insertion or deletion error 204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver recovery, override recoverFromMismatchedToken() and have it 205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver throw an exception. See TreeParser.recoverFromMismatchedToken(). 206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver This way any error in a rule will cause an exception and 207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver immediate exit from rule. Rule would recover by resynchronizing 208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver to the set of symbols that can follow rule ref. 209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver matchedSymbol = self.getCurrentInputSymbol(input) 212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.input.LA(1) == ttype: 213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input.consume() 214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.errorRecovery = False 215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return matchedSymbol 216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self._state.backtracking > 0: 218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # FIXME: need to return matchedSymbol here as well. damn!! 219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise BacktrackingFailed 220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver matchedSymbol = self.recoverFromMismatchedToken(input, ttype, follow) 222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return matchedSymbol 223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def matchAny(self, input): 226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """Match the wildcard: in a symbol""" 227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.errorRecovery = False 229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input.consume() 230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def mismatchIsUnwantedToken(self, input, ttype): 233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return input.LA(2) == ttype 234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def mismatchIsMissingToken(self, input, follow): 237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if follow is None: 238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # we have no information about the follow; we can only consume 239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # a single token and hope for the best 240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return False 241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # compute what can follow this grammar element reference 243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if EOR_TOKEN_TYPE in follow: 244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver viableTokensFollowingThisRule = self.computeContextSensitiveRuleFOLLOW() 245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow = follow | viableTokensFollowingThisRule 246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if len(self._state.following) > 0: 248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # remove EOR if we're not the start symbol 249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow = follow - set([EOR_TOKEN_TYPE]) 250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # if current token is consistent with what could come after set 252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # then we know we're missing a token; error recovery is free to 253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # "insert" the missing token 254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if input.LA(1) in follow or EOR_TOKEN_TYPE in follow: 255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return True 256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return False 258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def reportError(self, e): 261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """Report a recognition problem. 262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver This method sets errorRecovery to indicate the parser is recovering 264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver not parsing. Once in recovery mode, no errors are generated. 265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver To get out of recovery mode, the parser must successfully match 266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver a token (after a resync). So it will go: 267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1. error occurs 269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 2. enter recovery mode, report error 270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 3. consume until token found in resynch set 271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 4. try to resume parsing 272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 5. next match() will reset errorRecovery mode 273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver If you override, make sure to update syntaxErrors if you care about 275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver that. 276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # if we've already reported an error and have not matched a token 280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # yet successfully, don't report any errors. 281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self._state.errorRecovery: 282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return 283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.syntaxErrors += 1 # don't count spurious 285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.errorRecovery = True 286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.displayRecognitionError(self.tokenNames, e) 288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def displayRecognitionError(self, tokenNames, e): 291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver hdr = self.getErrorHeader(e) 292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = self.getErrorMessage(e, tokenNames) 293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.emitErrorMessage(hdr+" "+msg) 294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getErrorMessage(self, e, tokenNames): 297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver What error message should be generated for the various 299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver exception types? 300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Not very object-oriented code, but I like having all error message 302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver generation within one method rather than spread among all of the 303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver exception classes. This also makes it much easier for the exception 304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver handling because the exception classes do not have to have pointers back 305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver to this object to access utility routines and so on. Also, changing 306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver the message for an exception type would be difficult because you 307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver would have to subclassing exception, but then somehow get ANTLR 308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver to make those kinds of exception objects instead of the default. 309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver This looks weird, but trust me--it makes the most sense in terms 310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver of flexibility. 311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver For grammar debugging, you will want to override this to add 313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver more information such as the stack frame with 314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver getRuleInvocationStack(e, this.getClass().getName()) and, 315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for no viable alts, the decision description and state etc... 316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Override this to change the message generated for one or more 318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver exception types. 319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if isinstance(e, UnwantedTokenException): 322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = "<unknown>" 323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if e.expecting == EOF: 324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = "EOF" 325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else: 327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = self.tokenNames[e.expecting] 328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "extraneous input %s expecting %s" % ( 330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.getTokenErrorDisplay(e.getUnexpectedToken()), 331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName 332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver elif isinstance(e, MissingTokenException): 335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = "<unknown>" 336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if e.expecting == EOF: 337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = "EOF" 338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else: 340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = self.tokenNames[e.expecting] 341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "missing %s at %s" % ( 343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName, self.getTokenErrorDisplay(e.token) 344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver elif isinstance(e, MismatchedTokenException): 347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = "<unknown>" 348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if e.expecting == EOF: 349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = "EOF" 350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else: 351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = self.tokenNames[e.expecting] 352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "mismatched input " \ 354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + self.getTokenErrorDisplay(e.token) \ 355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + " expecting " \ 356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + tokenName 357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver elif isinstance(e, MismatchedTreeNodeException): 359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = "<unknown>" 360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if e.expecting == EOF: 361324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = "EOF" 362324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else: 363324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = self.tokenNames[e.expecting] 364324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 365324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "mismatched tree node: %s expecting %s" \ 366324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver % (e.node, tokenName) 367324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 368324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver elif isinstance(e, NoViableAltException): 369324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "no viable alternative at input " \ 370324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + self.getTokenErrorDisplay(e.token) 371324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 372324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver elif isinstance(e, EarlyExitException): 373324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "required (...)+ loop did not match anything at input " \ 374324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + self.getTokenErrorDisplay(e.token) 375324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 376324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver elif isinstance(e, MismatchedSetException): 377324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "mismatched input " \ 378324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + self.getTokenErrorDisplay(e.token) \ 379324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + " expecting set " \ 380324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + repr(e.expecting) 381324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 382324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver elif isinstance(e, MismatchedNotSetException): 383324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "mismatched input " \ 384324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + self.getTokenErrorDisplay(e.token) \ 385324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + " expecting set " \ 386324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + repr(e.expecting) 387324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 388324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver elif isinstance(e, FailedPredicateException): 389324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "rule " \ 390324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + e.ruleName \ 391324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + " failed predicate: {" \ 392324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + e.predicateText \ 393324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + "}?" 394324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 395324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else: 396324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = str(e) 397324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 398324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return msg 399324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 400324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 401324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getNumberOfSyntaxErrors(self): 402324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 403324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Get number of recognition errors (lexer, parser, tree parser). Each 404324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver recognizer tracks its own number. So parser and lexer each have 405324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver separate count. Does not count the spurious errors found between 406324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver an error and next valid token match 407324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 408324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver See also reportError() 409324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 410324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self._state.syntaxErrors 411324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 412324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 413324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getErrorHeader(self, e): 414324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 415324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver What is the error header, normally line/character position information? 416324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 417324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 418324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver source_name = self.getSourceName() 419324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if source_name is not None: 420324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return "%s line %d:%d" % (source_name, e.line, e.charPositionInLine) 421324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return "line %d:%d" % (e.line, e.charPositionInLine) 422324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 423324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 424324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getTokenErrorDisplay(self, t): 425324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 426324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver How should a token be displayed in an error message? The default 427324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver is to display just the text, but during development you might 428324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver want to have a lot of information spit out. Override in that case 429324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver to use t.toString() (which, for CommonToken, dumps everything about 430324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver the token). This is better than forcing you to override a method in 431324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver your token objects because you don't have to go modify your lexer 432324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver so that it creates a new Java type. 433324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 434324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 435324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = t.text 436324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if s is None: 437324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if t.type == EOF: 438324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = "<EOF>" 439324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else: 440324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = "<"+t.type+">" 441324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 442324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return repr(s) 443324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 444324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 445324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def emitErrorMessage(self, msg): 446324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """Override this method to change where error messages go""" 447324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver sys.stderr.write(msg + '\n') 448324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 449324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 450324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def recover(self, input, re): 451324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 452324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Recover from an error found on the input stream. This is 453324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for NoViableAlt and mismatched symbol exceptions. If you enable 454324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver single token insertion and deletion, this will usually not 455324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver handle mismatched symbol exceptions but there could be a mismatched 456324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token that the match() routine could not recover from. 457324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 458324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 459324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # PROBLEM? what if input stream is not the same as last time 460324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # perhaps make lastErrorIndex a member of input 461324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self._state.lastErrorIndex == input.index(): 462324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # uh oh, another error at same token index; must be a case 463324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # where LT(1) is in the recovery token set so nothing is 464324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # consumed; consume a single token so at least to prevent 465324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # an infinite loop; this is a failsafe. 466324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume() 467324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 468324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.lastErrorIndex = input.index() 469324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver followSet = self.computeErrorRecoverySet() 470324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 471324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.beginResync() 472324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.consumeUntil(input, followSet) 473324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.endResync() 474324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 475324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 476324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def beginResync(self): 477324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 478324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A hook to listen in on the token consumption during error recovery. 479324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver The DebugParser subclasses this to fire events to the listenter. 480324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 481324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 482324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pass 483324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 484324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 485324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def endResync(self): 486324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 487324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A hook to listen in on the token consumption during error recovery. 488324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver The DebugParser subclasses this to fire events to the listenter. 489324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 490324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 491324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pass 492324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 493324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 494324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def computeErrorRecoverySet(self): 495324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 496324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Compute the error recovery set for the current rule. During 497324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver rule invocation, the parser pushes the set of tokens that can 498324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow that rule reference on the stack; this amounts to 499324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver computing FIRST of what follows the rule reference in the 500324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver enclosing rule. This local follow set only includes tokens 501324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver from within the rule; i.e., the FIRST computation done by 502324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR stops at the end of a rule. 503324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 504324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver EXAMPLE 505324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 506324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver When you find a "no viable alt exception", the input is not 507324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver consistent with any of the alternatives for rule r. The best 508324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver thing to do is to consume tokens until you see something that 509324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver can legally follow a call to r *or* any rule that called r. 510324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver You don't want the exact set of viable next tokens because the 511324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input might just be missing a token--you might consume the 512324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver rest of the input looking for one of the missing tokens. 513324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 514324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Consider grammar: 515324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 516324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver a : '[' b ']' 517324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | '(' b ')' 518324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 519324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver b : c '^' INT ; 520324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver c : ID 521324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | INT 522324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 523324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 524324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver At each rule invocation, the set of tokens that could follow 525324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver that rule is pushed on a stack. Here are the various "local" 526324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow sets: 527324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 528324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver FOLLOW(b1_in_a) = FIRST(']') = ']' 529324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver FOLLOW(b2_in_a) = FIRST(')') = ')' 530324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver FOLLOW(c_in_b) = FIRST('^') = '^' 531324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 532324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Upon erroneous input "[]", the call chain is 533324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 534324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver a -> b -> c 535324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 536324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver and, hence, the follow context stack is: 537324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 538324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver depth local follow set after call to rule 539324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0 \<EOF> a (from main()) 540324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1 ']' b 541324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 3 '^' c 542324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 543324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Notice that ')' is not included, because b would have to have 544324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver been called from a different context in rule a for ')' to be 545324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver included. 546324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 547324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver For error recovery, we cannot consider FOLLOW(c) 548324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver (context-sensitive or otherwise). We need the combined set of 549324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver all context-sensitive FOLLOW sets--the set of all tokens that 550324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver could follow any reference in the call chain. We need to 551324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver resync to one of those tokens. Note that FOLLOW(c)='^' and if 552324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver we resync'd to that token, we'd consume until EOF. We need to 553324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 554324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver In this case, for input "[]", LA(1) is in this set so we would 555324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver not consume anything and after printing an error rule c would 556324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return normally. It would not find the required '^' though. 557324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver At this point, it gets a mismatched token error and throws an 558324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver exception (since LA(1) is not in the viable following token 559324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver set). The rule exception handler tries to recover, but finds 560324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver the same recovery set and doesn't consume anything. Rule b 561324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver exits normally returning to rule a. Now it finds the ']' (and 562324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver with the successful match exits errorRecovery mode). 563324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 564324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver So, you cna see that the parser walks up call chain looking 565324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for the token that was a member of the recovery set. 566324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 567324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Errors are not generated in errorRecovery mode. 568324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 569324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR's error recovery mechanism is based upon original ideas: 570324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 571324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "Algorithms + Data Structures = Programs" by Niklaus Wirth 572324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 573324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver and 574324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 575324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "A note on error recovery in recursive descent parsers": 576324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver http://portal.acm.org/citation.cfm?id=947902.947905 577324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 578324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Later, Josef Grosch had some good ideas: 579324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 580324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "Efficient and Comfortable Error Recovery in Recursive Descent 581324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Parsers": 582324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 583324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 584324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Like Grosch I implemented local FOLLOW sets that are combined 585324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver at run-time upon error to avoid overhead during parsing. 586324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 587324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 588324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.combineFollows(False) 589324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 590324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 591324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def computeContextSensitiveRuleFOLLOW(self): 592324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 593324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Compute the context-sensitive FOLLOW set for current rule. 594324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver This is set of token types that can follow a specific rule 595324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver reference given a specific call chain. You get the set of 596324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver viable tokens that can possibly come next (lookahead depth 1) 597324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver given the current call chain. Contrast this with the 598324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver definition of plain FOLLOW for rule r: 599324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 600324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} 601324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 602324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver where x in T* and alpha, beta in V*; T is set of terminals and 603324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver V is the set of terminals and nonterminals. In other words, 604324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver FOLLOW(r) is the set of all tokens that can possibly follow 605324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver references to r in *any* sentential form (context). At 606324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver runtime, however, we know precisely which context applies as 607324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver we have the call chain. We may compute the exact (rather 608324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver than covering superset) set of following tokens. 609324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 610324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver For example, consider grammar: 611324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 612324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} 613324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | "return" expr '.' 614324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 615324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} 616324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver atom : INT // FOLLOW(atom)=={'+',')',';','.'} 617324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | '(' expr ')' 618324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 619324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 620324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver The FOLLOW sets are all inclusive whereas context-sensitive 621324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver FOLLOW sets are precisely what could follow a rule reference. 622324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver For input input "i=(3);", here is the derivation: 623324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 624324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stat => ID '=' expr ';' 625324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver => ID '=' atom ('+' atom)* ';' 626324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver => ID '=' '(' expr ')' ('+' atom)* ';' 627324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver => ID '=' '(' atom ')' ('+' atom)* ';' 628324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver => ID '=' '(' INT ')' ('+' atom)* ';' 629324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver => ID '=' '(' INT ')' ';' 630324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 631324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver At the "3" token, you'd have a call chain of 632324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 633324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stat -> expr -> atom -> expr -> atom 634324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 635324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver What can follow that specific nested ref to atom? Exactly ')' 636324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver as you can see by looking at the derivation of this specific 637324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. 638324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 639324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver You want the exact viable token set when recovering from a 640324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token mismatch. Upon token mismatch, if LA(1) is member of 641324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver the viable next token set, then you know there is most likely 642324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver a missing token in the input stream. "Insert" one by just not 643324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver throwing an exception. 644324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 645324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 646324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.combineFollows(True) 647324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 648324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 649324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def combineFollows(self, exact): 650324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver followSet = set() 651324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for idx, localFollowSet in reversed(list(enumerate(self._state.following))): 652324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver followSet |= localFollowSet 653324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if exact: 654324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # can we see end of rule? 655324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if EOR_TOKEN_TYPE in localFollowSet: 656324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Only leave EOR in set if at top (start rule); this lets 657324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # us know if have to include follow(start rule); i.e., EOF 658324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if idx > 0: 659324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver followSet.remove(EOR_TOKEN_TYPE) 660324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 661324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else: 662324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # can't see end of rule, quit 663324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break 664324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 665324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return followSet 666324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 667324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 668324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def recoverFromMismatchedToken(self, input, ttype, follow): 669324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """Attempt to recover from a single missing or extra token. 670324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 671324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver EXTRA TOKEN 672324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 673324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver LA(1) is not what we are looking for. If LA(2) has the right token, 674324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver however, then assume LA(1) is some extra spurious token. Delete it 675324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver and LA(2) as if we were doing a normal match(), which advances the 676324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input. 677324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 678324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver MISSING TOKEN 679324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 680324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver If current token is consistent with what could come after 681324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ttype then it is ok to 'insert' the missing token, else throw 682324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver exception For example, Input 'i=(3;' is clearly missing the 683324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ')'. When the parser returns from the nested call to expr, it 684324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver will have call chain: 685324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 686324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stat -> expr -> atom 687324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 688324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver and it will be trying to match the ')' at this point in the 689324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver derivation: 690324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 691324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver => ID '=' '(' INT ')' ('+' atom)* ';' 692324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ^ 693324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver match() will see that ';' doesn't match ')' and report a 694324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver mismatched token error. To recover, it sees that LA(1)==';' 695324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver is in the set of tokens that can follow the ')' token 696324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver reference in rule atom. It can assume that you forgot the ')'. 697324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 698324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 699324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver e = None 700324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 701324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # if next token is what we are looking for then "delete" this token 702324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.mismatchIsUnwantedToken(input, ttype): 703324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver e = UnwantedTokenException(ttype, input) 704324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 705324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.beginResync() 706324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume() # simply delete extra token 707324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.endResync() 708324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 709324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # report after consuming so AW sees the token in the exception 710324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.reportError(e) 711324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 712324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # we want to return the token we're actually matching 713324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver matchedSymbol = self.getCurrentInputSymbol(input) 714324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 715324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # move past ttype token as if all were ok 716324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume() 717324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return matchedSymbol 718324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 719324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # can't recover with single token deletion, try insertion 720324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.mismatchIsMissingToken(input, follow): 721324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver inserted = self.getMissingSymbol(input, e, ttype, follow) 722324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver e = MissingTokenException(ttype, input, inserted) 723324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 724324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # report after inserting so AW sees the token in the exception 725324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.reportError(e) 726324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return inserted 727324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 728324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # even that didn't work; must throw the exception 729324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver e = MismatchedTokenException(ttype, input) 730324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise e 731324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 732324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 733324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def recoverFromMismatchedSet(self, input, e, follow): 734324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """Not currently used""" 735324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 736324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.mismatchIsMissingToken(input, follow): 737324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.reportError(e) 738324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # we don't know how to conjure up a token for sets yet 739324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.getMissingSymbol(input, e, INVALID_TOKEN_TYPE, follow) 740324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 741324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # TODO do single token deletion like above for Token mismatch 742324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise e 743324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 744324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 745324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getCurrentInputSymbol(self, input): 746324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 747324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Match needs to return the current input symbol, which gets put 748324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver into the label for the associated token ref; e.g., x=ID. Token 749324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver and tree parsers need to return different objects. Rather than test 750324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for input stream type or change the IntStream interface, I use 751324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver a simple method to ask the recognizer to tell me what the current 752324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input symbol is. 753324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 754324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver This is ignored for lexers. 755324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 756324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 757324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return None 758324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 759324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 760324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getMissingSymbol(self, input, e, expectedTokenType, follow): 761324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """Conjure up a missing token during error recovery. 762324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 763324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver The recognizer attempts to recover from single missing 764324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver symbols. But, actions might refer to that missing symbol. 765324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver For example, x=ID {f($x);}. The action clearly assumes 766324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver that there has been an identifier matched previously and that 767324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver $x points at that token. If that token is missing, but 768324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver the next token in the stream is what we want we assume that 769324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver this token is missing and we keep going. Because we 770324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver have to return some token to replace the missing token, 771324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver we have to conjure one up. This method gives the user control 772324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver over the tokens returned for missing tokens. Mostly, 773324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver you will want to create something special for identifier 774324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokens. For literals such as '{' and ',', the default 775324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver action in the parser or tree parser works. It simply creates 776324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver a CommonToken of the appropriate type. The text will be the token. 777324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver If you change what tokens must be created by the lexer, 778324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver override this method to create the appropriate tokens. 779324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 780324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 781324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return None 782324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 783324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 784324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## def recoverFromMissingElement(self, input, e, follow): 785324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## """ 786324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## This code is factored out from mismatched token and mismatched set 787324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## recovery. It handles "single token insertion" error recovery for 788324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## both. No tokens are consumed to recover from insertions. Return 789324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## true if recovery was possible else return false. 790324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## """ 791324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 792324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## if self.mismatchIsMissingToken(input, follow): 793324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## self.reportError(e) 794324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## return True 795324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 796324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## # nothing to do; throw exception 797324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## return False 798324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 799324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 800324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def consumeUntil(self, input, tokenTypes): 801324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 802324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Consume tokens until one matches the given token or token set 803324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 804324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenTypes can be a single token type or a set of token types 805324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 806324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 807324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 808324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if not isinstance(tokenTypes, (set, frozenset)): 809324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenTypes = frozenset([tokenTypes]) 810324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 811324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ttype = input.LA(1) 812324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while ttype != EOF and ttype not in tokenTypes: 813324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume() 814324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ttype = input.LA(1) 815324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 816324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 817324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getRuleInvocationStack(self): 818324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 819324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Return List<String> of the rules in your parser instance 820324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver leading up to a call to this method. You could override if 821324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver you want more details such as the file/line info of where 822324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver in the parser java code a rule is invoked. 823324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 824324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver This is very useful for error messages and for context-sensitive 825324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver error recovery. 826324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 827324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver You must be careful, if you subclass a generated recognizers. 828324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver The default implementation will only search the module of self 829324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for rules, but the subclass will not contain any rules. 830324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver You probably want to override this method to look like 831324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 832324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getRuleInvocationStack(self): 833324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self._getRuleInvocationStack(<class>.__module__) 834324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 835324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver where <class> is the class of the generated recognizer, e.g. 836324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver the superclass of self. 837324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 838324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 839324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self._getRuleInvocationStack(self.__module__) 840324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 841324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 842324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def _getRuleInvocationStack(cls, module): 843324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 844324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A more general version of getRuleInvocationStack where you can 845324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pass in, for example, a RecognitionException to get it's rule 846324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stack trace. This routine is shared with all recognizers, hence, 847324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver static. 848324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 849324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver TODO: move to a utility class or something; weird having lexer call 850324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver this 851324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 852324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 853324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # mmmhhh,... perhaps look at the first argument 854324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # (f_locals[co_varnames[0]]?) and test if it's a (sub)class of 855324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # requested recognizer... 856324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 857324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver rules = [] 858324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for frame in reversed(inspect.stack()): 859324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver code = frame[0].f_code 860324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver codeMod = inspect.getmodule(code) 861324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if codeMod is None: 862324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver continue 863324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 864324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # skip frames not in requested module 865324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if codeMod.__name__ != module: 866324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver continue 867324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 868324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # skip some unwanted names 869324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if code.co_name in ('nextToken', '<module>'): 870324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver continue 871324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 872324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver rules.append(code.co_name) 873324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 874324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return rules 875324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 876324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver _getRuleInvocationStack = classmethod(_getRuleInvocationStack) 877324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 878324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 879324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getBacktrackingLevel(self): 880324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self._state.backtracking 881324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 882324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def setBacktrackingLevel(self, n): 883324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.backtracking = n 884324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 885324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 886324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getGrammarFileName(self): 887324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """For debugging and other purposes, might want the grammar name. 888324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 889324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Have ANTLR generate an implementation for this method. 890324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 891324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 892324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.grammarFileName 893324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 894324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 895324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getSourceName(self): 896324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise NotImplementedError 897324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 898324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 899324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def toStrings(self, tokens): 900324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """A convenience method for use most often with template rewrites. 901324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 902324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Convert a List<Token> to List<String> 903324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 904324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 905324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if tokens is None: 906324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return None 907324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 908324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return [token.text for token in tokens] 909324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 910324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 911324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getRuleMemoization(self, ruleIndex, ruleStartIndex): 912324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 913324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Given a rule number and a start token index number, return 914324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver MEMO_RULE_UNKNOWN if the rule has not parsed input starting from 915324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver start index. If this rule has parsed input starting from the 916324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver start index before, then return where the rule stopped parsing. 917324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver It returns the index of the last token matched by the rule. 918324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 919324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 920324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ruleIndex not in self._state.ruleMemo: 921324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.ruleMemo[ruleIndex] = {} 922324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 923324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self._state.ruleMemo[ruleIndex].get( 924324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ruleStartIndex, self.MEMO_RULE_UNKNOWN 925324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 926324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 927324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 928324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def alreadyParsedRule(self, input, ruleIndex): 929324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 930324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Has this rule already parsed input at the current index in the 931324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input stream? Return the stop token index or MEMO_RULE_UNKNOWN. 932324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver If we attempted but failed to parse properly before, return 933324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver MEMO_RULE_FAILED. 934324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 935324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver This method has a side-effect: if we have seen this input for 936324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver this rule and successfully parsed before, then seek ahead to 937324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1 past the stop token matched for this rule last time. 938324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 939324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 940324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stopIndex = self.getRuleMemoization(ruleIndex, input.index()) 941324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if stopIndex == self.MEMO_RULE_UNKNOWN: 942324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return False 943324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 944324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if stopIndex == self.MEMO_RULE_FAILED: 945324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise BacktrackingFailed 946324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 947324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else: 948324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.seek(stopIndex + 1) 949324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 950324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return True 951324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 952324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 953324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def memoize(self, input, ruleIndex, ruleStartIndex, success): 954324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 955324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Record whether or not this rule parsed the input at this position 956324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver successfully. 957324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 958324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 959324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if success: 960324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stopTokenIndex = input.index() - 1 961324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else: 962324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stopTokenIndex = self.MEMO_RULE_FAILED 963324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 964324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ruleIndex in self._state.ruleMemo: 965324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex 966324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 967324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 968324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def traceIn(self, ruleName, ruleIndex, inputSymbol): 969324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver sys.stdout.write("enter %s %s" % (ruleName, inputSymbol)) 970324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 971324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self._state.backtracking > 0: 972324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver sys.stdout.write(" backtracking=%s" % self._state.backtracking) 973324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 974324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver sys.stdout.write('\n') 975324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 976324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 977324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def traceOut(self, ruleName, ruleIndex, inputSymbol): 978324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver sys.stdout.write("exit %s %s" % (ruleName, inputSymbol)) 979324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 980324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self._state.backtracking > 0: 981324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver sys.stdout.write(" backtracking=%s" % self._state.backtracking) 982324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 983324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # mmmm... we use BacktrackingFailed exceptions now. So how could we 984324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # get that information here? 985324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver #if self._state.failed: 986324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # sys.stdout.write(" failed") 987324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver #else: 988324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # sys.stdout.write(" succeeded") 989324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 990324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver sys.stdout.write('\n') 991324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 992324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 993324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass TokenSource(object): 994324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 995324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @brief Abstract baseclass for token producers. 996324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 997324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A source of tokens must provide a sequence of tokens via nextToken() 998324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver and also must reveal it's source of characters; CommonToken's text is 999324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver computed from a CharStream; it only store indices into the char stream. 1000324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1001324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Errors from the lexer are never passed to the parser. Either you want 1002324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver to keep going or you do not upon token recognition error. If you do not 1003324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver want to continue lexing then you do not want to continue parsing. Just 1004324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver throw an exception not under RecognitionException and Java will naturally 1005324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver toss you all the way out of the recognizers. If you want to continue 1006324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexing then you should not throw an exception to the parser--it has already 1007324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver requested a token. Keep lexing until you get a valid one. Just report 1008324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver errors and keep going, looking for a valid token. 1009324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1010324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1011324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def nextToken(self): 1012324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """Return a Token object from your input stream (usually a CharStream). 1013324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1014324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Do not fail/return upon lexing error; keep chewing on the characters 1015324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver until you get a good one; errors are not passed through to the parser. 1016324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1017324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1018324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise NotImplementedError 1019324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1020324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1021324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def __iter__(self): 1022324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """The TokenSource is an interator. 1023324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1024324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver The iteration will not include the final EOF token, see also the note 1025324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for the next() method. 1026324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1027324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1028324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1029324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self 1030324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1031324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1032324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def next(self): 1033324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """Return next token or raise StopIteration. 1034324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1035324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Note that this will raise StopIteration when hitting the EOF token, 1036324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver so EOF will not be part of the iteration. 1037324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1038324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1039324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1040324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token = self.nextToken() 1041324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if token is None or token.type == EOF: 1042324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise StopIteration 1043324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return token 1044324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1045324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1046324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass Lexer(BaseRecognizer, TokenSource): 1047324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1048324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @brief Baseclass for generated lexer classes. 1049324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1050324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A lexer is recognizer that draws input symbols from a character stream. 1051324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer grammars result in a subclass of this object. A Lexer object 1052324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver uses simplified match() and error recovery mechanisms in the interest 1053324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver of speed. 1054324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1055324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1056324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def __init__(self, input, state=None): 1057324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver BaseRecognizer.__init__(self, state) 1058324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver TokenSource.__init__(self) 1059324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1060324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Where is the lexer drawing characters from? 1061324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input = input 1062324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1063324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1064324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def reset(self): 1065324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver BaseRecognizer.reset(self) # reset all recognizer state variables 1066324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1067324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.input is not None: 1068324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # rewind the input 1069324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input.seek(0) 1070324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1071324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self._state is None: 1072324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # no shared state work to do 1073324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return 1074324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1075324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # wack Lexer state variables 1076324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.token = None 1077324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.type = INVALID_TOKEN_TYPE 1078324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.channel = DEFAULT_CHANNEL 1079324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.tokenStartCharIndex = -1 1080324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.tokenStartLine = -1 1081324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.tokenStartCharPositionInLine = -1 1082324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.text = None 1083324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1084324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1085324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def makeEOFToken(self): 1086324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver eof = CommonToken( 1087324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver type=EOF, channel=DEFAULT_CHANNEL, 1088324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input=self.input, 1089324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver start=self.input.index(), stop=self.input.index()) 1090324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver eof.line = self.input.line 1091324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver eof.charPositionInLine = self.input.charPositionInLine 1092324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return eof 1093324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1094324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def nextToken(self): 1095324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1096324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Return a token from this source; i.e., match a token on the char 1097324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stream. 1098324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1099324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while 1: 1101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.token = None 1102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.channel = DEFAULT_CHANNEL 1103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.tokenStartCharIndex = self.input.index() 1104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.tokenStartCharPositionInLine = self.input.charPositionInLine 1105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.tokenStartLine = self.input.line 1106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.text = None 1107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.input.LA(1) == EOF: 1108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.makeEOFToken() 1109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver try: 1111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.mTokens() 1112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self._state.token is None: 1114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.emit() 1115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver elif self._state.token == SKIP_TOKEN: 1117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver continue 1118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self._state.token 1120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver except NoViableAltException, re: 1122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.reportError(re) 1123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.recover(re) # throw out current char and try again 1124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver except RecognitionException, re: 1126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.reportError(re) 1127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # match() routine has already called recover() 1128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def skip(self): 1131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Instruct the lexer to skip creating a token for current lexer rule 1133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver and look for another token. nextToken() knows to keep looking when 1134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver a lexer rule finishes with token set to SKIP_TOKEN. Recall that 1135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if token==null at end of any token rule, it creates one for you 1136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver and emits it. 1137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.token = SKIP_TOKEN 1140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def mTokens(self): 1143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """This is the lexer entry point that sets instance var 'token'""" 1144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # abstract method 1146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise NotImplementedError 1147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def setCharStream(self, input): 1150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """Set the char stream and reset the lexer""" 1151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input = None 1152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.reset() 1153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input = input 1154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getSourceName(self): 1157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.input.getSourceName() 1158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def emit(self, token=None): 1161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver The standard method called to automatically emit a token at the 1163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver outermost lexical rule. The token object should point into the 1164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver char buffer start..stop. If there is a text override in 'text', 1165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver use that to set the token's text. Override this method to emit 1166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver custom Token objects. 1167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver If you are building trees, then you should also override 1169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Parser or TreeParser.getMissingSymbol(). 1170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if token is None: 1173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token = CommonToken( 1174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input=self.input, 1175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver type=self._state.type, 1176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver channel=self._state.channel, 1177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver start=self._state.tokenStartCharIndex, 1178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stop=self.getCharIndex()-1 1179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 1180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token.line = self._state.tokenStartLine 1181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token.text = self._state.text 1182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token.charPositionInLine = self._state.tokenStartCharPositionInLine 1183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.token = token 1185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return token 1187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def match(self, s): 1190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if isinstance(s, basestring): 1191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for c in s: 1192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.input.LA(1) != ord(c): 1193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self._state.backtracking > 0: 1194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise BacktrackingFailed 1195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver mte = MismatchedTokenException(c, self.input) 1197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.recover(mte) 1198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise mte 1199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input.consume() 1201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else: 1203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.input.LA(1) != s: 1204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self._state.backtracking > 0: 1205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise BacktrackingFailed 1206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver mte = MismatchedTokenException(unichr(s), self.input) 1208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.recover(mte) # don't really recover; just consume in lexer 1209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise mte 1210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input.consume() 1212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def matchAny(self): 1215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input.consume() 1216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def matchRange(self, a, b): 1219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.input.LA(1) < a or self.input.LA(1) > b: 1220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self._state.backtracking > 0: 1221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise BacktrackingFailed 1222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver mre = MismatchedRangeException(unichr(a), unichr(b), self.input) 1224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.recover(mre) 1225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise mre 1226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input.consume() 1228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getLine(self): 1231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.input.line 1232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getCharPositionInLine(self): 1235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.input.charPositionInLine 1236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getCharIndex(self): 1239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """What is the index of the current character of lookahead?""" 1240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.input.index() 1242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getText(self): 1245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Return the text matched so far for the current token or any 1247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver text override. 1248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self._state.text is not None: 1250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self._state.text 1251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.input.substring( 1253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.tokenStartCharIndex, 1254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.getCharIndex()-1 1255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 1256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def setText(self, text): 1259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Set the complete text of this token; it wipes any previous 1261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver changes to the text. 1262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self._state.text = text 1264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver text = property(getText, setText) 1267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def reportError(self, e): 1270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## TODO: not thought about recovery in lexer yet. 1271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## # if we've already reported an error and have not matched a token 1273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## # yet successfully, don't report any errors. 1274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## if self.errorRecovery: 1275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## #System.err.print("[SPURIOUS] "); 1276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## return; 1277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## 1278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ## self.errorRecovery = True 1279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.displayRecognitionError(self.tokenNames, e) 1281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getErrorMessage(self, e, tokenNames): 1284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = None 1285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if isinstance(e, MismatchedTokenException): 1287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "mismatched character " \ 1288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + self.getCharErrorDisplay(e.c) \ 1289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + " expecting " \ 1290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + self.getCharErrorDisplay(e.expecting) 1291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver elif isinstance(e, NoViableAltException): 1293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "no viable alternative at character " \ 1294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + self.getCharErrorDisplay(e.c) 1295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver elif isinstance(e, EarlyExitException): 1297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "required (...)+ loop did not match anything at character " \ 1298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + self.getCharErrorDisplay(e.c) 1299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver elif isinstance(e, MismatchedNotSetException): 1301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "mismatched character " \ 1302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + self.getCharErrorDisplay(e.c) \ 1303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + " expecting set " \ 1304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + repr(e.expecting) 1305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver elif isinstance(e, MismatchedSetException): 1307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "mismatched character " \ 1308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + self.getCharErrorDisplay(e.c) \ 1309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + " expecting set " \ 1310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + repr(e.expecting) 1311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver elif isinstance(e, MismatchedRangeException): 1313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "mismatched character " \ 1314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + self.getCharErrorDisplay(e.c) \ 1315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + " expecting set " \ 1316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + self.getCharErrorDisplay(e.a) \ 1317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + ".." \ 1318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + self.getCharErrorDisplay(e.b) 1319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else: 1321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = BaseRecognizer.getErrorMessage(self, e, tokenNames) 1322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return msg 1324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getCharErrorDisplay(self, c): 1327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if c == EOF: 1328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver c = '<EOF>' 1329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return repr(c) 1330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def recover(self, re): 1333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Lexers can normally match any char in it's vocabulary after matching 1335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver a token, so do the easy thing and just kill a character and hope 1336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver it all works out. You can instead use the rule invocation stack 1337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver to do sophisticated error recovery if you are in a fragment rule. 1338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input.consume() 1341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def traceIn(self, ruleName, ruleIndex): 1344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver inputSymbol = "%s line=%d:%s" % (self.input.LT(1), 1345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.getLine(), 1346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.getCharPositionInLine() 1347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 1348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver BaseRecognizer.traceIn(self, ruleName, ruleIndex, inputSymbol) 1350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def traceOut(self, ruleName, ruleIndex): 1353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver inputSymbol = "%s line=%d:%s" % (self.input.LT(1), 1354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.getLine(), 1355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.getCharPositionInLine() 1356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 1357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver BaseRecognizer.traceOut(self, ruleName, ruleIndex, inputSymbol) 1359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1361324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1362324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass Parser(BaseRecognizer): 1363324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1364324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @brief Baseclass for generated parser classes. 1365324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1366324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1367324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def __init__(self, lexer, state=None): 1368324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver BaseRecognizer.__init__(self, state) 1369324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1370324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input = lexer 1371324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1372324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1373324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def reset(self): 1374324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver BaseRecognizer.reset(self) # reset all recognizer state variables 1375324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if self.input is not None: 1376324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input.seek(0) # rewind the input 1377324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1378324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1379324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getCurrentInputSymbol(self, input): 1380324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return input.LT(1) 1381324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1382324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1383324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getMissingSymbol(self, input, e, expectedTokenType, follow): 1384324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if expectedTokenType == EOF: 1385324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenText = "<missing EOF>" 1386324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else: 1387324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenText = "<missing " + self.tokenNames[expectedTokenType] + ">" 1388324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t = CommonToken(type=expectedTokenType, text=tokenText) 1389324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver current = input.LT(1) 1390324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if current.type == EOF: 1391324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver current = input.LT(-1) 1392324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1393324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if current is not None: 1394324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t.line = current.line 1395324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t.charPositionInLine = current.charPositionInLine 1396324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t.channel = DEFAULT_CHANNEL 1397324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return t 1398324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1399324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1400324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def setTokenStream(self, input): 1401324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """Set the token stream and reset the parser""" 1402324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1403324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input = None 1404324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.reset() 1405324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.input = input 1406324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1407324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1408324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getTokenStream(self): 1409324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.input 1410324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1411324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1412324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getSourceName(self): 1413324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.input.getSourceName() 1414324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1415324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1416324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def traceIn(self, ruleName, ruleIndex): 1417324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1)) 1418324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1419324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1420324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def traceOut(self, ruleName, ruleIndex): 1421324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1)) 1422324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1423324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1424324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass RuleReturnScope(object): 1425324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1426324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Rules can return start/stop info as well as possible trees and templates. 1427324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1428324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1429324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getStart(self): 1430324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """Return the start token or tree.""" 1431324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return None 1432324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1433324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1434324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getStop(self): 1435324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """Return the stop token or tree.""" 1436324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return None 1437324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1438324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1439324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getTree(self): 1440324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """Has a value potentially if output=AST.""" 1441324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return None 1442324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1443324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1444324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getTemplate(self): 1445324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """Has a value potentially if output=template.""" 1446324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return None 1447324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1448324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1449324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass ParserRuleReturnScope(RuleReturnScope): 1450324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1451324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Rules that return more than a single value must return an object 1452324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver containing all the values. Besides the properties defined in 1453324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver RuleLabelScope.predefinedRulePropertiesScope there may be user-defined 1454324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return values. This class simply defines the minimum properties that 1455324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver are always defined and methods to access the others that might be 1456324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver available depending on output option such as template and tree. 1457324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1458324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Note text is not an actual property of the return value, it is computed 1459324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver from start and stop using the input stream's toString() method. I 1460324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver could add a ctor to this so that we can pass in and store the input 1461324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stream, but I'm not sure we want to do that. It would seem to be undefined 1462324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver to get the .text property anyway if the rule matches tokens from multiple 1463324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input streams. 1464324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1465324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver I do not use getters for fields of objects that are used simply to 1466324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver group values such as this aggregate. The getters/setters are there to 1467324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver satisfy the superclass interface. 1468324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """ 1469324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1470324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def __init__(self): 1471324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.start = None 1472324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.stop = None 1473324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.tree = None # only used when output=AST 1474324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1475324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1476324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getStart(self): 1477324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.start 1478324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1479324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1480324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getStop(self): 1481324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.stop 1482324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1483324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1484324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def getTree(self): 1485324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return self.tree 1486