1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverpackage org.antlr.runtime { 2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** A generic recognizer that can handle recognizers generated from 4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * lexer, parser, and tree grammars. This is all the parsing 5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * support code essentially; most of it is error recovery stuff and 6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * backtracking. 7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public class BaseRecognizer { 9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public static const MEMO_RULE_FAILED:int = -2; 10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public static const MEMO_RULE_UNKNOWN:int = -1; 11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public static const INITIAL_FOLLOW_STACK_SIZE:int = 100; 12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // copies from Token object for convenience in actions 14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public static const DEFAULT_TOKEN_CHANNEL:int = TokenConstants.DEFAULT_CHANNEL; 15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public static const HIDDEN:int = TokenConstants.HIDDEN_CHANNEL; 16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public static const NEXT_TOKEN_RULE_NAME:String = "nextToken"; 18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** State of a lexer, parser, or tree parser are collected into a state 20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * object so the state can be shared. This sharing is needed to 21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * have one grammar import others and share same error variables 22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * and other state variables. It's a kind of explicit multiple 23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * inheritance via delegation of methods and shared state. 24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public var state:RecognizerSharedState; // TODO - Place in private Namespace - cannot be private 27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function BaseRecognizer(state:RecognizerSharedState = null) { 29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( state == null ) { // don't ever let us have a null state 30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state = new RecognizerSharedState(); 31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver this.state = state; 33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** reset the parser's state; subclasses must rewinds the input stream */ 36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function reset():void { 37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // wack everything related to error recovery 38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (state == null) { 39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return; 40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state._fsp = -1; 42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.errorRecovery = false; 43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.lastErrorIndex = -1; 44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.failed = false; 45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.syntaxErrors = 0; 46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // wack everything related to backtracking and memoization 47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.backtracking = 0; 48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for (var i:int = 0; state.ruleMemo!=null && i < state.ruleMemo.length; i++) { // wipe cache 49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.ruleMemo[i] = null; 50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Match current input symbol against ttype. Attempt 54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * single token insertion or deletion error recovery. If 55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * that fails, throw MismatchedTokenException. 56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * To turn off single token insertion or deletion error 58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * recovery, override mismatchRecover() and have it call 59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * plain mismatch(), which does not recover. Then any error 60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * in a rule will cause an exception and immediate exit from 61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * rule. Rule would recover by resynchronizing to the set of 62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * symbols that can follow rule ref. 63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function matchStream(input:IntStream, ttype:int, follow:BitSet):Object { 65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //System.out.println("match "+((TokenStream)input).LT(1)); 66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var matchedSymbol:Object = getCurrentInputSymbol(input); 67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( input.LA(1)==ttype ) { 68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume(); 69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.errorRecovery = false; 70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.failed = false; 71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return matchedSymbol; 72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( state.backtracking>0 ) { 74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.failed = true; 75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return matchedSymbol; 76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver matchedSymbol = recoverFromMismatchedToken(input, ttype, follow); 78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return matchedSymbol; 79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Match the wildcard: in a symbol */ 82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function matchAnyStream(input:IntStream):void { 83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.errorRecovery = false; 84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.failed = false; 85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume(); 86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function mismatchIsUnwantedToken(input:IntStream, ttype:int):Boolean { 89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return input.LA(2)==ttype; 90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function mismatchIsMissingToken(input:IntStream, follow:BitSet):Boolean { 93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( follow==null ) { 94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // we have no information about the follow; we can only consume 95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // a single token and hope for the best 96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return false; 97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // compute what can follow this grammar element reference 99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( follow.member(TokenConstants.EOR_TOKEN_TYPE) ) { 100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var viableTokensFollowingThisRule:BitSet = computeContextSensitiveRuleFOLLOW(); 101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow = follow.or(viableTokensFollowingThisRule); 102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( state._fsp>=0 ) { // remove EOR if we're not the start symbol 103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow.remove(TokenConstants.EOR_TOKEN_TYPE); 104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // if current token is consistent with what could come after set 107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // then we know we're missing a token; error recovery is free to 108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // "insert" the missing token 109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //System.out.println("LT(1)="+((TokenStream)input).LT(1)); 111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR 113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // in follow set to indicate that the fall of the start symbol is 114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // in the set (EOF can follow). 115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( follow.member(input.LA(1)) || follow.member(TokenConstants.EOR_TOKEN_TYPE) ) { 116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting..."); 117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return true; 118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return false; 120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Factor out what to do upon token mismatch so tree parsers can behave 123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * differently. Override and call mismatchRecover(input, ttype, follow) 124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to get single token insertion and deletion. Use this to turn of 125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * single token insertion and deletion. Override mismatchRecover 126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to call this instead. 127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver protected function mismatch(input:IntStream, ttype:int, follow:BitSet):void 129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( mismatchIsUnwantedToken(input, ttype) ) { 131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver throw new UnwantedTokenException(ttype, input); 132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else if ( mismatchIsMissingToken(input, follow) ) { 134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver throw new MissingTokenException(ttype, input, null); 135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver throw new MismatchedTokenException(ttype, input); 137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Report a recognition problem. 140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This method sets errorRecovery to indicate the parser is recovering 142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * not parsing. Once in recovery mode, no errors are generated. 143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * To get out of recovery mode, the parser must successfully match 144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a token (after a resync). So it will go: 145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1. error occurs 147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 2. enter recovery mode, report error 148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 3. consume until token found in resynch set 149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 4. try to resume parsing 150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 5. next match() will reset errorRecovery mode 151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * If you override, make sure to update syntaxErrors if you care about that. 153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function reportError(e:RecognitionException):void { 155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // if we've already reported an error and have not matched a token 156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // yet successfully, don't report any errors. 157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( state.errorRecovery ) { 158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //System.err.print("[SPURIOUS] "); 159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return; 160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.syntaxErrors++; // don't count spurious 162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.errorRecovery = true; 163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver displayRecognitionError(this.tokenNames, e); 165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function displayRecognitionError(tokenNames:Array, 168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver e:RecognitionException):void 169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var hdr:String = getErrorHeader(e); 171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var msg:String = getErrorMessage(e, tokenNames); 172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver emitErrorMessage(hdr+" "+msg); 173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** What error message should be generated for the various 176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * exception types? 177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Not very object-oriented code, but I like having all error message 179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * generation within one method rather than spread among all of the 180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * exception classes. This also makes it much easier for the exception 181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * handling because the exception classes do not have to have pointers back 182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to this object to access utility routines and so on. Also, changing 183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the message for an exception type would be difficult because you 184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * would have to subclassing exception, but then somehow get ANTLR 185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to make those kinds of exception objects instead of the default. 186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This looks weird, but trust me--it makes the most sense in terms 187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * of flexibility. 188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * For grammar debugging, you will want to override this to add 190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * more information such as the stack frame with 191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * getRuleInvocationStack(e, this.getClass().getName()) and, 192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * for no viable alts, the decision description and state etc... 193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Override this to change the message generated for one or more 195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * exception types. 196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function getErrorMessage(e:RecognitionException, tokenNames:Array):String { 198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var msg:String = e.message; 199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var tokenName:String = null; 200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( e is UnwantedTokenException ) { 201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var ute:UnwantedTokenException = UnwantedTokenException(e); 202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName="<unknown>"; 203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( ute.expecting== TokenConstants.EOF ) { 204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = "EOF"; 205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else { 207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = tokenNames[ute.expecting]; 208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "extraneous input "+getTokenErrorDisplay(ute.unexpectedToken)+ 210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " expecting "+tokenName; 211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else if ( e is MissingTokenException ) { 213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var mite:MissingTokenException = MissingTokenException(e); 214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName="<unknown>"; 215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( mite.expecting == TokenConstants.EOF ) { 216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = "EOF"; 217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else { 219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = tokenNames[mite.expecting]; 220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "missing "+tokenName+" at "+getTokenErrorDisplay(e.token); 222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else if ( e is MismatchedTokenException ) { 224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var mte:MismatchedTokenException = MismatchedTokenException(e); 225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName="<unknown>"; 226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( mte.expecting== TokenConstants.EOF ) { 227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = "EOF"; 228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else { 230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = tokenNames[mte.expecting]; 231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "mismatched input "+getTokenErrorDisplay(e.token)+ 233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " expecting "+tokenName; 234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else if ( e is MismatchedTreeNodeException ) { 236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var mtne:MismatchedTreeNodeException = MismatchedTreeNodeException(e); 237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName="<unknown>"; 238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( mtne.expecting==TokenConstants.EOF ) { 239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = "EOF"; 240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else { 242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokenName = tokenNames[mtne.expecting]; 243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "mismatched tree node: "+mtne.node+ 245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " expecting "+tokenName; 246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else if ( e is NoViableAltException ) { 248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var nvae:NoViableAltException = NoViableAltException(e); 249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // and "(decision="+nvae.decisionNumber+") and 251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // "state "+nvae.stateNumber 252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "no viable alternative at input "+getTokenErrorDisplay(e.token); 253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else if ( e is EarlyExitException ) { 255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var eee:EarlyExitException = EarlyExitException(e); 256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // for development, can add "(decision="+eee.decisionNumber+")" 257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "required (...)+ loop did not match anything at input "+ 258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver getTokenErrorDisplay(e.token); 259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else if ( e is MismatchedSetException ) { 261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var mse:MismatchedSetException = MismatchedSetException(e); 262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "mismatched input "+getTokenErrorDisplay(e.token)+ 263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " expecting set "+mse.expecting; 264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else if ( e is MismatchedNotSetException ) { 266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var mnse:MismatchedNotSetException = MismatchedNotSetException(e); 267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "mismatched input "+getTokenErrorDisplay(e.token)+ 268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " expecting set "+mnse.expecting; 269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else if ( e is FailedPredicateException ) { 271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var fpe:FailedPredicateException = FailedPredicateException(e); 272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver msg = "rule "+fpe.ruleName+" failed predicate: {"+ 273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fpe.predicateText+"}?"; 274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return msg; 276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Get number of recognition errors (lexer, parser, tree parser). Each 279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * recognizer tracks its own number. So parser and lexer each have 280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * separate count. Does not count the spurious errors found between 281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * an error and next valid token match 282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * See also reportError() 284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function get numberOfSyntaxErrors():int { 286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return state.syntaxErrors; 287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** What is the error header, normally line/character position information? */ 290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function getErrorHeader(e:RecognitionException):String { 291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return "line "+e.line+":"+e.charPositionInLine; 292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** How should a token be displayed in an error message? The default 295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * is to display just the text, but during development you might 296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * want to have a lot of information spit out. Override in that case 297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to use t.toString() (which, for CommonToken, dumps everything about 298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the token). This is better than forcing you to override a method in 299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * your token objects because you don't have to go modify your lexer 300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * so that it creates a new Java type. 301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function getTokenErrorDisplay(t:Token):String { 303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var s:String = t.text; 304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( s==null ) { 305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( t.type==TokenConstants.EOF ) { 306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = "<EOF>"; 307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else { 309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = "<"+t.type+">"; 310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = s.replace("\n","\\\\n"); 313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = s.replace("\r","\\\\r"); 314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = s.replace("\t","\\\\t"); 315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return "'"+s+"'"; 316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Override this method to change where error messages go */ 319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function emitErrorMessage(msg:String):void { 320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver trace(msg); 321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Recover from an error found on the input stream. This is 324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * for NoViableAlt and mismatched symbol exceptions. If you enable 325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * single token insertion and deletion, this will usually not 326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * handle mismatched symbol exceptions but there could be a mismatched 327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * token that the match() routine could not recover from. 328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function recoverStream(input:IntStream, re:RecognitionException):void { 330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( state.lastErrorIndex==input.index) { 331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // uh oh, another error at same token index; must be a case 332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // where LT(1) is in the recovery token set so nothing is 333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // consumed; consume a single token so at least to prevent 334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // an infinite loop; this is a failsafe. 335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume(); 336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.lastErrorIndex = input.index; 338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var followSet:BitSet = computeErrorRecoverySet(); 339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver beginResync(); 340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver consumeUntil(input, followSet); 341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver endResync(); 342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** A hook to listen in on the token consumption during error recovery. 345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * The DebugParser subclasses this to fire events to the listenter. 346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function beginResync():void { 348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function endResync():void { 351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Compute the error recovery set for the current rule. During 354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * rule invocation, the parser pushes the set of tokens that can 355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * follow that rule reference on the stack; this amounts to 356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * computing FIRST of what follows the rule reference in the 357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * enclosing rule. This local follow set only includes tokens 358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * from within the rule; i.e., the FIRST computation done by 359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ANTLR stops at the end of a rule. 360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 361324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * EXAMPLE 362324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 363324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * When you find a "no viable alt exception", the input is not 364324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * consistent with any of the alternatives for rule r. The best 365324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * thing to do is to consume tokens until you see something that 366324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * can legally follow a call to r *or* any rule that called r. 367324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * You don't want the exact set of viable next tokens because the 368324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * input might just be missing a token--you might consume the 369324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * rest of the input looking for one of the missing tokens. 370324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 371324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Consider grammar: 372324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 373324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a : '[' b ']' 374324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * | '(' b ')' 375324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ; 376324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * b : c '^' INT ; 377324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * c : ID 378324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * | INT 379324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ; 380324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 381324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * At each rule invocation, the set of tokens that could follow 382324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * that rule is pushed on a stack. Here are the various "local" 383324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * follow sets: 384324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 385324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * FOLLOW(b1_in_a) = FIRST(']') = ']' 386324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * FOLLOW(b2_in_a) = FIRST(')') = ')' 387324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * FOLLOW(c_in_b) = FIRST('^') = '^' 388324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 389324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Upon erroneous input "[]", the call chain is 390324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 391324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a -> b -> c 392324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 393324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * and, hence, the follow context stack is: 394324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 395324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * depth local follow set after call to rule 396324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 0 <EOF> a (from main()) 397324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1 ']' b 398324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 3 '^' c 399324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 400324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Notice that ')' is not included, because b would have to have 401324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * been called from a different context in rule a for ')' to be 402324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * included. 403324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 404324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * For error recovery, we cannot consider FOLLOW(c) 405324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * (context-sensitive or otherwise). We need the combined set of 406324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * all context-sensitive FOLLOW sets--the set of all tokens that 407324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * could follow any reference in the call chain. We need to 408324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * resync to one of those tokens. Note that FOLLOW(c)='^' and if 409324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * we resync'd to that token, we'd consume until EOF. We need to 410324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 411324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * In this case, for input "[]", LA(1) is in this set so we would 412324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * not consume anything and after printing an error rule c would 413324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * return normally. It would not find the required '^' though. 414324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * At this point, it gets a mismatched token error and throws an 415324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * exception (since LA(1) is not in the viable following token 416324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * set). The rule exception handler tries to recover, but finds 417324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the same recovery set and doesn't consume anything. Rule b 418324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * exits normally returning to rule a. Now it finds the ']' (and 419324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * with the successful match exits errorRecovery mode). 420324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 421324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * So, you cna see that the parser walks up call chain looking 422324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * for the token that was a member of the recovery set. 423324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 424324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Errors are not generated in errorRecovery mode. 425324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 426324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ANTLR's error recovery mechanism is based upon original ideas: 427324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 428324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * "Algorithms + Data Structures = Programs" by Niklaus Wirth 429324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 430324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * and 431324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 432324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * "A note on error recovery in recursive descent parsers": 433324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * http://portal.acm.org/citation.cfm?id=947902.947905 434324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 435324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Later, Josef Grosch had some good ideas: 436324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 437324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * "Efficient and Comfortable Error Recovery in Recursive Descent 438324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Parsers": 439324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 440324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 441324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Like Grosch I implemented local FOLLOW sets that are combined 442324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * at run-time upon error to avoid overhead during parsing. 443324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 444324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver protected function computeErrorRecoverySet():BitSet { 445324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return combineFollows(false); 446324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 447324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 448324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Compute the context-sensitive FOLLOW set for current rule. 449324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This is set of token types that can follow a specific rule 450324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * reference given a specific call chain. You get the set of 451324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * viable tokens that can possibly come next (lookahead depth 1) 452324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * given the current call chain. Contrast this with the 453324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * definition of plain FOLLOW for rule r: 454324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 455324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} 456324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 457324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * where x in T* and alpha, beta in V*; T is set of terminals and 458324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * V is the set of terminals and nonterminals. In other words, 459324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * FOLLOW(r) is the set of all tokens that can possibly follow 460324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * references to r in *any* sentential form (context). At 461324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * runtime, however, we know precisely which context applies as 462324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * we have the call chain. We may compute the exact (rather 463324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * than covering superset) set of following tokens. 464324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 465324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * For example, consider grammar: 466324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 467324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} 468324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * | "return" expr '.' 469324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ; 470324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} 471324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * atom : INT // FOLLOW(atom)=={'+',')',';','.'} 472324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * | '(' expr ')' 473324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ; 474324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 475324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * The FOLLOW sets are all inclusive whereas context-sensitive 476324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * FOLLOW sets are precisely what could follow a rule reference. 477324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * For input input "i=(3);", here is the derivation: 478324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 479324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * stat => ID '=' expr ';' 480324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * => ID '=' atom ('+' atom)* ';' 481324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * => ID '=' '(' expr ')' ('+' atom)* ';' 482324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * => ID '=' '(' atom ')' ('+' atom)* ';' 483324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * => ID '=' '(' INT ')' ('+' atom)* ';' 484324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * => ID '=' '(' INT ')' ';' 485324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 486324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * At the "3" token, you'd have a call chain of 487324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 488324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * stat -> expr -> atom -> expr -> atom 489324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 490324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * What can follow that specific nested ref to atom? Exactly ')' 491324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * as you can see by looking at the derivation of this specific 492324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. 493324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 494324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * You want the exact viable token set when recovering from a 495324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * token mismatch. Upon token mismatch, if LA(1) is member of 496324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the viable next token set, then you know there is most likely 497324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a missing token in the input stream. "Insert" one by just not 498324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * throwing an exception. 499324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 500324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver protected function computeContextSensitiveRuleFOLLOW():BitSet { 501324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return combineFollows(true); 502324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 503324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 504324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver protected function combineFollows(exact:Boolean):BitSet { 505324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var top:int = state._fsp; 506324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var followSet:BitSet = new BitSet(); 507324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for (var i:int=top; i>=0; i--) { 508324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var localFollowSet:BitSet = state.following[i]; 509324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver followSet.orInPlace(localFollowSet); 510324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( exact ) { 511324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // can we see end of rule? 512324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( localFollowSet.member(TokenConstants.EOR_TOKEN_TYPE) ) { 513324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Only leave EOR in set if at top (start rule); this lets 514324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // us know if have to include follow(start rule); i.e., EOF 515324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( i>0 ) { 516324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver followSet.remove(TokenConstants.EOR_TOKEN_TYPE); 517324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 518324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 519324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else { // can't see end of rule, quit 520324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break; 521324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 522324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 523324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 524324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return followSet; 525324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 526324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 527324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Attempt to recover from a single missing or extra token. 528324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 529324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * EXTRA TOKEN 530324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 531324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * LA(1) is not what we are looking for. If LA(2) has the right token, 532324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * however, then assume LA(1) is some extra spurious token. Delete it 533324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * and LA(2) as if we were doing a normal match(), which advances the 534324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * input. 535324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 536324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * MISSING TOKEN 537324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 538324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * If current token is consistent with what could come after 539324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ttype then it is ok to "insert" the missing token, else throw 540324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * exception For example, Input "i=(3;" is clearly missing the 541324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ')'. When the parser returns from the nested call to expr, it 542324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * will have call chain: 543324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 544324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * stat -> expr -> atom 545324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 546324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * and it will be trying to match the ')' at this point in the 547324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * derivation: 548324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 549324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * => ID '=' '(' INT ')' ('+' atom)* ';' 550324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ^ 551324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * match() will see that ';' doesn't match ')' and report a 552324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * mismatched token error. To recover, it sees that LA(1)==';' 553324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * is in the set of tokens that can follow the ')' token 554324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * reference in rule atom. It can assume that you forgot the ')'. 555324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 556324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function recoverFromMismatchedToken(input:IntStream, 557324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ttype:int, 558324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow:BitSet):Object { 559324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var e:RecognitionException = null; 560324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // if next token is what we are looking for then "delete" this token 561324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( mismatchIsUnwantedToken(input, ttype) ) { 562324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver e = new UnwantedTokenException(ttype, input); 563324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* 564324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver System.err.println("recoverFromMismatchedToken deleting "+ 565324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ((TokenStream)input).LT(1)+ 566324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " since "+((TokenStream)input).LT(2)+" is what we want"); 567324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 568324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver beginResync(); 569324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume(); // simply delete extra token 570324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver endResync(); 571324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver reportError(e); // report after consuming so AW sees the token in the exception 572324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // we want to return the token we're actually matching 573324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var matchedSymbol:Object = getCurrentInputSymbol(input); 574324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume(); // move past ttype token as if all were ok 575324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return matchedSymbol; 576324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 577324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // can't recover with single token deletion, try insertion 578324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( mismatchIsMissingToken(input, follow) ) { 579324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var inserted:Object = getMissingSymbol(input, e, ttype, follow); 580324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver e = new MissingTokenException(ttype, input, inserted); 581324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver reportError(e); // report after inserting so AW sees the token in the exception 582324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return inserted; 583324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 584324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // even that didn't work; must throw the exception 585324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver e = new MismatchedTokenException(ttype, input); 586324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver throw e; 587324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 588324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 589324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Not currently used */ 590324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function recoverFromMismatchedSet(input:IntStream, 591324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver e:RecognitionException, 592324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow:BitSet):Object 593324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 594324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( mismatchIsMissingToken(input, follow) ) { 595324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // System.out.println("missing token"); 596324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver reportError(e); 597324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // we don't know how to conjure up a token for sets yet 598324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return getMissingSymbol(input, e, TokenConstants.INVALID_TOKEN_TYPE, follow); 599324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 600324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // TODO do single token deletion like above for Token mismatch 601324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver throw e; 602324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 603324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 604324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Match needs to return the current input symbol, which gets put 605324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * into the label for the associated token ref; e.g., x=ID. Token 606324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * and tree parsers need to return different objects. Rather than test 607324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * for input stream type or change the IntStream interface, I use 608324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a simple method to ask the recognizer to tell me what the current 609324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * input symbol is. 610324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 611324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This is ignored for lexers. 612324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 613324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver protected function getCurrentInputSymbol(input:IntStream):Object { return null; } 614324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 615324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Conjure up a missing token during error recovery. 616324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 617324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * The recognizer attempts to recover from single missing 618324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * symbols. But, actions might refer to that missing symbol. 619324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * For example, x=ID {f($x);}. The action clearly assumes 620324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * that there has been an identifier matched previously and that 621324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * $x points at that token. If that token is missing, but 622324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the next token in the stream is what we want we assume that 623324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * this token is missing and we keep going. Because we 624324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * have to return some token to replace the missing token, 625324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * we have to conjure one up. This method gives the user control 626324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * over the tokens returned for missing tokens. Mostly, 627324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * you will want to create something special for identifier 628324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * tokens. For literals such as '{' and ',', the default 629324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * action in the parser or tree parser works. It simply creates 630324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a CommonToken of the appropriate type. The text will be the token. 631324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * If you change what tokens must be created by the lexer, 632324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * override this method to create the appropriate tokens. 633324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 634324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver protected function getMissingSymbol(input:IntStream, 635324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver e:RecognitionException, 636324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver expectedTokenType:int, 637324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow:BitSet):Object 638324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 639324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return null; 640324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 641324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 642324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function consumeUntilToken(input:IntStream, tokenType:int):void { 643324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //System.out.println("consumeUntil "+tokenType); 644324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var ttype:int = input.LA(1); 645324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (ttype != TokenConstants.EOF && ttype != tokenType) { 646324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume(); 647324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ttype = input.LA(1); 648324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 649324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 650324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 651324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Consume tokens until one matches the given token set */ 652324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function consumeUntil(input:IntStream, bitSet:BitSet):void { 653324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //trace("consumeUntil("+bitSet.toStringFromTokens(tokenNames)+")"); 654324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var ttype:int = input.LA(1); 655324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (ttype != TokenConstants.EOF && !bitSet.member(ttype) ) { 656324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //trace("consume during recover LA(1)="+tokenNames[input.LA(1)]); 657324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.consume(); 658324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ttype = input.LA(1); 659324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 660324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 661324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 662324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Push a rule's follow set using our own hardcoded stack */ 663324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver protected function pushFollow(fset:BitSet):void { 664324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.following[++state._fsp] = fset; 665324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 666324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 667324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function get backtrackingLevel():int { 668324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return state.backtracking; 669324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 670324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 671324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function set backtrakingLevel(n:int):void { 672324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.backtracking = n; 673324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 674324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 675324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Return whether or not a backtracking attempt failed. */ 676324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function get failed():Boolean { 677324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return state.failed; 678324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 679324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 680324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Used to print out token names like ID during debugging and 681324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * error reporting. The generated parsers implement a method 682324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * that overrides this to point to their String[] tokenNames. 683324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 684324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function get tokenNames():Array { 685324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return null; 686324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 687324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 688324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** For debugging and other purposes, might want the grammar name. 689324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Have ANTLR generate an implementation for this method. 690324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 691324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function get grammarFileName():String { 692324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return null; 693324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 694324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 695324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function get sourceName():String { 696324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return null; 697324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 698324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 699324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** A convenience method for use most often with template rewrites. 700324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Convert a List<Token> to List<String> 701324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 702324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function toStrings(tokens:Array):Array { 703324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( tokens==null ) return null; 704324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var strings:Array = new Array(); 705324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for (var i:int = 0; i<tokens.length; i++) { 706324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver strings.push(tokens[i].text); 707324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 708324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return strings; 709324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 710324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 711324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Given a rule number and a start token index number, return 712324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * MEMO_RULE_UNKNOWN if the rule has not parsed input starting from 713324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * start index. If this rule has parsed input starting from the 714324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * start index before, then return where the rule stopped parsing. 715324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * It returns the index of the last token matched by the rule. 716324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 717324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * For now we use a hashtable and just the slow Object-based one. 718324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Later, we can make a special one for ints and also one that 719324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * tosses out data after we commit past input position i. 720324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 721324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function getRuleMemoization(ruleIndex:int, ruleStartIndex:int):int { 722324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( state.ruleMemo[ruleIndex]==undefined ) { 723324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.ruleMemo[ruleIndex] = new Array(); 724324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 725324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var stopIndex:* = state.ruleMemo[ruleIndex][ruleStartIndex]; 726324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( stopIndex == undefined ) { 727324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return MEMO_RULE_UNKNOWN; 728324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 729324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return stopIndex as int; 730324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 731324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 732324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Has this rule already parsed input at the current index in the 733324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * input stream? Return the stop token index or MEMO_RULE_UNKNOWN. 734324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * If we attempted but failed to parse properly before, return 735324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * MEMO_RULE_FAILED. 736324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 737324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This method has a side-effect: if we have seen this input for 738324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * this rule and successfully parsed before, then seek ahead to 739324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1 past the stop token matched for this rule last time. 740324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 741324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function alreadyParsedRule(input:IntStream, ruleIndex:int):Boolean { 742324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var stopIndex:int = getRuleMemoization(ruleIndex, input.index); 743324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( stopIndex==MEMO_RULE_UNKNOWN ) { 744324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return false; 745324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 746324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( stopIndex==MEMO_RULE_FAILED ) { 747324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //System.out.println("rule "+ruleIndex+" will never succeed"); 748324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.failed=true; 749324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 750324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else { 751324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+failed); 752324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input.seek(stopIndex+1); // jump to one past stop token 753324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 754324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return true; 755324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 756324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 757324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Record whether or not this rule parsed the input at this position 758324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * successfully. Use a standard java hashtable for now. 759324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 760324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function memoize(input:IntStream, 761324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ruleIndex:int, 762324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ruleStartIndex:int):void 763324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 764324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var stopTokenIndex:int = state.failed ? MEMO_RULE_FAILED : input.index - 1; 765324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( state.ruleMemo==null ) { 766324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver trace("!!!!!!!!! memo array is null for "+ grammarFileName); 767324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 768324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( ruleIndex >= state.ruleMemo.length ) { 769324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver trace("!!!!!!!!! memo size is "+state.ruleMemo.length+", but rule index is "+ruleIndex); 770324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 771324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 772324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( state.ruleMemo[ruleIndex]!=null ) { 773324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex; 774324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 775324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 776324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 777324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** return how many rule/input-index pairs there are in total. 778324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * TODO: this includes synpreds. :( 779324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 780324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function getRuleMemoizationCacheSize():int { 781324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var n:int = 0; 782324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for (var i:int = 0; state.ruleMemo!=null && i < state.ruleMemo.length; i++) { 783324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver var ruleMap:Object = state.ruleMemo[i]; 784324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( ruleMap!=null ) { 785324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver n += ruleMap.length; // how many input indexes are recorded? 786324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 787324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 788324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return n; 789324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 790324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 791324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function traceInSymbol(ruleName:String, ruleIndex:int, inputSymbol:Object):void { 792324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver trace("enter "+ruleName+" "+inputSymbol); 793324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( state.backtracking>0 ) { 794324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver trace(" backtracking="+state.backtracking); 795324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 796324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver trace(); 797324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 798324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 799324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public function traceOutSymbol(ruleName:String, 800324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ruleIndex:int, 801324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver inputSymbol:Object):void 802324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 803324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver trace("exit "+ruleName+" "+inputSymbol); 804324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( state.backtracking>0 ) { 805324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver trace(" backtracking="+state.backtracking); 806324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( state.failed ) trace(" failed"); 807324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else trace(" succeeded"); 808324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 809324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver trace(); 810324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 811324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 812324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 813324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}