BaseRecognizer.as revision 324c4644fee44b9898524c09511bd33c3f12e2df
1package org.antlr.runtime { 2 3 /** A generic recognizer that can handle recognizers generated from 4 * lexer, parser, and tree grammars. This is all the parsing 5 * support code essentially; most of it is error recovery stuff and 6 * backtracking. 7 */ 8 public class BaseRecognizer { 9 public static const MEMO_RULE_FAILED:int = -2; 10 public static const MEMO_RULE_UNKNOWN:int = -1; 11 public static const INITIAL_FOLLOW_STACK_SIZE:int = 100; 12 13 // copies from Token object for convenience in actions 14 public static const DEFAULT_TOKEN_CHANNEL:int = TokenConstants.DEFAULT_CHANNEL; 15 public static const HIDDEN:int = TokenConstants.HIDDEN_CHANNEL; 16 17 public static const NEXT_TOKEN_RULE_NAME:String = "nextToken"; 18 19 /** State of a lexer, parser, or tree parser are collected into a state 20 * object so the state can be shared. This sharing is needed to 21 * have one grammar import others and share same error variables 22 * and other state variables. It's a kind of explicit multiple 23 * inheritance via delegation of methods and shared state. 24 * 25 */ 26 public var state:RecognizerSharedState; // TODO - Place in private Namespace - cannot be private 27 28 public function BaseRecognizer(state:RecognizerSharedState = null) { 29 if ( state == null ) { // don't ever let us have a null state 30 state = new RecognizerSharedState(); 31 } 32 this.state = state; 33 } 34 35 /** reset the parser's state; subclasses must rewinds the input stream */ 36 public function reset():void { 37 // wack everything related to error recovery 38 if (state == null) { 39 return; 40 } 41 state._fsp = -1; 42 state.errorRecovery = false; 43 state.lastErrorIndex = -1; 44 state.failed = false; 45 state.syntaxErrors = 0; 46 // wack everything related to backtracking and memoization 47 state.backtracking = 0; 48 for (var i:int = 0; state.ruleMemo!=null && i < state.ruleMemo.length; i++) { // wipe cache 49 state.ruleMemo[i] = null; 50 } 51 } 52 53 /** Match current input symbol against ttype. Attempt 54 * single token insertion or deletion error recovery. If 55 * that fails, throw MismatchedTokenException. 56 * 57 * To turn off single token insertion or deletion error 58 * recovery, override mismatchRecover() and have it call 59 * plain mismatch(), which does not recover. Then any error 60 * in a rule will cause an exception and immediate exit from 61 * rule. Rule would recover by resynchronizing to the set of 62 * symbols that can follow rule ref. 63 */ 64 public function matchStream(input:IntStream, ttype:int, follow:BitSet):Object { 65 //System.out.println("match "+((TokenStream)input).LT(1)); 66 var matchedSymbol:Object = getCurrentInputSymbol(input); 67 if ( input.LA(1)==ttype ) { 68 input.consume(); 69 state.errorRecovery = false; 70 state.failed = false; 71 return matchedSymbol; 72 } 73 if ( state.backtracking>0 ) { 74 state.failed = true; 75 return matchedSymbol; 76 } 77 matchedSymbol = recoverFromMismatchedToken(input, ttype, follow); 78 return matchedSymbol; 79 } 80 81 /** Match the wildcard: in a symbol */ 82 public function matchAnyStream(input:IntStream):void { 83 state.errorRecovery = false; 84 state.failed = false; 85 input.consume(); 86 } 87 88 public function mismatchIsUnwantedToken(input:IntStream, ttype:int):Boolean { 89 return input.LA(2)==ttype; 90 } 91 92 public function mismatchIsMissingToken(input:IntStream, follow:BitSet):Boolean { 93 if ( follow==null ) { 94 // we have no information about the follow; we can only consume 95 // a single token and hope for the best 96 return false; 97 } 98 // compute what can follow this grammar element reference 99 if ( follow.member(TokenConstants.EOR_TOKEN_TYPE) ) { 100 var viableTokensFollowingThisRule:BitSet = computeContextSensitiveRuleFOLLOW(); 101 follow = follow.or(viableTokensFollowingThisRule); 102 if ( state._fsp>=0 ) { // remove EOR if we're not the start symbol 103 follow.remove(TokenConstants.EOR_TOKEN_TYPE); 104 } 105 } 106 // if current token is consistent with what could come after set 107 // then we know we're missing a token; error recovery is free to 108 // "insert" the missing token 109 110 //System.out.println("LT(1)="+((TokenStream)input).LT(1)); 111 112 // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR 113 // in follow set to indicate that the fall of the start symbol is 114 // in the set (EOF can follow). 115 if ( follow.member(input.LA(1)) || follow.member(TokenConstants.EOR_TOKEN_TYPE) ) { 116 //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting..."); 117 return true; 118 } 119 return false; 120 } 121 122 /** Factor out what to do upon token mismatch so tree parsers can behave 123 * differently. Override and call mismatchRecover(input, ttype, follow) 124 * to get single token insertion and deletion. Use this to turn of 125 * single token insertion and deletion. Override mismatchRecover 126 * to call this instead. 127 */ 128 protected function mismatch(input:IntStream, ttype:int, follow:BitSet):void 129 { 130 if ( mismatchIsUnwantedToken(input, ttype) ) { 131 throw new UnwantedTokenException(ttype, input); 132 } 133 else if ( mismatchIsMissingToken(input, follow) ) { 134 throw new MissingTokenException(ttype, input, null); 135 } 136 throw new MismatchedTokenException(ttype, input); 137 } 138 139 /** Report a recognition problem. 140 * 141 * This method sets errorRecovery to indicate the parser is recovering 142 * not parsing. Once in recovery mode, no errors are generated. 143 * To get out of recovery mode, the parser must successfully match 144 * a token (after a resync). So it will go: 145 * 146 * 1. error occurs 147 * 2. enter recovery mode, report error 148 * 3. consume until token found in resynch set 149 * 4. try to resume parsing 150 * 5. next match() will reset errorRecovery mode 151 * 152 * If you override, make sure to update syntaxErrors if you care about that. 153 */ 154 public function reportError(e:RecognitionException):void { 155 // if we've already reported an error and have not matched a token 156 // yet successfully, don't report any errors. 157 if ( state.errorRecovery ) { 158 //System.err.print("[SPURIOUS] "); 159 return; 160 } 161 state.syntaxErrors++; // don't count spurious 162 state.errorRecovery = true; 163 164 displayRecognitionError(this.tokenNames, e); 165 } 166 167 public function displayRecognitionError(tokenNames:Array, 168 e:RecognitionException):void 169 { 170 var hdr:String = getErrorHeader(e); 171 var msg:String = getErrorMessage(e, tokenNames); 172 emitErrorMessage(hdr+" "+msg); 173 } 174 175 /** What error message should be generated for the various 176 * exception types? 177 * 178 * Not very object-oriented code, but I like having all error message 179 * generation within one method rather than spread among all of the 180 * exception classes. This also makes it much easier for the exception 181 * handling because the exception classes do not have to have pointers back 182 * to this object to access utility routines and so on. Also, changing 183 * the message for an exception type would be difficult because you 184 * would have to subclassing exception, but then somehow get ANTLR 185 * to make those kinds of exception objects instead of the default. 186 * This looks weird, but trust me--it makes the most sense in terms 187 * of flexibility. 188 * 189 * For grammar debugging, you will want to override this to add 190 * more information such as the stack frame with 191 * getRuleInvocationStack(e, this.getClass().getName()) and, 192 * for no viable alts, the decision description and state etc... 193 * 194 * Override this to change the message generated for one or more 195 * exception types. 196 */ 197 public function getErrorMessage(e:RecognitionException, tokenNames:Array):String { 198 var msg:String = e.message; 199 var tokenName:String = null; 200 if ( e is UnwantedTokenException ) { 201 var ute:UnwantedTokenException = UnwantedTokenException(e); 202 tokenName="<unknown>"; 203 if ( ute.expecting== TokenConstants.EOF ) { 204 tokenName = "EOF"; 205 } 206 else { 207 tokenName = tokenNames[ute.expecting]; 208 } 209 msg = "extraneous input "+getTokenErrorDisplay(ute.unexpectedToken)+ 210 " expecting "+tokenName; 211 } 212 else if ( e is MissingTokenException ) { 213 var mite:MissingTokenException = MissingTokenException(e); 214 tokenName="<unknown>"; 215 if ( mite.expecting == TokenConstants.EOF ) { 216 tokenName = "EOF"; 217 } 218 else { 219 tokenName = tokenNames[mite.expecting]; 220 } 221 msg = "missing "+tokenName+" at "+getTokenErrorDisplay(e.token); 222 } 223 else if ( e is MismatchedTokenException ) { 224 var mte:MismatchedTokenException = MismatchedTokenException(e); 225 tokenName="<unknown>"; 226 if ( mte.expecting== TokenConstants.EOF ) { 227 tokenName = "EOF"; 228 } 229 else { 230 tokenName = tokenNames[mte.expecting]; 231 } 232 msg = "mismatched input "+getTokenErrorDisplay(e.token)+ 233 " expecting "+tokenName; 234 } 235 else if ( e is MismatchedTreeNodeException ) { 236 var mtne:MismatchedTreeNodeException = MismatchedTreeNodeException(e); 237 tokenName="<unknown>"; 238 if ( mtne.expecting==TokenConstants.EOF ) { 239 tokenName = "EOF"; 240 } 241 else { 242 tokenName = tokenNames[mtne.expecting]; 243 } 244 msg = "mismatched tree node: "+mtne.node+ 245 " expecting "+tokenName; 246 } 247 else if ( e is NoViableAltException ) { 248 var nvae:NoViableAltException = NoViableAltException(e); 249 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 250 // and "(decision="+nvae.decisionNumber+") and 251 // "state "+nvae.stateNumber 252 msg = "no viable alternative at input "+getTokenErrorDisplay(e.token); 253 } 254 else if ( e is EarlyExitException ) { 255 var eee:EarlyExitException = EarlyExitException(e); 256 // for development, can add "(decision="+eee.decisionNumber+")" 257 msg = "required (...)+ loop did not match anything at input "+ 258 getTokenErrorDisplay(e.token); 259 } 260 else if ( e is MismatchedSetException ) { 261 var mse:MismatchedSetException = MismatchedSetException(e); 262 msg = "mismatched input "+getTokenErrorDisplay(e.token)+ 263 " expecting set "+mse.expecting; 264 } 265 else if ( e is MismatchedNotSetException ) { 266 var mnse:MismatchedNotSetException = MismatchedNotSetException(e); 267 msg = "mismatched input "+getTokenErrorDisplay(e.token)+ 268 " expecting set "+mnse.expecting; 269 } 270 else if ( e is FailedPredicateException ) { 271 var fpe:FailedPredicateException = FailedPredicateException(e); 272 msg = "rule "+fpe.ruleName+" failed predicate: {"+ 273 fpe.predicateText+"}?"; 274 } 275 return msg; 276 } 277 278 /** Get number of recognition errors (lexer, parser, tree parser). Each 279 * recognizer tracks its own number. So parser and lexer each have 280 * separate count. Does not count the spurious errors found between 281 * an error and next valid token match 282 * 283 * See also reportError() 284 */ 285 public function get numberOfSyntaxErrors():int { 286 return state.syntaxErrors; 287 } 288 289 /** What is the error header, normally line/character position information? */ 290 public function getErrorHeader(e:RecognitionException):String { 291 return "line "+e.line+":"+e.charPositionInLine; 292 } 293 294 /** How should a token be displayed in an error message? The default 295 * is to display just the text, but during development you might 296 * want to have a lot of information spit out. Override in that case 297 * to use t.toString() (which, for CommonToken, dumps everything about 298 * the token). This is better than forcing you to override a method in 299 * your token objects because you don't have to go modify your lexer 300 * so that it creates a new Java type. 301 */ 302 public function getTokenErrorDisplay(t:Token):String { 303 var s:String = t.text; 304 if ( s==null ) { 305 if ( t.type==TokenConstants.EOF ) { 306 s = "<EOF>"; 307 } 308 else { 309 s = "<"+t.type+">"; 310 } 311 } 312 s = s.replace("\n","\\\\n"); 313 s = s.replace("\r","\\\\r"); 314 s = s.replace("\t","\\\\t"); 315 return "'"+s+"'"; 316 } 317 318 /** Override this method to change where error messages go */ 319 public function emitErrorMessage(msg:String):void { 320 trace(msg); 321 } 322 323 /** Recover from an error found on the input stream. This is 324 * for NoViableAlt and mismatched symbol exceptions. If you enable 325 * single token insertion and deletion, this will usually not 326 * handle mismatched symbol exceptions but there could be a mismatched 327 * token that the match() routine could not recover from. 328 */ 329 public function recoverStream(input:IntStream, re:RecognitionException):void { 330 if ( state.lastErrorIndex==input.index) { 331 // uh oh, another error at same token index; must be a case 332 // where LT(1) is in the recovery token set so nothing is 333 // consumed; consume a single token so at least to prevent 334 // an infinite loop; this is a failsafe. 335 input.consume(); 336 } 337 state.lastErrorIndex = input.index; 338 var followSet:BitSet = computeErrorRecoverySet(); 339 beginResync(); 340 consumeUntil(input, followSet); 341 endResync(); 342 } 343 344 /** A hook to listen in on the token consumption during error recovery. 345 * The DebugParser subclasses this to fire events to the listenter. 346 */ 347 public function beginResync():void { 348 } 349 350 public function endResync():void { 351 } 352 353 /* Compute the error recovery set for the current rule. During 354 * rule invocation, the parser pushes the set of tokens that can 355 * follow that rule reference on the stack; this amounts to 356 * computing FIRST of what follows the rule reference in the 357 * enclosing rule. This local follow set only includes tokens 358 * from within the rule; i.e., the FIRST computation done by 359 * ANTLR stops at the end of a rule. 360 * 361 * EXAMPLE 362 * 363 * When you find a "no viable alt exception", the input is not 364 * consistent with any of the alternatives for rule r. The best 365 * thing to do is to consume tokens until you see something that 366 * can legally follow a call to r *or* any rule that called r. 367 * You don't want the exact set of viable next tokens because the 368 * input might just be missing a token--you might consume the 369 * rest of the input looking for one of the missing tokens. 370 * 371 * Consider grammar: 372 * 373 * a : '[' b ']' 374 * | '(' b ')' 375 * ; 376 * b : c '^' INT ; 377 * c : ID 378 * | INT 379 * ; 380 * 381 * At each rule invocation, the set of tokens that could follow 382 * that rule is pushed on a stack. Here are the various "local" 383 * follow sets: 384 * 385 * FOLLOW(b1_in_a) = FIRST(']') = ']' 386 * FOLLOW(b2_in_a) = FIRST(')') = ')' 387 * FOLLOW(c_in_b) = FIRST('^') = '^' 388 * 389 * Upon erroneous input "[]", the call chain is 390 * 391 * a -> b -> c 392 * 393 * and, hence, the follow context stack is: 394 * 395 * depth local follow set after call to rule 396 * 0 <EOF> a (from main()) 397 * 1 ']' b 398 * 3 '^' c 399 * 400 * Notice that ')' is not included, because b would have to have 401 * been called from a different context in rule a for ')' to be 402 * included. 403 * 404 * For error recovery, we cannot consider FOLLOW(c) 405 * (context-sensitive or otherwise). We need the combined set of 406 * all context-sensitive FOLLOW sets--the set of all tokens that 407 * could follow any reference in the call chain. We need to 408 * resync to one of those tokens. Note that FOLLOW(c)='^' and if 409 * we resync'd to that token, we'd consume until EOF. We need to 410 * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 411 * In this case, for input "[]", LA(1) is in this set so we would 412 * not consume anything and after printing an error rule c would 413 * return normally. It would not find the required '^' though. 414 * At this point, it gets a mismatched token error and throws an 415 * exception (since LA(1) is not in the viable following token 416 * set). The rule exception handler tries to recover, but finds 417 * the same recovery set and doesn't consume anything. Rule b 418 * exits normally returning to rule a. Now it finds the ']' (and 419 * with the successful match exits errorRecovery mode). 420 * 421 * So, you cna see that the parser walks up call chain looking 422 * for the token that was a member of the recovery set. 423 * 424 * Errors are not generated in errorRecovery mode. 425 * 426 * ANTLR's error recovery mechanism is based upon original ideas: 427 * 428 * "Algorithms + Data Structures = Programs" by Niklaus Wirth 429 * 430 * and 431 * 432 * "A note on error recovery in recursive descent parsers": 433 * http://portal.acm.org/citation.cfm?id=947902.947905 434 * 435 * Later, Josef Grosch had some good ideas: 436 * 437 * "Efficient and Comfortable Error Recovery in Recursive Descent 438 * Parsers": 439 * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 440 * 441 * Like Grosch I implemented local FOLLOW sets that are combined 442 * at run-time upon error to avoid overhead during parsing. 443 */ 444 protected function computeErrorRecoverySet():BitSet { 445 return combineFollows(false); 446 } 447 448 /** Compute the context-sensitive FOLLOW set for current rule. 449 * This is set of token types that can follow a specific rule 450 * reference given a specific call chain. You get the set of 451 * viable tokens that can possibly come next (lookahead depth 1) 452 * given the current call chain. Contrast this with the 453 * definition of plain FOLLOW for rule r: 454 * 455 * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} 456 * 457 * where x in T* and alpha, beta in V*; T is set of terminals and 458 * V is the set of terminals and nonterminals. In other words, 459 * FOLLOW(r) is the set of all tokens that can possibly follow 460 * references to r in *any* sentential form (context). At 461 * runtime, however, we know precisely which context applies as 462 * we have the call chain. We may compute the exact (rather 463 * than covering superset) set of following tokens. 464 * 465 * For example, consider grammar: 466 * 467 * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} 468 * | "return" expr '.' 469 * ; 470 * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} 471 * atom : INT // FOLLOW(atom)=={'+',')',';','.'} 472 * | '(' expr ')' 473 * ; 474 * 475 * The FOLLOW sets are all inclusive whereas context-sensitive 476 * FOLLOW sets are precisely what could follow a rule reference. 477 * For input input "i=(3);", here is the derivation: 478 * 479 * stat => ID '=' expr ';' 480 * => ID '=' atom ('+' atom)* ';' 481 * => ID '=' '(' expr ')' ('+' atom)* ';' 482 * => ID '=' '(' atom ')' ('+' atom)* ';' 483 * => ID '=' '(' INT ')' ('+' atom)* ';' 484 * => ID '=' '(' INT ')' ';' 485 * 486 * At the "3" token, you'd have a call chain of 487 * 488 * stat -> expr -> atom -> expr -> atom 489 * 490 * What can follow that specific nested ref to atom? Exactly ')' 491 * as you can see by looking at the derivation of this specific 492 * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. 493 * 494 * You want the exact viable token set when recovering from a 495 * token mismatch. Upon token mismatch, if LA(1) is member of 496 * the viable next token set, then you know there is most likely 497 * a missing token in the input stream. "Insert" one by just not 498 * throwing an exception. 499 */ 500 protected function computeContextSensitiveRuleFOLLOW():BitSet { 501 return combineFollows(true); 502 } 503 504 protected function combineFollows(exact:Boolean):BitSet { 505 var top:int = state._fsp; 506 var followSet:BitSet = new BitSet(); 507 for (var i:int=top; i>=0; i--) { 508 var localFollowSet:BitSet = state.following[i]; 509 followSet.orInPlace(localFollowSet); 510 if ( exact ) { 511 // can we see end of rule? 512 if ( localFollowSet.member(TokenConstants.EOR_TOKEN_TYPE) ) { 513 // Only leave EOR in set if at top (start rule); this lets 514 // us know if have to include follow(start rule); i.e., EOF 515 if ( i>0 ) { 516 followSet.remove(TokenConstants.EOR_TOKEN_TYPE); 517 } 518 } 519 else { // can't see end of rule, quit 520 break; 521 } 522 } 523 } 524 return followSet; 525 } 526 527 /** Attempt to recover from a single missing or extra token. 528 * 529 * EXTRA TOKEN 530 * 531 * LA(1) is not what we are looking for. If LA(2) has the right token, 532 * however, then assume LA(1) is some extra spurious token. Delete it 533 * and LA(2) as if we were doing a normal match(), which advances the 534 * input. 535 * 536 * MISSING TOKEN 537 * 538 * If current token is consistent with what could come after 539 * ttype then it is ok to "insert" the missing token, else throw 540 * exception For example, Input "i=(3;" is clearly missing the 541 * ')'. When the parser returns from the nested call to expr, it 542 * will have call chain: 543 * 544 * stat -> expr -> atom 545 * 546 * and it will be trying to match the ')' at this point in the 547 * derivation: 548 * 549 * => ID '=' '(' INT ')' ('+' atom)* ';' 550 * ^ 551 * match() will see that ';' doesn't match ')' and report a 552 * mismatched token error. To recover, it sees that LA(1)==';' 553 * is in the set of tokens that can follow the ')' token 554 * reference in rule atom. It can assume that you forgot the ')'. 555 */ 556 public function recoverFromMismatchedToken(input:IntStream, 557 ttype:int, 558 follow:BitSet):Object { 559 var e:RecognitionException = null; 560 // if next token is what we are looking for then "delete" this token 561 if ( mismatchIsUnwantedToken(input, ttype) ) { 562 e = new UnwantedTokenException(ttype, input); 563 /* 564 System.err.println("recoverFromMismatchedToken deleting "+ 565 ((TokenStream)input).LT(1)+ 566 " since "+((TokenStream)input).LT(2)+" is what we want"); 567 */ 568 beginResync(); 569 input.consume(); // simply delete extra token 570 endResync(); 571 reportError(e); // report after consuming so AW sees the token in the exception 572 // we want to return the token we're actually matching 573 var matchedSymbol:Object = getCurrentInputSymbol(input); 574 input.consume(); // move past ttype token as if all were ok 575 return matchedSymbol; 576 } 577 // can't recover with single token deletion, try insertion 578 if ( mismatchIsMissingToken(input, follow) ) { 579 var inserted:Object = getMissingSymbol(input, e, ttype, follow); 580 e = new MissingTokenException(ttype, input, inserted); 581 reportError(e); // report after inserting so AW sees the token in the exception 582 return inserted; 583 } 584 // even that didn't work; must throw the exception 585 e = new MismatchedTokenException(ttype, input); 586 throw e; 587 } 588 589 /** Not currently used */ 590 public function recoverFromMismatchedSet(input:IntStream, 591 e:RecognitionException, 592 follow:BitSet):Object 593 { 594 if ( mismatchIsMissingToken(input, follow) ) { 595 // System.out.println("missing token"); 596 reportError(e); 597 // we don't know how to conjure up a token for sets yet 598 return getMissingSymbol(input, e, TokenConstants.INVALID_TOKEN_TYPE, follow); 599 } 600 // TODO do single token deletion like above for Token mismatch 601 throw e; 602 } 603 604 /** Match needs to return the current input symbol, which gets put 605 * into the label for the associated token ref; e.g., x=ID. Token 606 * and tree parsers need to return different objects. Rather than test 607 * for input stream type or change the IntStream interface, I use 608 * a simple method to ask the recognizer to tell me what the current 609 * input symbol is. 610 * 611 * This is ignored for lexers. 612 */ 613 protected function getCurrentInputSymbol(input:IntStream):Object { return null; } 614 615 /** Conjure up a missing token during error recovery. 616 * 617 * The recognizer attempts to recover from single missing 618 * symbols. But, actions might refer to that missing symbol. 619 * For example, x=ID {f($x);}. The action clearly assumes 620 * that there has been an identifier matched previously and that 621 * $x points at that token. If that token is missing, but 622 * the next token in the stream is what we want we assume that 623 * this token is missing and we keep going. Because we 624 * have to return some token to replace the missing token, 625 * we have to conjure one up. This method gives the user control 626 * over the tokens returned for missing tokens. Mostly, 627 * you will want to create something special for identifier 628 * tokens. For literals such as '{' and ',', the default 629 * action in the parser or tree parser works. It simply creates 630 * a CommonToken of the appropriate type. The text will be the token. 631 * If you change what tokens must be created by the lexer, 632 * override this method to create the appropriate tokens. 633 */ 634 protected function getMissingSymbol(input:IntStream, 635 e:RecognitionException, 636 expectedTokenType:int, 637 follow:BitSet):Object 638 { 639 return null; 640 } 641 642 public function consumeUntilToken(input:IntStream, tokenType:int):void { 643 //System.out.println("consumeUntil "+tokenType); 644 var ttype:int = input.LA(1); 645 while (ttype != TokenConstants.EOF && ttype != tokenType) { 646 input.consume(); 647 ttype = input.LA(1); 648 } 649 } 650 651 /** Consume tokens until one matches the given token set */ 652 public function consumeUntil(input:IntStream, bitSet:BitSet):void { 653 //trace("consumeUntil("+bitSet.toStringFromTokens(tokenNames)+")"); 654 var ttype:int = input.LA(1); 655 while (ttype != TokenConstants.EOF && !bitSet.member(ttype) ) { 656 //trace("consume during recover LA(1)="+tokenNames[input.LA(1)]); 657 input.consume(); 658 ttype = input.LA(1); 659 } 660 } 661 662 /** Push a rule's follow set using our own hardcoded stack */ 663 protected function pushFollow(fset:BitSet):void { 664 state.following[++state._fsp] = fset; 665 } 666 667 public function get backtrackingLevel():int { 668 return state.backtracking; 669 } 670 671 public function set backtrakingLevel(n:int):void { 672 state.backtracking = n; 673 } 674 675 /** Return whether or not a backtracking attempt failed. */ 676 public function get failed():Boolean { 677 return state.failed; 678 } 679 680 /** Used to print out token names like ID during debugging and 681 * error reporting. The generated parsers implement a method 682 * that overrides this to point to their String[] tokenNames. 683 */ 684 public function get tokenNames():Array { 685 return null; 686 } 687 688 /** For debugging and other purposes, might want the grammar name. 689 * Have ANTLR generate an implementation for this method. 690 */ 691 public function get grammarFileName():String { 692 return null; 693 } 694 695 public function get sourceName():String { 696 return null; 697 } 698 699 /** A convenience method for use most often with template rewrites. 700 * Convert a List<Token> to List<String> 701 */ 702 public function toStrings(tokens:Array):Array { 703 if ( tokens==null ) return null; 704 var strings:Array = new Array(); 705 for (var i:int = 0; i<tokens.length; i++) { 706 strings.push(tokens[i].text); 707 } 708 return strings; 709 } 710 711 /** Given a rule number and a start token index number, return 712 * MEMO_RULE_UNKNOWN if the rule has not parsed input starting from 713 * start index. If this rule has parsed input starting from the 714 * start index before, then return where the rule stopped parsing. 715 * It returns the index of the last token matched by the rule. 716 * 717 * For now we use a hashtable and just the slow Object-based one. 718 * Later, we can make a special one for ints and also one that 719 * tosses out data after we commit past input position i. 720 */ 721 public function getRuleMemoization(ruleIndex:int, ruleStartIndex:int):int { 722 if ( state.ruleMemo[ruleIndex]==undefined ) { 723 state.ruleMemo[ruleIndex] = new Array(); 724 } 725 var stopIndex:* = state.ruleMemo[ruleIndex][ruleStartIndex]; 726 if ( stopIndex == undefined ) { 727 return MEMO_RULE_UNKNOWN; 728 } 729 return stopIndex as int; 730 } 731 732 /** Has this rule already parsed input at the current index in the 733 * input stream? Return the stop token index or MEMO_RULE_UNKNOWN. 734 * If we attempted but failed to parse properly before, return 735 * MEMO_RULE_FAILED. 736 * 737 * This method has a side-effect: if we have seen this input for 738 * this rule and successfully parsed before, then seek ahead to 739 * 1 past the stop token matched for this rule last time. 740 */ 741 public function alreadyParsedRule(input:IntStream, ruleIndex:int):Boolean { 742 var stopIndex:int = getRuleMemoization(ruleIndex, input.index); 743 if ( stopIndex==MEMO_RULE_UNKNOWN ) { 744 return false; 745 } 746 if ( stopIndex==MEMO_RULE_FAILED ) { 747 //System.out.println("rule "+ruleIndex+" will never succeed"); 748 state.failed=true; 749 } 750 else { 751 //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+failed); 752 input.seek(stopIndex+1); // jump to one past stop token 753 } 754 return true; 755 } 756 757 /** Record whether or not this rule parsed the input at this position 758 * successfully. Use a standard java hashtable for now. 759 */ 760 public function memoize(input:IntStream, 761 ruleIndex:int, 762 ruleStartIndex:int):void 763 { 764 var stopTokenIndex:int = state.failed ? MEMO_RULE_FAILED : input.index - 1; 765 if ( state.ruleMemo==null ) { 766 trace("!!!!!!!!! memo array is null for "+ grammarFileName); 767 } 768 if ( ruleIndex >= state.ruleMemo.length ) { 769 trace("!!!!!!!!! memo size is "+state.ruleMemo.length+", but rule index is "+ruleIndex); 770 } 771 772 if ( state.ruleMemo[ruleIndex]!=null ) { 773 state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex; 774 } 775 } 776 777 /** return how many rule/input-index pairs there are in total. 778 * TODO: this includes synpreds. :( 779 */ 780 public function getRuleMemoizationCacheSize():int { 781 var n:int = 0; 782 for (var i:int = 0; state.ruleMemo!=null && i < state.ruleMemo.length; i++) { 783 var ruleMap:Object = state.ruleMemo[i]; 784 if ( ruleMap!=null ) { 785 n += ruleMap.length; // how many input indexes are recorded? 786 } 787 } 788 return n; 789 } 790 791 public function traceInSymbol(ruleName:String, ruleIndex:int, inputSymbol:Object):void { 792 trace("enter "+ruleName+" "+inputSymbol); 793 if ( state.backtracking>0 ) { 794 trace(" backtracking="+state.backtracking); 795 } 796 trace(); 797 } 798 799 public function traceOutSymbol(ruleName:String, 800 ruleIndex:int, 801 inputSymbol:Object):void 802 { 803 trace("exit "+ruleName+" "+inputSymbol); 804 if ( state.backtracking>0 ) { 805 trace(" backtracking="+state.backtracking); 806 if ( state.failed ) trace(" failed"); 807 else trace(" succeeded"); 808 } 809 trace(); 810 } 811 812 } 813}