1/** A generic recognizer that can handle recognizers generated from
2 *  lexer, parser, and tree grammars.  This is all the parsing
3 *  support code essentially; most of it is error recovery stuff and
4 *  backtracking.
5 *
6 *  <p>This class should not be instantiated directly.  Instead, use one of its
7 *  subclasses.</p>
8 *
9 *  @class
10 *  @param {org.antlr.runtime.RecognizerSharedState} [state] state object with
11 *      which to initialize this recognizer.
12 */
13org.antlr.runtime.BaseRecognizer = function(state) {
14    /** State of a lexer, parser, or tree parser are collected into a state
15     *  object so the state can be shared.  This sharing is needed to
16     *  have one grammar import others and share same error variables
17     *  and other state variables.  It's a kind of explicit multiple
18     *  inheritance via delegation of methods and shared state.
19     *  @type org.antlr.runtime.RecognizerSharedState
20     */
21    this.state = state || new org.antlr.runtime.RecognizerSharedState();
22};
23
24org.antlr.lang.augmentObject(org.antlr.runtime.BaseRecognizer, {
25    /**
26     * @memberOf org.antlr.runtime.BaseRecognizer
27     * @type Number
28     */
29    MEMO_RULE_FAILED: -2,
30
31    /**
32     * @memberOf org.antlr.runtime.BaseRecognizer
33     * @type Number
34     */
35    MEMO_RULE_UNKNOWN: -1,
36
37    /**
38     * @memberOf org.antlr.runtime.BaseRecognizer
39     * @type Number
40     */
41    INITIAL_FOLLOW_STACK_SIZE: 100,
42
43    /**
44     * @memberOf org.antlr.runtime.BaseRecognizer
45     * @type Number
46     */
47    MEMO_RULE_FAILED_I: -2,
48
49    /**
50     * @memberOf org.antlr.runtime.BaseRecognizer
51     * @type Number
52     */
53    DEFAULT_TOKEN_CHANNEL: org.antlr.runtime.Token.DEFAULT_CHANNEL,
54
55    /**
56     * @memberOf org.antlr.runtime.BaseRecognizer
57     * @type Number
58     */
59    HIDDEN: org.antlr.runtime.Token.HIDDEN_CHANNEL,
60
61    /**
62     * @memberOf org.antlr.runtime.BaseRecognizer
63     * @type String
64     */
65    NEXT_TOKEN_RULE_NAME: "nextToken"
66});
67
68org.antlr.runtime.BaseRecognizer.prototype = {
69    /** Reset the parser's state.  Subclasses must rewinds the input stream */
70    reset: function() {
71        var i, len;
72
73        // wack everything related to error recovery
74        if (!this.state) {
75            return; // no shared state work to do
76        }
77        this.state._fsp = -1;
78        this.state.errorRecovery = false;
79        this.state.lastErrorIndex = -1;
80        this.state.failed = false;
81        this.state.syntaxErrors = 0;
82        // wack everything related to backtracking and memoization
83        this.state.backtracking = 0;
84        // wipe cache
85        if (this.state.ruleMemo) {
86            for (i=0, len=this.state.ruleMemo.length; i<len; i++) {
87                this.state.ruleMemo[i] = null;
88            }
89        }
90    },
91
92    /** Match current input symbol against ttype.  Attempt
93     *  single token insertion or deletion error recovery.  If
94     *  that fails, throw {@link org.antlr.runtime.MismatchedTokenException}.
95     *
96     *  <p>To turn off single token insertion or deletion error
97     *  recovery, override {@link #mismatchRecover} and have it call
98     *  plain {@link #mismatch}, which does not recover.  Then any error
99     *  in a rule will cause an exception and immediate exit from
100     *  rule.  Rule would recover by resynchronizing to the set of
101     *  symbols that can follow rule ref.</p>
102     *
103     *  @param {org.antlr.runtime.IntStream} input input stream to match against.
104     *  @param {Number} ttype  input type to match.
105     *  @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the
106     *      matched token.
107     *  @returns {Object} the matched symbol
108     */
109    match: function(input, ttype, follow) {
110        var matchedSymbol = this.getCurrentInputSymbol(input);
111        if ( input.LA(1)===ttype ) {
112            input.consume();
113            this.state.errorRecovery = false;
114            this.state.failed = false;
115            return matchedSymbol;
116        }
117        if ( this.state.backtracking>0 ) {
118            this.state.failed = true;
119            return matchedSymbol;
120        }
121        matchedSymbol = this.recoverFromMismatchedToken(input, ttype, follow);
122        return matchedSymbol;
123    },
124
125    /**
126     * Match any token.
127     * @param {org.antlr.runtime.IntStream} input input stream to match against.
128     */
129    matchAny: function(input) {
130        this.state.errorRecovery = false;
131        this.state.failed = false;
132        input.consume();
133    },
134
135    /**
136     * Is the following token (LA(2)) the unwanted type (ttype)?
137     * @param {org.antlr.runtime.IntStream} input input stream to match against.
138     * @param {Number} ttype the undesired token type.
139     * @returns {Boolean} true if and only if the following token is the
140     *      unwanted type.
141     */
142    mismatchIsUnwantedToken: function(input, ttype) {
143        return input.LA(2)===ttype;
144    },
145
146    /**
147     * Does the stream appear to be missing a single token?
148     * @param {org.antlr.runtime.IntStream} input input stream to match against.
149     * @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the
150     *      matched token.
151     * @returns {Boolean} true if and only if it appears that the stream is
152     *      missing a single token.
153     */
154    mismatchIsMissingToken: function(input, follow) {
155        if ( !follow ) {
156            // we have no information about the follow; we can only consume
157            // a single token and hope for the best
158            return false;
159        }
160        // compute what can follow this grammar element reference
161        if ( follow.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) ) {
162            var viableTokensFollowingThisRule = this.computeContextSensitiveRuleFOLLOW();
163            follow = follow.or(this.viableTokensFollowingThisRule);
164            if ( this.state._fsp>=0 ) { // remove EOR if we're not the start symbol
165                follow.remove(org.antlr.runtime.Token.EOR_TOKEN_TYPE);
166            }
167        }
168        // if current token is consistent with what could come after set
169        // then we know we're missing a token; error recovery is free to
170        // "insert" the missing token
171
172        // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
173        // in follow set to indicate that the fall of the start symbol is
174        // in the set (EOF can follow).
175        if ( follow.member(input.LA(1)) ||
176             follow.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) )
177        {
178            return true;
179        }
180        return false;
181    },
182
183    /** Factor out what to do upon token mismatch so tree parsers can behave
184     *  differently.  Override and call {@link #mismatchRecover}
185     *  to get single token insertion and deletion.
186     *
187     *  @param {org.antlr.runtime.IntStream} input input stream to match against.
188     *  @param {Number} ttype  input type to match.
189     *  @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the
190     *      matched token.
191     */
192    mismatch: function(input, ttype, follow) {
193        if ( this.mismatchIsUnwantedToken(input, ttype) ) {
194            throw new org.antlr.runtime.UnwantedTokenException(ttype, input);
195        } else if ( this.mismatchIsMissingToken(input, follow) ) {
196            throw new org.antlr.runtime.MissingTokenException(ttype, input, null);
197        }
198        throw new org.antlr.runtime.MismatchedTokenException(ttype, input);
199    },
200
201    /** Report a recognition problem.
202     *
203     *  <p>This method sets errorRecovery to indicate the parser is recovering
204     *  not parsing.  Once in recovery mode, no errors are generated.
205     *  To get out of recovery mode, the parser must successfully match
206     *  a token (after a resync).  So it will go:</p>
207     *  <ol>
208     *      <li>error occurs</li>
209     *      <li>enter recovery mode, report error</li>
210     *      <li>consume until token found in resynch set</li>
211     *      <li>try to resume parsing</li>
212     *      <li>next match() will reset errorRecovery mode</li>
213     *  </ol>
214     *
215     *  <p>If you override, make sure to update this.state.syntaxErrors if you
216     *  care about that.</p>
217     *  @param {org.antlr.runtime.RecognitionException} e the error to be reported.
218     */
219    reportError: function(e) {
220        // if we've already reported an error and have not matched a token
221        // yet successfully, don't report any errors.
222        if ( this.state.errorRecovery ) {
223            return;
224        }
225        this.state.syntaxErrors++;
226        this.state.errorRecovery = true;
227
228        this.displayRecognitionError(this.getTokenNames(), e);
229    },
230
231    /**
232     * Assemble recognition error message.
233     * @param {Array} tokenNames array of token names (strings).
234     * @param {org.antlr.runtime.RecognitionException} e the error to be reported.
235     */
236    displayRecognitionError: function(tokenNames, e) {
237        var hdr = this.getErrorHeader(e),
238            msg = this.getErrorMessage(e, tokenNames);
239        this.emitErrorMessage(hdr+" "+msg);
240    },
241
242    /**
243     * Create error header message.  Format is <q>line
244     * lineNumber:positionInLine</q>.
245     * @param {org.antlr.runtime.RecognitionException} e the error to be reported.
246     * @returns {String} The error header.
247     */
248    getErrorHeader: function(e) {
249        /* handle null input */
250        if (!org.antlr.lang.isNumber(e.line)) {
251            e.line = 0;
252        }
253        return "line "+e.line+":"+e.charPositionInLine;
254    },
255
256    /**
257     * Override this method to change where error messages go.
258     * Defaults to "alert"-ing the error in browsers and "print"-ing the error
259     * in other environments (e.g. Rhino, SpiderMonkey).
260     * @param {String} msg the error message to be displayed.
261     */
262    emitErrorMessage: function(msg) {
263        if (typeof(window) != 'undefined' && window.alert) {
264            alert(msg);
265        } else {
266            print(msg);
267        }
268    },
269
270    /** What error message should be generated for the various
271     *  exception types?
272     *
273     *  <p>Not very object-oriented code, but I like having all error message
274     *  generation within one method rather than spread among all of the
275     *  exception classes. This also makes it much easier for the exception
276     *  handling because the exception classes do not have to have pointers back
277     *  to this object to access utility routines and so on. Also, changing
278     *  the message for an exception type would be difficult because you
279     *  would have to be subclassing exceptions, but then somehow get ANTLR
280     *  to make those kinds of exception objects instead of the default.</p>
281     *
282     *  <p>For grammar debugging, you will want to override this to add
283     *  more information such as the stack frame and no viable alts.</p>
284     *
285     *  <p>Override this to change the message generated for one or more
286     *  exception types.</p>
287     *
288     * @param {Array} tokenNames array of token names (strings).
289     * @param {org.antlr.runtime.RecognitionException} e the error to be reported.
290     * @returns {String} the error message to be emitted.
291     */
292    getErrorMessage: function(e, tokenNames) {
293        var msg = (e && e.getMessage) ? e.getMessage() : null,
294            mte,
295            tokenName;
296        if ( e instanceof org.antlr.runtime.UnwantedTokenException ) {
297            var ute = e;
298            tokenName="<unknown>";
299            if ( ute.expecting== org.antlr.runtime.Token.EOF ) {
300                tokenName = "EOF";
301            } else {
302                tokenName = tokenNames[ute.expecting];
303            }
304            msg = "extraneous input "+this.getTokenErrorDisplay(ute.getUnexpectedToken())+
305                " expecting "+tokenName;
306        }
307        else if ( e instanceof org.antlr.runtime.MissingTokenException ) {
308            mte = e;
309            tokenName="<unknown>";
310            if ( mte.expecting== org.antlr.runtime.Token.EOF ) {
311                tokenName = "EOF";
312            } else {
313                tokenName = tokenNames[mte.expecting];
314            }
315            msg = "missing "+tokenName+" at "+this.getTokenErrorDisplay(e.token);
316        }
317        else if ( e instanceof org.antlr.runtime.MismatchedTokenException ) {
318            mte = e;
319            tokenName="<unknown>";
320            if ( mte.expecting== org.antlr.runtime.Token.EOF ) {
321                tokenName = "EOF";
322            }
323            else {
324                tokenName = tokenNames[mte.expecting];
325            }
326            msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+
327                " expecting "+tokenName;
328        }
329        else if ( e instanceof org.antlr.runtime.NoViableAltException ) {
330            msg = "no viable alternative at input "+this.getTokenErrorDisplay(e.token);
331        }
332        else if ( e instanceof org.antlr.runtime.EarlyExitException ) {
333            msg = "required (...)+ loop did not match anything at input "+
334                this.getTokenErrorDisplay(e.token);
335        }
336        else if ( e instanceof org.antlr.runtime.MismatchedSetException ) {
337            msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+
338                " expecting set "+e.expecting;
339        }
340        else if ( e instanceof org.antlr.runtime.MismatchedNotSetException ) {
341            msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+
342                " expecting set "+e.expecting;
343        }
344        else if ( e instanceof org.antlr.runtime.FailedPredicateException ) {
345            msg = "rule "+e.ruleName+" failed predicate: {"+
346                e.predicateText+"}?";
347        }
348        return msg;
349    },
350
351    /** <p>Get number of recognition errors (lexer, parser, tree parser).  Each
352     *  recognizer tracks its own number.  So parser and lexer each have
353     *  separate count.  Does not count the spurious errors found between
354     *  an error and next valid token match.</p>
355     *
356     *  <p>See also {@link #reportError}()
357     *  @returns {Number} number of syntax errors encountered
358     */
359    getNumberOfSyntaxErrors: function() {
360        return this.state.syntaxErrors;
361    },
362
363    /** How should a token be displayed in an error message? The default
364     *  is to display just the text, but during development you might
365     *  want to have a lot of information spit out.  Override in that case
366     *  to use t.toString() (which, for CommonToken, dumps everything about
367     *  the token).
368     * @param {org.antlr.runtime.Token} t token that will be displayed in an error message
369     * @return {String} the string representation of the token
370     */
371    getTokenErrorDisplay: function(t) {
372        var s = t.getText();
373        if ( !org.antlr.lang.isValue(s) ) {
374            if ( t.getType()==org.antlr.runtime.Token.EOF ) {
375                s = "<EOF>";
376            }
377            else {
378                s = "<"+t.getType()+">";
379            }
380        }
381        s = s.replace(/\n/g,"\\n");
382        s = s.replace(/\r/g,"\\r");
383        s = s.replace(/\t/g,"\\t");
384        return "'"+s+"'";
385    },
386
387    /** Recover from an error found on the input stream.  This is
388     *  for NoViableAlt and mismatched symbol exceptions.  If you enable
389     *  single token insertion and deletion, this will usually not
390     *  handle mismatched symbol exceptions but there could be a mismatched
391     *  token that the match() routine could not recover from.
392     *  @param {org.antlr.runtime.IntStream} input the intput stream
393     *  @param {org.antlr.runtime.RecogntionException} the error found on the input stream
394     */
395    recover: function(input, re) {
396        if ( this.state.lastErrorIndex==input.index() ) {
397            // uh oh, another error at same token index; must be a case
398            // where LT(1) is in the recovery token set so nothing is
399            // consumed; consume a single token so at least to prevent
400            // an infinite loop; this is a failsafe.
401            input.consume();
402        }
403        this.state.lastErrorIndex = input.index();
404        var followSet = this.computeErrorRecoverySet();
405        this.beginResync();
406        this.consumeUntil(input, followSet);
407        this.endResync();
408    },
409
410    /** A hook to listen in on the token consumption during error recovery.
411     */
412    beginResync: function() {
413    },
414
415    /** A hook to listen in on the token consumption during error recovery.
416     */
417    endResync: function() {
418    },
419
420    /** Compute the error recovery set for the current rule.
421     *  <p>During rule invocation, the parser pushes the set of tokens that can
422     *  follow that rule reference on the stack; this amounts to
423     *  computing FIRST of what follows the rule reference in the
424     *  enclosing rule. This local follow set only includes tokens
425     *  from within the rule; i.e., the FIRST computation done by
426     *  ANTLR stops at the end of a rule.</p>
427     *
428     *  <p>EXAMPLE</p>
429     *
430     *  <p>When you find a "no viable alt exception", the input is not
431     *  consistent with any of the alternatives for rule r.  The best
432     *  thing to do is to consume tokens until you see something that
433     *  can legally follow a call to r *or* any rule that called r.
434     *  You don't want the exact set of viable next tokens because the
435     *  input might just be missing a token--you might consume the
436     *  rest of the input looking for one of the missing tokens.</p>
437     *
438     *  <p>Consider grammar:</p>
439     *  <code><pre>
440     *  a : '[' b ']'
441     *    | '(' b ')'
442     *    ;
443     *  b : c '^' INT ;
444     *  c : ID
445     *    | INT
446     *    ;
447     *  </pre></code>
448     *
449     *  <p>At each rule invocation, the set of tokens that could follow
450     *  that rule is pushed on a stack.  Here are the various "local"
451     *  follow sets:</p>
452     *
453     *  <code><pre>
454     *  FOLLOW(b1_in_a) = FIRST(']') = ']'
455     *  FOLLOW(b2_in_a) = FIRST(')') = ')'
456     *  FOLLOW(c_in_b) = FIRST('^') = '^'
457     *  </pre></code>
458     *
459     *  <p>Upon erroneous input "[]", the call chain is</p>
460     *
461     *  <code>a -> b -> c</code>
462     *
463     *  <p>and, hence, the follow context stack is:</p>
464     *
465     *  <code><pre>
466     *  depth  local follow set     after call to rule
467     *    0         <EOF>                    a (from main())
468     *    1          ']'                     b
469     *    3          '^'                     c
470     *  </pre></code>
471     *
472     *  <p>Notice that ')' is not included, because b would have to have
473     *  been called from a different context in rule a for ')' to be
474     *  included.</p>
475     *
476     *  <p>For error recovery, we cannot consider FOLLOW(c)
477     *  (context-sensitive or otherwise).  We need the combined set of
478     *  all context-sensitive FOLLOW sets--the set of all tokens that
479     *  could follow any reference in the call chain.  We need to
480     *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
481     *  we resync'd to that token, we'd consume until EOF.  We need to
482     *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
483     *  In this case, for input "[]", LA(1) is in this set so we would
484     *  not consume anything and after printing an error rule c would
485     *  return normally.  It would not find the required '^' though.
486     *  At this point, it gets a mismatched token error and throws an
487     *  exception (since LA(1) is not in the viable following token
488     *  set).  The rule exception handler tries to recover, but finds
489     *  the same recovery set and doesn't consume anything.  Rule b
490     *  exits normally returning to rule a.  Now it finds the ']' (and
491     *  with the successful match exits errorRecovery mode).</p>
492     *
493     *  <p>So, you cna see that the parser walks up call chain looking
494     *  for the token that was a member of the recovery set.</p>
495     *
496     *  <p>Errors are not generated in errorRecovery mode.</p>
497     *
498     *  <p>ANTLR's error recovery mechanism is based upon original ideas:</p>
499     *
500     *  <p>"Algorithms + Data Structures = Programs" by Niklaus Wirth</p>
501     *
502     *  <p>and</p>
503     *
504     *  <p>"A note on error recovery in recursive descent parsers":
505     *  http://portal.acm.org/citation.cfm?id=947902.947905</p>
506     *
507     *  <p>Later, Josef Grosch had some good ideas:</p>
508     *
509     *  <p>"Efficient and Comfortable Error Recovery in Recursive Descent
510     *  Parsers":
511     *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip</p>
512     *
513     *  <p>Like Grosch I implemented local FOLLOW sets that are combined
514     *  at run-time upon error to avoid overhead during parsing.</p>
515     *  @returns {org.antlr.runtime.BitSet}
516     */
517    computeErrorRecoverySet: function() {
518        return this.combineFollows(false);
519    },
520
521
522    /** Compute the context-sensitive FOLLOW set for current rule.
523     *  <p>This is set of token types that can follow a specific rule
524     *  reference given a specific call chain.  You get the set of
525     *  viable tokens that can possibly come next (lookahead depth 1)
526     *  given the current call chain.  Contrast this with the
527     *  definition of plain FOLLOW for rule r:</p>
528     *
529     *   <code>FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}</code>
530     *
531     *  <p>where x in T* and alpha, beta in V*; T is set of terminals and
532     *  V is the set of terminals and nonterminals.  In other words,
533     *  FOLLOW(r) is the set of all tokens that can possibly follow
534     *  references to r in *any* sentential form (context).  At
535     *  runtime, however, we know precisely which context applies as
536     *  we have the call chain.  We may compute the exact (rather
537     *  than covering superset) set of following tokens.</p>
538     *
539     *  <p>For example, consider grammar:</p>
540     *
541     *  <code><pre>
542     *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
543     *       | "return" expr '.'
544     *       ;
545     *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
546     *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
547     *       | '(' expr ')'
548     *       ;
549     *  </pre></code>
550     *
551     *  <p>The FOLLOW sets are all inclusive whereas context-sensitive
552     *  FOLLOW sets are precisely what could follow a rule reference.
553     *  For input input "i=(3);", here is the derivation:</p>
554     *
555     *  <code><pre>
556     *  stat => ID '=' expr ';'
557     *       => ID '=' atom ('+' atom)* ';'
558     *       => ID '=' '(' expr ')' ('+' atom)* ';'
559     *       => ID '=' '(' atom ')' ('+' atom)* ';'
560     *       => ID '=' '(' INT ')' ('+' atom)* ';'
561     *       => ID '=' '(' INT ')' ';'
562     *  </pre></code>
563     *
564     *  <p>At the "3" token, you'd have a call chain of</p>
565     *
566     *  <code>  stat -> expr -> atom -> expr -> atom</code>
567     *
568     *  <p>What can follow that specific nested ref to atom?  Exactly ')'
569     *  as you can see by looking at the derivation of this specific
570     *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.</p>
571     *
572     *  <p>You want the exact viable token set when recovering from a
573     *  token mismatch.  Upon token mismatch, if LA(1) is member of
574     *  the viable next token set, then you know there is most likely
575     *  a missing token in the input stream.  "Insert" one by just not
576     *  throwing an exception.</p>
577     *  @returns {org.antlr.runtime.BitSet}
578     */
579    computeContextSensitiveRuleFOLLOW: function() {
580        return this.combineFollows(true);
581    },
582
583    /**
584     * Helper method for {@link #computeErrorRecoverySet} and
585     * {@link computeContextSensitiveRuleFOLLO}.
586     * @param {Boolean} exact
587     * @returns {org.antlr.runtime.BitSet}
588     */
589    combineFollows: function(exact) {
590        var top = this.state._fsp,
591            i,
592            localFollowSet,
593            followSet = new org.antlr.runtime.BitSet();
594        for (i=top; i>=0; i--) {
595            localFollowSet = this.state.following[i];
596            followSet.orInPlace(localFollowSet);
597            if ( exact ) {
598                // can we see end of rule?
599                if ( localFollowSet.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) )
600                {
601                    // Only leave EOR in set if at top (start rule); this lets
602                    // us know if have to include follow(start rule); i.e., EOF
603                    if ( i>0 ) {
604                        followSet.remove(org.antlr.runtime.Token.EOR_TOKEN_TYPE);
605                    }
606                }
607                else { // can't see end of rule, quit
608                    break;
609                }
610            }
611        }
612        return followSet;
613    },
614
615    /** Attempt to recover from a single missing or extra token.
616     *
617     *  <p>EXTRA TOKEN</p>
618     *
619     *  <p>LA(1) is not what we are looking for.  If LA(2) has the right token,
620     *  however, then assume LA(1) is some extra spurious token.  Delete it
621     *  and LA(2) as if we were doing a normal match(), which advances the
622     *  input.</p>
623     *
624     *  <p>MISSING TOKEN</p>
625     *
626     *  <p>If current token is consistent with what could come after
627     *  ttype then it is ok to "insert" the missing token, else throw
628     *  exception For example, Input "i=(3;" is clearly missing the
629     *  ')'.  When the parser returns from the nested call to expr, it
630     *  will have call chain:</p>
631     *
632     *  <pre><code>  stat -> expr -> atom</code></pre>
633     *
634     *  <p>and it will be trying to match the ')' at this point in the
635     *  derivation:</p>
636     *
637     *  <pre><code>     => ID '=' '(' INT ')' ('+' atom)* ';'</code></pre>
638     *                          ^
639     *  <p>match() will see that ';' doesn't match ')' and report a
640     *  mismatched token error.  To recover, it sees that LA(1)==';'
641     *  is in the set of tokens that can follow the ')' token
642     *  reference in rule atom.  It can assume that you forgot the ')'.</p>
643     *
644     *  @param {org.antlr.runtime.IntStream} input
645     *  @param {Number} ttype
646     *  @param {org.antlr.runtime.BitSet} follow
647     *  @returns {Object}
648     */
649    recoverFromMismatchedToken: function(input,
650                                         ttype,
651                                         follow)
652    {
653        var e = null;
654        // if next token is what we are looking for then "delete" this token
655        if ( this.mismatchIsUnwantedToken(input, ttype) ) {
656            e = new org.antlr.runtime.UnwantedTokenException(ttype, input);
657            this.beginResync();
658            input.consume(); // simply delete extra token
659            this.endResync();
660            this.reportError(e);  // report after consuming so AW sees the token in the exception
661            // we want to return the token we're actually matching
662            var matchedSymbol = this.getCurrentInputSymbol(input);
663            input.consume(); // move past ttype token as if all were ok
664            return matchedSymbol;
665        }
666        // can't recover with single token deletion, try insertion
667        if ( this.mismatchIsMissingToken(input, follow) ) {
668            var inserted = this.getMissingSymbol(input, e, ttype, follow);
669            e = new org.antlr.runtime.MissingTokenException(ttype, input, inserted);
670            this.reportError(e);  // report after inserting so AW sees the token in the exception
671            return inserted;
672        }
673        // even that didn't work; must throw the exception
674        e = new org.antlr.runtime.MismatchedTokenException(ttype, input);
675        throw e;
676    },
677
678    /**
679     * Recover from a mismatched set exception.
680     * @param {org.antlr.runtime.IntStream} input
681     * @param {org.antlr.runtime.RecognitionException} e
682     * @param {org.antlr.runtime.BitSet} follow
683     * @returns {Object}
684     */
685    recoverFromMismatchedSet: function(input,
686                                       e,
687                                       follow)
688    {
689        if ( this.mismatchIsMissingToken(input, follow) ) {
690            // System.out.println("missing token");
691            this.reportError(e);
692            // we don't know how to conjure up a token for sets yet
693            return this.getMissingSymbol(input, e, org.antlr.runtime.Token.INVALID_TOKEN_TYPE, follow);
694        }
695        throw e;
696    },
697
698    /** Match needs to return the current input symbol, which gets put
699     *  into the label for the associated token ref; e.g., x=ID.  Token
700     *  and tree parsers need to return different objects. Rather than test
701     *  for input stream type or change the IntStream interface, I use
702     *  a simple method to ask the recognizer to tell me what the current
703     *  input symbol is.
704     *
705     *  <p>This is ignored for lexers.</p>
706     *  @param {org.antlr.runtime.IntStream} input
707     *  @returns {Object}
708     */
709    getCurrentInputSymbol: function(input) { return null; },
710
711    /** Conjure up a missing token during error recovery.
712     *
713     *  <p>The recognizer attempts to recover from single missing
714     *  symbols. But, actions might refer to that missing symbol.
715     *  For example, x=ID {f($x);}. The action clearly assumes
716     *  that there has been an identifier matched previously and that
717     *  $x points at that token. If that token is missing, but
718     *  the next token in the stream is what we want we assume that
719     *  this token is missing and we keep going. Because we
720     *  have to return some token to replace the missing token,
721     *  we have to conjure one up. This method gives the user control
722     *  over the tokens returned for missing tokens. Mostly,
723     *  you will want to create something special for identifier
724     *  tokens. For literals such as '{' and ',', the default
725     *  action in the parser or tree parser works. It simply creates
726     *  a CommonToken of the appropriate type. The text will be the token.
727     *  If you change what tokens must be created by the lexer,
728     *  override this method to create the appropriate tokens.</p>
729     *
730     *  @param {org.antlr.runtime.IntStream} input
731     *  @param {org.antlr.runtime.RecognitionException} e
732     *  @param {Number} expectedTokenType
733     *  @param {org.antlr.runtime.BitSet} follow
734     *  @returns {Object}
735     */
736    getMissingSymbol: function(input,
737                               e,
738                               expectedTokenType,
739                               follow)
740    {
741        return null;
742    },
743
744
745    /**
746     * Consume tokens until one matches the given token or token set
747     * @param {org.antlr.runtime.IntStream} input
748     * @param {Number|org.antlr.runtime.BitSet} set
749     */
750    consumeUntil: function(input, set) {
751        var ttype = input.LA(1);
752        while (ttype != org.antlr.runtime.Token.EOF && !set.member(ttype) ) {
753            input.consume();
754            ttype = input.LA(1);
755        }
756    },
757
758    /**
759     * Push a rule's follow set using our own hardcoded stack.
760     * @param {org.antlr.runtime.BitSet} fset
761     */
762    pushFollow: function(fset) {
763        if ( (this.state._fsp +1)>=this.state.following.length ) {
764            var f = [];
765            var i;
766            for (i=this.state.following.length-1; i>=0; i--) {
767                f[i] = this.state.following[i];
768            }
769            this.state.following = f;
770        }
771        this.state._fsp++;
772        this.state.following[this.state._fsp] = fset;
773    },
774
775    /**
776     * Sadly JavaScript doesn't provide a robust mechanism for runtime stack reflection.
777     * This makes implementing this function impossible without maintaining an auxillary
778     * stack data structure, which would be crazy expensive, especially in Lexers.  As such,
779     * this method remains unimplemented.
780     * @deprecated
781     */
782    getRuleInvocationStack: function(e, recognizerClassName)
783    {
784        throw new Error("Not implemented.");
785    },
786
787    /**
788     * Get this recognizer's backtracking level.
789     * @returns {Number} backtracking level
790     */
791    getBacktrackingLevel: function() {
792        return this.state.backtracking;
793    },
794
795    /** Used to print out token names like ID during debugging and
796     *  error reporting.  The generated parsers implement a method
797     *  that overrides this to point to their String[] tokenNames.
798     *  @returns {Array} String array of token names.
799     */
800    getTokenNames: function() {
801        return null;
802    },
803
804    /** For debugging and other purposes, might want the grammar name.
805     *  Have ANTLR generate an implementation for this method.
806     *  @returns {String} the grammar name.
807     */
808    getGrammarFileName: function() {
809        return null;
810    },
811
812    /** A convenience method for use most often with template rewrites.
813     *  Convert an array of Tokens to an array of Strings.
814     *  @param {Array} array of org.antlr.runtime.Token objects.
815     *  @returns {Array} array of string representations of the argument.
816     */
817    toStrings: function(tokens) {
818        if ( !tokens ) {
819            return null;
820        }
821        var strings = [];
822        var i;
823        for (i=0; i<tokens.length; i++) {
824            strings.push(tokens[i].getText());
825        }
826        return strings;
827    },
828
829    /** Given a rule number and a start token index number, return
830     *  MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
831     *  start index.  If this rule has parsed input starting from the
832     *  start index before, then return where the rule stopped parsing.
833     *  It returns the index of the last token matched by the rule.
834     *
835     *  <p>For now we use a hashtable and just the slow Object-based one.
836     *  Later, we can make a special one for ints and also one that
837     *  tosses out data after we commit past input position i.</p>
838     *  @param {Number} ruleIndex
839     *  @param {Number} ruleStartIndex
840     *  @returns {Number}
841     */
842    getRuleMemoization: function(ruleIndex, ruleStartIndex) {
843        if ( !this.state.ruleMemo[ruleIndex] ) {
844            this.state.ruleMemo[ruleIndex] = {};
845        }
846        var stopIndexI =
847            this.state.ruleMemo[ruleIndex][ruleStartIndex];
848        if ( !org.antlr.lang.isNumber(stopIndexI) ) {
849            return org.antlr.runtime.BaseRecognizer.MEMO_RULE_UNKNOWN;
850        }
851        return stopIndexI;
852    },
853
854    /** Has this rule already parsed input at the current index in the
855     *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
856     *  If we attempted but failed to parse properly before, return
857     *  MEMO_RULE_FAILED.
858     *
859     *  <p>This method has a side-effect: if we have seen this input for
860     *  this rule and successfully parsed before, then seek ahead to
861     *  1 past the stop token matched for this rule last time.</p>
862     *  @param {org.antlr.runtime.IntStream} input
863     *  @param {Number} ruleIndex
864     *  @returns {Boolean}
865     */
866    alreadyParsedRule: function(input, ruleIndex) {
867        var stopIndex = this.getRuleMemoization(ruleIndex, input.index());
868        if ( stopIndex==org.antlr.runtime.BaseRecognizer.MEMO_RULE_UNKNOWN ) {
869            return false;
870        }
871        if ( stopIndex==org.antlr.runtime.BaseRecognizer.MEMO_RULE_FAILED ) {
872            //System.out.println("rule "+ruleIndex+" will never succeed");
873            this.state.failed=true;
874        }
875        else {
876            input.seek(stopIndex+1); // jump to one past stop token
877        }
878        return true;
879    },
880
881    /** Record whether or not this rule parsed the input at this position
882     *  successfully.  Use a standard java hashtable for now.
883     *  @param {org.antlr.runtime.IntStream} input
884     *  @param {Number} ruleIndex
885     *  @param {Number} ruleStartIndex
886     */
887    memoize: function(input,
888                      ruleIndex,
889                      ruleStartIndex)
890    {
891        var stopTokenIndex = this.state.failed ?
892            org.antlr.runtime.BaseRecognizer.MEMO_RULE_FAILED : input.index()-1;
893        if ( !org.antlr.lang.isValue(this.state.ruleMemo) ) {
894            throw new Error("!!!!!!!!! memo array is null for "+ this.getGrammarFileName());
895        }
896        if ( ruleIndex >= this.state.ruleMemo.length ) {
897            throw new Error("!!!!!!!!! memo size is "+this.state.ruleMemo.length+", but rule index is "+ruleIndex);
898        }
899        if ( org.antlr.lang.isValue(this.state.ruleMemo[ruleIndex]) ) {
900            this.state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex;
901        }
902    },
903
904    /** return how many rule/input-index pairs there are in total.
905     *  TODO: this includes synpreds.
906     *  @returns {Number}
907     */
908    getRuleMemoizationCacheSize: function() {
909        var n = 0, i;
910        for (i = 0; this.state.ruleMemo && i < this.state.ruleMemo.length; i++) {
911            var ruleMap = this.state.ruleMemo[i];
912            if ( ruleMap ) {
913                // @todo need to get size of rulemap?
914                n += ruleMap.length; // how many input indexes are recorded?
915            }
916        }
917        return n;
918    },
919
920    /**
921     * When a grammar is compiled with the tracing flag enabled, this method is invoked
922     * at the start of each rule.
923     * @param {String} ruleName the current ruleName
924     * @param {Number} ruleIndex
925     * @param {Object} inputSymbol
926     */
927    traceIn: function(ruleName, ruleIndex, inputSymbol)  {
928        this.emitErrorMessage("enter "+ruleName+" "+inputSymbol);
929        if ( this.state.failed ) {
930            this.emitErrorMessage(" failed="+this.failed);
931        }
932        if ( this.state.backtracking>0 ) {
933            this.emitErrorMessage(" backtracking="+this.state.backtracking);
934        }
935        // System.out.println();
936    },
937
938    /**
939     * When a grammar is compiled with the tracing flag enabled, this method is invoked
940     * at the end of each rule.
941     * @param {String} ruleName the current ruleName
942     * @param {Number} ruleIndex
943     * @param {Object} inputSymbol
944     */
945    traceOut: function(ruleName, ruleIndex, inputSymbol) {
946        this.emitErrorMessage("exit "+ruleName+" "+inputSymbol);
947        if ( this.state.failed ) {
948            this.emitErrorMessage(" failed="+this.state.failed);
949        }
950        if ( this.state.backtracking>0 ) {
951            this.emitErrorMessage(" backtracking="+this.state.backtracking);
952        }
953    }
954};
955