BaseRecognizer.cs revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/*
2 * [The "BSD licence"]
3 * Copyright (c) 2005-2008 Terence Parr
4 * All rights reserved.
5 *
6 * Conversion to C#:
7 * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33namespace Antlr.Runtime {
34    using System.Collections.Generic;
35
36    using ArgumentNullException = System.ArgumentNullException;
37    using Array = System.Array;
38    using Conditional = System.Diagnostics.ConditionalAttribute;
39    using Exception = System.Exception;
40    using IDebugEventListener = Antlr.Runtime.Debug.IDebugEventListener;
41    using MethodBase = System.Reflection.MethodBase;
42    using NotSupportedException = System.NotSupportedException;
43    using Regex = System.Text.RegularExpressions.Regex;
44    using StackFrame = System.Diagnostics.StackFrame;
45    using StackTrace = System.Diagnostics.StackTrace;
46    using TextWriter = System.IO.TextWriter;
47    using Type = System.Type;
48
49    /** <summary>
50     *  A generic recognizer that can handle recognizers generated from
51     *  lexer, parser, and tree grammars.  This is all the parsing
52     *  support code essentially; most of it is error recovery stuff and
53     *  backtracking.
54     *  </summary>
55     */
56    public abstract class BaseRecognizer {
57        public const int MemoRuleFailed = -2;
58        public const int MemoRuleUnknown = -1;
59        public const int InitialFollowStackSize = 100;
60
61        // copies from Token object for convenience in actions
62        public const int DefaultTokenChannel = TokenChannels.Default;
63        public const int Hidden = TokenChannels.Hidden;
64
65        public const string NextTokenRuleName = "nextToken";
66
67        /** <summary>
68         *  State of a lexer, parser, or tree parser are collected into a state
69         *  object so the state can be shared.  This sharing is needed to
70         *  have one grammar import others and share same error variables
71         *  and other state variables.  It's a kind of explicit multiple
72         *  inheritance via delegation of methods and shared state.
73         *  </summary>
74         */
75        protected internal RecognizerSharedState state;
76
77        public BaseRecognizer()
78            : this(new RecognizerSharedState()) {
79        }
80
81        public BaseRecognizer(RecognizerSharedState state) {
82            if (state == null) {
83                state = new RecognizerSharedState();
84            }
85            this.state = state;
86            InitDFAs();
87        }
88
89        public TextWriter TraceDestination {
90            get;
91            set;
92        }
93
94        protected virtual void InitDFAs() {
95        }
96
97        /** <summary>reset the parser's state; subclasses must rewinds the input stream</summary> */
98        public virtual void Reset() {
99            // wack everything related to error recovery
100            if (state == null) {
101                return; // no shared state work to do
102            }
103            state._fsp = -1;
104            state.errorRecovery = false;
105            state.lastErrorIndex = -1;
106            state.failed = false;
107            state.syntaxErrors = 0;
108            // wack everything related to backtracking and memoization
109            state.backtracking = 0;
110            for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) { // wipe cache
111                state.ruleMemo[i] = null;
112            }
113        }
114
115
116        /** <summary>
117         *  Match current input symbol against ttype.  Attempt
118         *  single token insertion or deletion error recovery.  If
119         *  that fails, throw MismatchedTokenException.
120         *  </summary>
121         *
122         *  <remarks>
123         *  To turn off single token insertion or deletion error
124         *  recovery, override recoverFromMismatchedToken() and have it
125         *  throw an exception. See TreeParser.recoverFromMismatchedToken().
126         *  This way any error in a rule will cause an exception and
127         *  immediate exit from rule.  Rule would recover by resynchronizing
128         *  to the set of symbols that can follow rule ref.
129         *  </remarks>
130         */
131        public virtual object Match(IIntStream input, int ttype, BitSet follow) {
132            //System.out.println("match "+((TokenStream)input).LT(1));
133            object matchedSymbol = GetCurrentInputSymbol(input);
134            if (input.LA(1) == ttype) {
135                input.Consume();
136                state.errorRecovery = false;
137                state.failed = false;
138                return matchedSymbol;
139            }
140            if (state.backtracking > 0) {
141                state.failed = true;
142                return matchedSymbol;
143            }
144            matchedSymbol = RecoverFromMismatchedToken(input, ttype, follow);
145            return matchedSymbol;
146        }
147
148        /** <summary>Match the wildcard: in a symbol</summary> */
149        public virtual void MatchAny(IIntStream input) {
150            state.errorRecovery = false;
151            state.failed = false;
152            input.Consume();
153        }
154
155        public virtual bool MismatchIsUnwantedToken(IIntStream input, int ttype) {
156            return input.LA(2) == ttype;
157        }
158
159        public virtual bool MismatchIsMissingToken(IIntStream input, BitSet follow) {
160            if (follow == null) {
161                // we have no information about the follow; we can only consume
162                // a single token and hope for the best
163                return false;
164            }
165            // compute what can follow this grammar element reference
166            if (follow.Member(TokenTypes.EndOfRule)) {
167                BitSet viableTokensFollowingThisRule = ComputeContextSensitiveRuleFOLLOW();
168                follow = follow.Or(viableTokensFollowingThisRule);
169                if (state._fsp >= 0) { // remove EOR if we're not the start symbol
170                    follow.Remove(TokenTypes.EndOfRule);
171                }
172            }
173            // if current token is consistent with what could come after set
174            // then we know we're missing a token; error recovery is free to
175            // "insert" the missing token
176
177            //System.out.println("viable tokens="+follow.toString(getTokenNames()));
178            //System.out.println("LT(1)="+((TokenStream)input).LT(1));
179
180            // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
181            // in follow set to indicate that the fall of the start symbol is
182            // in the set (EOF can follow).
183            if (follow.Member(input.LA(1)) || follow.Member(TokenTypes.EndOfRule)) {
184                //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
185                return true;
186            }
187            return false;
188        }
189
190        /** <summary>Report a recognition problem.</summary>
191         *
192         *  <remarks>
193         *  This method sets errorRecovery to indicate the parser is recovering
194         *  not parsing.  Once in recovery mode, no errors are generated.
195         *  To get out of recovery mode, the parser must successfully match
196         *  a token (after a resync).  So it will go:
197         *
198         * 		1. error occurs
199         * 		2. enter recovery mode, report error
200         * 		3. consume until token found in resynch set
201         * 		4. try to resume parsing
202         * 		5. next match() will reset errorRecovery mode
203         *
204         *  If you override, make sure to update syntaxErrors if you care about that.
205         *  </remarks>
206         */
207        public virtual void ReportError(RecognitionException e) {
208            // if we've already reported an error and have not matched a token
209            // yet successfully, don't report any errors.
210            if (state.errorRecovery) {
211                //System.err.print("[SPURIOUS] ");
212                return;
213            }
214            state.syntaxErrors++; // don't count spurious
215            state.errorRecovery = true;
216
217            DisplayRecognitionError(this.TokenNames, e);
218        }
219
220        public virtual void DisplayRecognitionError(string[] tokenNames,
221                                            RecognitionException e) {
222            string hdr = GetErrorHeader(e);
223            string msg = GetErrorMessage(e, tokenNames);
224            EmitErrorMessage(hdr + " " + msg);
225        }
226
227        /** <summary>What error message should be generated for the various exception types?</summary>
228         *
229         *  <remarks>
230         *  Not very object-oriented code, but I like having all error message
231         *  generation within one method rather than spread among all of the
232         *  exception classes. This also makes it much easier for the exception
233         *  handling because the exception classes do not have to have pointers back
234         *  to this object to access utility routines and so on. Also, changing
235         *  the message for an exception type would be difficult because you
236         *  would have to subclassing exception, but then somehow get ANTLR
237         *  to make those kinds of exception objects instead of the default.
238         *  This looks weird, but trust me--it makes the most sense in terms
239         *  of flexibility.
240         *
241         *  For grammar debugging, you will want to override this to add
242         *  more information such as the stack frame with
243         *  getRuleInvocationStack(e, this.getClass().getName()) and,
244         *  for no viable alts, the decision description and state etc...
245         *
246         *  Override this to change the message generated for one or more
247         *  exception types.
248         *  </remarks>
249         */
250        public virtual string GetErrorMessage(RecognitionException e, string[] tokenNames) {
251            string msg = e.Message;
252            if (e is UnwantedTokenException) {
253                UnwantedTokenException ute = (UnwantedTokenException)e;
254                string tokenName = "<unknown>";
255                if (ute.Expecting == TokenTypes.EndOfFile) {
256                    tokenName = "EndOfFile";
257                } else {
258                    tokenName = tokenNames[ute.Expecting];
259                }
260                msg = "extraneous input " + GetTokenErrorDisplay(ute.UnexpectedToken) +
261                    " expecting " + tokenName;
262            } else if (e is MissingTokenException) {
263                MissingTokenException mte = (MissingTokenException)e;
264                string tokenName = "<unknown>";
265                if (mte.Expecting == TokenTypes.EndOfFile) {
266                    tokenName = "EndOfFile";
267                } else {
268                    tokenName = tokenNames[mte.Expecting];
269                }
270                msg = "missing " + tokenName + " at " + GetTokenErrorDisplay(e.Token);
271            } else if (e is MismatchedTokenException) {
272                MismatchedTokenException mte = (MismatchedTokenException)e;
273                string tokenName = "<unknown>";
274                if (mte.Expecting == TokenTypes.EndOfFile) {
275                    tokenName = "EndOfFile";
276                } else {
277                    tokenName = tokenNames[mte.Expecting];
278                }
279                msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
280                    " expecting " + tokenName;
281            } else if (e is MismatchedTreeNodeException) {
282                MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e;
283                string tokenName = "<unknown>";
284                if (mtne.Expecting == TokenTypes.EndOfFile) {
285                    tokenName = "EndOfFile";
286                } else {
287                    tokenName = tokenNames[mtne.Expecting];
288                }
289                // workaround for a .NET framework bug (NullReferenceException)
290                string nodeText = (mtne.Node != null) ? mtne.Node.ToString() ?? string.Empty : string.Empty;
291                msg = "mismatched tree node: " + nodeText + " expecting " + tokenName;
292            } else if (e is NoViableAltException) {
293                //NoViableAltException nvae = (NoViableAltException)e;
294                // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
295                // and "(decision="+nvae.decisionNumber+") and
296                // "state "+nvae.stateNumber
297                msg = "no viable alternative at input " + GetTokenErrorDisplay(e.Token);
298            } else if (e is EarlyExitException) {
299                //EarlyExitException eee = (EarlyExitException)e;
300                // for development, can add "(decision="+eee.decisionNumber+")"
301                msg = "required (...)+ loop did not match anything at input " +
302                    GetTokenErrorDisplay(e.Token);
303            } else if (e is MismatchedSetException) {
304                MismatchedSetException mse = (MismatchedSetException)e;
305                msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
306                    " expecting set " + mse.Expecting;
307            } else if (e is MismatchedNotSetException) {
308                MismatchedNotSetException mse = (MismatchedNotSetException)e;
309                msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
310                    " expecting set " + mse.Expecting;
311            } else if (e is FailedPredicateException) {
312                FailedPredicateException fpe = (FailedPredicateException)e;
313                msg = "rule " + fpe.RuleName + " failed predicate: {" +
314                    fpe.PredicateText + "}?";
315            }
316            return msg;
317        }
318
319        /** <summary>
320         *  Get number of recognition errors (lexer, parser, tree parser).  Each
321         *  recognizer tracks its own number.  So parser and lexer each have
322         *  separate count.  Does not count the spurious errors found between
323         *  an error and next valid token match
324         *  </summary>
325         *
326         *  <seealso cref="reportError()"/>
327         */
328        public virtual int NumberOfSyntaxErrors {
329            get {
330                return state.syntaxErrors;
331            }
332        }
333
334        /** <summary>What is the error header, normally line/character position information?</summary> */
335        public virtual string GetErrorHeader(RecognitionException e) {
336            return "line " + e.Line + ":" + (e.CharPositionInLine + 1);
337        }
338
339        /** <summary>
340         *  How should a token be displayed in an error message? The default
341         *  is to display just the text, but during development you might
342         *  want to have a lot of information spit out.  Override in that case
343         *  to use t.ToString() (which, for CommonToken, dumps everything about
344         *  the token). This is better than forcing you to override a method in
345         *  your token objects because you don't have to go modify your lexer
346         *  so that it creates a new Java type.
347         *  </summary>
348         */
349        public virtual string GetTokenErrorDisplay(IToken t) {
350            string s = t.Text;
351            if (s == null) {
352                if (t.Type == TokenTypes.EndOfFile) {
353                    s = "<EOF>";
354                } else {
355                    s = "<" + t.Type + ">";
356                }
357            }
358            s = Regex.Replace(s, "\n", "\\\\n");
359            s = Regex.Replace(s, "\r", "\\\\r");
360            s = Regex.Replace(s, "\t", "\\\\t");
361            return "'" + s + "'";
362        }
363
364        /** <summary>Override this method to change where error messages go</summary> */
365        public virtual void EmitErrorMessage(string msg) {
366            if (TraceDestination != null)
367                TraceDestination.WriteLine(msg);
368        }
369
370        /** <summary>
371         *  Recover from an error found on the input stream.  This is
372         *  for NoViableAlt and mismatched symbol exceptions.  If you enable
373         *  single token insertion and deletion, this will usually not
374         *  handle mismatched symbol exceptions but there could be a mismatched
375         *  token that the match() routine could not recover from.
376         *  </summary>
377         */
378        public virtual void Recover(IIntStream input, RecognitionException re) {
379            if (state.lastErrorIndex == input.Index) {
380                // uh oh, another error at same token index; must be a case
381                // where LT(1) is in the recovery token set so nothing is
382                // consumed; consume a single token so at least to prevent
383                // an infinite loop; this is a failsafe.
384                input.Consume();
385            }
386            state.lastErrorIndex = input.Index;
387            BitSet followSet = ComputeErrorRecoverySet();
388            BeginResync();
389            ConsumeUntil(input, followSet);
390            EndResync();
391        }
392
393        /** <summary>
394         *  A hook to listen in on the token consumption during error recovery.
395         *  The DebugParser subclasses this to fire events to the listenter.
396         *  </summary>
397         */
398        public virtual void BeginResync() {
399        }
400
401        public virtual void EndResync() {
402        }
403
404        /*  Compute the error recovery set for the current rule.  During
405         *  rule invocation, the parser pushes the set of tokens that can
406         *  follow that rule reference on the stack; this amounts to
407         *  computing FIRST of what follows the rule reference in the
408         *  enclosing rule. This local follow set only includes tokens
409         *  from within the rule; i.e., the FIRST computation done by
410         *  ANTLR stops at the end of a rule.
411         *
412         *  EXAMPLE
413         *
414         *  When you find a "no viable alt exception", the input is not
415         *  consistent with any of the alternatives for rule r.  The best
416         *  thing to do is to consume tokens until you see something that
417         *  can legally follow a call to r *or* any rule that called r.
418         *  You don't want the exact set of viable next tokens because the
419         *  input might just be missing a token--you might consume the
420         *  rest of the input looking for one of the missing tokens.
421         *
422         *  Consider grammar:
423         *
424         *  a : '[' b ']'
425         *    | '(' b ')'
426         *    ;
427         *  b : c '^' INT ;
428         *  c : ID
429         *    | INT
430         *    ;
431         *
432         *  At each rule invocation, the set of tokens that could follow
433         *  that rule is pushed on a stack.  Here are the various "local"
434         *  follow sets:
435         *
436         *  FOLLOW(b1_in_a) = FIRST(']') = ']'
437         *  FOLLOW(b2_in_a) = FIRST(')') = ')'
438         *  FOLLOW(c_in_b) = FIRST('^') = '^'
439         *
440         *  Upon erroneous input "[]", the call chain is
441         *
442         *  a -> b -> c
443         *
444         *  and, hence, the follow context stack is:
445         *
446         *  depth  local follow set     after call to rule
447         *    0         <EOF>                    a (from main())
448         *    1          ']'                     b
449         *    3          '^'                     c
450         *
451         *  Notice that ')' is not included, because b would have to have
452         *  been called from a different context in rule a for ')' to be
453         *  included.
454         *
455         *  For error recovery, we cannot consider FOLLOW(c)
456         *  (context-sensitive or otherwise).  We need the combined set of
457         *  all context-sensitive FOLLOW sets--the set of all tokens that
458         *  could follow any reference in the call chain.  We need to
459         *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
460         *  we resync'd to that token, we'd consume until EOF.  We need to
461         *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
462         *  In this case, for input "[]", LA(1) is in this set so we would
463         *  not consume anything and after printing an error rule c would
464         *  return normally.  It would not find the required '^' though.
465         *  At this point, it gets a mismatched token error and throws an
466         *  exception (since LA(1) is not in the viable following token
467         *  set).  The rule exception handler tries to recover, but finds
468         *  the same recovery set and doesn't consume anything.  Rule b
469         *  exits normally returning to rule a.  Now it finds the ']' (and
470         *  with the successful match exits errorRecovery mode).
471         *
472         *  So, you cna see that the parser walks up call chain looking
473         *  for the token that was a member of the recovery set.
474         *
475         *  Errors are not generated in errorRecovery mode.
476         *
477         *  ANTLR's error recovery mechanism is based upon original ideas:
478         *
479         *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
480         *
481         *  and
482         *
483         *  "A note on error recovery in recursive descent parsers":
484         *  http://portal.acm.org/citation.cfm?id=947902.947905
485         *
486         *  Later, Josef Grosch had some good ideas:
487         *
488         *  "Efficient and Comfortable Error Recovery in Recursive Descent
489         *  Parsers":
490         *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
491         *
492         *  Like Grosch I implemented local FOLLOW sets that are combined
493         *  at run-time upon error to avoid overhead during parsing.
494         */
495        protected virtual BitSet ComputeErrorRecoverySet() {
496            return CombineFollows(false);
497        }
498
499        /** <summary>
500         *  Compute the context-sensitive FOLLOW set for current rule.
501         *  This is set of token types that can follow a specific rule
502         *  reference given a specific call chain.  You get the set of
503         *  viable tokens that can possibly come next (lookahead depth 1)
504         *  given the current call chain.  Contrast this with the
505         *  definition of plain FOLLOW for rule r:
506         *  </summary>
507         *
508         *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
509         *
510         *  where x in T* and alpha, beta in V*; T is set of terminals and
511         *  V is the set of terminals and nonterminals.  In other words,
512         *  FOLLOW(r) is the set of all tokens that can possibly follow
513         *  references to r in *any* sentential form (context).  At
514         *  runtime, however, we know precisely which context applies as
515         *  we have the call chain.  We may compute the exact (rather
516         *  than covering superset) set of following tokens.
517         *
518         *  For example, consider grammar:
519         *
520         *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
521         *       | "return" expr '.'
522         *       ;
523         *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
524         *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
525         *       | '(' expr ')'
526         *       ;
527         *
528         *  The FOLLOW sets are all inclusive whereas context-sensitive
529         *  FOLLOW sets are precisely what could follow a rule reference.
530         *  For input input "i=(3);", here is the derivation:
531         *
532         *  stat => ID '=' expr ';'
533         *       => ID '=' atom ('+' atom)* ';'
534         *       => ID '=' '(' expr ')' ('+' atom)* ';'
535         *       => ID '=' '(' atom ')' ('+' atom)* ';'
536         *       => ID '=' '(' INT ')' ('+' atom)* ';'
537         *       => ID '=' '(' INT ')' ';'
538         *
539         *  At the "3" token, you'd have a call chain of
540         *
541         *    stat -> expr -> atom -> expr -> atom
542         *
543         *  What can follow that specific nested ref to atom?  Exactly ')'
544         *  as you can see by looking at the derivation of this specific
545         *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
546         *
547         *  You want the exact viable token set when recovering from a
548         *  token mismatch.  Upon token mismatch, if LA(1) is member of
549         *  the viable next token set, then you know there is most likely
550         *  a missing token in the input stream.  "Insert" one by just not
551         *  throwing an exception.
552         */
553        protected virtual BitSet ComputeContextSensitiveRuleFOLLOW() {
554            return CombineFollows(true);
555        }
556
557        // what is exact? it seems to only add sets from above on stack
558        // if EOR is in set i.  When it sees a set w/o EOR, it stops adding.
559        // Why would we ever want them all?  Maybe no viable alt instead of
560        // mismatched token?
561        protected virtual BitSet CombineFollows(bool exact) {
562            int top = state._fsp;
563            BitSet followSet = new BitSet();
564            for (int i = top; i >= 0; i--) {
565                BitSet localFollowSet = (BitSet)state.following[i];
566                /*
567                System.out.println("local follow depth "+i+"="+
568                                   localFollowSet.toString(getTokenNames())+")");
569                 */
570                followSet.OrInPlace(localFollowSet);
571                if (exact) {
572                    // can we see end of rule?
573                    if (localFollowSet.Member(TokenTypes.EndOfRule)) {
574                        // Only leave EOR in set if at top (start rule); this lets
575                        // us know if have to include follow(start rule); i.e., EOF
576                        if (i > 0) {
577                            followSet.Remove(TokenTypes.EndOfRule);
578                        }
579                    } else { // can't see end of rule, quit
580                        break;
581                    }
582                }
583            }
584            return followSet;
585        }
586
587        /** <summary>Attempt to recover from a single missing or extra token.</summary>
588         *
589         *  EXTRA TOKEN
590         *
591         *  LA(1) is not what we are looking for.  If LA(2) has the right token,
592         *  however, then assume LA(1) is some extra spurious token.  Delete it
593         *  and LA(2) as if we were doing a normal match(), which advances the
594         *  input.
595         *
596         *  MISSING TOKEN
597         *
598         *  If current token is consistent with what could come after
599         *  ttype then it is ok to "insert" the missing token, else throw
600         *  exception For example, Input "i=(3;" is clearly missing the
601         *  ')'.  When the parser returns from the nested call to expr, it
602         *  will have call chain:
603         *
604         *    stat -> expr -> atom
605         *
606         *  and it will be trying to match the ')' at this point in the
607         *  derivation:
608         *
609         *       => ID '=' '(' INT ')' ('+' atom)* ';'
610         *                          ^
611         *  match() will see that ';' doesn't match ')' and report a
612         *  mismatched token error.  To recover, it sees that LA(1)==';'
613         *  is in the set of tokens that can follow the ')' token
614         *  reference in rule atom.  It can assume that you forgot the ')'.
615         */
616        protected virtual object RecoverFromMismatchedToken(IIntStream input, int ttype, BitSet follow) {
617            RecognitionException e = null;
618            // if next token is what we are looking for then "delete" this token
619            if (MismatchIsUnwantedToken(input, ttype)) {
620                e = new UnwantedTokenException(ttype, input, TokenNames);
621                /*
622                System.err.println("recoverFromMismatchedToken deleting "+
623                                   ((TokenStream)input).LT(1)+
624                                   " since "+((TokenStream)input).LT(2)+" is what we want");
625                 */
626                BeginResync();
627                input.Consume(); // simply delete extra token
628                EndResync();
629                ReportError(e);  // report after consuming so AW sees the token in the exception
630                // we want to return the token we're actually matching
631                object matchedSymbol = GetCurrentInputSymbol(input);
632                input.Consume(); // move past ttype token as if all were ok
633                return matchedSymbol;
634            }
635            // can't recover with single token deletion, try insertion
636            if (MismatchIsMissingToken(input, follow)) {
637                object inserted = GetMissingSymbol(input, e, ttype, follow);
638                e = new MissingTokenException(ttype, input, inserted);
639                ReportError(e);  // report after inserting so AW sees the token in the exception
640                return inserted;
641            }
642            // even that didn't work; must throw the exception
643            e = new MismatchedTokenException(ttype, input, TokenNames);
644            throw e;
645        }
646
647        /** Not currently used */
648        public virtual object RecoverFromMismatchedSet(IIntStream input,
649                                               RecognitionException e,
650                                               BitSet follow) {
651            if (MismatchIsMissingToken(input, follow)) {
652                // System.out.println("missing token");
653                ReportError(e);
654                // we don't know how to conjure up a token for sets yet
655                return GetMissingSymbol(input, e, TokenTypes.Invalid, follow);
656            }
657            // TODO do single token deletion like above for Token mismatch
658            throw e;
659        }
660
661        /** <summary>
662         *  Match needs to return the current input symbol, which gets put
663         *  into the label for the associated token ref; e.g., x=ID.  Token
664         *  and tree parsers need to return different objects. Rather than test
665         *  for input stream type or change the IntStream interface, I use
666         *  a simple method to ask the recognizer to tell me what the current
667         *  input symbol is.
668         *  </summary>
669         *
670         *  <remarks>This is ignored for lexers.</remarks>
671         */
672        protected virtual object GetCurrentInputSymbol(IIntStream input) {
673            return null;
674        }
675
676        /** <summary>Conjure up a missing token during error recovery.</summary>
677         *
678         *  <remarks>
679         *  The recognizer attempts to recover from single missing
680         *  symbols. But, actions might refer to that missing symbol.
681         *  For example, x=ID {f($x);}. The action clearly assumes
682         *  that there has been an identifier matched previously and that
683         *  $x points at that token. If that token is missing, but
684         *  the next token in the stream is what we want we assume that
685         *  this token is missing and we keep going. Because we
686         *  have to return some token to replace the missing token,
687         *  we have to conjure one up. This method gives the user control
688         *  over the tokens returned for missing tokens. Mostly,
689         *  you will want to create something special for identifier
690         *  tokens. For literals such as '{' and ',', the default
691         *  action in the parser or tree parser works. It simply creates
692         *  a CommonToken of the appropriate type. The text will be the token.
693         *  If you change what tokens must be created by the lexer,
694         *  override this method to create the appropriate tokens.
695         *  </remarks>
696         */
697        protected virtual object GetMissingSymbol(IIntStream input,
698                                          RecognitionException e,
699                                          int expectedTokenType,
700                                          BitSet follow) {
701            return null;
702        }
703
704        public virtual void ConsumeUntil(IIntStream input, int tokenType) {
705            //System.out.println("consumeUntil "+tokenType);
706            int ttype = input.LA(1);
707            while (ttype != TokenTypes.EndOfFile && ttype != tokenType) {
708                input.Consume();
709                ttype = input.LA(1);
710            }
711        }
712
713        /** <summary>Consume tokens until one matches the given token set</summary> */
714        public virtual void ConsumeUntil(IIntStream input, BitSet set) {
715            //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
716            int ttype = input.LA(1);
717            while (ttype != TokenTypes.EndOfFile && !set.Member(ttype)) {
718                //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
719                input.Consume();
720                ttype = input.LA(1);
721            }
722        }
723
724        /** <summary>Push a rule's follow set using our own hardcoded stack</summary> */
725        protected void PushFollow(BitSet fset) {
726            if ((state._fsp + 1) >= state.following.Length) {
727                Array.Resize(ref state.following, state.following.Length * 2);
728            }
729            state.following[++state._fsp] = fset;
730        }
731
732        protected void PopFollow() {
733            state._fsp--;
734        }
735
736        /** <summary>
737         *  Return List<String> of the rules in your parser instance
738         *  leading up to a call to this method.  You could override if
739         *  you want more details such as the file/line info of where
740         *  in the parser java code a rule is invoked.
741         *  </summary>
742         *
743         *  <remarks>
744         *  This is very useful for error messages and for context-sensitive
745         *  error recovery.
746         *  </remarks>
747         */
748        public virtual IList<string> GetRuleInvocationStack() {
749            return GetRuleInvocationStack(new StackTrace(true));
750        }
751
752        /** <summary>
753         *  A more general version of GetRuleInvocationStack where you can
754         *  pass in the StackTrace of, for example, a RecognitionException
755         *  to get it's rule stack trace.
756         *  </summary>
757         */
758        public static IList<string> GetRuleInvocationStack(StackTrace trace) {
759            if (trace == null)
760                throw new ArgumentNullException("trace");
761
762            List<string> rules = new List<string>();
763            StackFrame[] stack = trace.GetFrames() ?? new StackFrame[0];
764
765            for (int i = stack.Length - 1; i >= 0; i--) {
766                StackFrame frame = stack[i];
767                MethodBase method = frame.GetMethod();
768                GrammarRuleAttribute[] attributes = (GrammarRuleAttribute[])method.GetCustomAttributes(typeof(GrammarRuleAttribute), true);
769                if (attributes != null && attributes.Length > 0)
770                    rules.Add(attributes[0].Name);
771            }
772
773            return rules;
774        }
775
776        public virtual int BacktrackingLevel {
777            get {
778                return state.backtracking;
779            }
780            set {
781                state.backtracking = value;
782            }
783        }
784
785        /** <summary>Return whether or not a backtracking attempt failed.</summary> */
786        public virtual bool Failed {
787            get {
788                return state.failed;
789            }
790        }
791
792        /** <summary>
793         *  Used to print out token names like ID during debugging and
794         *  error reporting.  The generated parsers implement a method
795         *  that overrides this to point to their String[] tokenNames.
796         *  </summary>
797         */
798        public virtual string[] TokenNames {
799            get {
800                return null;
801            }
802        }
803
804        /** <summary>
805         *  For debugging and other purposes, might want the grammar name.
806         *  Have ANTLR generate an implementation for this method.
807         *  </summary>
808         */
809        public virtual string GrammarFileName {
810            get {
811                return null;
812            }
813        }
814
815        public abstract string SourceName {
816            get;
817        }
818
819        /** <summary>
820         *  A convenience method for use most often with template rewrites.
821         *  Convert a List<Token> to List<String>
822         *  </summary>
823         */
824        public virtual List<string> ToStrings(ICollection<IToken> tokens) {
825            if (tokens == null)
826                return null;
827
828            List<string> strings = new List<string>(tokens.Count);
829            foreach (IToken token in tokens) {
830                strings.Add(token.Text);
831            }
832
833            return strings;
834        }
835
836        /** <summary>
837         *  Given a rule number and a start token index number, return
838         *  MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
839         *  start index.  If this rule has parsed input starting from the
840         *  start index before, then return where the rule stopped parsing.
841         *  It returns the index of the last token matched by the rule.
842         *  </summary>
843         *
844         *  <remarks>
845         *  For now we use a hashtable and just the slow Object-based one.
846         *  Later, we can make a special one for ints and also one that
847         *  tosses out data after we commit past input position i.
848         *  </remarks>
849         */
850        public virtual int GetRuleMemoization(int ruleIndex, int ruleStartIndex) {
851            if (state.ruleMemo[ruleIndex] == null) {
852                state.ruleMemo[ruleIndex] = new Dictionary<int, int>();
853            }
854
855            int stopIndex;
856            if (!state.ruleMemo[ruleIndex].TryGetValue(ruleStartIndex, out stopIndex))
857                return MemoRuleUnknown;
858
859            return stopIndex;
860        }
861
862        /** <summary>
863         *  Has this rule already parsed input at the current index in the
864         *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
865         *  If we attempted but failed to parse properly before, return
866         *  MEMO_RULE_FAILED.
867         *  </summary>
868         *
869         *  <remarks>
870         *  This method has a side-effect: if we have seen this input for
871         *  this rule and successfully parsed before, then seek ahead to
872         *  1 past the stop token matched for this rule last time.
873         *  </remarks>
874         */
875        public virtual bool AlreadyParsedRule(IIntStream input, int ruleIndex) {
876            int stopIndex = GetRuleMemoization(ruleIndex, input.Index);
877            if (stopIndex == MemoRuleUnknown) {
878                return false;
879            }
880            if (stopIndex == MemoRuleFailed) {
881                //System.out.println("rule "+ruleIndex+" will never succeed");
882                state.failed = true;
883            } else {
884                //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed);
885                input.Seek(stopIndex + 1); // jump to one past stop token
886            }
887            return true;
888        }
889
890        /** <summary>
891         *  Record whether or not this rule parsed the input at this position
892         *  successfully.  Use a standard java hashtable for now.
893         *  </summary>
894         */
895        public virtual void Memoize(IIntStream input,
896                            int ruleIndex,
897                            int ruleStartIndex) {
898            int stopTokenIndex = state.failed ? MemoRuleFailed : input.Index - 1;
899            if (state.ruleMemo == null) {
900                if (TraceDestination != null)
901                    TraceDestination.WriteLine("!!!!!!!!! memo array is null for " + GrammarFileName);
902            }
903            if (ruleIndex >= state.ruleMemo.Length) {
904                if (TraceDestination != null)
905                    TraceDestination.WriteLine("!!!!!!!!! memo size is " + state.ruleMemo.Length + ", but rule index is " + ruleIndex);
906            }
907            if (state.ruleMemo[ruleIndex] != null) {
908                state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex;
909            }
910        }
911
912        /** <summary>return how many rule/input-index pairs there are in total.</summary>
913         *  TODO: this includes synpreds. :(
914         */
915        public virtual int GetRuleMemoizationCacheSize() {
916            int n = 0;
917            for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) {
918                var ruleMap = state.ruleMemo[i];
919                if (ruleMap != null) {
920                    n += ruleMap.Count; // how many input indexes are recorded?
921                }
922            }
923            return n;
924        }
925
926        [Conditional("ANTLR_TRACE")]
927        public virtual void TraceIn(string ruleName, int ruleIndex, object inputSymbol) {
928            if (TraceDestination == null)
929                return;
930
931            TraceDestination.Write("enter " + ruleName + " " + inputSymbol);
932            if (state.backtracking > 0) {
933                TraceDestination.Write(" backtracking=" + state.backtracking);
934            }
935            TraceDestination.WriteLine();
936        }
937
938        [Conditional("ANTLR_TRACE")]
939        public virtual void TraceOut(string ruleName, int ruleIndex, object inputSymbol) {
940            if (TraceDestination == null)
941                return;
942
943            TraceDestination.Write("exit " + ruleName + " " + inputSymbol);
944            if (state.backtracking > 0) {
945                TraceDestination.Write(" backtracking=" + state.backtracking);
946                if (state.failed)
947                    TraceDestination.Write(" failed");
948                else
949                    TraceDestination.Write(" succeeded");
950            }
951            TraceDestination.WriteLine();
952        }
953
954        #region Debugging support
955        public virtual IDebugEventListener DebugListener {
956            get {
957                return null;
958            }
959        }
960
961        [Conditional("ANTLR_DEBUG")]
962        protected virtual void DebugEnterRule(string grammarFileName, string ruleName) {
963            IDebugEventListener dbg = DebugListener;
964            if (dbg != null)
965                dbg.EnterRule(grammarFileName, ruleName);
966        }
967
968        [Conditional("ANTLR_DEBUG")]
969        protected virtual void DebugExitRule(string grammarFileName, string ruleName) {
970            IDebugEventListener dbg = DebugListener;
971            if (dbg != null)
972                dbg.ExitRule(grammarFileName, ruleName);
973        }
974
975        [Conditional("ANTLR_DEBUG")]
976        protected virtual void DebugEnterSubRule(int decisionNumber) {
977            IDebugEventListener dbg = DebugListener;
978            if (dbg != null)
979                dbg.EnterSubRule(decisionNumber);
980        }
981
982        [Conditional("ANTLR_DEBUG")]
983        protected virtual void DebugExitSubRule(int decisionNumber) {
984            IDebugEventListener dbg = DebugListener;
985            if (dbg != null)
986                dbg.ExitSubRule(decisionNumber);
987        }
988
989        [Conditional("ANTLR_DEBUG")]
990        protected virtual void DebugEnterAlt(int alt) {
991            IDebugEventListener dbg = DebugListener;
992            if (dbg != null)
993                dbg.EnterAlt(alt);
994        }
995
996        [Conditional("ANTLR_DEBUG")]
997        protected virtual void DebugEnterDecision(int decisionNumber, bool couldBacktrack) {
998            IDebugEventListener dbg = DebugListener;
999            if (dbg != null)
1000                dbg.EnterDecision(decisionNumber, couldBacktrack);
1001        }
1002
1003        [Conditional("ANTLR_DEBUG")]
1004        protected virtual void DebugExitDecision(int decisionNumber) {
1005            IDebugEventListener dbg = DebugListener;
1006            if (dbg != null)
1007                dbg.ExitDecision(decisionNumber);
1008        }
1009
1010        [Conditional("ANTLR_DEBUG")]
1011        protected virtual void DebugLocation(int line, int charPositionInLine) {
1012            IDebugEventListener dbg = DebugListener;
1013            if (dbg != null)
1014                dbg.Location(line, charPositionInLine);
1015        }
1016
1017        [Conditional("ANTLR_DEBUG")]
1018        protected virtual void DebugSemanticPredicate(bool result, string predicate) {
1019            IDebugEventListener dbg = DebugListener;
1020            if (dbg != null)
1021                dbg.SemanticPredicate(result, predicate);
1022        }
1023
1024        [Conditional("ANTLR_DEBUG")]
1025        protected virtual void DebugBeginBacktrack(int level) {
1026            IDebugEventListener dbg = DebugListener;
1027            if (dbg != null)
1028                dbg.BeginBacktrack(level);
1029        }
1030
1031        [Conditional("ANTLR_DEBUG")]
1032        protected virtual void DebugEndBacktrack(int level, bool successful) {
1033            IDebugEventListener dbg = DebugListener;
1034            if (dbg != null)
1035                dbg.EndBacktrack(level, successful);
1036        }
1037
1038        [Conditional("ANTLR_DEBUG")]
1039        protected virtual void DebugRecognitionException(RecognitionException ex) {
1040            IDebugEventListener dbg = DebugListener;
1041            if (dbg != null)
1042                dbg.RecognitionException(ex);
1043        }
1044        #endregion
1045    }
1046}
1047