1/*
2 * [The "BSD licence"]
3 * Copyright (c) 2005-2008 Terence Parr
4 * All rights reserved.
5 *
6 * Conversion to C#:
7 * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33namespace Antlr.Runtime
34{
35    using System.Collections.Generic;
36
37    using ArgumentNullException = System.ArgumentNullException;
38    using Array = System.Array;
39    using Conditional = System.Diagnostics.ConditionalAttribute;
40    using Exception = System.Exception;
41    using IDebugEventListener = Antlr.Runtime.Debug.IDebugEventListener;
42    using MethodBase = System.Reflection.MethodBase;
43    using NotSupportedException = System.NotSupportedException;
44    using Regex = System.Text.RegularExpressions.Regex;
45    using StackFrame = System.Diagnostics.StackFrame;
46    using StackTrace = System.Diagnostics.StackTrace;
47    using TextWriter = System.IO.TextWriter;
48    using Type = System.Type;
49
50    /** <summary>
51     *  A generic recognizer that can handle recognizers generated from
52     *  lexer, parser, and tree grammars.  This is all the parsing
53     *  support code essentially; most of it is error recovery stuff and
54     *  backtracking.
55     *  </summary>
56     */
57    public abstract class BaseRecognizer
58    {
59        public const int MemoRuleFailed = -2;
60        public const int MemoRuleUnknown = -1;
61        public const int InitialFollowStackSize = 100;
62
63        // copies from Token object for convenience in actions
64        public const int DefaultTokenChannel = TokenChannels.Default;
65        public const int Hidden = TokenChannels.Hidden;
66
67        public const string NextTokenRuleName = "nextToken";
68
69        /** <summary>
70         *  State of a lexer, parser, or tree parser are collected into a state
71         *  object so the state can be shared.  This sharing is needed to
72         *  have one grammar import others and share same error variables
73         *  and other state variables.  It's a kind of explicit multiple
74         *  inheritance via delegation of methods and shared state.
75         *  </summary>
76         */
77        protected internal RecognizerSharedState state;
78
79        public BaseRecognizer()
80            : this(new RecognizerSharedState())
81        {
82        }
83
84        public BaseRecognizer( RecognizerSharedState state )
85        {
86            if ( state == null )
87            {
88                state = new RecognizerSharedState();
89            }
90            this.state = state;
91            InitDFAs();
92        }
93
94        public TextWriter TraceDestination
95        {
96            get;
97            set;
98        }
99
100        protected virtual void InitDFAs()
101        {
102        }
103
104        /** <summary>reset the parser's state; subclasses must rewinds the input stream</summary> */
105        public virtual void Reset()
106        {
107            // wack everything related to error recovery
108            if ( state == null )
109            {
110                return; // no shared state work to do
111            }
112            state._fsp = -1;
113            state.errorRecovery = false;
114            state.lastErrorIndex = -1;
115            state.failed = false;
116            state.syntaxErrors = 0;
117            // wack everything related to backtracking and memoization
118            state.backtracking = 0;
119            for ( int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++ )
120            { // wipe cache
121                state.ruleMemo[i] = null;
122            }
123        }
124
125
126        /** <summary>
127         *  Match current input symbol against ttype.  Attempt
128         *  single token insertion or deletion error recovery.  If
129         *  that fails, throw MismatchedTokenException.
130         *  </summary>
131         *
132         *  <remarks>
133         *  To turn off single token insertion or deletion error
134         *  recovery, override recoverFromMismatchedToken() and have it
135         *  throw an exception. See TreeParser.recoverFromMismatchedToken().
136         *  This way any error in a rule will cause an exception and
137         *  immediate exit from rule.  Rule would recover by resynchronizing
138         *  to the set of symbols that can follow rule ref.
139         *  </remarks>
140         */
141        public virtual object Match( IIntStream input, int ttype, BitSet follow )
142        {
143            //System.out.println("match "+((TokenStream)input).LT(1));
144            object matchedSymbol = GetCurrentInputSymbol( input );
145            if ( input.LA( 1 ) == ttype )
146            {
147                input.Consume();
148                state.errorRecovery = false;
149                state.failed = false;
150                return matchedSymbol;
151            }
152            if ( state.backtracking > 0 )
153            {
154                state.failed = true;
155                return matchedSymbol;
156            }
157            matchedSymbol = RecoverFromMismatchedToken( input, ttype, follow );
158            return matchedSymbol;
159        }
160
161        /** <summary>Match the wildcard: in a symbol</summary> */
162        public virtual void MatchAny( IIntStream input )
163        {
164            state.errorRecovery = false;
165            state.failed = false;
166            input.Consume();
167        }
168
169        public virtual bool MismatchIsUnwantedToken( IIntStream input, int ttype )
170        {
171            return input.LA( 2 ) == ttype;
172        }
173
174        public virtual bool MismatchIsMissingToken( IIntStream input, BitSet follow )
175        {
176            if ( follow == null )
177            {
178                // we have no information about the follow; we can only consume
179                // a single token and hope for the best
180                return false;
181            }
182            // compute what can follow this grammar element reference
183            if ( follow.Member( TokenTypes.EndOfRule ) )
184            {
185                BitSet viableTokensFollowingThisRule = ComputeContextSensitiveRuleFOLLOW();
186                follow = follow.Or( viableTokensFollowingThisRule );
187                if ( state._fsp >= 0 )
188                { // remove EOR if we're not the start symbol
189                    follow.Remove( TokenTypes.EndOfRule );
190                }
191            }
192            // if current token is consistent with what could come after set
193            // then we know we're missing a token; error recovery is free to
194            // "insert" the missing token
195
196            //System.out.println("viable tokens="+follow.toString(getTokenNames()));
197            //System.out.println("LT(1)="+((TokenStream)input).LT(1));
198
199            // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
200            // in follow set to indicate that the fall of the start symbol is
201            // in the set (EOF can follow).
202            if ( follow.Member( input.LA( 1 ) ) || follow.Member( TokenTypes.EndOfRule ) )
203            {
204                //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
205                return true;
206            }
207            return false;
208        }
209
210        /** <summary>Report a recognition problem.</summary>
211         *
212         *  <remarks>
213         *  This method sets errorRecovery to indicate the parser is recovering
214         *  not parsing.  Once in recovery mode, no errors are generated.
215         *  To get out of recovery mode, the parser must successfully match
216         *  a token (after a resync).  So it will go:
217         *
218         * 		1. error occurs
219         * 		2. enter recovery mode, report error
220         * 		3. consume until token found in resynch set
221         * 		4. try to resume parsing
222         * 		5. next match() will reset errorRecovery mode
223         *
224         *  If you override, make sure to update syntaxErrors if you care about that.
225         *  </remarks>
226         */
227        public virtual void ReportError( RecognitionException e )
228        {
229            // if we've already reported an error and have not matched a token
230            // yet successfully, don't report any errors.
231            if ( state.errorRecovery )
232            {
233                //System.err.print("[SPURIOUS] ");
234                return;
235            }
236            state.syntaxErrors++; // don't count spurious
237            state.errorRecovery = true;
238
239            DisplayRecognitionError( this.TokenNames, e );
240        }
241
242        public virtual void DisplayRecognitionError( string[] tokenNames,
243                                            RecognitionException e )
244        {
245            string hdr = GetErrorHeader( e );
246            string msg = GetErrorMessage( e, tokenNames );
247            EmitErrorMessage( hdr + " " + msg );
248        }
249
250        /** <summary>What error message should be generated for the various exception types?</summary>
251         *
252         *  <remarks>
253         *  Not very object-oriented code, but I like having all error message
254         *  generation within one method rather than spread among all of the
255         *  exception classes. This also makes it much easier for the exception
256         *  handling because the exception classes do not have to have pointers back
257         *  to this object to access utility routines and so on. Also, changing
258         *  the message for an exception type would be difficult because you
259         *  would have to subclassing exception, but then somehow get ANTLR
260         *  to make those kinds of exception objects instead of the default.
261         *  This looks weird, but trust me--it makes the most sense in terms
262         *  of flexibility.
263         *
264         *  For grammar debugging, you will want to override this to add
265         *  more information such as the stack frame with
266         *  getRuleInvocationStack(e, this.getClass().getName()) and,
267         *  for no viable alts, the decision description and state etc...
268         *
269         *  Override this to change the message generated for one or more
270         *  exception types.
271         *  </remarks>
272         */
273        public virtual string GetErrorMessage( RecognitionException e, string[] tokenNames )
274        {
275            string msg = e.Message;
276            if ( e is UnwantedTokenException )
277            {
278                UnwantedTokenException ute = (UnwantedTokenException)e;
279                string tokenName = "<unknown>";
280                if ( ute.Expecting == TokenTypes.EndOfFile )
281                {
282                    tokenName = "EndOfFile";
283                }
284                else
285                {
286                    tokenName = tokenNames[ute.Expecting];
287                }
288                msg = "extraneous input " + GetTokenErrorDisplay( ute.UnexpectedToken ) +
289                    " expecting " + tokenName;
290            }
291            else if ( e is MissingTokenException )
292            {
293                MissingTokenException mte = (MissingTokenException)e;
294                string tokenName = "<unknown>";
295                if ( mte.Expecting == TokenTypes.EndOfFile )
296                {
297                    tokenName = "EndOfFile";
298                }
299                else
300                {
301                    tokenName = tokenNames[mte.Expecting];
302                }
303                msg = "missing " + tokenName + " at " + GetTokenErrorDisplay( e.Token );
304            }
305            else if ( e is MismatchedTokenException )
306            {
307                MismatchedTokenException mte = (MismatchedTokenException)e;
308                string tokenName = "<unknown>";
309                if ( mte.Expecting == TokenTypes.EndOfFile )
310                {
311                    tokenName = "EndOfFile";
312                }
313                else
314                {
315                    tokenName = tokenNames[mte.Expecting];
316                }
317                msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) +
318                    " expecting " + tokenName;
319            }
320            else if ( e is MismatchedTreeNodeException )
321            {
322                MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e;
323                string tokenName = "<unknown>";
324                if ( mtne.Expecting == TokenTypes.EndOfFile )
325                {
326                    tokenName = "EndOfFile";
327                }
328                else
329                {
330                    tokenName = tokenNames[mtne.Expecting];
331                }
332                // workaround for a .NET framework bug (NullReferenceException)
333                string nodeText = ( mtne.Node != null ) ? mtne.Node.ToString() ?? string.Empty : string.Empty;
334                msg = "mismatched tree node: " + nodeText + " expecting " + tokenName;
335            }
336            else if ( e is NoViableAltException )
337            {
338                //NoViableAltException nvae = (NoViableAltException)e;
339                // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
340                // and "(decision="+nvae.decisionNumber+") and
341                // "state "+nvae.stateNumber
342                msg = "no viable alternative at input " + GetTokenErrorDisplay( e.Token );
343            }
344            else if ( e is EarlyExitException )
345            {
346                //EarlyExitException eee = (EarlyExitException)e;
347                // for development, can add "(decision="+eee.decisionNumber+")"
348                msg = "required (...)+ loop did not match anything at input " +
349                    GetTokenErrorDisplay( e.Token );
350            }
351            else if ( e is MismatchedSetException )
352            {
353                MismatchedSetException mse = (MismatchedSetException)e;
354                msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) +
355                    " expecting set " + mse.Expecting;
356            }
357            else if ( e is MismatchedNotSetException )
358            {
359                MismatchedNotSetException mse = (MismatchedNotSetException)e;
360                msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) +
361                    " expecting set " + mse.Expecting;
362            }
363            else if ( e is FailedPredicateException )
364            {
365                FailedPredicateException fpe = (FailedPredicateException)e;
366                msg = "rule " + fpe.RuleName + " failed predicate: {" +
367                    fpe.PredicateText + "}?";
368            }
369            return msg;
370        }
371
372        /** <summary>
373         *  Get number of recognition errors (lexer, parser, tree parser).  Each
374         *  recognizer tracks its own number.  So parser and lexer each have
375         *  separate count.  Does not count the spurious errors found between
376         *  an error and next valid token match
377         *  </summary>
378         *
379         *  <seealso cref="reportError()"/>
380         */
381        public virtual int NumberOfSyntaxErrors
382        {
383            get
384            {
385                return state.syntaxErrors;
386            }
387        }
388
389        /** <summary>What is the error header, normally line/character position information?</summary> */
390        public virtual string GetErrorHeader( RecognitionException e )
391        {
392            string prefix = SourceName ?? string.Empty;
393            if (prefix.Length > 0)
394                prefix += ' ';
395
396            return string.Format("{0}line {1}:{2}", prefix, e.Line, e.CharPositionInLine + 1);
397        }
398
399        /** <summary>
400         *  How should a token be displayed in an error message? The default
401         *  is to display just the text, but during development you might
402         *  want to have a lot of information spit out.  Override in that case
403         *  to use t.ToString() (which, for CommonToken, dumps everything about
404         *  the token). This is better than forcing you to override a method in
405         *  your token objects because you don't have to go modify your lexer
406         *  so that it creates a new Java type.
407         *  </summary>
408         */
409        public virtual string GetTokenErrorDisplay( IToken t )
410        {
411            string s = t.Text;
412            if ( s == null )
413            {
414                if ( t.Type == TokenTypes.EndOfFile )
415                {
416                    s = "<EOF>";
417                }
418                else
419                {
420                    s = "<" + t.Type + ">";
421                }
422            }
423            s = Regex.Replace( s, "\n", "\\\\n" );
424            s = Regex.Replace( s, "\r", "\\\\r" );
425            s = Regex.Replace( s, "\t", "\\\\t" );
426            return "'" + s + "'";
427        }
428
429        /** <summary>Override this method to change where error messages go</summary> */
430        public virtual void EmitErrorMessage( string msg )
431        {
432            if (TraceDestination != null)
433                TraceDestination.WriteLine( msg );
434        }
435
436        /** <summary>
437         *  Recover from an error found on the input stream.  This is
438         *  for NoViableAlt and mismatched symbol exceptions.  If you enable
439         *  single token insertion and deletion, this will usually not
440         *  handle mismatched symbol exceptions but there could be a mismatched
441         *  token that the match() routine could not recover from.
442         *  </summary>
443         */
444        public virtual void Recover( IIntStream input, RecognitionException re )
445        {
446            if ( state.lastErrorIndex == input.Index )
447            {
448                // uh oh, another error at same token index; must be a case
449                // where LT(1) is in the recovery token set so nothing is
450                // consumed; consume a single token so at least to prevent
451                // an infinite loop; this is a failsafe.
452                input.Consume();
453            }
454            state.lastErrorIndex = input.Index;
455            BitSet followSet = ComputeErrorRecoverySet();
456            BeginResync();
457            ConsumeUntil( input, followSet );
458            EndResync();
459        }
460
461        /** <summary>
462         *  A hook to listen in on the token consumption during error recovery.
463         *  The DebugParser subclasses this to fire events to the listenter.
464         *  </summary>
465         */
466        public virtual void BeginResync()
467        {
468        }
469
470        public virtual void EndResync()
471        {
472        }
473
474        /*  Compute the error recovery set for the current rule.  During
475         *  rule invocation, the parser pushes the set of tokens that can
476         *  follow that rule reference on the stack; this amounts to
477         *  computing FIRST of what follows the rule reference in the
478         *  enclosing rule. This local follow set only includes tokens
479         *  from within the rule; i.e., the FIRST computation done by
480         *  ANTLR stops at the end of a rule.
481         *
482         *  EXAMPLE
483         *
484         *  When you find a "no viable alt exception", the input is not
485         *  consistent with any of the alternatives for rule r.  The best
486         *  thing to do is to consume tokens until you see something that
487         *  can legally follow a call to r *or* any rule that called r.
488         *  You don't want the exact set of viable next tokens because the
489         *  input might just be missing a token--you might consume the
490         *  rest of the input looking for one of the missing tokens.
491         *
492         *  Consider grammar:
493         *
494         *  a : '[' b ']'
495         *    | '(' b ')'
496         *    ;
497         *  b : c '^' INT ;
498         *  c : ID
499         *    | INT
500         *    ;
501         *
502         *  At each rule invocation, the set of tokens that could follow
503         *  that rule is pushed on a stack.  Here are the various "local"
504         *  follow sets:
505         *
506         *  FOLLOW(b1_in_a) = FIRST(']') = ']'
507         *  FOLLOW(b2_in_a) = FIRST(')') = ')'
508         *  FOLLOW(c_in_b) = FIRST('^') = '^'
509         *
510         *  Upon erroneous input "[]", the call chain is
511         *
512         *  a -> b -> c
513         *
514         *  and, hence, the follow context stack is:
515         *
516         *  depth  local follow set     after call to rule
517         *    0         <EOF>                    a (from main())
518         *    1          ']'                     b
519         *    3          '^'                     c
520         *
521         *  Notice that ')' is not included, because b would have to have
522         *  been called from a different context in rule a for ')' to be
523         *  included.
524         *
525         *  For error recovery, we cannot consider FOLLOW(c)
526         *  (context-sensitive or otherwise).  We need the combined set of
527         *  all context-sensitive FOLLOW sets--the set of all tokens that
528         *  could follow any reference in the call chain.  We need to
529         *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
530         *  we resync'd to that token, we'd consume until EOF.  We need to
531         *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
532         *  In this case, for input "[]", LA(1) is in this set so we would
533         *  not consume anything and after printing an error rule c would
534         *  return normally.  It would not find the required '^' though.
535         *  At this point, it gets a mismatched token error and throws an
536         *  exception (since LA(1) is not in the viable following token
537         *  set).  The rule exception handler tries to recover, but finds
538         *  the same recovery set and doesn't consume anything.  Rule b
539         *  exits normally returning to rule a.  Now it finds the ']' (and
540         *  with the successful match exits errorRecovery mode).
541         *
542         *  So, you cna see that the parser walks up call chain looking
543         *  for the token that was a member of the recovery set.
544         *
545         *  Errors are not generated in errorRecovery mode.
546         *
547         *  ANTLR's error recovery mechanism is based upon original ideas:
548         *
549         *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
550         *
551         *  and
552         *
553         *  "A note on error recovery in recursive descent parsers":
554         *  http://portal.acm.org/citation.cfm?id=947902.947905
555         *
556         *  Later, Josef Grosch had some good ideas:
557         *
558         *  "Efficient and Comfortable Error Recovery in Recursive Descent
559         *  Parsers":
560         *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
561         *
562         *  Like Grosch I implemented local FOLLOW sets that are combined
563         *  at run-time upon error to avoid overhead during parsing.
564         */
565        protected virtual BitSet ComputeErrorRecoverySet()
566        {
567            return CombineFollows( false );
568        }
569
570        /** <summary>
571         *  Compute the context-sensitive FOLLOW set for current rule.
572         *  This is set of token types that can follow a specific rule
573         *  reference given a specific call chain.  You get the set of
574         *  viable tokens that can possibly come next (lookahead depth 1)
575         *  given the current call chain.  Contrast this with the
576         *  definition of plain FOLLOW for rule r:
577         *  </summary>
578         *
579         *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
580         *
581         *  where x in T* and alpha, beta in V*; T is set of terminals and
582         *  V is the set of terminals and nonterminals.  In other words,
583         *  FOLLOW(r) is the set of all tokens that can possibly follow
584         *  references to r in *any* sentential form (context).  At
585         *  runtime, however, we know precisely which context applies as
586         *  we have the call chain.  We may compute the exact (rather
587         *  than covering superset) set of following tokens.
588         *
589         *  For example, consider grammar:
590         *
591         *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
592         *       | "return" expr '.'
593         *       ;
594         *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
595         *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
596         *       | '(' expr ')'
597         *       ;
598         *
599         *  The FOLLOW sets are all inclusive whereas context-sensitive
600         *  FOLLOW sets are precisely what could follow a rule reference.
601         *  For input input "i=(3);", here is the derivation:
602         *
603         *  stat => ID '=' expr ';'
604         *       => ID '=' atom ('+' atom)* ';'
605         *       => ID '=' '(' expr ')' ('+' atom)* ';'
606         *       => ID '=' '(' atom ')' ('+' atom)* ';'
607         *       => ID '=' '(' INT ')' ('+' atom)* ';'
608         *       => ID '=' '(' INT ')' ';'
609         *
610         *  At the "3" token, you'd have a call chain of
611         *
612         *    stat -> expr -> atom -> expr -> atom
613         *
614         *  What can follow that specific nested ref to atom?  Exactly ')'
615         *  as you can see by looking at the derivation of this specific
616         *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
617         *
618         *  You want the exact viable token set when recovering from a
619         *  token mismatch.  Upon token mismatch, if LA(1) is member of
620         *  the viable next token set, then you know there is most likely
621         *  a missing token in the input stream.  "Insert" one by just not
622         *  throwing an exception.
623         */
624        protected virtual BitSet ComputeContextSensitiveRuleFOLLOW()
625        {
626            return CombineFollows( true );
627        }
628
629        // what is exact? it seems to only add sets from above on stack
630        // if EOR is in set i.  When it sees a set w/o EOR, it stops adding.
631        // Why would we ever want them all?  Maybe no viable alt instead of
632        // mismatched token?
633        protected virtual BitSet CombineFollows(bool exact)
634        {
635            int top = state._fsp;
636            BitSet followSet = new BitSet();
637            for ( int i = top; i >= 0; i-- )
638            {
639                BitSet localFollowSet = (BitSet)state.following[i];
640                /*
641                System.out.println("local follow depth "+i+"="+
642                                   localFollowSet.toString(getTokenNames())+")");
643                 */
644                followSet.OrInPlace( localFollowSet );
645                if ( exact )
646                {
647                    // can we see end of rule?
648                    if ( localFollowSet.Member( TokenTypes.EndOfRule ) )
649                    {
650                        // Only leave EOR in set if at top (start rule); this lets
651                        // us know if have to include follow(start rule); i.e., EOF
652                        if ( i > 0 )
653                        {
654                            followSet.Remove( TokenTypes.EndOfRule );
655                        }
656                    }
657                    else
658                    { // can't see end of rule, quit
659                        break;
660                    }
661                }
662            }
663            return followSet;
664        }
665
666        /** <summary>Attempt to recover from a single missing or extra token.</summary>
667         *
668         *  EXTRA TOKEN
669         *
670         *  LA(1) is not what we are looking for.  If LA(2) has the right token,
671         *  however, then assume LA(1) is some extra spurious token.  Delete it
672         *  and LA(2) as if we were doing a normal match(), which advances the
673         *  input.
674         *
675         *  MISSING TOKEN
676         *
677         *  If current token is consistent with what could come after
678         *  ttype then it is ok to "insert" the missing token, else throw
679         *  exception For example, Input "i=(3;" is clearly missing the
680         *  ')'.  When the parser returns from the nested call to expr, it
681         *  will have call chain:
682         *
683         *    stat -> expr -> atom
684         *
685         *  and it will be trying to match the ')' at this point in the
686         *  derivation:
687         *
688         *       => ID '=' '(' INT ')' ('+' atom)* ';'
689         *                          ^
690         *  match() will see that ';' doesn't match ')' and report a
691         *  mismatched token error.  To recover, it sees that LA(1)==';'
692         *  is in the set of tokens that can follow the ')' token
693         *  reference in rule atom.  It can assume that you forgot the ')'.
694         */
695        protected virtual object RecoverFromMismatchedToken( IIntStream input, int ttype, BitSet follow )
696        {
697            RecognitionException e = null;
698            // if next token is what we are looking for then "delete" this token
699            if ( MismatchIsUnwantedToken( input, ttype ) )
700            {
701                e = new UnwantedTokenException( ttype, input, TokenNames );
702                /*
703                System.err.println("recoverFromMismatchedToken deleting "+
704                                   ((TokenStream)input).LT(1)+
705                                   " since "+((TokenStream)input).LT(2)+" is what we want");
706                 */
707                BeginResync();
708                input.Consume(); // simply delete extra token
709                EndResync();
710                ReportError( e );  // report after consuming so AW sees the token in the exception
711                // we want to return the token we're actually matching
712                object matchedSymbol = GetCurrentInputSymbol( input );
713                input.Consume(); // move past ttype token as if all were ok
714                return matchedSymbol;
715            }
716            // can't recover with single token deletion, try insertion
717            if ( MismatchIsMissingToken( input, follow ) )
718            {
719                object inserted = GetMissingSymbol( input, e, ttype, follow );
720                e = new MissingTokenException( ttype, input, inserted );
721                ReportError( e );  // report after inserting so AW sees the token in the exception
722                return inserted;
723            }
724            // even that didn't work; must throw the exception
725            e = new MismatchedTokenException(ttype, input, TokenNames);
726            throw e;
727        }
728
729        /** Not currently used */
730        public virtual object RecoverFromMismatchedSet( IIntStream input,
731                                               RecognitionException e,
732                                               BitSet follow )
733        {
734            if ( MismatchIsMissingToken( input, follow ) )
735            {
736                // System.out.println("missing token");
737                ReportError( e );
738                // we don't know how to conjure up a token for sets yet
739                return GetMissingSymbol( input, e, TokenTypes.Invalid, follow );
740            }
741            // TODO do single token deletion like above for Token mismatch
742            throw e;
743        }
744
745        /** <summary>
746         *  Match needs to return the current input symbol, which gets put
747         *  into the label for the associated token ref; e.g., x=ID.  Token
748         *  and tree parsers need to return different objects. Rather than test
749         *  for input stream type or change the IntStream interface, I use
750         *  a simple method to ask the recognizer to tell me what the current
751         *  input symbol is.
752         *  </summary>
753         *
754         *  <remarks>This is ignored for lexers.</remarks>
755         */
756        protected virtual object GetCurrentInputSymbol( IIntStream input )
757        {
758            return null;
759        }
760
761        /** <summary>Conjure up a missing token during error recovery.</summary>
762         *
763         *  <remarks>
764         *  The recognizer attempts to recover from single missing
765         *  symbols. But, actions might refer to that missing symbol.
766         *  For example, x=ID {f($x);}. The action clearly assumes
767         *  that there has been an identifier matched previously and that
768         *  $x points at that token. If that token is missing, but
769         *  the next token in the stream is what we want we assume that
770         *  this token is missing and we keep going. Because we
771         *  have to return some token to replace the missing token,
772         *  we have to conjure one up. This method gives the user control
773         *  over the tokens returned for missing tokens. Mostly,
774         *  you will want to create something special for identifier
775         *  tokens. For literals such as '{' and ',', the default
776         *  action in the parser or tree parser works. It simply creates
777         *  a CommonToken of the appropriate type. The text will be the token.
778         *  If you change what tokens must be created by the lexer,
779         *  override this method to create the appropriate tokens.
780         *  </remarks>
781         */
782        protected virtual object GetMissingSymbol( IIntStream input,
783                                          RecognitionException e,
784                                          int expectedTokenType,
785                                          BitSet follow )
786        {
787            return null;
788        }
789
790        public virtual void ConsumeUntil( IIntStream input, int tokenType )
791        {
792            //System.out.println("consumeUntil "+tokenType);
793            int ttype = input.LA( 1 );
794            while ( ttype != TokenTypes.EndOfFile && ttype != tokenType )
795            {
796                input.Consume();
797                ttype = input.LA( 1 );
798            }
799        }
800
801        /** <summary>Consume tokens until one matches the given token set</summary> */
802        public virtual void ConsumeUntil( IIntStream input, BitSet set )
803        {
804            //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
805            int ttype = input.LA( 1 );
806            while ( ttype != TokenTypes.EndOfFile && !set.Member( ttype ) )
807            {
808                //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
809                input.Consume();
810                ttype = input.LA( 1 );
811            }
812        }
813
814        /** <summary>Push a rule's follow set using our own hardcoded stack</summary> */
815        protected void PushFollow( BitSet fset )
816        {
817            if ( ( state._fsp + 1 ) >= state.following.Length )
818            {
819                Array.Resize(ref state.following, state.following.Length * 2);
820            }
821            state.following[++state._fsp] = fset;
822        }
823
824        protected void PopFollow()
825        {
826            state._fsp--;
827        }
828
829        /** <summary>
830         *  Return List<String> of the rules in your parser instance
831         *  leading up to a call to this method.  You could override if
832         *  you want more details such as the file/line info of where
833         *  in the parser java code a rule is invoked.
834         *  </summary>
835         *
836         *  <remarks>
837         *  This is very useful for error messages and for context-sensitive
838         *  error recovery.
839         *  </remarks>
840         */
841        public virtual IList<string> GetRuleInvocationStack()
842        {
843            return GetRuleInvocationStack( new StackTrace(true) );
844        }
845
846        /** <summary>
847         *  A more general version of GetRuleInvocationStack where you can
848         *  pass in the StackTrace of, for example, a RecognitionException
849         *  to get it's rule stack trace.
850         *  </summary>
851         */
852        public static IList<string> GetRuleInvocationStack(StackTrace trace)
853        {
854            if (trace == null)
855                throw new ArgumentNullException("trace");
856
857            List<string> rules = new List<string>();
858            StackFrame[] stack = trace.GetFrames() ?? new StackFrame[0];
859
860            for (int i = stack.Length - 1; i >= 0; i--)
861            {
862                StackFrame frame = stack[i];
863                MethodBase method = frame.GetMethod();
864                GrammarRuleAttribute[] attributes = (GrammarRuleAttribute[])method.GetCustomAttributes(typeof(GrammarRuleAttribute), true);
865                if (attributes != null && attributes.Length > 0)
866                    rules.Add(attributes[0].Name);
867            }
868
869            return rules;
870        }
871
872        public virtual int BacktrackingLevel
873        {
874            get
875            {
876                return state.backtracking;
877            }
878            set
879            {
880                state.backtracking = value;
881            }
882        }
883
884        /** <summary>Return whether or not a backtracking attempt failed.</summary> */
885        public virtual bool Failed
886        {
887            get
888            {
889                return state.failed;
890            }
891        }
892
893        /** <summary>
894         *  Used to print out token names like ID during debugging and
895         *  error reporting.  The generated parsers implement a method
896         *  that overrides this to point to their String[] tokenNames.
897         *  </summary>
898         */
899        public virtual string[] TokenNames
900        {
901            get
902            {
903                return null;
904            }
905        }
906
907        /** <summary>
908         *  For debugging and other purposes, might want the grammar name.
909         *  Have ANTLR generate an implementation for this method.
910         *  </summary>
911         */
912        public virtual string GrammarFileName
913        {
914            get
915            {
916                return null;
917            }
918        }
919
920        public abstract string SourceName
921        {
922            get;
923        }
924
925        /** <summary>
926         *  A convenience method for use most often with template rewrites.
927         *  Convert a List<Token> to List<String>
928         *  </summary>
929         */
930        public virtual List<string> ToStrings( ICollection<IToken> tokens )
931        {
932            if ( tokens == null )
933                return null;
934
935            List<string> strings = new List<string>( tokens.Count );
936            foreach ( IToken token in tokens )
937            {
938                strings.Add( token.Text );
939            }
940
941            return strings;
942        }
943
944        /** <summary>
945         *  Given a rule number and a start token index number, return
946         *  MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
947         *  start index.  If this rule has parsed input starting from the
948         *  start index before, then return where the rule stopped parsing.
949         *  It returns the index of the last token matched by the rule.
950         *  </summary>
951         *
952         *  <remarks>
953         *  For now we use a hashtable and just the slow Object-based one.
954         *  Later, we can make a special one for ints and also one that
955         *  tosses out data after we commit past input position i.
956         *  </remarks>
957         */
958        public virtual int GetRuleMemoization( int ruleIndex, int ruleStartIndex )
959        {
960            if ( state.ruleMemo[ruleIndex] == null )
961            {
962                state.ruleMemo[ruleIndex] = new Dictionary<int, int>();
963            }
964
965            int stopIndex;
966            if ( !state.ruleMemo[ruleIndex].TryGetValue( ruleStartIndex, out stopIndex ) )
967                return MemoRuleUnknown;
968
969            return stopIndex;
970        }
971
972        /** <summary>
973         *  Has this rule already parsed input at the current index in the
974         *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
975         *  If we attempted but failed to parse properly before, return
976         *  MEMO_RULE_FAILED.
977         *  </summary>
978         *
979         *  <remarks>
980         *  This method has a side-effect: if we have seen this input for
981         *  this rule and successfully parsed before, then seek ahead to
982         *  1 past the stop token matched for this rule last time.
983         *  </remarks>
984         */
985        public virtual bool AlreadyParsedRule( IIntStream input, int ruleIndex )
986        {
987            int stopIndex = GetRuleMemoization( ruleIndex, input.Index );
988            if ( stopIndex == MemoRuleUnknown )
989            {
990                return false;
991            }
992            if ( stopIndex == MemoRuleFailed )
993            {
994                //System.out.println("rule "+ruleIndex+" will never succeed");
995                state.failed = true;
996            }
997            else
998            {
999                //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed);
1000                input.Seek( stopIndex + 1 ); // jump to one past stop token
1001            }
1002            return true;
1003        }
1004
1005        /** <summary>
1006         *  Record whether or not this rule parsed the input at this position
1007         *  successfully.  Use a standard java hashtable for now.
1008         *  </summary>
1009         */
1010        public virtual void Memoize( IIntStream input,
1011                            int ruleIndex,
1012                            int ruleStartIndex )
1013        {
1014            int stopTokenIndex = state.failed ? MemoRuleFailed : input.Index - 1;
1015            if ( state.ruleMemo == null )
1016            {
1017                if (TraceDestination != null)
1018                    TraceDestination.WriteLine( "!!!!!!!!! memo array is null for " + GrammarFileName );
1019            }
1020            if ( ruleIndex >= state.ruleMemo.Length )
1021            {
1022                if (TraceDestination != null)
1023                    TraceDestination.WriteLine("!!!!!!!!! memo size is " + state.ruleMemo.Length + ", but rule index is " + ruleIndex);
1024            }
1025            if ( state.ruleMemo[ruleIndex] != null )
1026            {
1027                state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex;
1028            }
1029        }
1030
1031        /** <summary>return how many rule/input-index pairs there are in total.</summary>
1032         *  TODO: this includes synpreds. :(
1033         */
1034        public virtual int GetRuleMemoizationCacheSize()
1035        {
1036            int n = 0;
1037            for ( int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++ )
1038            {
1039                var ruleMap = state.ruleMemo[i];
1040                if ( ruleMap != null )
1041                {
1042                    n += ruleMap.Count; // how many input indexes are recorded?
1043                }
1044            }
1045            return n;
1046        }
1047
1048        public virtual void TraceIn(string ruleName, int ruleIndex, object inputSymbol)
1049        {
1050            if (TraceDestination == null)
1051                return;
1052
1053            TraceDestination.Write("enter " + ruleName + " " + inputSymbol);
1054            if (state.backtracking > 0)
1055            {
1056                TraceDestination.Write(" backtracking=" + state.backtracking);
1057            }
1058            TraceDestination.WriteLine();
1059        }
1060
1061        public virtual void TraceOut(string ruleName, int ruleIndex, object inputSymbol)
1062        {
1063            if (TraceDestination == null)
1064                return;
1065
1066            TraceDestination.Write("exit " + ruleName + " " + inputSymbol);
1067            if (state.backtracking > 0)
1068            {
1069                TraceDestination.Write(" backtracking=" + state.backtracking);
1070                if (state.failed)
1071                    TraceDestination.Write(" failed");
1072                else
1073                    TraceDestination.Write(" succeeded");
1074            }
1075            TraceDestination.WriteLine();
1076        }
1077
1078        #region Debugging support
1079        public virtual IDebugEventListener DebugListener
1080        {
1081            get
1082            {
1083                return null;
1084            }
1085        }
1086
1087        [Conditional("ANTLR_DEBUG")]
1088        protected virtual void DebugEnterRule(string grammarFileName, string ruleName)
1089        {
1090            IDebugEventListener dbg = DebugListener;
1091            if (dbg != null)
1092                dbg.EnterRule(grammarFileName, ruleName);
1093        }
1094
1095        [Conditional("ANTLR_DEBUG")]
1096        protected virtual void DebugExitRule(string grammarFileName, string ruleName)
1097        {
1098            IDebugEventListener dbg = DebugListener;
1099            if (dbg != null)
1100                dbg.ExitRule(grammarFileName, ruleName);
1101        }
1102
1103        [Conditional("ANTLR_DEBUG")]
1104        protected virtual void DebugEnterSubRule(int decisionNumber)
1105        {
1106            IDebugEventListener dbg = DebugListener;
1107            if (dbg != null)
1108                dbg.EnterSubRule(decisionNumber);
1109        }
1110
1111        [Conditional("ANTLR_DEBUG")]
1112        protected virtual void DebugExitSubRule(int decisionNumber)
1113        {
1114            IDebugEventListener dbg = DebugListener;
1115            if (dbg != null)
1116                dbg.ExitSubRule(decisionNumber);
1117        }
1118
1119        [Conditional("ANTLR_DEBUG")]
1120        protected virtual void DebugEnterAlt(int alt)
1121        {
1122            IDebugEventListener dbg = DebugListener;
1123            if (dbg != null)
1124                dbg.EnterAlt(alt);
1125        }
1126
1127        [Conditional("ANTLR_DEBUG")]
1128        protected virtual void DebugEnterDecision(int decisionNumber, bool couldBacktrack)
1129        {
1130            IDebugEventListener dbg = DebugListener;
1131            if (dbg != null)
1132                dbg.EnterDecision(decisionNumber, couldBacktrack);
1133        }
1134
1135        [Conditional("ANTLR_DEBUG")]
1136        protected virtual void DebugExitDecision(int decisionNumber)
1137        {
1138            IDebugEventListener dbg = DebugListener;
1139            if (dbg != null)
1140                dbg.ExitDecision(decisionNumber);
1141        }
1142
1143        [Conditional("ANTLR_DEBUG")]
1144        protected virtual void DebugLocation(int line, int charPositionInLine)
1145        {
1146            IDebugEventListener dbg = DebugListener;
1147            if (dbg != null)
1148                dbg.Location(line, charPositionInLine);
1149        }
1150
1151        [Conditional("ANTLR_DEBUG")]
1152        protected virtual void DebugSemanticPredicate(bool result, string predicate)
1153        {
1154            IDebugEventListener dbg = DebugListener;
1155            if (dbg != null)
1156                dbg.SemanticPredicate(result, predicate);
1157        }
1158
1159        [Conditional("ANTLR_DEBUG")]
1160        protected virtual void DebugBeginBacktrack(int level)
1161        {
1162            IDebugEventListener dbg = DebugListener;
1163            if (dbg != null)
1164                dbg.BeginBacktrack(level);
1165        }
1166
1167        [Conditional("ANTLR_DEBUG")]
1168        protected virtual void DebugEndBacktrack(int level, bool successful)
1169        {
1170            IDebugEventListener dbg = DebugListener;
1171            if (dbg != null)
1172                dbg.EndBacktrack(level, successful);
1173        }
1174
1175        [Conditional("ANTLR_DEBUG")]
1176        protected virtual void DebugRecognitionException(RecognitionException ex)
1177        {
1178            IDebugEventListener dbg = DebugListener;
1179            if (dbg != null)
1180                dbg.RecognitionException(ex);
1181        }
1182        #endregion
1183    }
1184}
1185