1/*
2 [The "BSD license"]
3 Copyright (c) 2005-2009 Terence Parr
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 1. Redistributions of source code must retain the above copyright
10     notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12     notice, this list of conditions and the following disclaimer in the
13     documentation and/or other materials provided with the distribution.
14 3. The name of the author may not be used to endorse or promote products
15     derived from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28package org.antlr.runtime;
29
30import java.util.ArrayList;
31import java.util.HashMap;
32import java.util.List;
33import java.util.Map;
34
35/** A generic recognizer that can handle recognizers generated from
36 *  lexer, parser, and tree grammars.  This is all the parsing
37 *  support code essentially; most of it is error recovery stuff and
38 *  backtracking.
39 */
40public abstract class BaseRecognizer {
41	public static final int MEMO_RULE_FAILED = -2;
42	public static final int MEMO_RULE_UNKNOWN = -1;
43	public static final int INITIAL_FOLLOW_STACK_SIZE = 100;
44
45	// copies from Token object for convenience in actions
46	public static final int DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL;
47	public static final int HIDDEN = Token.HIDDEN_CHANNEL;
48
49	public static final String NEXT_TOKEN_RULE_NAME = "nextToken";
50
51	/** State of a lexer, parser, or tree parser are collected into a state
52	 *  object so the state can be shared.  This sharing is needed to
53	 *  have one grammar import others and share same error variables
54	 *  and other state variables.  It's a kind of explicit multiple
55	 *  inheritance via delegation of methods and shared state.
56	 */
57	protected RecognizerSharedState state;
58
59	public BaseRecognizer() {
60		state = new RecognizerSharedState();
61	}
62
63	public BaseRecognizer(RecognizerSharedState state) {
64		if ( state==null ) {
65			state = new RecognizerSharedState();
66		}
67		this.state = state;
68	}
69
70	/** reset the parser's state; subclasses must rewinds the input stream */
71	public void reset() {
72		// wack everything related to error recovery
73		if ( state==null ) {
74			return; // no shared state work to do
75		}
76		state._fsp = -1;
77		state.errorRecovery = false;
78		state.lastErrorIndex = -1;
79		state.failed = false;
80		state.syntaxErrors = 0;
81		// wack everything related to backtracking and memoization
82		state.backtracking = 0;
83		for (int i = 0; state.ruleMemo!=null && i < state.ruleMemo.length; i++) { // wipe cache
84			state.ruleMemo[i] = null;
85		}
86	}
87
88
89	/** Match current input symbol against ttype.  Attempt
90	 *  single token insertion or deletion error recovery.  If
91	 *  that fails, throw MismatchedTokenException.
92	 *
93	 *  To turn off single token insertion or deletion error
94	 *  recovery, override recoverFromMismatchedToken() and have it
95     *  throw an exception. See TreeParser.recoverFromMismatchedToken().
96     *  This way any error in a rule will cause an exception and
97     *  immediate exit from rule.  Rule would recover by resynchronizing
98     *  to the set of symbols that can follow rule ref.
99	 */
100	public Object match(IntStream input, int ttype, BitSet follow)
101		throws RecognitionException
102	{
103		//System.out.println("match "+((TokenStream)input).LT(1));
104		Object matchedSymbol = getCurrentInputSymbol(input);
105		if ( input.LA(1)==ttype ) {
106			input.consume();
107			state.errorRecovery = false;
108			state.failed = false;
109			return matchedSymbol;
110		}
111		if ( state.backtracking>0 ) {
112			state.failed = true;
113			return matchedSymbol;
114		}
115		matchedSymbol = recoverFromMismatchedToken(input, ttype, follow);
116		return matchedSymbol;
117	}
118
119	/** Match the wildcard: in a symbol */
120	public void matchAny(IntStream input) {
121		state.errorRecovery = false;
122		state.failed = false;
123		input.consume();
124	}
125
126	public boolean mismatchIsUnwantedToken(IntStream input, int ttype) {
127		return input.LA(2)==ttype;
128	}
129
130	public boolean mismatchIsMissingToken(IntStream input, BitSet follow) {
131		if ( follow==null ) {
132			// we have no information about the follow; we can only consume
133			// a single token and hope for the best
134			return false;
135		}
136		// compute what can follow this grammar element reference
137		if ( follow.member(Token.EOR_TOKEN_TYPE) ) {
138			BitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW();
139			follow = follow.or(viableTokensFollowingThisRule);
140            if ( state._fsp>=0 ) { // remove EOR if we're not the start symbol
141                follow.remove(Token.EOR_TOKEN_TYPE);
142            }
143		}
144		// if current token is consistent with what could come after set
145		// then we know we're missing a token; error recovery is free to
146		// "insert" the missing token
147
148		//System.out.println("viable tokens="+follow.toString(getTokenNames()));
149		//System.out.println("LT(1)="+((TokenStream)input).LT(1));
150
151		// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
152		// in follow set to indicate that the fall of the start symbol is
153		// in the set (EOF can follow).
154		if ( follow.member(input.LA(1)) || follow.member(Token.EOR_TOKEN_TYPE) ) {
155			//System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
156			return true;
157		}
158		return false;
159	}
160
161	/** Report a recognition problem.
162	 *
163	 *  This method sets errorRecovery to indicate the parser is recovering
164	 *  not parsing.  Once in recovery mode, no errors are generated.
165	 *  To get out of recovery mode, the parser must successfully match
166	 *  a token (after a resync).  So it will go:
167	 *
168	 * 		1. error occurs
169	 * 		2. enter recovery mode, report error
170	 * 		3. consume until token found in resynch set
171	 * 		4. try to resume parsing
172	 * 		5. next match() will reset errorRecovery mode
173	 *
174	 *  If you override, make sure to update syntaxErrors if you care about that.
175	 */
176	public void reportError(RecognitionException e) {
177		// if we've already reported an error and have not matched a token
178		// yet successfully, don't report any errors.
179		if ( state.errorRecovery ) {
180			//System.err.print("[SPURIOUS] ");
181			return;
182		}
183		state.syntaxErrors++; // don't count spurious
184		state.errorRecovery = true;
185
186		displayRecognitionError(this.getTokenNames(), e);
187	}
188
189	public void displayRecognitionError(String[] tokenNames,
190										RecognitionException e)
191	{
192		String hdr = getErrorHeader(e);
193		String msg = getErrorMessage(e, tokenNames);
194		emitErrorMessage(hdr+" "+msg);
195	}
196
197	/** What error message should be generated for the various
198	 *  exception types?
199	 *
200	 *  Not very object-oriented code, but I like having all error message
201	 *  generation within one method rather than spread among all of the
202	 *  exception classes. This also makes it much easier for the exception
203	 *  handling because the exception classes do not have to have pointers back
204	 *  to this object to access utility routines and so on. Also, changing
205	 *  the message for an exception type would be difficult because you
206	 *  would have to subclassing exception, but then somehow get ANTLR
207	 *  to make those kinds of exception objects instead of the default.
208	 *  This looks weird, but trust me--it makes the most sense in terms
209	 *  of flexibility.
210	 *
211	 *  For grammar debugging, you will want to override this to add
212	 *  more information such as the stack frame with
213	 *  getRuleInvocationStack(e, this.getClass().getName()) and,
214	 *  for no viable alts, the decision description and state etc...
215	 *
216	 *  Override this to change the message generated for one or more
217	 *  exception types.
218	 */
219	public String getErrorMessage(RecognitionException e, String[] tokenNames) {
220		String msg = e.getMessage();
221		if ( e instanceof UnwantedTokenException ) {
222			UnwantedTokenException ute = (UnwantedTokenException)e;
223			String tokenName="<unknown>";
224			if ( ute.expecting== Token.EOF ) {
225				tokenName = "EOF";
226			}
227			else {
228				tokenName = tokenNames[ute.expecting];
229			}
230			msg = "extraneous input "+getTokenErrorDisplay(ute.getUnexpectedToken())+
231				" expecting "+tokenName;
232		}
233		else if ( e instanceof MissingTokenException ) {
234			MissingTokenException mte = (MissingTokenException)e;
235			String tokenName="<unknown>";
236			if ( mte.expecting== Token.EOF ) {
237				tokenName = "EOF";
238			}
239			else {
240				tokenName = tokenNames[mte.expecting];
241			}
242			msg = "missing "+tokenName+" at "+getTokenErrorDisplay(e.token);
243		}
244		else if ( e instanceof MismatchedTokenException ) {
245			MismatchedTokenException mte = (MismatchedTokenException)e;
246			String tokenName="<unknown>";
247			if ( mte.expecting== Token.EOF ) {
248				tokenName = "EOF";
249			}
250			else {
251				tokenName = tokenNames[mte.expecting];
252			}
253			msg = "mismatched input "+getTokenErrorDisplay(e.token)+
254				" expecting "+tokenName;
255		}
256		else if ( e instanceof MismatchedTreeNodeException ) {
257			MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e;
258			String tokenName="<unknown>";
259			if ( mtne.expecting==Token.EOF ) {
260				tokenName = "EOF";
261			}
262			else {
263				tokenName = tokenNames[mtne.expecting];
264			}
265			msg = "mismatched tree node: "+mtne.node+
266				" expecting "+tokenName;
267		}
268		else if ( e instanceof NoViableAltException ) {
269			//NoViableAltException nvae = (NoViableAltException)e;
270			// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
271			// and "(decision="+nvae.decisionNumber+") and
272			// "state "+nvae.stateNumber
273			msg = "no viable alternative at input "+getTokenErrorDisplay(e.token);
274		}
275		else if ( e instanceof EarlyExitException ) {
276			//EarlyExitException eee = (EarlyExitException)e;
277			// for development, can add "(decision="+eee.decisionNumber+")"
278			msg = "required (...)+ loop did not match anything at input "+
279				getTokenErrorDisplay(e.token);
280		}
281		else if ( e instanceof MismatchedSetException ) {
282			MismatchedSetException mse = (MismatchedSetException)e;
283			msg = "mismatched input "+getTokenErrorDisplay(e.token)+
284				" expecting set "+mse.expecting;
285		}
286		else if ( e instanceof MismatchedNotSetException ) {
287			MismatchedNotSetException mse = (MismatchedNotSetException)e;
288			msg = "mismatched input "+getTokenErrorDisplay(e.token)+
289				" expecting set "+mse.expecting;
290		}
291		else if ( e instanceof FailedPredicateException ) {
292			FailedPredicateException fpe = (FailedPredicateException)e;
293			msg = "rule "+fpe.ruleName+" failed predicate: {"+
294				fpe.predicateText+"}?";
295		}
296		return msg;
297	}
298
299	/** Get number of recognition errors (lexer, parser, tree parser).  Each
300	 *  recognizer tracks its own number.  So parser and lexer each have
301	 *  separate count.  Does not count the spurious errors found between
302	 *  an error and next valid token match
303	 *
304	 *  See also reportError()
305	 */
306	public int getNumberOfSyntaxErrors() {
307		return state.syntaxErrors;
308	}
309
310	/** What is the error header, normally line/character position information? */
311	public String getErrorHeader(RecognitionException e) {
312		if ( getSourceName()!=null )
313			return getSourceName()+" line "+e.line+":"+e.charPositionInLine;
314
315		return "line "+e.line+":"+e.charPositionInLine;
316	}
317
318	/** How should a token be displayed in an error message? The default
319	 *  is to display just the text, but during development you might
320	 *  want to have a lot of information spit out.  Override in that case
321	 *  to use t.toString() (which, for CommonToken, dumps everything about
322	 *  the token). This is better than forcing you to override a method in
323	 *  your token objects because you don't have to go modify your lexer
324	 *  so that it creates a new Java type.
325	 */
326	public String getTokenErrorDisplay(Token t) {
327		String s = t.getText();
328		if ( s==null ) {
329			if ( t.getType()==Token.EOF ) {
330				s = "<EOF>";
331			}
332			else {
333				s = "<"+t.getType()+">";
334			}
335		}
336		s = s.replaceAll("\n","\\\\n");
337		s = s.replaceAll("\r","\\\\r");
338		s = s.replaceAll("\t","\\\\t");
339		return "'"+s+"'";
340	}
341
342	/** Override this method to change where error messages go */
343	public void emitErrorMessage(String msg) {
344		System.err.println(msg);
345	}
346
347	/** Recover from an error found on the input stream.  This is
348	 *  for NoViableAlt and mismatched symbol exceptions.  If you enable
349	 *  single token insertion and deletion, this will usually not
350	 *  handle mismatched symbol exceptions but there could be a mismatched
351	 *  token that the match() routine could not recover from.
352	 */
353	public void recover(IntStream input, RecognitionException re) {
354		if ( state.lastErrorIndex==input.index() ) {
355			// uh oh, another error at same token index; must be a case
356			// where LT(1) is in the recovery token set so nothing is
357			// consumed; consume a single token so at least to prevent
358			// an infinite loop; this is a failsafe.
359			input.consume();
360		}
361		state.lastErrorIndex = input.index();
362		BitSet followSet = computeErrorRecoverySet();
363		beginResync();
364		consumeUntil(input, followSet);
365		endResync();
366	}
367
368	/** A hook to listen in on the token consumption during error recovery.
369	 *  The DebugParser subclasses this to fire events to the listenter.
370	 */
371	public void beginResync() {
372	}
373
374	public void endResync() {
375	}
376
377	/*  Compute the error recovery set for the current rule.  During
378	 *  rule invocation, the parser pushes the set of tokens that can
379	 *  follow that rule reference on the stack; this amounts to
380	 *  computing FIRST of what follows the rule reference in the
381	 *  enclosing rule. This local follow set only includes tokens
382	 *  from within the rule; i.e., the FIRST computation done by
383	 *  ANTLR stops at the end of a rule.
384	 *
385	 *  EXAMPLE
386	 *
387	 *  When you find a "no viable alt exception", the input is not
388	 *  consistent with any of the alternatives for rule r.  The best
389	 *  thing to do is to consume tokens until you see something that
390	 *  can legally follow a call to r *or* any rule that called r.
391	 *  You don't want the exact set of viable next tokens because the
392	 *  input might just be missing a token--you might consume the
393	 *  rest of the input looking for one of the missing tokens.
394	 *
395	 *  Consider grammar:
396	 *
397	 *  a : '[' b ']'
398	 *    | '(' b ')'
399	 *    ;
400	 *  b : c '^' INT ;
401	 *  c : ID
402	 *    | INT
403	 *    ;
404	 *
405	 *  At each rule invocation, the set of tokens that could follow
406	 *  that rule is pushed on a stack.  Here are the various "local"
407	 *  follow sets:
408	 *
409	 *  FOLLOW(b1_in_a) = FIRST(']') = ']'
410	 *  FOLLOW(b2_in_a) = FIRST(')') = ')'
411	 *  FOLLOW(c_in_b) = FIRST('^') = '^'
412	 *
413	 *  Upon erroneous input "[]", the call chain is
414	 *
415	 *  a -> b -> c
416	 *
417	 *  and, hence, the follow context stack is:
418	 *
419	 *  depth  local follow set     after call to rule
420	 *    0         <EOF>                    a (from main())
421	 *    1          ']'                     b
422	 *    3          '^'                     c
423	 *
424	 *  Notice that ')' is not included, because b would have to have
425	 *  been called from a different context in rule a for ')' to be
426	 *  included.
427	 *
428	 *  For error recovery, we cannot consider FOLLOW(c)
429	 *  (context-sensitive or otherwise).  We need the combined set of
430	 *  all context-sensitive FOLLOW sets--the set of all tokens that
431	 *  could follow any reference in the call chain.  We need to
432	 *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
433	 *  we resync'd to that token, we'd consume until EOF.  We need to
434	 *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
435	 *  In this case, for input "[]", LA(1) is in this set so we would
436	 *  not consume anything and after printing an error rule c would
437	 *  return normally.  It would not find the required '^' though.
438	 *  At this point, it gets a mismatched token error and throws an
439	 *  exception (since LA(1) is not in the viable following token
440	 *  set).  The rule exception handler tries to recover, but finds
441	 *  the same recovery set and doesn't consume anything.  Rule b
442	 *  exits normally returning to rule a.  Now it finds the ']' (and
443	 *  with the successful match exits errorRecovery mode).
444	 *
445	 *  So, you cna see that the parser walks up call chain looking
446	 *  for the token that was a member of the recovery set.
447	 *
448	 *  Errors are not generated in errorRecovery mode.
449	 *
450	 *  ANTLR's error recovery mechanism is based upon original ideas:
451	 *
452	 *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
453	 *
454	 *  and
455	 *
456	 *  "A note on error recovery in recursive descent parsers":
457	 *  http://portal.acm.org/citation.cfm?id=947902.947905
458	 *
459	 *  Later, Josef Grosch had some good ideas:
460	 *
461	 *  "Efficient and Comfortable Error Recovery in Recursive Descent
462	 *  Parsers":
463	 *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
464	 *
465	 *  Like Grosch I implemented local FOLLOW sets that are combined
466	 *  at run-time upon error to avoid overhead during parsing.
467	 */
468	protected BitSet computeErrorRecoverySet() {
469		return combineFollows(false);
470	}
471
472	/** Compute the context-sensitive FOLLOW set for current rule.
473	 *  This is set of token types that can follow a specific rule
474	 *  reference given a specific call chain.  You get the set of
475	 *  viable tokens that can possibly come next (lookahead depth 1)
476	 *  given the current call chain.  Contrast this with the
477	 *  definition of plain FOLLOW for rule r:
478	 *
479	 *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
480	 *
481	 *  where x in T* and alpha, beta in V*; T is set of terminals and
482	 *  V is the set of terminals and nonterminals.  In other words,
483	 *  FOLLOW(r) is the set of all tokens that can possibly follow
484	 *  references to r in *any* sentential form (context).  At
485	 *  runtime, however, we know precisely which context applies as
486	 *  we have the call chain.  We may compute the exact (rather
487	 *  than covering superset) set of following tokens.
488	 *
489	 *  For example, consider grammar:
490	 *
491	 *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
492	 *       | "return" expr '.'
493	 *       ;
494	 *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
495	 *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
496	 *       | '(' expr ')'
497	 *       ;
498	 *
499	 *  The FOLLOW sets are all inclusive whereas context-sensitive
500	 *  FOLLOW sets are precisely what could follow a rule reference.
501	 *  For input input "i=(3);", here is the derivation:
502	 *
503	 *  stat => ID '=' expr ';'
504	 *       => ID '=' atom ('+' atom)* ';'
505	 *       => ID '=' '(' expr ')' ('+' atom)* ';'
506	 *       => ID '=' '(' atom ')' ('+' atom)* ';'
507	 *       => ID '=' '(' INT ')' ('+' atom)* ';'
508	 *       => ID '=' '(' INT ')' ';'
509	 *
510	 *  At the "3" token, you'd have a call chain of
511	 *
512	 *    stat -> expr -> atom -> expr -> atom
513	 *
514	 *  What can follow that specific nested ref to atom?  Exactly ')'
515	 *  as you can see by looking at the derivation of this specific
516	 *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
517	 *
518	 *  You want the exact viable token set when recovering from a
519	 *  token mismatch.  Upon token mismatch, if LA(1) is member of
520	 *  the viable next token set, then you know there is most likely
521	 *  a missing token in the input stream.  "Insert" one by just not
522	 *  throwing an exception.
523	 */
524	protected BitSet computeContextSensitiveRuleFOLLOW() {
525		return combineFollows(true);
526	}
527
528	// what is exact? it seems to only add sets from above on stack
529	// if EOR is in set i.  When it sees a set w/o EOR, it stops adding.
530	// Why would we ever want them all?  Maybe no viable alt instead of
531	// mismatched token?
532	protected BitSet combineFollows(boolean exact) {
533		int top = state._fsp;
534		BitSet followSet = new BitSet();
535		for (int i=top; i>=0; i--) {
536			BitSet localFollowSet = (BitSet)state.following[i];
537			/*
538			System.out.println("local follow depth "+i+"="+
539							   localFollowSet.toString(getTokenNames())+")");
540			 */
541			followSet.orInPlace(localFollowSet);
542			if ( exact ) {
543				// can we see end of rule?
544				if ( localFollowSet.member(Token.EOR_TOKEN_TYPE) ) {
545					// Only leave EOR in set if at top (start rule); this lets
546					// us know if have to include follow(start rule); i.e., EOF
547					if ( i>0 ) {
548						followSet.remove(Token.EOR_TOKEN_TYPE);
549					}
550				}
551				else { // can't see end of rule, quit
552					break;
553				}
554			}
555		}
556		return followSet;
557	}
558
559	/** Attempt to recover from a single missing or extra token.
560	 *
561	 *  EXTRA TOKEN
562	 *
563	 *  LA(1) is not what we are looking for.  If LA(2) has the right token,
564	 *  however, then assume LA(1) is some extra spurious token.  Delete it
565	 *  and LA(2) as if we were doing a normal match(), which advances the
566	 *  input.
567	 *
568	 *  MISSING TOKEN
569	 *
570	 *  If current token is consistent with what could come after
571	 *  ttype then it is ok to "insert" the missing token, else throw
572	 *  exception For example, Input "i=(3;" is clearly missing the
573	 *  ')'.  When the parser returns from the nested call to expr, it
574	 *  will have call chain:
575	 *
576	 *    stat -> expr -> atom
577	 *
578	 *  and it will be trying to match the ')' at this point in the
579	 *  derivation:
580	 *
581	 *       => ID '=' '(' INT ')' ('+' atom)* ';'
582	 *                          ^
583	 *  match() will see that ';' doesn't match ')' and report a
584	 *  mismatched token error.  To recover, it sees that LA(1)==';'
585	 *  is in the set of tokens that can follow the ')' token
586	 *  reference in rule atom.  It can assume that you forgot the ')'.
587	 */
588	protected Object recoverFromMismatchedToken(IntStream input, int ttype, BitSet follow)
589		throws RecognitionException
590	{
591		RecognitionException e = null;
592		// if next token is what we are looking for then "delete" this token
593		if ( mismatchIsUnwantedToken(input, ttype) ) {
594			e = new UnwantedTokenException(ttype, input);
595			/*
596			System.err.println("recoverFromMismatchedToken deleting "+
597							   ((TokenStream)input).LT(1)+
598							   " since "+((TokenStream)input).LT(2)+" is what we want");
599			 */
600			beginResync();
601			input.consume(); // simply delete extra token
602			endResync();
603			reportError(e);  // report after consuming so AW sees the token in the exception
604			// we want to return the token we're actually matching
605			Object matchedSymbol = getCurrentInputSymbol(input);
606			input.consume(); // move past ttype token as if all were ok
607			return matchedSymbol;
608		}
609		// can't recover with single token deletion, try insertion
610		if ( mismatchIsMissingToken(input, follow) ) {
611			Object inserted = getMissingSymbol(input, e, ttype, follow);
612			e = new MissingTokenException(ttype, input, inserted);
613			reportError(e);  // report after inserting so AW sees the token in the exception
614			return inserted;
615		}
616		// even that didn't work; must throw the exception
617		e = new MismatchedTokenException(ttype, input);
618		throw e;
619	}
620
621	/** Not currently used */
622	public Object recoverFromMismatchedSet(IntStream input,
623										   RecognitionException e,
624										   BitSet follow)
625		throws RecognitionException
626	{
627		if ( mismatchIsMissingToken(input, follow) ) {
628			// System.out.println("missing token");
629			reportError(e);
630			// we don't know how to conjure up a token for sets yet
631			return getMissingSymbol(input, e, Token.INVALID_TOKEN_TYPE, follow);
632		}
633		// TODO do single token deletion like above for Token mismatch
634		throw e;
635	}
636
637	/** Match needs to return the current input symbol, which gets put
638	 *  into the label for the associated token ref; e.g., x=ID.  Token
639	 *  and tree parsers need to return different objects. Rather than test
640	 *  for input stream type or change the IntStream interface, I use
641	 *  a simple method to ask the recognizer to tell me what the current
642	 *  input symbol is.
643	 *
644	 *  This is ignored for lexers.
645	 */
646	protected Object getCurrentInputSymbol(IntStream input) { return null; }
647
648	/** Conjure up a missing token during error recovery.
649	 *
650	 *  The recognizer attempts to recover from single missing
651	 *  symbols. But, actions might refer to that missing symbol.
652	 *  For example, x=ID {f($x);}. The action clearly assumes
653	 *  that there has been an identifier matched previously and that
654	 *  $x points at that token. If that token is missing, but
655	 *  the next token in the stream is what we want we assume that
656	 *  this token is missing and we keep going. Because we
657	 *  have to return some token to replace the missing token,
658	 *  we have to conjure one up. This method gives the user control
659	 *  over the tokens returned for missing tokens. Mostly,
660	 *  you will want to create something special for identifier
661	 *  tokens. For literals such as '{' and ',', the default
662	 *  action in the parser or tree parser works. It simply creates
663	 *  a CommonToken of the appropriate type. The text will be the token.
664	 *  If you change what tokens must be created by the lexer,
665	 *  override this method to create the appropriate tokens.
666	 */
667	protected Object getMissingSymbol(IntStream input,
668									  RecognitionException e,
669									  int expectedTokenType,
670									  BitSet follow)
671	{
672		return null;
673	}
674
675	public void consumeUntil(IntStream input, int tokenType) {
676		//System.out.println("consumeUntil "+tokenType);
677		int ttype = input.LA(1);
678		while (ttype != Token.EOF && ttype != tokenType) {
679			input.consume();
680			ttype = input.LA(1);
681		}
682	}
683
684	/** Consume tokens until one matches the given token set */
685	public void consumeUntil(IntStream input, BitSet set) {
686		//System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
687		int ttype = input.LA(1);
688		while (ttype != Token.EOF && !set.member(ttype) ) {
689			//System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
690			input.consume();
691			ttype = input.LA(1);
692		}
693	}
694
695	/** Push a rule's follow set using our own hardcoded stack */
696	protected void pushFollow(BitSet fset) {
697		if ( (state._fsp +1)>=state.following.length ) {
698			BitSet[] f = new BitSet[state.following.length*2];
699			System.arraycopy(state.following, 0, f, 0, state.following.length);
700			state.following = f;
701		}
702		state.following[++state._fsp] = fset;
703	}
704
705	/** Return List<String> of the rules in your parser instance
706	 *  leading up to a call to this method.  You could override if
707	 *  you want more details such as the file/line info of where
708	 *  in the parser java code a rule is invoked.
709	 *
710	 *  This is very useful for error messages and for context-sensitive
711	 *  error recovery.
712	 */
713	public List getRuleInvocationStack() {
714		String parserClassName = getClass().getName();
715		return getRuleInvocationStack(new Throwable(), parserClassName);
716	}
717
718	/** A more general version of getRuleInvocationStack where you can
719	 *  pass in, for example, a RecognitionException to get it's rule
720	 *  stack trace.  This routine is shared with all recognizers, hence,
721	 *  static.
722	 *
723	 *  TODO: move to a utility class or something; weird having lexer call this
724	 */
725	public static List getRuleInvocationStack(Throwable e,
726											  String recognizerClassName)
727	{
728		List rules = new ArrayList();
729		StackTraceElement[] stack = e.getStackTrace();
730		int i = 0;
731		for (i=stack.length-1; i>=0; i--) {
732			StackTraceElement t = stack[i];
733			if ( t.getClassName().startsWith("org.antlr.runtime.") ) {
734				continue; // skip support code such as this method
735			}
736			if ( t.getMethodName().equals(NEXT_TOKEN_RULE_NAME) ) {
737				continue;
738			}
739			if ( !t.getClassName().equals(recognizerClassName) ) {
740				continue; // must not be part of this parser
741			}
742            rules.add(t.getMethodName());
743		}
744		return rules;
745	}
746
747    public int getBacktrackingLevel() { return state.backtracking; }
748
749    public void setBacktrackingLevel(int n) { state.backtracking = n; }
750
751    /** Return whether or not a backtracking attempt failed. */
752    public boolean failed() { return state.failed; }
753
754	/** Used to print out token names like ID during debugging and
755	 *  error reporting.  The generated parsers implement a method
756	 *  that overrides this to point to their String[] tokenNames.
757	 */
758	public String[] getTokenNames() {
759		return null;
760	}
761
762	/** For debugging and other purposes, might want the grammar name.
763	 *  Have ANTLR generate an implementation for this method.
764	 */
765	public String getGrammarFileName() {
766		return null;
767	}
768
769	public abstract String getSourceName();
770
771	/** A convenience method for use most often with template rewrites.
772	 *  Convert a List<Token> to List<String>
773	 */
774	public List toStrings(List tokens) {
775		if ( tokens==null ) return null;
776		List strings = new ArrayList(tokens.size());
777		for (int i=0; i<tokens.size(); i++) {
778			strings.add(((Token)tokens.get(i)).getText());
779		}
780		return strings;
781	}
782
783	/** Given a rule number and a start token index number, return
784	 *  MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
785	 *  start index.  If this rule has parsed input starting from the
786	 *  start index before, then return where the rule stopped parsing.
787	 *  It returns the index of the last token matched by the rule.
788	 *
789	 *  For now we use a hashtable and just the slow Object-based one.
790	 *  Later, we can make a special one for ints and also one that
791	 *  tosses out data after we commit past input position i.
792	 */
793	public int getRuleMemoization(int ruleIndex, int ruleStartIndex) {
794		if ( state.ruleMemo[ruleIndex]==null ) {
795			state.ruleMemo[ruleIndex] = new HashMap();
796		}
797		Integer stopIndexI =
798			(Integer)state.ruleMemo[ruleIndex].get(new Integer(ruleStartIndex));
799		if ( stopIndexI==null ) {
800			return MEMO_RULE_UNKNOWN;
801		}
802		return stopIndexI.intValue();
803	}
804
805	/** Has this rule already parsed input at the current index in the
806	 *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
807	 *  If we attempted but failed to parse properly before, return
808	 *  MEMO_RULE_FAILED.
809	 *
810	 *  This method has a side-effect: if we have seen this input for
811	 *  this rule and successfully parsed before, then seek ahead to
812	 *  1 past the stop token matched for this rule last time.
813	 */
814	public boolean alreadyParsedRule(IntStream input, int ruleIndex) {
815		int stopIndex = getRuleMemoization(ruleIndex, input.index());
816		if ( stopIndex==MEMO_RULE_UNKNOWN ) {
817			return false;
818		}
819		if ( stopIndex==MEMO_RULE_FAILED ) {
820			//System.out.println("rule "+ruleIndex+" will never succeed");
821			state.failed=true;
822		}
823		else {
824			//System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed);
825			input.seek(stopIndex+1); // jump to one past stop token
826		}
827		return true;
828	}
829
830	/** Record whether or not this rule parsed the input at this position
831	 *  successfully.  Use a standard java hashtable for now.
832	 */
833	public void memoize(IntStream input,
834						int ruleIndex,
835						int ruleStartIndex)
836	{
837		int stopTokenIndex = state.failed?MEMO_RULE_FAILED:input.index()-1;
838		if ( state.ruleMemo==null ) {
839			System.err.println("!!!!!!!!! memo array is null for "+ getGrammarFileName());
840		}
841		if ( ruleIndex >= state.ruleMemo.length ) {
842			System.err.println("!!!!!!!!! memo size is "+state.ruleMemo.length+", but rule index is "+ruleIndex);
843		}
844		if ( state.ruleMemo[ruleIndex]!=null ) {
845			state.ruleMemo[ruleIndex].put(
846				new Integer(ruleStartIndex), new Integer(stopTokenIndex)
847			);
848		}
849	}
850
851	/** return how many rule/input-index pairs there are in total.
852	 *  TODO: this includes synpreds. :(
853	 */
854	public int getRuleMemoizationCacheSize() {
855		int n = 0;
856		for (int i = 0; state.ruleMemo!=null && i < state.ruleMemo.length; i++) {
857			Map ruleMap = state.ruleMemo[i];
858			if ( ruleMap!=null ) {
859				n += ruleMap.size(); // how many input indexes are recorded?
860			}
861		}
862		return n;
863	}
864
865	public void traceIn(String ruleName, int ruleIndex, Object inputSymbol)  {
866		System.out.print("enter "+ruleName+" "+inputSymbol);
867		if ( state.backtracking>0 ) {
868			System.out.print(" backtracking="+state.backtracking);
869		}
870		System.out.println();
871	}
872
873	public void traceOut(String ruleName,
874						 int ruleIndex,
875						 Object inputSymbol)
876	{
877		System.out.print("exit "+ruleName+" "+inputSymbol);
878		if ( state.backtracking>0 ) {
879            System.out.print(" backtracking="+state.backtracking);
880            if ( state.failed ) System.out.print(" failed");
881            else System.out.print(" succeeded");
882        }
883		System.out.println();
884	}
885
886}
887