1//
2//  ANTLRBaseRecognizer.m
3//  ANTLR
4//
5//  Created by Alan Condit on 6/16/10.
6// [The "BSD licence"]
7// Copyright (c) 2010 Alan Condit
8// All rights reserved.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions
12// are met:
13// 1. Redistributions of source code must retain the above copyright
14//    notice, this list of conditions and the following disclaimer.
15// 2. Redistributions in binary form must reproduce the above copyright
16//    notice, this list of conditions and the following disclaimer in the
17//    documentation and/or other materials provided with the distribution.
18// 3. The name of the author may not be used to endorse or promote products
19//    derived from this software without specific prior written permission.
20//
21// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32#import "ANTLRBaseRecognizer.h"
33#import "ANTLRHashRule.h"
34#import "ANTLRRuleMemo.h"
35#import "ANTLRCommonToken.h"
36#import "ANTLRMap.h"
37
38extern NSInteger debug;
39
40@implementation ANTLRBaseRecognizer
41
42static AMutableArray *_tokenNames;
43static NSString *_grammarFileName;
44static NSString *NEXT_TOKEN_RULE_NAME;
45
46@synthesize state;
47@synthesize grammarFileName;
48//@synthesize failed;
49@synthesize sourceName;
50//@synthesize numberOfSyntaxErrors;
51@synthesize tokenNames;
52
53+ (void) initialize
54{
55    NEXT_TOKEN_RULE_NAME = [NSString stringWithString:@"nextToken"];
56    [NEXT_TOKEN_RULE_NAME retain];
57}
58
59+ (ANTLRBaseRecognizer *) newANTLRBaseRecognizer
60{
61    return [[ANTLRBaseRecognizer alloc] init];
62}
63
64+ (ANTLRBaseRecognizer *) newANTLRBaseRecognizerWithRuleLen:(NSInteger)aLen
65{
66    return [[ANTLRBaseRecognizer alloc] initWithLen:aLen];
67}
68
69+ (ANTLRBaseRecognizer *) newANTLRBaseRecognizer:(ANTLRRecognizerSharedState *)aState
70{
71	return [[ANTLRBaseRecognizer alloc] initWithState:aState];
72}
73
74+ (AMutableArray *)getTokenNames
75{
76    return _tokenNames;
77}
78
79+ (void)setTokenNames:(AMutableArray *)theTokNams
80{
81    if ( _tokenNames != theTokNams ) {
82        if ( _tokenNames ) [_tokenNames release];
83        [theTokNams retain];
84    }
85    _tokenNames = theTokNams;
86}
87
88+ (void)setGrammarFileName:(NSString *)aFileName
89{
90    if ( _grammarFileName != aFileName ) {
91        if ( _grammarFileName ) [_grammarFileName release];
92        [aFileName retain];
93    }
94    [_grammarFileName retain];
95}
96
97- (id) init
98{
99	if ((self = [super init]) != nil) {
100        if (state == nil) {
101            state = [[ANTLRRecognizerSharedState newANTLRRecognizerSharedState] retain];
102        }
103        tokenNames = _tokenNames;
104        if ( tokenNames ) [tokenNames retain];
105        grammarFileName = _grammarFileName;
106        if ( grammarFileName ) [grammarFileName retain];
107        state._fsp = -1;
108        state.errorRecovery = NO;		// are we recovering?
109        state.lastErrorIndex = -1;
110        state.failed = NO;				// indicate that some match failed
111        state.syntaxErrors = 0;
112        state.backtracking = 0;			// the level of backtracking
113        state.tokenStartCharIndex = -1;
114	}
115	return self;
116}
117
118- (id) initWithLen:(NSInteger)aLen
119{
120	if ((self = [super init]) != nil) {
121        if (state == nil) {
122            state = [[ANTLRRecognizerSharedState newANTLRRecognizerSharedStateWithRuleLen:aLen] retain];
123        }
124        tokenNames = _tokenNames;
125        if ( tokenNames ) [tokenNames retain];
126        grammarFileName = _grammarFileName;
127        if ( grammarFileName ) [grammarFileName retain];
128        state._fsp = -1;
129        state.errorRecovery = NO;		// are we recovering?
130        state.lastErrorIndex = -1;
131        state.failed = NO;				// indicate that some match failed
132        state.syntaxErrors = 0;
133        state.backtracking = 0;			// the level of backtracking
134        state.tokenStartCharIndex = -1;
135	}
136	return self;
137}
138
139- (id) initWithState:(ANTLRRecognizerSharedState *)aState
140{
141	if ((self = [super init]) != nil) {
142		state = aState;
143        if (state == nil) {
144            state = [ANTLRRecognizerSharedState newANTLRRecognizerSharedState];
145        }
146        [state retain];
147        tokenNames = _tokenNames;
148        if ( tokenNames ) [tokenNames retain];
149        grammarFileName = _grammarFileName;
150        if ( grammarFileName ) [grammarFileName retain];
151        state._fsp = -1;
152        state.errorRecovery = NO;		// are we recovering?
153        state.lastErrorIndex = -1;
154        state.failed = NO;				// indicate that some match failed
155        state.syntaxErrors = 0;
156        state.backtracking = 0;			// the level of backtracking
157        state.tokenStartCharIndex = -1;
158	}
159	return self;
160}
161
162- (void)dealloc
163{
164#ifdef DEBUG_DEALLOC
165    NSLog( @"called dealloc in ANTLRBaseRecognizer" );
166#endif
167	if ( grammarFileName ) [grammarFileName release];
168	if ( tokenNames ) [tokenNames release];
169	if ( state ) [state release];
170	[super dealloc];
171}
172
173// reset the recognizer to the initial state. does not touch the token source!
174// this can be extended by the grammar writer to reset custom ivars
175- (void) reset
176{
177    if ( state == nil )
178        return; 
179    if ( state.following != nil ) {
180        if ( [state.following count] )
181            [state.following removeAllObjects];
182    }
183    state._fsp = -1;
184    state.errorRecovery = NO;		// are we recovering?
185    state.lastErrorIndex = -1;
186    state.failed = NO;				// indicate that some match failed
187    state.syntaxErrors = 0;
188    state.backtracking = 0;			// the level of backtracking
189    state.tokenStartCharIndex = -1;
190    if ( state.ruleMemo != nil ) {
191        if ( [state.ruleMemo count] )
192            [state.ruleMemo removeAllObjects];
193    }
194}
195
196- (BOOL) getFailed
197{
198	return [state getFailed];
199}
200
201- (void) setFailed:(BOOL)flag
202{
203	[state setFailed:flag];
204}
205
206- (ANTLRRecognizerSharedState *) getState
207{
208	return state;
209}
210
211- (void) setState:(ANTLRRecognizerSharedState *) theState
212{
213	if (state != theState) {
214		if ( state ) [state release];
215		state = theState;
216		[state retain];
217	}
218}
219
220- (id)input
221{
222    return nil; // Must be overriden in inheriting class
223}
224
225- (void)skip // override in inheriting class
226{
227    return;
228}
229
230-(id) match:(id<ANTLRIntStream>)anInput TokenType:(NSInteger)ttype Follow:(ANTLRBitSet *)follow
231{
232	id matchedSymbol = [self getCurrentInputSymbol:anInput];
233	if ([anInput LA:1] == ttype) {
234		[anInput consume];
235		state.errorRecovery = NO;
236		state.failed = NO;
237		return matchedSymbol;
238	}
239	if (state.backtracking > 0) {
240		state.failed = YES;
241		return matchedSymbol;
242	}
243	matchedSymbol = [self recoverFromMismatchedToken:anInput TokenType:ttype Follow:follow];
244	return matchedSymbol;
245}
246
247-(void) matchAny:(id<ANTLRIntStream>)anInput
248{
249    state.errorRecovery = NO;
250    state.failed = NO;
251    [anInput consume];
252}
253
254-(BOOL) mismatchIsUnwantedToken:(id<ANTLRIntStream>)anInput TokenType:(NSInteger)ttype
255{
256    return [anInput LA:2] == ttype;
257}
258
259-(BOOL) mismatchIsMissingToken:(id<ANTLRIntStream>)anInput Follow:(ANTLRBitSet *) follow
260{
261    if ( follow == nil ) {
262        // we have no information about the follow; we can only consume
263        // a single token and hope for the best
264        return NO;
265    }
266    // compute what can follow this grammar element reference
267    if ( [follow member:ANTLRTokenTypeEOR] ) {
268        ANTLRBitSet *viableTokensFollowingThisRule = [self computeContextSensitiveRuleFOLLOW];
269        follow = [follow or:viableTokensFollowingThisRule];
270        if ( state._fsp >= 0 ) { // remove EOR if we're not the start symbol
271            [follow remove:(ANTLRTokenTypeEOR)];
272        }
273    }
274    // if current token is consistent with what could come after set
275    // then we know we're missing a token; error recovery is free to
276    // "insert" the missing token
277    
278    //System.out.println("viable tokens="+follow.toString(getTokenNames()));
279    //System.out.println("LT(1)="+((TokenStream)input).LT(1));
280    
281    // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
282    // in follow set to indicate that the fall of the start symbol is
283    // in the set (EOF can follow).
284    if ( [follow member:[anInput LA:1]] || [follow member:ANTLRTokenTypeEOR] ) {
285        //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
286        return YES;
287    }
288    return NO;
289}
290
291/** Report a recognition problem.
292 *
293 *  This method sets errorRecovery to indicate the parser is recovering
294 *  not parsing.  Once in recovery mode, no errors are generated.
295 *  To get out of recovery mode, the parser must successfully match
296 *  a token (after a resync).  So it will go:
297 *
298 * 		1. error occurs
299 * 		2. enter recovery mode, report error
300 * 		3. consume until token found in resynch set
301 * 		4. try to resume parsing
302 * 		5. next match() will reset errorRecovery mode
303 *
304 *  If you override, make sure to update syntaxErrors if you care about that.
305 */
306-(void) reportError:(ANTLRRecognitionException *) e
307{
308    // if we've already reported an error and have not matched a token
309    // yet successfully, don't report any errors.
310    if ( state.errorRecovery ) {
311        //System.err.print("[SPURIOUS] ");
312        return;
313    }
314    state.syntaxErrors++; // don't count spurious
315    state.errorRecovery = YES;
316    
317    [self displayRecognitionError:[self getTokenNames] Exception:e];
318}
319
320-(void) displayRecognitionError:(AMutableArray *)theTokNams Exception:(ANTLRRecognitionException *)e
321{
322    NSString *hdr = [self getErrorHeader:e];
323    NSString *msg = [self getErrorMessage:e TokenNames:theTokNams];
324    [self emitErrorMessage:[NSString stringWithFormat:@" %@ %@", hdr, msg]];
325}
326
327/** What error message should be generated for the various
328 *  exception types?
329 *
330 *  Not very object-oriented code, but I like having all error message
331 *  generation within one method rather than spread among all of the
332 *  exception classes. This also makes it much easier for the exception
333 *  handling because the exception classes do not have to have pointers back
334 *  to this object to access utility routines and so on. Also, changing
335 *  the message for an exception type would be difficult because you
336 *  would have to subclassing exception, but then somehow get ANTLR
337 *  to make those kinds of exception objects instead of the default.
338 *  This looks weird, but trust me--it makes the most sense in terms
339 *  of flexibility.
340 *
341 *  For grammar debugging, you will want to override this to add
342 *  more information such as the stack frame with
343 *  getRuleInvocationStack(e, this.getClass().getName()) and,
344 *  for no viable alts, the decision description and state etc...
345 *
346 *  Override this to change the message generated for one or more
347 *  exception types.
348 */
349- (NSString *)getErrorMessage:(ANTLRRecognitionException *)e TokenNames:(AMutableArray *)theTokNams
350{
351    // NSString *msg = [e getMessage];
352    NSString *msg;
353    if ( [e isKindOfClass:[ANTLRUnwantedTokenException class]] ) {
354        ANTLRUnwantedTokenException *ute = (ANTLRUnwantedTokenException *)e;
355        NSString *tokenName=@"<unknown>";
356        if ( ute.expecting == ANTLRTokenTypeEOF ) {
357            tokenName = @"EOF";
358        }
359        else {
360            tokenName = (NSString *)[theTokNams objectAtIndex:ute.expecting];
361        }
362        msg = [NSString stringWithFormat:@"extraneous input %@ expecting %@", [self getTokenErrorDisplay:[ute getUnexpectedToken]],
363               tokenName];
364    }
365    else if ( [e isKindOfClass:[ANTLRMissingTokenException class] ] ) {
366        ANTLRMissingTokenException *mte = (ANTLRMissingTokenException *)e;
367        NSString *tokenName=@"<unknown>";
368        if ( mte.expecting== ANTLRTokenTypeEOF ) {
369            tokenName = @"EOF";
370        }
371        else {
372            tokenName = [theTokNams objectAtIndex:mte.expecting];
373        }
374        msg = [NSString stringWithFormat:@"missing %@ at %@", tokenName, [self getTokenErrorDisplay:(e.token)] ];
375    }
376    else if ( [e isKindOfClass:[ANTLRMismatchedTokenException class]] ) {
377        ANTLRMismatchedTokenException *mte = (ANTLRMismatchedTokenException *)e;
378        NSString *tokenName=@"<unknown>";
379        if ( mte.expecting== ANTLRTokenTypeEOF ) {
380            tokenName = @"EOF";
381        }
382        else {
383            tokenName = [theTokNams objectAtIndex:mte.expecting];
384        }
385        msg = [NSString stringWithFormat:@"mismatched input %@ expecting %@",[self getTokenErrorDisplay:(e.token)], tokenName];
386    }
387    else if ( [e isKindOfClass:[ANTLRMismatchedTreeNodeException class]] ) {
388        ANTLRMismatchedTreeNodeException *mtne = (ANTLRMismatchedTreeNodeException *)e;
389        NSString *tokenName=@"<unknown>";
390        if ( mtne.expecting==ANTLRTokenTypeEOF ) {
391            tokenName = @"EOF";
392        }
393        else {
394            tokenName = [theTokNams objectAtIndex:mtne.expecting];
395        }
396        msg = [NSString stringWithFormat:@"mismatched tree node: %@ expecting %@", mtne.node, tokenName];
397    }
398    else if ( [e isKindOfClass:[ANTLRNoViableAltException class]] ) {
399        //NoViableAltException *nvae = (NoViableAltException *)e;
400        // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
401        // and "(decision="+nvae.decisionNumber+") and
402        // "state "+nvae.stateNumber
403        msg = [NSString stringWithFormat:@"no viable alternative at input %@", [self getTokenErrorDisplay:e.token]];
404    }
405    else if ( [e isKindOfClass:[ANTLREarlyExitException class]] ) {
406        //ANTLREarlyExitException *eee = (ANTLREarlyExitException *)e;
407        // for development, can add "(decision="+eee.decisionNumber+")"
408        msg =[NSString stringWithFormat: @"required (...)+ loop did not match anything at input ", [self getTokenErrorDisplay:e.token]];
409    }
410    else if ( [e isKindOfClass:[ANTLRMismatchedSetException class]] ) {
411        ANTLRMismatchedSetException *mse = (ANTLRMismatchedSetException *)e;
412        msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
413               [self getTokenErrorDisplay:(e.token)],
414               mse.expecting];
415    }
416#pragma warning NotSet not yet implemented.
417    else if ( [e isKindOfClass:[ANTLRMismatchedNotSetException class] ] ) {
418        ANTLRMismatchedNotSetException *mse = (ANTLRMismatchedNotSetException *)e;
419        msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
420               [self getTokenErrorDisplay:(e.token)],
421               mse.expecting];
422    }
423    else if ( [e isKindOfClass:[ANTLRFailedPredicateException class]] ) {
424        ANTLRFailedPredicateException *fpe = (ANTLRFailedPredicateException *)e;
425        msg = [NSString stringWithFormat:@"rule %@ failed predicate: { %@ }?", fpe.ruleName, fpe.predicate];
426    }
427    else {
428        msg = [NSString stringWithFormat:@"Exception= %@\n", e.name];
429    }
430    return msg;
431}
432
433/** Get number of recognition errors (lexer, parser, tree parser).  Each
434 *  recognizer tracks its own number.  So parser and lexer each have
435 *  separate count.  Does not count the spurious errors found between
436 *  an error and next valid token match
437 *
438 *  See also reportError()
439 */
440- (NSInteger) getNumberOfSyntaxErrors
441{
442    return state.syntaxErrors;
443}
444
445/** What is the error header, normally line/character position information? */
446- (NSString *)getErrorHeader:(ANTLRRecognitionException *)e
447{
448    return [NSString stringWithFormat:@"line %d:%d", e.line, e.charPositionInLine];
449}
450
451/** How should a token be displayed in an error message? The default
452 *  is to display just the text, but during development you might
453 *  want to have a lot of information spit out.  Override in that case
454 *  to use t.toString() (which, for CommonToken, dumps everything about
455 *  the token). This is better than forcing you to override a method in
456 *  your token objects because you don't have to go modify your lexer
457 *  so that it creates a new Java type.
458 */
459- (NSString *)getTokenErrorDisplay:(id<ANTLRToken>)t
460{
461    NSString *s = t.text;
462    if ( s == nil ) {
463        if ( t.type == ANTLRTokenTypeEOF ) {
464            s = @"<EOF>";
465        }
466        else {
467            s = [NSString stringWithFormat:@"<%@>", t.type];
468        }
469    }
470    s = [s stringByReplacingOccurrencesOfString:@"\n" withString:@"\\\\n"];
471    s = [s stringByReplacingOccurrencesOfString:@"\r" withString:@"\\\\r"];
472    s = [s stringByReplacingOccurrencesOfString:@"\t" withString:@"\\\\t"];
473    return [NSString stringWithFormat:@"\'%@\'", s];
474}
475                                        
476/** Override this method to change where error messages go */
477- (void) emitErrorMessage:(NSString *) msg
478{
479//    System.err.println(msg);
480    NSLog(@"%@", msg);
481}
482
483/** Recover from an error found on the input stream.  This is
484 *  for NoViableAlt and mismatched symbol exceptions.  If you enable
485 *  single token insertion and deletion, this will usually not
486 *  handle mismatched symbol exceptions but there could be a mismatched
487 *  token that the match() routine could not recover from.
488 */
489- (void)recover:(id<ANTLRIntStream>)anInput Exception:(ANTLRRecognitionException *)re
490{
491    if ( state.lastErrorIndex == anInput.index ) {
492        // uh oh, another error at same token index; must be a case
493        // where LT(1) is in the recovery token set so nothing is
494        // consumed; consume a single token so at least to prevent
495        // an infinite loop; this is a failsafe.
496        [anInput consume];
497    }
498    state.lastErrorIndex = anInput.index;
499    ANTLRBitSet *followSet = [self computeErrorRecoverySet];
500    [self beginResync];
501    [self consumeUntilFollow:anInput Follow:followSet];
502    [self endResync];
503}
504
505- (void) beginResync
506{
507    
508}
509
510- (void) endResync
511{
512    
513}
514                            
515/*  Compute the error recovery set for the current rule.  During
516 *  rule invocation, the parser pushes the set of tokens that can
517 *  follow that rule reference on the stack; this amounts to
518 *  computing FIRST of what follows the rule reference in the
519 *  enclosing rule. This local follow set only includes tokens
520 *  from within the rule; i.e., the FIRST computation done by
521 *  ANTLR stops at the end of a rule.
522 *
523 *  EXAMPLE
524 *
525 *  When you find a "no viable alt exception", the input is not
526 *  consistent with any of the alternatives for rule r.  The best
527 *  thing to do is to consume tokens until you see something that
528 *  can legally follow a call to r *or* any rule that called r.
529 *  You don't want the exact set of viable next tokens because the
530 *  input might just be missing a token--you might consume the
531 *  rest of the input looking for one of the missing tokens.
532 *
533 *  Consider grammar:
534 *
535 *  a : '[' b ']'
536 *    | '(' b ')'
537 *    ;
538 *  b : c '^' INT ;
539 *  c : ID
540 *    | INT
541 *    ;
542 *
543 *  At each rule invocation, the set of tokens that could follow
544 *  that rule is pushed on a stack.  Here are the various "local"
545 *  follow sets:
546 *
547 *  FOLLOW(b1_in_a) = FIRST(']') = ']'
548 *  FOLLOW(b2_in_a) = FIRST(')') = ')'
549 *  FOLLOW(c_in_b) = FIRST('^') = '^'
550 *
551 *  Upon erroneous input "[]", the call chain is
552 *
553 *  a -> b -> c
554 *
555 *  and, hence, the follow context stack is:
556 *
557 *  depth  local follow set     after call to rule
558 *    0         <EOF>                    a (from main())
559 *    1          ']'                     b
560 *    3          '^'                     c
561 *
562 *  Notice that ')' is not included, because b would have to have
563 *  been called from a different context in rule a for ')' to be
564 *  included.
565 *
566 *  For error recovery, we cannot consider FOLLOW(c)
567 *  (context-sensitive or otherwise).  We need the combined set of
568 *  all context-sensitive FOLLOW sets--the set of all tokens that
569 *  could follow any reference in the call chain.  We need to
570 *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
571 *  we resync'd to that token, we'd consume until EOF.  We need to
572 *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
573 *  In this case, for input "[]", LA(1) is in this set so we would
574 *  not consume anything and after printing an error rule c would
575 *  return normally.  It would not find the required '^' though.
576 *  At this point, it gets a mismatched token error and throws an
577 *  exception (since LA(1) is not in the viable following token
578 *  set).  The rule exception handler tries to recover, but finds
579 *  the same recovery set and doesn't consume anything.  Rule b
580 *  exits normally returning to rule a.  Now it finds the ']' (and
581 *  with the successful match exits errorRecovery mode).
582 *
583 *  So, you cna see that the parser walks up call chain looking
584 *  for the token that was a member of the recovery set.
585 *
586 *  Errors are not generated in errorRecovery mode.
587 *
588 *  ANTLR's error recovery mechanism is based upon original ideas:
589 *
590 *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
591 *
592 *  and
593 *
594 *  "A note on error recovery in recursive descent parsers":
595 *  http://portal.acm.org/citation.cfm?id=947902.947905
596 *
597 *  Later, Josef Grosch had some good ideas:
598 *
599 *  "Efficient and Comfortable Error Recovery in Recursive Descent
600 *  Parsers":
601 *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
602 *
603 *  Like Grosch I implemented local FOLLOW sets that are combined
604 *  at run-time upon error to avoid overhead during parsing.
605 */
606- (ANTLRBitSet *) computeErrorRecoverySet
607{
608    return [self combineFollows:NO];
609}
610
611/** Compute the context-sensitive FOLLOW set for current rule.
612 *  This is set of token types that can follow a specific rule
613 *  reference given a specific call chain.  You get the set of
614 *  viable tokens that can possibly come next (lookahead depth 1)
615 *  given the current call chain.  Contrast this with the
616 *  definition of plain FOLLOW for rule r:
617 *
618 *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
619 *
620 *  where x in T* and alpha, beta in V*; T is set of terminals and
621 *  V is the set of terminals and nonterminals.  In other words,
622 *  FOLLOW(r) is the set of all tokens that can possibly follow
623 *  references to r in *any* sentential form (context).  At
624 *  runtime, however, we know precisely which context applies as
625 *  we have the call chain.  We may compute the exact (rather
626 *  than covering superset) set of following tokens.
627 *
628 *  For example, consider grammar:
629 *
630 *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
631 *       | "return" expr '.'
632 *       ;
633 *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
634 *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
635 *       | '(' expr ')'
636 *       ;
637 *
638 *  The FOLLOW sets are all inclusive whereas context-sensitive
639 *  FOLLOW sets are precisely what could follow a rule reference.
640 *  For input input "i=(3);", here is the derivation:
641 *
642 *  stat => ID '=' expr ';'
643 *       => ID '=' atom ('+' atom)* ';'
644 *       => ID '=' '(' expr ')' ('+' atom)* ';'
645 *       => ID '=' '(' atom ')' ('+' atom)* ';'
646 *       => ID '=' '(' INT ')' ('+' atom)* ';'
647 *       => ID '=' '(' INT ')' ';'
648 *
649 *  At the "3" token, you'd have a call chain of
650 *
651 *    stat -> expr -> atom -> expr -> atom
652 *
653 *  What can follow that specific nested ref to atom?  Exactly ')'
654 *  as you can see by looking at the derivation of this specific
655 *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
656 *
657 *  You want the exact viable token set when recovering from a
658 *  token mismatch.  Upon token mismatch, if LA(1) is member of
659 *  the viable next token set, then you know there is most likely
660 *  a missing token in the input stream.  "Insert" one by just not
661 *  throwing an exception.
662 */
663- (ANTLRBitSet *)computeContextSensitiveRuleFOLLOW
664{
665    return [self combineFollows:YES];
666}
667
668// what is exact? it seems to only add sets from above on stack
669// if EOR is in set i.  When it sees a set w/o EOR, it stops adding.
670// Why would we ever want them all?  Maybe no viable alt instead of
671// mismatched token?
672- (ANTLRBitSet *)combineFollows:(BOOL) exact
673{
674    NSInteger top = state._fsp;
675    ANTLRBitSet *followSet = [[ANTLRBitSet newANTLRBitSet] retain];
676    for (int i = top; i >= 0; i--) {
677        ANTLRBitSet *localFollowSet = (ANTLRBitSet *)[state.following objectAtIndex:i];
678        /*
679         System.out.println("local follow depth "+i+"="+
680         localFollowSet.toString(getTokenNames())+")");
681         */
682        [followSet orInPlace:localFollowSet];
683        if ( exact ) {
684            // can we see end of rule?
685            if ( [localFollowSet member:ANTLRTokenTypeEOR] ) {
686                // Only leave EOR in set if at top (start rule); this lets
687                // us know if have to include follow(start rule); i.e., EOF
688                if ( i > 0 ) {
689                    [followSet remove:ANTLRTokenTypeEOR];
690                }
691            }
692            else { // can't see end of rule, quit
693                break;
694            }
695        }
696    }
697    return followSet;
698}
699
700/** Attempt to recover from a single missing or extra token.
701 *
702 *  EXTRA TOKEN
703 *
704 *  LA(1) is not what we are looking for.  If LA(2) has the right token,
705 *  however, then assume LA(1) is some extra spurious token.  Delete it
706 *  and LA(2) as if we were doing a normal match(), which advances the
707 *  input.
708 *
709 *  MISSING TOKEN
710 *
711 *  If current token is consistent with what could come after
712 *  ttype then it is ok to "insert" the missing token, else throw
713 *  exception For example, Input "i=(3;" is clearly missing the
714 *  ')'.  When the parser returns from the nested call to expr, it
715 *  will have call chain:
716 *
717 *    stat -> expr -> atom
718 *
719 *  and it will be trying to match the ')' at this point in the
720 *  derivation:
721 *
722 *       => ID '=' '(' INT ')' ('+' atom)* ';'
723 *                          ^
724 *  match() will see that ';' doesn't match ')' and report a
725 *  mismatched token error.  To recover, it sees that LA(1)==';'
726 *  is in the set of tokens that can follow the ')' token
727 *  reference in rule atom.  It can assume that you forgot the ')'.
728 */
729- (id<ANTLRToken>)recoverFromMismatchedToken:(id<ANTLRIntStream>)anInput
730                       TokenType:(NSInteger)ttype
731                          Follow:(ANTLRBitSet *)follow
732{
733    ANTLRRecognitionException *e = nil;
734    // if next token is what we are looking for then "delete" this token
735    if ( [self mismatchIsUnwantedToken:anInput TokenType:ttype] ) {
736        e = [ANTLRUnwantedTokenException newException:ttype Stream:anInput];
737        /*
738         System.err.println("recoverFromMismatchedToken deleting "+
739         ((TokenStream)input).LT(1)+
740         " since "+((TokenStream)input).LT(2)+" is what we want");
741         */
742        [self beginResync];
743        [anInput consume]; // simply delete extra token
744        [self endResync];
745        [self reportError:e];  // report after consuming so AW sees the token in the exception
746                         // we want to return the token we're actually matching
747        id matchedSymbol = [self getCurrentInputSymbol:anInput];
748        [anInput consume]; // move past ttype token as if all were ok
749        return matchedSymbol;
750    }
751    // can't recover with single token deletion, try insertion
752    if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
753        id<ANTLRToken> inserted = [self getMissingSymbol:anInput Exception:e TokenType:ttype Follow:follow];
754        e = [ANTLRMissingTokenException newException:ttype Stream:anInput With:inserted];
755        [self reportError:e];  // report after inserting so AW sees the token in the exception
756        return inserted;
757    }
758    // even that didn't work; must throw the exception
759    e = [ANTLRMismatchedTokenException newException:ttype Stream:anInput];
760    @throw e;
761}
762
763/** Not currently used */
764-(id) recoverFromMismatchedSet:(id<ANTLRIntStream>)anInput
765                     Exception:(ANTLRRecognitionException *)e
766                        Follow:(ANTLRBitSet *) follow
767{
768    if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
769        // System.out.println("missing token");
770        [self reportError:e];
771        // we don't know how to conjure up a token for sets yet
772        return [self getMissingSymbol:anInput Exception:e TokenType:ANTLRTokenTypeInvalid Follow:follow];
773    }
774    // TODO do single token deletion like above for Token mismatch
775    @throw e;
776}
777
778/** Match needs to return the current input symbol, which gets put
779 *  into the label for the associated token ref; e.g., x=ID.  Token
780 *  and tree parsers need to return different objects. Rather than test
781 *  for input stream type or change the IntStream interface, I use
782 *  a simple method to ask the recognizer to tell me what the current
783 *  input symbol is.
784 * 
785 *  This is ignored for lexers.
786 */
787- (id) getCurrentInputSymbol:(id<ANTLRIntStream>)anInput
788{
789    return nil;
790}
791
792/** Conjure up a missing token during error recovery.
793 *
794 *  The recognizer attempts to recover from single missing
795 *  symbols. But, actions might refer to that missing symbol.
796 *  For example, x=ID {f($x);}. The action clearly assumes
797 *  that there has been an identifier matched previously and that
798 *  $x points at that token. If that token is missing, but
799 *  the next token in the stream is what we want we assume that
800 *  this token is missing and we keep going. Because we
801 *  have to return some token to replace the missing token,
802 *  we have to conjure one up. This method gives the user control
803 *  over the tokens returned for missing tokens. Mostly,
804 *  you will want to create something special for identifier
805 *  tokens. For literals such as '{' and ',', the default
806 *  action in the parser or tree parser works. It simply creates
807 *  a CommonToken of the appropriate type. The text will be the token.
808 *  If you change what tokens must be created by the lexer,
809 *  override this method to create the appropriate tokens.
810 */
811- (id)getMissingSymbol:(id<ANTLRIntStream>)anInput
812             Exception:(ANTLRRecognitionException *)e
813             TokenType:(NSInteger)expectedTokenType
814                Follow:(ANTLRBitSet *)follow
815{
816    return nil;
817}
818
819
820-(void) consumeUntilTType:(id<ANTLRIntStream>)anInput TokenType:(NSInteger)tokenType
821{
822    //System.out.println("consumeUntil "+tokenType);
823    int ttype = [anInput LA:1];
824    while (ttype != ANTLRTokenTypeEOF && ttype != tokenType) {
825        [anInput consume];
826        ttype = [anInput LA:1];
827    }
828}
829
830/** Consume tokens until one matches the given token set */
831-(void) consumeUntilFollow:(id<ANTLRIntStream>)anInput Follow:(ANTLRBitSet *)set
832{
833    //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
834    int ttype = [anInput LA:1];
835    while (ttype != ANTLRTokenTypeEOF && ![set member:ttype] ) {
836        //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
837        [anInput consume];
838        ttype = [anInput LA:1];
839    }
840}
841
842/** Push a rule's follow set using our own hardcoded stack */
843- (void)pushFollow:(ANTLRBitSet *)fset
844{
845    if ( (state._fsp +1) >= [state.following count] ) {
846        //        AMutableArray *f = [AMutableArray arrayWithCapacity:[[state.following] count]*2];
847        //        System.arraycopy(state.following, 0, f, 0, state.following.length);
848        //        state.following = f;
849        [state.following addObject:fset];
850        [fset retain];
851        state._fsp++;
852    }
853    else {
854        [state.following replaceObjectAtIndex:++state._fsp withObject:fset];
855    }
856}
857
858- (ANTLRBitSet *)popFollow
859{
860    ANTLRBitSet *fset;
861
862    if ( state._fsp >= 0 && [state.following count] > 0 ) {
863        fset = [state.following objectAtIndex:state._fsp--];
864        [state.following removeLastObject];
865        return fset;
866    }
867    else {
868        NSLog( @"Attempted to pop a follow when none exists on the stack\n" );
869    }
870    return nil;
871}
872
873/** Return List<String> of the rules in your parser instance
874 *  leading up to a call to this method.  You could override if
875 *  you want more details such as the file/line info of where
876 *  in the parser java code a rule is invoked.
877 *
878 *  This is very useful for error messages and for context-sensitive
879 *  error recovery.
880 */
881- (AMutableArray *)getRuleInvocationStack
882{
883    NSString *parserClassName = [[self className] retain];
884    return [self getRuleInvocationStack:[ANTLRRecognitionException newException] Recognizer:parserClassName];
885}
886
887/** A more general version of getRuleInvocationStack where you can
888 *  pass in, for example, a RecognitionException to get it's rule
889 *  stack trace.  This routine is shared with all recognizers, hence,
890 *  static.
891 *
892 *  TODO: move to a utility class or something; weird having lexer call this
893 */
894- (AMutableArray *)getRuleInvocationStack:(ANTLRRecognitionException *)e
895                                Recognizer:(NSString *)recognizerClassName
896{
897    // char *name;
898    AMutableArray *rules = [[AMutableArray arrayWithCapacity:20] retain];
899    NSArray *stack = [e callStackSymbols];
900    int i = 0;
901    for (i = [stack count]-1; i >= 0; i--) {
902        NSString *t = [stack objectAtIndex:i];
903        // NSLog(@"stack %d = %@\n", i, t);
904        if ( [t commonPrefixWithString:@"org.antlr.runtime." options:NSLiteralSearch] ) {
905            // id aClass = objc_getClass( [t UTF8String] );
906            continue; // skip support code such as this method
907        }
908        if ( [t isEqualTo:NEXT_TOKEN_RULE_NAME] ) {
909            // name = sel_getName(method_getName(method));
910            // NSString *aMethod = [NSString stringWithFormat:@"%s", name];
911            continue;
912        }
913        if ( ![t isEqualTo:recognizerClassName] ) {
914            // name = class_getName( [t UTF8String] );
915            continue; // must not be part of this parser
916        }
917        [rules addObject:t];
918    }
919#ifdef DONTUSEYET
920    StackTraceElement[] stack = e.getStackTrace();
921    int i = 0;
922    for (i=stack.length-1; i>=0; i--) {
923        StackTraceElement t = stack[i];
924        if ( [t getClassName().startsWith("org.antlr.runtime.") ) {
925            continue; // skip support code such as this method
926        }
927              if ( [[t getMethodName] equals:NEXT_TOKEN_RULE_NAME] ) {
928            continue;
929        }
930              if ( ![[t getClassName] equals:recognizerClassName] ) {
931            continue; // must not be part of this parser
932        }
933              [rules addObject:[t getMethodName]];
934    }
935#endif
936    [stack release];
937    return rules;
938}
939
940- (NSInteger) getBacktrackingLevel
941{
942    return [state getBacktracking];
943}
944      
945- (void) setBacktrackingLevel:(NSInteger)level
946{
947    [state setBacktracking:level];
948}
949      
950        /** Used to print out token names like ID during debugging and
951 *  error reporting.  The generated parsers implement a method
952 *  that overrides this to point to their String[] tokenNames.
953 */
954- (NSArray *)getTokenNames
955{
956    return tokenNames;
957}
958
959/** For debugging and other purposes, might want the grammar name.
960 *  Have ANTLR generate an implementation for this method.
961 */
962- (NSString *)getGrammarFileName
963{
964    return grammarFileName;
965}
966
967- (NSString *)getSourceName
968{
969    return nil;
970}
971
972/** A convenience method for use most often with template rewrites.
973 *  Convert a List<Token> to List<String>
974 */
975- (AMutableArray *)toStrings:(AMutableArray *)tokens
976{
977    if ( tokens == nil )
978        return nil;
979    AMutableArray *strings = [AMutableArray arrayWithCapacity:[tokens count]];
980    id object;
981    NSInteger i = 0;
982    for (object in tokens) {
983        [strings addObject:[object text]];
984        i++;
985    }
986    return strings;
987}
988
989/** Given a rule number and a start token index number, return
990 *  ANTLR_MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
991 *  start index.  If this rule has parsed input starting from the
992 *  start index before, then return where the rule stopped parsing.
993 *  It returns the index of the last token matched by the rule.
994 *
995 *  For now we use a hashtable and just the slow Object-based one.
996 *  Later, we can make a special one for ints and also one that
997 *  tosses out data after we commit past input position i.
998 */
999- (NSInteger)getRuleMemoization:(NSInteger)ruleIndex StartIndex:(NSInteger)ruleStartIndex
1000{
1001    NSNumber *stopIndexI;
1002    ANTLRHashRule *aHashRule;
1003    if ( (aHashRule = [state.ruleMemo objectAtIndex:ruleIndex]) == nil ) {
1004        aHashRule = [ANTLRHashRule newANTLRHashRuleWithLen:17];
1005        [state.ruleMemo insertObject:aHashRule atIndex:ruleIndex];
1006    }
1007    stopIndexI = [aHashRule getRuleMemoStopIndex:ruleStartIndex];
1008    if ( stopIndexI == nil ) {
1009        return ANTLR_MEMO_RULE_UNKNOWN;
1010    }
1011    return [stopIndexI integerValue];
1012}
1013
1014/** Has this rule already parsed input at the current index in the
1015 *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
1016 *  If we attempted but failed to parse properly before, return
1017 *  MEMO_RULE_FAILED.
1018 *
1019 *  This method has a side-effect: if we have seen this input for
1020 *  this rule and successfully parsed before, then seek ahead to
1021 *  1 past the stop token matched for this rule last time.
1022 */
1023- (BOOL)alreadyParsedRule:(id<ANTLRIntStream>)anInput RuleIndex:(NSInteger)ruleIndex
1024{
1025    NSInteger aStopIndex = [self getRuleMemoization:ruleIndex StartIndex:anInput.index];
1026    if ( aStopIndex == ANTLR_MEMO_RULE_UNKNOWN ) {
1027        // NSLog(@"rule %d not yet encountered\n", ruleIndex);
1028        return NO;
1029    }
1030    if ( aStopIndex == ANTLR_MEMO_RULE_FAILED ) {
1031        if (debug) NSLog(@"rule %d will never succeed\n", ruleIndex);
1032        state.failed = YES;
1033    }
1034    else {
1035        if (debug) NSLog(@"seen rule %d before; skipping ahead to %d failed = %@\n", ruleIndex, aStopIndex+1, state.failed?@"YES":@"NO");
1036        [anInput seek:(aStopIndex+1)]; // jump to one past stop token
1037    }
1038    return YES;
1039}
1040      
1041/** Record whether or not this rule parsed the input at this position
1042 *  successfully.  Use a standard java hashtable for now.
1043 */
1044- (void)memoize:(id<ANTLRIntStream>)anInput
1045      RuleIndex:(NSInteger)ruleIndex
1046     StartIndex:(NSInteger)ruleStartIndex
1047{
1048    ANTLRRuleStack *aRuleStack;
1049    NSInteger stopTokenIndex;
1050
1051    aRuleStack = state.ruleMemo;
1052    stopTokenIndex = (state.failed ? ANTLR_MEMO_RULE_FAILED : (anInput.index-1));
1053    if ( aRuleStack == nil ) {
1054        if (debug) NSLog(@"!!!!!!!!! memo array is nil for %@", [self getGrammarFileName]);
1055        return;
1056    }
1057    if ( ruleIndex >= [aRuleStack length] ) {
1058        if (debug) NSLog(@"!!!!!!!!! memo size is %d, but rule index is %d", [state.ruleMemo length], ruleIndex);
1059        return;
1060    }
1061    if ( [aRuleStack objectAtIndex:ruleIndex] != nil ) {
1062        [aRuleStack putHashRuleAtRuleIndex:ruleIndex StartIndex:ruleStartIndex StopIndex:stopTokenIndex];
1063    }
1064    return;
1065}
1066   
1067/** return how many rule/input-index pairs there are in total.
1068 *  TODO: this includes synpreds. :(
1069 */
1070- (NSInteger)getRuleMemoizationCacheSize
1071{
1072    ANTLRRuleStack *aRuleStack;
1073    ANTLRHashRule *aHashRule;
1074
1075    int aCnt = 0;
1076    aRuleStack = state.ruleMemo;
1077    for (NSUInteger i = 0; aRuleStack != nil && i < [aRuleStack length]; i++) {
1078        aHashRule = [aRuleStack objectAtIndex:i];
1079        if ( aHashRule != nil ) {
1080            aCnt += [aHashRule count]; // how many input indexes are recorded?
1081        }
1082    }
1083    return aCnt;
1084}
1085
1086#pragma warning Have to fix traceIn and traceOut.
1087- (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
1088{
1089    NSLog(@"enter %@ %@", ruleName, inputSymbol);
1090    if ( state.backtracking > 0 ) {
1091        NSLog(@" backtracking=%s", ((state.backtracking==YES)?"YES":"NO"));
1092    }
1093    NSLog(@"\n");
1094}
1095
1096- (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
1097{
1098    NSLog(@"exit %@ -- %@", ruleName, inputSymbol);
1099    if ( state.backtracking > 0 ) {
1100        NSLog(@" backtracking=%s %s", state.backtracking?"YES":"NO", state.failed ? "failed":"succeeded");
1101    }
1102    NSLog(@"\n");
1103}
1104
1105
1106// call a syntactic predicate methods using its selector. this way we can support arbitrary synpreds.
1107- (BOOL) evaluateSyntacticPredicate:(SEL)synpredFragment // stream:(id<ANTLRIntStream>)input
1108{
1109    id<ANTLRIntStream> input;
1110
1111    state.backtracking++;
1112    // input = state.token.input;
1113    input = self.input;
1114    int start = [input mark];
1115    @try {
1116        [self performSelector:synpredFragment];
1117    }
1118    @catch (ANTLRRecognitionException *re) {
1119        NSLog(@"impossible synpred: %@", re.name);
1120    }
1121    BOOL success = (state.failed == NO);
1122    [input rewind:start];
1123    state.backtracking--;
1124    state.failed = NO;
1125    return success;
1126}
1127              
1128@end
1129                               
1130