1/*
2 * [The "BSD license"]
3 *  Copyright (c) 2010 Terence Parr
4 *  All rights reserved.
5 *
6 *  Redistribution and use in source and binary forms, with or without
7 *  modification, are permitted provided that the following conditions
8 *  are met:
9 *  1. Redistributions of source code must retain the above copyright
10 *      notice, this list of conditions and the following disclaimer.
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *  3. The name of the author may not be used to endorse or promote products
15 *      derived from this software without specific prior written permission.
16 *
17 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28package org.antlr.analysis;
29
30import org.antlr.grammar.v3.ANTLRParser;
31import org.antlr.misc.IntSet;
32import org.antlr.misc.IntervalSet;
33import org.antlr.tool.Grammar;
34import org.antlr.tool.Rule;
35
36import java.util.HashMap;
37import java.util.HashSet;
38import java.util.Map;
39import java.util.Set;
40
41/**
42 * Created by IntelliJ IDEA.
43 * User: parrt
44 * Date: Dec 31, 2007
45 * Time: 1:31:16 PM
46 * To change this template use File | Settings | File Templates.
47 */
48public class LL1Analyzer {
49	/**	0	if we hit end of rule and invoker should keep going (epsilon) */
50	public static final int DETECT_PRED_EOR = 0;
51	/**	1	if we found a nonautobacktracking pred */
52	public static final int DETECT_PRED_FOUND = 1;
53	/**	2	if we didn't find such a pred */
54	public static final int DETECT_PRED_NOT_FOUND = 2;
55
56	public Grammar grammar;
57
58	/** Used during LOOK to detect computation cycles */
59	protected Set<NFAState> lookBusy = new HashSet<NFAState>();
60
61	public Map<NFAState, LookaheadSet> FIRSTCache = new HashMap<NFAState, LookaheadSet>();
62	public Map<Rule, LookaheadSet> FOLLOWCache = new HashMap<Rule, LookaheadSet>();
63
64	public LL1Analyzer(Grammar grammar) {
65		this.grammar = grammar;
66	}
67
68	/*
69	public void computeRuleFIRSTSets() {
70		if ( getNumberOfDecisions()==0 ) {
71			createNFAs();
72		}
73		for (Iterator it = getRules().iterator(); it.hasNext();) {
74			Rule r = (Rule)it.next();
75			if ( r.isSynPred ) {
76				continue;
77			}
78			LookaheadSet s = FIRST(r);
79			System.out.println("FIRST("+r.name+")="+s);
80		}
81	}
82	*/
83
84	/*
85	public Set<String> getOverriddenRulesWithDifferentFIRST() {
86		// walk every rule in this grammar and compare FIRST set with
87		// those in imported grammars.
88		Set<String> rules = new HashSet();
89		for (Iterator it = getRules().iterator(); it.hasNext();) {
90			Rule r = (Rule)it.next();
91			//System.out.println(r.name+" FIRST="+r.FIRST);
92			for (int i = 0; i < delegates.size(); i++) {
93				Grammar g = delegates.get(i);
94				Rule importedRule = g.getRule(r.name);
95				if ( importedRule != null ) { // exists in imported grammar
96					// System.out.println(r.name+" exists in imported grammar: FIRST="+importedRule.FIRST);
97					if ( !r.FIRST.equals(importedRule.FIRST) ) {
98						rules.add(r.name);
99					}
100				}
101			}
102		}
103		return rules;
104	}
105
106	public Set<Rule> getImportedRulesSensitiveToOverriddenRulesDueToLOOK() {
107		Set<String> diffFIRSTs = getOverriddenRulesWithDifferentFIRST();
108		Set<Rule> rules = new HashSet();
109		for (Iterator it = diffFIRSTs.iterator(); it.hasNext();) {
110			String r = (String) it.next();
111			for (int i = 0; i < delegates.size(); i++) {
112				Grammar g = delegates.get(i);
113				Set<Rule> callers = g.ruleSensitivity.get(r);
114				// somebody invokes rule whose FIRST changed in subgrammar?
115				if ( callers!=null ) {
116					rules.addAll(callers);
117					//System.out.println(g.name+" rules "+callers+" sensitive to "+r+"; dup 'em");
118				}
119			}
120		}
121		return rules;
122	}
123*/
124
125	/*
126	public LookaheadSet LOOK(Rule r) {
127		if ( r.FIRST==null ) {
128			r.FIRST = FIRST(r.startState);
129		}
130		return r.FIRST;
131	}
132*/
133
134	/** From an NFA state, s, find the set of all labels reachable from s.
135	 *  Used to compute follow sets for error recovery.  Never computes
136	 *  a FOLLOW operation.  FIRST stops at end of rules, returning EOR, unless
137	 *  invoked from another rule.  I.e., routine properly handles
138	 *
139	 *     a : b A ;
140	 *
141	 *  where b is nullable.
142	 *
143	 *  We record with EOR_TOKEN_TYPE if we hit the end of a rule so we can
144	 *  know at runtime (when these sets are used) to start walking up the
145	 *  follow chain to compute the real, correct follow set (as opposed to
146	 *  the FOLLOW, which is a superset).
147	 *
148	 *  This routine will only be used on parser and tree parser grammars.
149	 */
150	public LookaheadSet FIRST(NFAState s) {
151		//System.out.println("> FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule);
152		lookBusy.clear();
153		LookaheadSet look = _FIRST(s, false);
154		//System.out.println("< FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule+"="+look.toString(this.grammar));
155		return look;
156	}
157
158	public LookaheadSet FOLLOW(Rule r) {
159        //System.out.println("> FOLLOW("+r.name+") in rule "+r.startState.enclosingRule);
160		LookaheadSet f = FOLLOWCache.get(r);
161		if ( f!=null ) {
162			return f;
163		}
164		f = _FIRST(r.stopState, true);
165		FOLLOWCache.put(r, f);
166        //System.out.println("< FOLLOW("+r+") in rule "+r.startState.enclosingRule+"="+f.toString(this.grammar));
167		return f;
168	}
169
170	public LookaheadSet LOOK(NFAState s) {
171		if ( NFAToDFAConverter.debug ) {
172			System.out.println("> LOOK("+s+")");
173		}
174		lookBusy.clear();
175		LookaheadSet look = _FIRST(s, true);
176		// FOLLOW makes no sense (at the moment!) for lexical rules.
177		if ( grammar.type!=Grammar.LEXER && look.member(Label.EOR_TOKEN_TYPE) ) {
178			// avoid altering FIRST reset as it is cached
179			LookaheadSet f = FOLLOW(s.enclosingRule);
180			f.orInPlace(look);
181			f.remove(Label.EOR_TOKEN_TYPE);
182			look = f;
183			//look.orInPlace(FOLLOW(s.enclosingRule));
184		}
185		else if ( grammar.type==Grammar.LEXER && look.member(Label.EOT) ) {
186			// if this has EOT, lookahead is all char (all char can follow rule)
187			//look = new LookaheadSet(Label.EOT);
188			look = new LookaheadSet(IntervalSet.COMPLETE_SET);
189		}
190		if ( NFAToDFAConverter.debug ) {
191			System.out.println("< LOOK("+s+")="+look.toString(grammar));
192		}
193		return look;
194	}
195
196	protected LookaheadSet _FIRST(NFAState s, boolean chaseFollowTransitions) {
197		/*
198		System.out.println("_LOOK("+s+") in rule "+s.enclosingRule);
199		if ( s.transition[0] instanceof RuleClosureTransition ) {
200			System.out.println("go to rule "+((NFAState)s.transition[0].target).enclosingRule);
201		}
202		*/
203		if ( !chaseFollowTransitions && s.isAcceptState() ) {
204			if ( grammar.type==Grammar.LEXER ) {
205				// FOLLOW makes no sense (at the moment!) for lexical rules.
206				// assume all char can follow
207				return new LookaheadSet(IntervalSet.COMPLETE_SET);
208			}
209			return new LookaheadSet(Label.EOR_TOKEN_TYPE);
210		}
211
212		if ( lookBusy.contains(s) ) {
213			// return a copy of an empty set; we may modify set inline
214			return new LookaheadSet();
215		}
216		lookBusy.add(s);
217
218		Transition transition0 = s.transition[0];
219		if ( transition0==null ) {
220			return null;
221		}
222
223		if ( transition0.label.isAtom() ) {
224			int atom = transition0.label.getAtom();
225			return new LookaheadSet(atom);
226		}
227		if ( transition0.label.isSet() ) {
228			IntSet sl = transition0.label.getSet();
229			return new LookaheadSet(sl);
230		}
231
232		// compute FIRST of transition 0
233		LookaheadSet tset = null;
234		// if transition 0 is a rule call and we don't want FOLLOW, check cache
235		if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) {
236			tset = FIRSTCache.get((NFAState)transition0.target);
237		}
238
239		// if not in cache, must compute
240		if ( tset==null ) {
241			tset = _FIRST((NFAState)transition0.target, chaseFollowTransitions);
242			// save FIRST cache for transition 0 if rule call
243			if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) {
244				FIRSTCache.put((NFAState)transition0.target, tset);
245			}
246		}
247
248        LookaheadSet tsetCached = tset; // tset is stored in cache. We can't return the same instance
249
250		// did we fall off the end?
251		if ( grammar.type!=Grammar.LEXER && tset.member(Label.EOR_TOKEN_TYPE) ) {
252			if ( transition0 instanceof RuleClosureTransition ) {
253				// we called a rule that found the end of the rule.
254				// That means the rule is nullable and we need to
255				// keep looking at what follows the rule ref.  E.g.,
256				// a : b A ; where b is nullable means that LOOK(a)
257				// should include A.
258				RuleClosureTransition ruleInvocationTrans =
259					(RuleClosureTransition)transition0;
260				// remove the EOR and get what follows
261				//tset.remove(Label.EOR_TOKEN_TYPE);
262				NFAState following = (NFAState) ruleInvocationTrans.followState;
263				LookaheadSet fset =	_FIRST(following, chaseFollowTransitions);
264				fset.orInPlace(tset); // tset cached; or into new set
265				fset.remove(Label.EOR_TOKEN_TYPE);
266				tset = fset;
267			}
268		}
269
270		Transition transition1 = s.transition[1];
271		if ( transition1!=null ) {
272			LookaheadSet tset1 =
273				_FIRST((NFAState)transition1.target, chaseFollowTransitions);
274			tset1.orInPlace(tset);
275			tset = tset1;
276		}
277
278		// never return a cached set; clone
279		return tset==tsetCached ? new LookaheadSet(tset) : tset;
280	}
281
282	/** Is there a non-syn-pred predicate visible from s that is not in
283	 *  the rule enclosing s?  This accounts for most predicate situations
284	 *  and lets ANTLR do a simple LL(1)+pred computation.
285	 *
286	 *  TODO: what about gated vs regular preds?
287	 */
288	public boolean detectConfoundingPredicates(NFAState s) {
289		lookBusy.clear();
290		Rule r = s.enclosingRule;
291		return _detectConfoundingPredicates(s, r, false) == DETECT_PRED_FOUND;
292	}
293
294	protected int _detectConfoundingPredicates(NFAState s,
295											   Rule enclosingRule,
296											   boolean chaseFollowTransitions)
297	{
298		//System.out.println("_detectNonAutobacktrackPredicates("+s+")");
299		if ( !chaseFollowTransitions && s.isAcceptState() ) {
300			if ( grammar.type==Grammar.LEXER ) {
301				// FOLLOW makes no sense (at the moment!) for lexical rules.
302				// assume all char can follow
303				return DETECT_PRED_NOT_FOUND;
304			}
305			return DETECT_PRED_EOR;
306		}
307
308		if ( lookBusy.contains(s) ) {
309			// return a copy of an empty set; we may modify set inline
310			return DETECT_PRED_NOT_FOUND;
311		}
312		lookBusy.add(s);
313
314		Transition transition0 = s.transition[0];
315		if ( transition0==null ) {
316			return DETECT_PRED_NOT_FOUND;
317		}
318
319		if ( !(transition0.label.isSemanticPredicate()||
320			   transition0.label.isEpsilon()) ) {
321			return DETECT_PRED_NOT_FOUND;
322		}
323
324		if ( transition0.label.isSemanticPredicate() ) {
325			//System.out.println("pred "+transition0.label);
326			SemanticContext ctx = transition0.label.getSemanticContext();
327			SemanticContext.Predicate p = (SemanticContext.Predicate)ctx;
328			if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED ) {
329				return DETECT_PRED_FOUND;
330			}
331		}
332
333		/*
334		if ( transition0.label.isSemanticPredicate() ) {
335			System.out.println("pred "+transition0.label);
336			SemanticContext ctx = transition0.label.getSemanticContext();
337			SemanticContext.Predicate p = (SemanticContext.Predicate)ctx;
338			// if a non-syn-pred found not in enclosingRule, say we found one
339			if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED &&
340				 !p.predicateAST.enclosingRuleName.equals(enclosingRule.name) )
341			{
342				System.out.println("found pred "+p+" not in "+enclosingRule.name);
343				return DETECT_PRED_FOUND;
344			}
345		}
346		*/
347
348		int result = _detectConfoundingPredicates((NFAState)transition0.target,
349												  enclosingRule,
350												  chaseFollowTransitions);
351		if ( result == DETECT_PRED_FOUND ) {
352			return DETECT_PRED_FOUND;
353		}
354
355		if ( result == DETECT_PRED_EOR ) {
356			if ( transition0 instanceof RuleClosureTransition ) {
357				// we called a rule that found the end of the rule.
358				// That means the rule is nullable and we need to
359				// keep looking at what follows the rule ref.  E.g.,
360				// a : b A ; where b is nullable means that LOOK(a)
361				// should include A.
362				RuleClosureTransition ruleInvocationTrans =
363					(RuleClosureTransition)transition0;
364				NFAState following = (NFAState) ruleInvocationTrans.followState;
365				int afterRuleResult =
366					_detectConfoundingPredicates(following,
367												 enclosingRule,
368												 chaseFollowTransitions);
369				if ( afterRuleResult == DETECT_PRED_FOUND ) {
370					return DETECT_PRED_FOUND;
371				}
372			}
373		}
374
375		Transition transition1 = s.transition[1];
376		if ( transition1!=null ) {
377			int t1Result =
378				_detectConfoundingPredicates((NFAState)transition1.target,
379											 enclosingRule,
380											 chaseFollowTransitions);
381			if ( t1Result == DETECT_PRED_FOUND ) {
382				return DETECT_PRED_FOUND;
383			}
384		}
385
386		return DETECT_PRED_NOT_FOUND;
387	}
388
389	/** Return predicate expression found via epsilon edges from s.  Do
390	 *  not look into other rules for now.  Do something simple.  Include
391	 *  backtracking synpreds.
392	 */
393	public SemanticContext getPredicates(NFAState altStartState) {
394		lookBusy.clear();
395		return _getPredicates(altStartState, altStartState);
396	}
397
398	protected SemanticContext _getPredicates(NFAState s, NFAState altStartState) {
399		//System.out.println("_getPredicates("+s+")");
400		if ( s.isAcceptState() ) {
401			return null;
402		}
403
404		// avoid infinite loops from (..)* etc...
405		if ( lookBusy.contains(s) ) {
406			return null;
407		}
408		lookBusy.add(s);
409
410		Transition transition0 = s.transition[0];
411		// no transitions
412		if ( transition0==null ) {
413			return null;
414		}
415
416		// not a predicate and not even an epsilon
417		if ( !(transition0.label.isSemanticPredicate()||
418			   transition0.label.isEpsilon()) ) {
419			return null;
420		}
421
422		SemanticContext p = null;
423		SemanticContext p0 = null;
424		SemanticContext p1 = null;
425		if ( transition0.label.isSemanticPredicate() ) {
426			//System.out.println("pred "+transition0.label);
427			p = transition0.label.getSemanticContext();
428			// ignore backtracking preds not on left edge for this decision
429			if ( ((SemanticContext.Predicate)p).predicateAST.getType() ==
430				  ANTLRParser.BACKTRACK_SEMPRED  &&
431				 s == altStartState.transition[0].target )
432			{
433				p = null; // don't count
434			}
435		}
436
437		// get preds from beyond this state
438		p0 = _getPredicates((NFAState)transition0.target, altStartState);
439
440		// get preds from other transition
441		Transition transition1 = s.transition[1];
442		if ( transition1!=null ) {
443			p1 = _getPredicates((NFAState)transition1.target, altStartState);
444		}
445
446		// join this&following-right|following-down
447		return SemanticContext.and(p,SemanticContext.or(p0,p1));
448	}
449}
450