1/*
2 * [The "BSD license"]
3 *  Copyright (c) 2010 Terence Parr
4 *  All rights reserved.
5 *
6 *  Redistribution and use in source and binary forms, with or without
7 *  modification, are permitted provided that the following conditions
8 *  are met:
9 *  1. Redistributions of source code must retain the above copyright
10 *      notice, this list of conditions and the following disclaimer.
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *  3. The name of the author may not be used to endorse or promote products
15 *      derived from this software without specific prior written permission.
16 *
17 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28package org.antlr.tool;
29
30import org.antlr.analysis.NFAState;
31import org.antlr.analysis.RuleClosureTransition;
32import org.antlr.analysis.Transition;
33import org.antlr.grammar.v3.ANTLRParser;
34import org.antlr.runtime.tree.Tree;
35
36import java.util.ArrayList;
37import java.util.HashSet;
38import java.util.List;
39import java.util.Set;
40
41/** Factor out routines that check sanity of rules, alts, grammars, etc.. */
42public class GrammarSanity {
43	/** The checkForLeftRecursion method needs to track what rules it has
44	 *  visited to track infinite recursion.
45	 */
46	protected Set<Rule> visitedDuringRecursionCheck = null;
47
48	protected Grammar grammar;
49	public GrammarSanity(Grammar grammar) {
50		this.grammar = grammar;
51	}
52
53	/** Check all rules for infinite left recursion before analysis. Return list
54	 *  of troublesome rule cycles.  This method has two side-effects: it notifies
55	 *  the error manager that we have problems and it sets the list of
56	 *  recursive rules that we should ignore during analysis.
57	 */
58	public List<Set<Rule>> checkAllRulesForLeftRecursion() {
59		grammar.buildNFA(); // make sure we have NFAs
60		grammar.leftRecursiveRules = new HashSet();
61		List<Set<Rule>> listOfRecursiveCycles = new ArrayList();
62		for (int i = 0; i < grammar.composite.ruleIndexToRuleList.size(); i++) {
63			Rule r = grammar.composite.ruleIndexToRuleList.elementAt(i);
64			if ( r!=null ) {
65				visitedDuringRecursionCheck = new HashSet();
66				visitedDuringRecursionCheck.add(r);
67				Set visitedStates = new HashSet();
68				traceStatesLookingForLeftRecursion(r.startState,
69												   visitedStates,
70												   listOfRecursiveCycles);
71			}
72		}
73		if ( listOfRecursiveCycles.size()>0 ) {
74			ErrorManager.leftRecursionCycles(listOfRecursiveCycles);
75		}
76		return listOfRecursiveCycles;
77	}
78
79	/** From state s, look for any transition to a rule that is currently
80	 *  being traced.  When tracing r, visitedDuringRecursionCheck has r
81	 *  initially.  If you reach an accept state, return but notify the
82	 *  invoking rule that it is nullable, which implies that invoking
83	 *  rule must look at follow transition for that invoking state.
84	 *  The visitedStates tracks visited states within a single rule so
85	 *  we can avoid epsilon-loop-induced infinite recursion here.  Keep
86	 *  filling the cycles in listOfRecursiveCycles and also, as a
87	 *  side-effect, set leftRecursiveRules.
88	 */
89	protected boolean traceStatesLookingForLeftRecursion(NFAState s,
90														 Set visitedStates,
91														 List<Set<Rule>> listOfRecursiveCycles)
92	{
93		if ( s.isAcceptState() ) {
94			// this rule must be nullable!
95			// At least one epsilon edge reached accept state
96			return true;
97		}
98		if ( visitedStates.contains(s) ) {
99			// within same rule, we've hit same state; quit looping
100			return false;
101		}
102		visitedStates.add(s);
103		boolean stateReachesAcceptState = false;
104		Transition t0 = s.transition[0];
105		if ( t0 instanceof RuleClosureTransition ) {
106			RuleClosureTransition refTrans = (RuleClosureTransition)t0;
107			Rule refRuleDef = refTrans.rule;
108			//String targetRuleName = ((NFAState)t0.target).getEnclosingRule();
109			if ( visitedDuringRecursionCheck.contains(refRuleDef) ) {
110				// record left-recursive rule, but don't go back in
111				grammar.leftRecursiveRules.add(refRuleDef);
112				/*
113				System.out.println("already visited "+refRuleDef+", calling from "+
114								   s.enclosingRule);
115								   */
116				addRulesToCycle(refRuleDef,
117								s.enclosingRule,
118								listOfRecursiveCycles);
119			}
120			else {
121				// must visit if not already visited; send new visitedStates set
122				visitedDuringRecursionCheck.add(refRuleDef);
123				boolean callReachedAcceptState =
124					traceStatesLookingForLeftRecursion((NFAState)t0.target,
125													   new HashSet(),
126													   listOfRecursiveCycles);
127				// we're back from visiting that rule
128				visitedDuringRecursionCheck.remove(refRuleDef);
129				// must keep going in this rule then
130				if ( callReachedAcceptState ) {
131					NFAState followingState =
132						((RuleClosureTransition) t0).followState;
133					stateReachesAcceptState |=
134						traceStatesLookingForLeftRecursion(followingState,
135														   visitedStates,
136														   listOfRecursiveCycles);
137				}
138			}
139		}
140		else if ( t0.label.isEpsilon() || t0.label.isSemanticPredicate() ) {
141			stateReachesAcceptState |=
142				traceStatesLookingForLeftRecursion((NFAState)t0.target, visitedStates, listOfRecursiveCycles);
143		}
144		// else it has a labeled edge
145
146		// now do the other transition if it exists
147		Transition t1 = s.transition[1];
148		if ( t1!=null ) {
149			stateReachesAcceptState |=
150				traceStatesLookingForLeftRecursion((NFAState)t1.target,
151												   visitedStates,
152												   listOfRecursiveCycles);
153		}
154		return stateReachesAcceptState;
155	}
156
157	/** enclosingRuleName calls targetRuleName, find the cycle containing
158	 *  the target and add the caller.  Find the cycle containing the caller
159	 *  and add the target.  If no cycles contain either, then create a new
160	 *  cycle.  listOfRecursiveCycles is List<Set<String>> that holds a list
161	 *  of cycles (sets of rule names).
162	 */
163	protected void addRulesToCycle(Rule targetRule,
164								   Rule enclosingRule,
165								   List<Set<Rule>> listOfRecursiveCycles)
166	{
167		boolean foundCycle = false;
168		for (int i = 0; i < listOfRecursiveCycles.size(); i++) {
169			Set<Rule> rulesInCycle = listOfRecursiveCycles.get(i);
170			// ensure both rules are in same cycle
171			if ( rulesInCycle.contains(targetRule) ) {
172				rulesInCycle.add(enclosingRule);
173				foundCycle = true;
174			}
175			if ( rulesInCycle.contains(enclosingRule) ) {
176				rulesInCycle.add(targetRule);
177				foundCycle = true;
178			}
179		}
180		if ( !foundCycle ) {
181			Set cycle = new HashSet();
182			cycle.add(targetRule);
183			cycle.add(enclosingRule);
184			listOfRecursiveCycles.add(cycle);
185		}
186	}
187
188	public void checkRuleReference(GrammarAST scopeAST,
189								   GrammarAST refAST,
190								   GrammarAST argsAST,
191								   String currentRuleName)
192	{
193		Rule r = grammar.getRule(refAST.getText());
194		if ( refAST.getType()==ANTLRParser.RULE_REF ) {
195			if ( argsAST!=null ) {
196				// rule[args]; ref has args
197                if ( r!=null && r.argActionAST==null ) {
198					// but rule def has no args
199					ErrorManager.grammarError(
200						ErrorManager.MSG_RULE_HAS_NO_ARGS,
201						grammar,
202						argsAST.getToken(),
203						r.name);
204				}
205			}
206			else {
207				// rule ref has no args
208				if ( r!=null && r.argActionAST!=null ) {
209					// but rule def has args
210					ErrorManager.grammarError(
211						ErrorManager.MSG_MISSING_RULE_ARGS,
212						grammar,
213						refAST.getToken(),
214						r.name);
215				}
216			}
217		}
218		else if ( refAST.getType()==ANTLRParser.TOKEN_REF ) {
219			if ( grammar.type!=Grammar.LEXER ) {
220				if ( argsAST!=null ) {
221					// args on a token ref not in a lexer rule
222					ErrorManager.grammarError(
223						ErrorManager.MSG_ARGS_ON_TOKEN_REF,
224						grammar,
225						refAST.getToken(),
226						refAST.getText());
227				}
228				return; // ignore token refs in nonlexers
229			}
230			if ( argsAST!=null ) {
231				// tokenRef[args]; ref has args
232				if ( r!=null && r.argActionAST==null ) {
233					// but token rule def has no args
234					ErrorManager.grammarError(
235						ErrorManager.MSG_RULE_HAS_NO_ARGS,
236						grammar,
237						argsAST.getToken(),
238						r.name);
239				}
240			}
241			else {
242				// token ref has no args
243				if ( r!=null && r.argActionAST!=null ) {
244					// but token rule def has args
245					ErrorManager.grammarError(
246						ErrorManager.MSG_MISSING_RULE_ARGS,
247						grammar,
248						refAST.getToken(),
249						r.name);
250				}
251			}
252		}
253	}
254
255	/** Rules in tree grammar that use -> rewrites and are spitting out
256	 *  templates via output=template and then use rewrite=true must only
257	 *  use -> on alts that are simple nodes or trees or single rule refs
258	 *  that match either nodes or trees.  The altAST is the ALT node
259	 *  for an ALT.  Verify that its first child is simple.  Must be either
260	 *  ( ALT ^( A B ) <end-of-alt> ) or ( ALT A <end-of-alt> ) or
261	 *  other element.
262	 *
263	 *  Ignore predicates in front and labels.
264	 */
265	public void ensureAltIsSimpleNodeOrTree(GrammarAST altAST,
266											GrammarAST elementAST,
267											int outerAltNum)
268	{
269		if ( isValidSimpleElementNode(elementAST) ) {
270			GrammarAST next = (GrammarAST)elementAST.getNextSibling();
271			if ( !isNextNonActionElementEOA(next)) {
272				ErrorManager.grammarWarning(ErrorManager.MSG_REWRITE_FOR_MULTI_ELEMENT_ALT,
273											grammar,
274											next.token,
275											new Integer(outerAltNum));
276			}
277			return;
278		}
279		switch ( elementAST.getType() ) {
280			case ANTLRParser.ASSIGN :		// labels ok on non-rule refs
281			case ANTLRParser.PLUS_ASSIGN :
282				if ( isValidSimpleElementNode(elementAST.getChild(1)) ) {
283					return;
284				}
285				break;
286			case ANTLRParser.ACTION :		// skip past actions
287			case ANTLRParser.SEMPRED :
288			case ANTLRParser.SYN_SEMPRED :
289			case ANTLRParser.BACKTRACK_SEMPRED :
290			case ANTLRParser.GATED_SEMPRED :
291				ensureAltIsSimpleNodeOrTree(altAST,
292											(GrammarAST)elementAST.getNextSibling(),
293											outerAltNum);
294				return;
295		}
296		ErrorManager.grammarWarning(ErrorManager.MSG_REWRITE_FOR_MULTI_ELEMENT_ALT,
297									grammar,
298									elementAST.token,
299									new Integer(outerAltNum));
300	}
301
302	protected boolean isValidSimpleElementNode(Tree t) {
303		switch ( t.getType() ) {
304			case ANTLRParser.TREE_BEGIN :
305			case ANTLRParser.TOKEN_REF :
306			case ANTLRParser.CHAR_LITERAL :
307			case ANTLRParser.STRING_LITERAL :
308			case ANTLRParser.WILDCARD :
309				return true;
310			default :
311				return false;
312		}
313	}
314
315	protected boolean isNextNonActionElementEOA(GrammarAST t) {
316		while ( t.getType()==ANTLRParser.ACTION ||
317				t.getType()==ANTLRParser.SEMPRED )
318		{
319			t = (GrammarAST)t.getNextSibling();
320		}
321		if ( t.getType()==ANTLRParser.EOA ) {
322			return true;
323		}
324		return false;
325	}
326}
327