1/*
2 * [The "BSD license"]
3 *  Copyright (c) 2010 Terence Parr
4 *  All rights reserved.
5 *
6 *  Redistribution and use in source and binary forms, with or without
7 *  modification, are permitted provided that the following conditions
8 *  are met:
9 *  1. Redistributions of source code must retain the above copyright
10 *      notice, this list of conditions and the following disclaimer.
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *  3. The name of the author may not be used to endorse or promote products
15 *      derived from this software without specific prior written permission.
16 *
17 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */package org.antlr.tool;
28
29import org.antlr.analysis.Label;
30import org.antlr.analysis.NFAState;
31import org.antlr.grammar.v3.ANTLRParser;
32import org.antlr.grammar.v3.AssignTokenTypesWalker;
33import org.antlr.misc.Utils;
34import org.antlr.runtime.RecognitionException;
35import org.antlr.runtime.tree.CommonTreeNodeStream;
36
37import java.util.*;
38
39/** A tree of component (delegate) grammars.
40 *
41 *  Rules defined in delegates are "inherited" like multi-inheritance
42 *  so you can override them.  All token types must be consistent across
43 *  rules from all delegate grammars, so they must be stored here in one
44 *  central place.
45 *
46 *  We have to start out assuming a composite grammar situation as we can't
47 *  look into the grammar files a priori to see if there is a delegate
48 *  statement.  Because of this, and to avoid duplicating token type tracking
49 *  in each grammar, even single noncomposite grammars use one of these objects
50 *  to track token types.
51 */
52public class CompositeGrammar {
53	public static final int MIN_RULE_INDEX = 1;
54
55	public CompositeGrammarTree delegateGrammarTreeRoot;
56
57	/** Used during getRuleReferenceClosure to detect computation cycles */
58	protected Set<NFAState> refClosureBusy = new HashSet<NFAState>();
59
60	/** Used to assign state numbers; all grammars in composite share common
61	 *  NFA space.  This NFA tracks state numbers number to state mapping.
62	 */
63	public int stateCounter = 0;
64
65	/** The NFA states in the NFA built from rules across grammars in composite.
66	 *  Maps state number to NFAState object.
67	 *  This is a Vector instead of a List because I need to be able to grow
68	 *  this properly.  After talking to Josh Bloch, Collections guy at Sun,
69	 *  I decided this was easiest solution.
70	 */
71	protected Vector<NFAState> numberToStateList = new Vector<NFAState>(1000);
72
73	/** Token names and literal tokens like "void" are uniquely indexed.
74	 *  with -1 implying EOF.  Characters are different; they go from
75	 *  -1 (EOF) to \uFFFE.  For example, 0 could be a binary byte you
76	 *  want to lexer.  Labels of DFA/NFA transitions can be both tokens
77	 *  and characters.  I use negative numbers for bookkeeping labels
78	 *  like EPSILON. Char/String literals and token types overlap in the same
79	 *  space, however.
80	 */
81	protected int maxTokenType = Label.MIN_TOKEN_TYPE-1;
82
83	/** Map token like ID (but not literals like "while") to its token type */
84	public Map tokenIDToTypeMap = new LinkedHashMap();
85
86	/** Map token literals like "while" to its token type.  It may be that
87	 *  WHILE="while"=35, in which case both tokenIDToTypeMap and this
88	 *  field will have entries both mapped to 35.
89	 */
90	public Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>();
91	/** Reverse index for stringLiteralToTypeMap */
92	public Vector<String> typeToStringLiteralList = new Vector<String>();
93
94	/** Map a token type to its token name.
95	 *  Must subtract MIN_TOKEN_TYPE from index.
96	 */
97	public Vector<String> typeToTokenList = new Vector<String>();
98
99	/** If combined or lexer grammar, track the rules.
100	 * 	Track lexer rules so we can warn about undefined tokens.
101	 *  This is combined set of lexer rules from all lexer grammars
102	 *  seen in all imports.
103	 */
104	protected Set<String> lexerRules = new HashSet<String>();
105
106	/** Rules are uniquely labeled from 1..n among all grammars */
107	protected int ruleIndex = MIN_RULE_INDEX;
108
109	/** Map a rule index to its name; use a Vector on purpose as new
110	 *  collections stuff won't let me setSize and make it grow.  :(
111	 *  I need a specific guaranteed index, which the Collections stuff
112	 *  won't let me have.
113	 */
114	protected Vector<Rule> ruleIndexToRuleList = new Vector<Rule>();
115
116	public boolean watchNFAConversion = false;
117
118	protected void initTokenSymbolTables() {
119		// the faux token types take first NUM_FAUX_LABELS positions
120		// then we must have room for the predefined runtime token types
121		// like DOWN/UP used for tree parsing.
122		typeToTokenList.setSize(Label.NUM_FAUX_LABELS+Label.MIN_TOKEN_TYPE-1);
123		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.INVALID, "<INVALID>");
124		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOT, "<EOT>");
125		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SEMPRED, "<SEMPRED>");
126		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SET, "<SET>");
127		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EPSILON, Label.EPSILON_STR);
128		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOF, "EOF");
129		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOR_TOKEN_TYPE-1, "<EOR>");
130		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.DOWN-1, "DOWN");
131		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.UP-1, "UP");
132		tokenIDToTypeMap.put("<INVALID>", Utils.integer(Label.INVALID));
133		tokenIDToTypeMap.put("<EOT>", Utils.integer(Label.EOT));
134		tokenIDToTypeMap.put("<SEMPRED>", Utils.integer(Label.SEMPRED));
135		tokenIDToTypeMap.put("<SET>", Utils.integer(Label.SET));
136		tokenIDToTypeMap.put("<EPSILON>", Utils.integer(Label.EPSILON));
137		tokenIDToTypeMap.put("EOF", Utils.integer(Label.EOF));
138		tokenIDToTypeMap.put("<EOR>", Utils.integer(Label.EOR_TOKEN_TYPE));
139		tokenIDToTypeMap.put("DOWN", Utils.integer(Label.DOWN));
140		tokenIDToTypeMap.put("UP", Utils.integer(Label.UP));
141	}
142
143	public CompositeGrammar() {
144		initTokenSymbolTables();
145	}
146
147	public CompositeGrammar(Grammar g) {
148		this();
149		setDelegationRoot(g);
150	}
151
152	public void setDelegationRoot(Grammar root) {
153		delegateGrammarTreeRoot = new CompositeGrammarTree(root);
154		root.compositeTreeNode = delegateGrammarTreeRoot;
155	}
156
157	public Rule getRule(String ruleName) {
158		return delegateGrammarTreeRoot.getRule(ruleName);
159	}
160
161	public Object getOption(String key) {
162		return delegateGrammarTreeRoot.getOption(key);
163	}
164
165	/** Add delegate grammar as child of delegator */
166	public void addGrammar(Grammar delegator, Grammar delegate) {
167		if ( delegator.compositeTreeNode==null ) {
168			delegator.compositeTreeNode = new CompositeGrammarTree(delegator);
169		}
170		delegator.compositeTreeNode.addChild(new CompositeGrammarTree(delegate));
171
172		/*// find delegator in tree so we can add a child to it
173		CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(delegator);
174		t.addChild();
175		*/
176		// make sure new grammar shares this composite
177		delegate.composite = this;
178	}
179
180	/** Get parent of this grammar */
181	public Grammar getDelegator(Grammar g) {
182		CompositeGrammarTree me = delegateGrammarTreeRoot.findNode(g);
183		if ( me==null ) {
184			return null; // not found
185		}
186		if ( me.parent!=null ) {
187			return me.parent.grammar;
188		}
189		return null;
190	}
191
192	/** Get list of all delegates from all grammars in the delegate subtree of g.
193	 *  The grammars are in delegation tree preorder.  Don't include g itself
194	 *  in list as it is not a delegate of itself.
195	 */
196	public List<Grammar> getDelegates(Grammar g) {
197		CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g);
198		if ( t==null ) {
199			return null; // no delegates
200		}
201		List<Grammar> grammars = t.getPostOrderedGrammarList();
202		grammars.remove(grammars.size()-1); // remove g (last one)
203		return grammars;
204	}
205
206	public List<Grammar> getDirectDelegates(Grammar g) {
207		CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g);
208		List<CompositeGrammarTree> children = t.children;
209		if ( children==null ) {
210			return null;
211		}
212		List<Grammar> grammars = new ArrayList();
213		for (int i = 0; children!=null && i < children.size(); i++) {
214			CompositeGrammarTree child = (CompositeGrammarTree) children.get(i);
215			grammars.add(child.grammar);
216		}
217		return grammars;
218	}
219
220	/** Get delegates below direct delegates of g */
221	public List<Grammar> getIndirectDelegates(Grammar g) {
222		List<Grammar> direct = getDirectDelegates(g);
223		List<Grammar> delegates = getDelegates(g);
224		delegates.removeAll(direct);
225		return delegates;
226	}
227
228	/** Return list of delegate grammars from root down to g.
229	 *  Order is root, ..., g.parent.  (g not included).
230	 */
231	public List<Grammar> getDelegators(Grammar g) {
232		if ( g==delegateGrammarTreeRoot.grammar ) {
233			return null;
234		}
235		List<Grammar> grammars = new ArrayList();
236		CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g);
237		// walk backwards to root, collecting grammars
238		CompositeGrammarTree p = t.parent;
239		while ( p!=null ) {
240			grammars.add(0, p.grammar); // add to head so in order later
241			p = p.parent;
242		}
243		return grammars;
244	}
245
246	/** Get set of rules for grammar g that need to have manual delegation
247	 *  methods.  This is the list of rules collected from all direct/indirect
248	 *  delegates minus rules overridden in grammar g.
249	 *
250	 *  This returns null except for the delegate root because it is the only
251	 *  one that has to have a complete grammar rule interface.  The delegates
252	 *  should not be instantiated directly for use as parsers (you can create
253	 *  them to pass to the root parser's ctor as arguments).
254	 */
255	public Set<Rule> getDelegatedRules(Grammar g) {
256		if ( g!=delegateGrammarTreeRoot.grammar ) {
257			return null;
258		}
259		Set<Rule> rules = getAllImportedRules(g);
260		for (Iterator it = rules.iterator(); it.hasNext();) {
261			Rule r = (Rule) it.next();
262			Rule localRule = g.getLocallyDefinedRule(r.name);
263			// if locally defined or it's not local but synpred, don't make
264			// a delegation method
265			if ( localRule!=null || r.isSynPred ) {
266				it.remove(); // kill overridden rules
267			}
268		}
269		return rules;
270	}
271
272	/** Get all rule definitions from all direct/indirect delegate grammars
273	 *  of g.
274	 */
275	public Set<Rule> getAllImportedRules(Grammar g) {
276		Set<String> ruleNames = new HashSet();
277		Set<Rule> rules = new HashSet();
278		CompositeGrammarTree subtreeRoot = delegateGrammarTreeRoot.findNode(g);
279
280		List<Grammar> grammars = subtreeRoot.getPreOrderedGrammarList();
281		// walk all grammars preorder, priority given to grammar listed first.
282		for (int i = 0; i < grammars.size(); i++) {
283			Grammar delegate = (org.antlr.tool.Grammar) grammars.get(i);
284			// for each rule in delegate, add to rules if no rule with that
285			// name as been seen.  (can't use removeAll; wrong hashcode/equals on Rule)
286			for (Iterator it = delegate.getRules().iterator(); it.hasNext();) {
287				Rule r = (Rule)it.next();
288				if ( !ruleNames.contains(r.name) ) {
289					ruleNames.add(r.name); // track that we've seen this
290					rules.add(r);
291				}
292			}
293		}
294		return rules;
295	}
296
297	public Grammar getRootGrammar() {
298		if ( delegateGrammarTreeRoot==null ) {
299			return null;
300		}
301		return delegateGrammarTreeRoot.grammar;
302	}
303
304	public Grammar getGrammar(String grammarName) {
305		CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(grammarName);
306		if ( t!=null ) {
307			return t.grammar;
308		}
309		return null;
310	}
311
312	// NFA spans multiple grammars, must handle here
313
314	public int getNewNFAStateNumber() {
315		return stateCounter++;
316	}
317
318	public void addState(NFAState state) {
319		numberToStateList.setSize(state.stateNumber+1); // make sure we have room
320		numberToStateList.set(state.stateNumber, state);
321	}
322
323	public NFAState getState(int s) {
324		return (NFAState)numberToStateList.get(s);
325	}
326
327	public void assignTokenTypes() throws RecognitionException {
328		// ASSIGN TOKEN TYPES for all delegates (same walker)
329		//System.out.println("### assign types");
330		AssignTokenTypesWalker ttypesWalker = new AssignTokenTypesBehavior();
331		List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList();
332		for (int i = 0; grammars!=null && i < grammars.size(); i++) {
333			Grammar g = (Grammar)grammars.get(i);
334			ttypesWalker.setTreeNodeStream(new CommonTreeNodeStream(g.getGrammarTree()));
335			try {
336				//System.out.println("    walking "+g.name);
337				ttypesWalker.grammar_(g);
338			}
339			catch (RecognitionException re) {
340				ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE,
341								   re);
342			}
343		}
344		// the walker has filled literals, tokens, and alias tables.
345		// now tell it to define them in the root grammar
346		ttypesWalker.defineTokens(delegateGrammarTreeRoot.grammar);
347	}
348
349	public void translateLeftRecursiveRules() {
350		List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList();
351		for (int i = 0; grammars!=null && i < grammars.size(); i++) {
352			Grammar g = grammars.get(i);
353			if ( !(g.type==Grammar.PARSER || g.type==Grammar.COMBINED) ) continue;
354			for (GrammarAST r : g.grammarTree.findAllType(ANTLRParser.RULE)) {
355				if ( !Character.isUpperCase(r.getChild(0).getText().charAt(0)) ) {
356					if ( LeftRecursiveRuleAnalyzer.hasImmediateRecursiveRuleRefs(r, r.enclosingRuleName) ) {
357						g.translateLeftRecursiveRule(r);
358					}
359				}
360			}
361		}
362	}
363
364	public void defineGrammarSymbols() {
365		delegateGrammarTreeRoot.trimLexerImportsIntoCombined();
366		List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList();
367		for (int i = 0; grammars!=null && i < grammars.size(); i++) {
368			Grammar g = grammars.get(i);
369			g.defineGrammarSymbols();
370		}
371		for (int i = 0; grammars!=null && i < grammars.size(); i++) {
372			Grammar g = grammars.get(i);
373			g.checkNameSpaceAndActions();
374		}
375		minimizeRuleSet();
376	}
377
378	public void createNFAs() {
379		if ( ErrorManager.doNotAttemptAnalysis() ) {
380			return;
381		}
382		List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList();
383		//System.out.println("### createNFAs for composite; grammars: "+names);
384		for (int i = 0; grammars!=null && i < grammars.size(); i++) {
385			Grammar g = (Grammar)grammars.get(i);
386			g.createRuleStartAndStopNFAStates();
387		}
388		for (int i = 0; grammars!=null && i < grammars.size(); i++) {
389			Grammar g = (Grammar)grammars.get(i);
390			g.buildNFA();
391		}
392	}
393
394	public void minimizeRuleSet() {
395		Set<String> ruleDefs = new HashSet<String>();
396		_minimizeRuleSet(ruleDefs, delegateGrammarTreeRoot);
397	}
398
399	public void _minimizeRuleSet(Set<String> ruleDefs,
400								 CompositeGrammarTree p) {
401		Set<String> localRuleDefs = new HashSet<String>();
402		Set<String> overrides = new HashSet<String>();
403		// compute set of non-overridden rules for this delegate
404		for (Rule r : p.grammar.getRules()) {
405			if ( !ruleDefs.contains(r.name) ) {
406				localRuleDefs.add(r.name);
407			}
408			else if ( !r.name.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) {
409				// record any overridden rule 'cept tokens rule
410				overrides.add(r.name);
411			}
412		}
413		//System.out.println("rule defs for "+p.grammar.name+": "+localRuleDefs);
414		//System.out.println("overridden rule for "+p.grammar.name+": "+overrides);
415		p.grammar.overriddenRules = overrides;
416
417		// make set of all rules defined thus far walking delegation tree.
418		// the same rule in two delegates resolves in favor of first found
419		// in tree therefore second must not be included
420		ruleDefs.addAll(localRuleDefs);
421
422		// pass larger set of defined rules to delegates
423		if ( p.children!=null ) {
424			for (CompositeGrammarTree delegate : p.children) {
425				_minimizeRuleSet(ruleDefs, delegate);
426			}
427		}
428	}
429
430	/*
431	public void minimizeRuleSet() {
432		Set<Rule> refs = _minimizeRuleSet(delegateGrammarTreeRoot);
433		System.out.println("all rule refs: "+refs);
434	}
435
436	public Set<Rule> _minimizeRuleSet(CompositeGrammarTree p) {
437		Set<Rule> refs = new HashSet<Rule>();
438		for (GrammarAST refAST : p.grammar.ruleRefs) {
439			System.out.println("ref "+refAST.getText()+": "+refAST.NFAStartState+
440							   " enclosing rule: "+refAST.NFAStartState.enclosingRule+
441							   " invoking rule: "+((NFAState)refAST.NFAStartState.transition[0].target).enclosingRule);
442			refs.add(((NFAState)refAST.NFAStartState.transition[0].target).enclosingRule);
443		}
444
445		if ( p.children!=null ) {
446			for (CompositeGrammarTree delegate : p.children) {
447				Set<Rule> delegateRuleRefs = _minimizeRuleSet(delegate);
448				refs.addAll(delegateRuleRefs);
449			}
450		}
451
452		return refs;
453	}
454	*/
455
456	/*
457	public void oldminimizeRuleSet() {
458		// first walk to remove all overridden rules
459		Set<String> ruleDefs = new HashSet<String>();
460		Set<String> ruleRefs = new HashSet<String>();
461		for (GrammarAST refAST : delegateGrammarTreeRoot.grammar.ruleRefs) {
462			String rname = refAST.getText();
463			ruleRefs.add(rname);
464		}
465		_minimizeRuleSet(ruleDefs,
466						 ruleRefs,
467						 delegateGrammarTreeRoot);
468		System.out.println("overall rule defs: "+ruleDefs);
469	}
470
471	public void _minimizeRuleSet(Set<String> ruleDefs,
472								 Set<String> ruleRefs,
473								 CompositeGrammarTree p) {
474		Set<String> localRuleDefs = new HashSet<String>();
475		for (Rule r : p.grammar.getRules()) {
476			if ( !ruleDefs.contains(r.name) ) {
477				localRuleDefs.add(r.name);
478				ruleDefs.add(r.name);
479			}
480		}
481		System.out.println("rule defs for "+p.grammar.name+": "+localRuleDefs);
482
483		// remove locally-defined rules not in ref set
484		// find intersection of local rules and references from delegator
485		// that is set of rules needed by delegator
486		Set<String> localRuleDefsSatisfyingRefsFromBelow = new HashSet<String>();
487		for (String r : ruleRefs) {
488			if ( localRuleDefs.contains(r) ) {
489				localRuleDefsSatisfyingRefsFromBelow.add(r);
490			}
491		}
492
493		// now get list of refs from localRuleDefsSatisfyingRefsFromBelow.
494		// Those rules are also allowed in this delegate
495		for (GrammarAST refAST : p.grammar.ruleRefs) {
496			if ( localRuleDefsSatisfyingRefsFromBelow.contains(refAST.enclosingRuleName) ) {
497				// found rule ref within needed rule
498			}
499		}
500
501		// remove rule refs not in the new rule def set
502
503		// walk all children, adding rules not already defined
504		if ( p.children!=null ) {
505			for (CompositeGrammarTree delegate : p.children) {
506				_minimizeRuleSet(ruleDefs, ruleRefs, delegate);
507			}
508		}
509	}
510	*/
511
512	/*
513	public void trackNFAStatesThatHaveLabeledEdge(Label label,
514												  NFAState stateWithLabeledEdge)
515	{
516		Set<NFAState> states = typeToNFAStatesWithEdgeOfTypeMap.get(label);
517		if ( states==null ) {
518			states = new HashSet<NFAState>();
519			typeToNFAStatesWithEdgeOfTypeMap.put(label, states);
520		}
521		states.add(stateWithLabeledEdge);
522	}
523
524	public Map<Label, Set<NFAState>> getTypeToNFAStatesWithEdgeOfTypeMap() {
525		return typeToNFAStatesWithEdgeOfTypeMap;
526	}
527
528	public Set<NFAState> getStatesWithEdge(Label label) {
529		return typeToNFAStatesWithEdgeOfTypeMap.get(label);
530	}
531*/
532}
533