antlr/tool/CompositeGrammar.java

/*
 * [The "BSD license"]
 *  Copyright (c) 2010 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */package org.antlr.tool;

import org.antlr.analysis.Label;
import org.antlr.analysis.NFAState;
import org.antlr.grammar.v3.ANTLRParser;
import org.antlr.grammar.v3.AssignTokenTypesWalker;
import org.antlr.misc.Utils;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTreeNodeStream;

import java.util.*;

/** A tree of component (delegate) grammars.
 *
 *  Rules defined in delegates are "inherited" like multi-inheritance
 *  so you can override them.  All token types must be consistent across
 *  rules from all delegate grammars, so they must be stored here in one
 *  central place.
 *
 *  We have to start out assuming a composite grammar situation as we can't
 *  look into the grammar files a priori to see if there is a delegate
 *  statement.  Because of this, and to avoid duplicating token type tracking
 *  in each grammar, even single noncomposite grammars use one of these objects
 *  to track token types.
 */
public class CompositeGrammar {
	public static final int MIN_RULE_INDEX = 1;

	public CompositeGrammarTree delegateGrammarTreeRoot;

	/** Used during getRuleReferenceClosure to detect computation cycles */
	protected Set<NFAState> refClosureBusy = new HashSet<NFAState>();

	/** Used to assign state numbers; all grammars in composite share common
	 *  NFA space.  This NFA tracks state numbers number to state mapping.
	 */
	public int stateCounter = 0;

	/** The NFA states in the NFA built from rules across grammars in composite.
	 *  Maps state number to NFAState object.
	 *  This is a Vector instead of a List because I need to be able to grow
	 *  this properly.  After talking to Josh Bloch, Collections guy at Sun,
	 *  I decided this was easiest solution.
	 */
	protected Vector<NFAState> numberToStateList = new Vector<NFAState>(1000);

	/** Token names and literal tokens like "void" are uniquely indexed.
	 *  with -1 implying EOF.  Characters are different; they go from
	 *  -1 (EOF) to \uFFFE.  For example, 0 could be a binary byte you
	 *  want to lexer.  Labels of DFA/NFA transitions can be both tokens
	 *  and characters.  I use negative numbers for bookkeeping labels
	 *  like EPSILON. Char/String literals and token types overlap in the same
	 *  space, however.
	 */
	protected int maxTokenType = Label.MIN_TOKEN_TYPE-1;

	/** Map token like ID (but not literals like "while") to its token type */
	public Map tokenIDToTypeMap = new LinkedHashMap();

	/** Map token literals like "while" to its token type.  It may be that
	 *  WHILE="while"=35, in which case both tokenIDToTypeMap and this
	 *  field will have entries both mapped to 35.
	 */
	public Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>();
	/** Reverse index for stringLiteralToTypeMap */
	public Vector<String> typeToStringLiteralList = new Vector<String>();

	/** Map a token type to its token name.
	 *  Must subtract MIN_TOKEN_TYPE from index.
	 */
	public Vector<String> typeToTokenList = new Vector<String>();

	/** If combined or lexer grammar, track the rules.
	 * 	Track lexer rules so we can warn about undefined tokens.
	 *  This is combined set of lexer rules from all lexer grammars
	 *  seen in all imports.
	 */
	protected Set<String> lexerRules = new HashSet<String>();

	/** Rules are uniquely labeled from 1..n among all grammars */
	protected int ruleIndex = MIN_RULE_INDEX;

	/** Map a rule index to its name; use a Vector on purpose as new
	 *  collections stuff won't let me setSize and make it grow.  :(
	 *  I need a specific guaranteed index, which the Collections stuff
	 *  won't let me have.
	 */
	protected Vector<Rule> ruleIndexToRuleList = new Vector<Rule>();

	public boolean watchNFAConversion = false;

	protected void initTokenSymbolTables() {
		// the faux token types take first NUM_FAUX_LABELS positions
		// then we must have room for the predefined runtime token types
		// like DOWN/UP used for tree parsing.
		typeToTokenList.setSize(Label.NUM_FAUX_LABELS+Label.MIN_TOKEN_TYPE-1);
		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.INVALID, "<INVALID>");
		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOT, "<EOT>");
		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SEMPRED, "<SEMPRED>");
		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SET, "<SET>");
		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EPSILON, Label.EPSILON_STR);
		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOF, "EOF");
		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOR_TOKEN_TYPE-1, "<EOR>");
		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.DOWN-1, "DOWN");
		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.UP-1, "UP");
		tokenIDToTypeMap.put("<INVALID>", Utils.integer(Label.INVALID));
		tokenIDToTypeMap.put("<EOT>", Utils.integer(Label.EOT));
		tokenIDToTypeMap.put("<SEMPRED>", Utils.integer(Label.SEMPRED));
		tokenIDToTypeMap.put("<SET>", Utils.integer(Label.SET));
		tokenIDToTypeMap.put("<EPSILON>", Utils.integer(Label.EPSILON));
		tokenIDToTypeMap.put("EOF", Utils.integer(Label.EOF));
		tokenIDToTypeMap.put("<EOR>", Utils.integer(Label.EOR_TOKEN_TYPE));
		tokenIDToTypeMap.put("DOWN", Utils.integer(Label.DOWN));
		tokenIDToTypeMap.put("UP", Utils.integer(Label.UP));
	}

	public CompositeGrammar() {
		initTokenSymbolTables();
	}

	public CompositeGrammar(Grammar g) {
		this();
		setDelegationRoot(g);
	}

	public void setDelegationRoot(Grammar root) {
		delegateGrammarTreeRoot = new CompositeGrammarTree(root);
		root.compositeTreeNode = delegateGrammarTreeRoot;
	}

	public Rule getRule(String ruleName) {
		return delegateGrammarTreeRoot.getRule(ruleName);
	}

	public Object getOption(String key) {
		return delegateGrammarTreeRoot.getOption(key);
	}

	/** Add delegate grammar as child of delegator */
	public void addGrammar(Grammar delegator, Grammar delegate) {
		if ( delegator.compositeTreeNode==null ) {
			delegator.compositeTreeNode = new CompositeGrammarTree(delegator);
		}
		delegator.compositeTreeNode.addChild(new CompositeGrammarTree(delegate));

		/*// find delegator in tree so we can add a child to it
		CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(delegator);
		t.addChild();
		*/
		// make sure new grammar shares this composite
		delegate.composite = this;
	}

	/** Get parent of this grammar */
	public Grammar getDelegator(Grammar g) {
		CompositeGrammarTree me = delegateGrammarTreeRoot.findNode(g);
		if ( me==null ) {
			return null; // not found
		}
		if ( me.parent!=null ) {
			return me.parent.grammar;
		}
		return null;
	}

	/** Get list of all delegates from all grammars in the delegate subtree of g.
	 *  The grammars are in delegation tree preorder.  Don't include g itself
	 *  in list as it is not a delegate of itself.
	 */
	public List<Grammar> getDelegates(Grammar g) {
		CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g);
		if ( t==null ) {
			return null; // no delegates
		}
		List<Grammar> grammars = t.getPostOrderedGrammarList();
		grammars.remove(grammars.size()-1); // remove g (last one)
		return grammars;
	}

	public List<Grammar> getDirectDelegates(Grammar g) {
		CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g);
		List<CompositeGrammarTree> children = t.children;
		if ( children==null ) {
			return null;
		}
		List<Grammar> grammars = new ArrayList();
		for (int i = 0; children!=null && i < children.size(); i++) {
			CompositeGrammarTree child = (CompositeGrammarTree) children.get(i);
			grammars.add(child.grammar);
		}
		return grammars;
	}

	/** Get delegates below direct delegates of g */
	public List<Grammar> getIndirectDelegates(Grammar g) {
		List<Grammar> direct = getDirectDelegates(g);
		List<Grammar> delegates = getDelegates(g);
		delegates.removeAll(direct);
		return delegates;
	}

	/** Return list of delegate grammars from root down to g.
	 *  Order is root, ..., g.parent.  (g not included).
	 */
	public List<Grammar> getDelegators(Grammar g) {
		if ( g==delegateGrammarTreeRoot.grammar ) {
			return null;
		}
		List<Grammar> grammars = new ArrayList();
		CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g);
		// walk backwards to root, collecting grammars
		CompositeGrammarTree p = t.parent;
		while ( p!=null ) {
			grammars.add(0, p.grammar); // add to head so in order later
			p = p.parent;
		}
		return grammars;
	}

	/** Get set of rules for grammar g that need to have manual delegation
	 *  methods.  This is the list of rules collected from all direct/indirect
	 *  delegates minus rules overridden in grammar g.
	 *
	 *  This returns null except for the delegate root because it is the only
	 *  one that has to have a complete grammar rule interface.  The delegates
	 *  should not be instantiated directly for use as parsers (you can create
	 *  them to pass to the root parser's ctor as arguments).
	 */
	public Set<Rule> getDelegatedRules(Grammar g) {
		if ( g!=delegateGrammarTreeRoot.grammar ) {
			return null;
		}
		Set<Rule> rules = getAllImportedRules(g);
		for (Iterator it = rules.iterator(); it.hasNext();) {
			Rule r = (Rule) it.next();
			Rule localRule = g.getLocallyDefinedRule(r.name);
			// if locally defined or it's not local but synpred, don't make
			// a delegation method
			if ( localRule!=null || r.isSynPred ) {
				it.remove(); // kill overridden rules
			}
		}
		return rules;
	}

	/** Get all rule definitions from all direct/indirect delegate grammars
	 *  of g.
	 */
	public Set<Rule> getAllImportedRules(Grammar g) {
		Set<String> ruleNames = new HashSet();
		Set<Rule> rules = new HashSet();
		CompositeGrammarTree subtreeRoot = delegateGrammarTreeRoot.findNode(g);

		List<Grammar> grammars = subtreeRoot.getPreOrderedGrammarList();
		// walk all grammars preorder, priority given to grammar listed first.
		for (int i = 0; i < grammars.size(); i++) {
			Grammar delegate = (org.antlr.tool.Grammar) grammars.get(i);
			// for each rule in delegate, add to rules if no rule with that
			// name as been seen.  (can't use removeAll; wrong hashcode/equals on Rule)
			for (Iterator it = delegate.getRules().iterator(); it.hasNext();) {
				Rule r = (Rule)it.next();
				if ( !ruleNames.contains(r.name) ) {
					ruleNames.add(r.name); // track that we've seen this
					rules.add(r);
				}
			}
		}
		return rules;
	}

	public Grammar getRootGrammar() {
		if ( delegateGrammarTreeRoot==null ) {
			return null;
		}
		return delegateGrammarTreeRoot.grammar;
	}

	public Grammar getGrammar(String grammarName) {
		CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(grammarName);
		if ( t!=null ) {
			return t.grammar;
		}
		return null;
	}

	// NFA spans multiple grammars, must handle here

	public int getNewNFAStateNumber() {
		return stateCounter++;
	}

	public void addState(NFAState state) {
		numberToStateList.setSize(state.stateNumber+1); // make sure we have room
		numberToStateList.set(state.stateNumber, state);
	}

	public NFAState getState(int s) {
		return (NFAState)numberToStateList.get(s);
	}

	public void assignTokenTypes() throws RecognitionException {
		// ASSIGN TOKEN TYPES for all delegates (same walker)
		//System.out.println("### assign types");
		AssignTokenTypesWalker ttypesWalker = new AssignTokenTypesBehavior();
		List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList();
		for (int i = 0; grammars!=null && i < grammars.size(); i++) {
			Grammar g = (Grammar)grammars.get(i);
			ttypesWalker.setTreeNodeStream(new CommonTreeNodeStream(g.getGrammarTree()));
			try {
				//System.out.println("    walking "+g.name);
				ttypesWalker.grammar_(g);
			}
			catch (RecognitionException re) {
				ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE,
								   re);
			}
		}
		// the walker has filled literals, tokens, and alias tables.
		// now tell it to define them in the root grammar
		ttypesWalker.defineTokens(delegateGrammarTreeRoot.grammar);
	}

	public void translateLeftRecursiveRules() {
		List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList();
		for (int i = 0; grammars!=null && i < grammars.size(); i++) {
			Grammar g = grammars.get(i);
			if ( !(g.type==Grammar.PARSER || g.type==Grammar.COMBINED) ) continue;
			for (GrammarAST r : g.grammarTree.findAllType(ANTLRParser.RULE)) {
				if ( !Character.isUpperCase(r.getChild(0).getText().charAt(0)) ) {
					if ( LeftRecursiveRuleAnalyzer.hasImmediateRecursiveRuleRefs(r, r.enclosingRuleName) ) {
						g.translateLeftRecursiveRule(r);
					}
				}
			}
		}
	}

	public void defineGrammarSymbols() {
		delegateGrammarTreeRoot.trimLexerImportsIntoCombined();
		List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList();
		for (int i = 0; grammars!=null && i < grammars.size(); i++) {
			Grammar g = grammars.get(i);
			g.defineGrammarSymbols();
		}
		for (int i = 0; grammars!=null && i < grammars.size(); i++) {
			Grammar g = grammars.get(i);
			g.checkNameSpaceAndActions();
		}
		minimizeRuleSet();
	}

	public void createNFAs() {
		if ( ErrorManager.doNotAttemptAnalysis() ) {
			return;
		}
		List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList();
		//System.out.println("### createNFAs for composite; grammars: "+names);
		for (int i = 0; grammars!=null && i < grammars.size(); i++) {
			Grammar g = (Grammar)grammars.get(i);
			g.createRuleStartAndStopNFAStates();
		}
		for (int i = 0; grammars!=null && i < grammars.size(); i++) {
			Grammar g = (Grammar)grammars.get(i);
			g.buildNFA();
		}
	}

	public void minimizeRuleSet() {
		Set<String> ruleDefs = new HashSet<String>();
		_minimizeRuleSet(ruleDefs, delegateGrammarTreeRoot);
	}

	public void _minimizeRuleSet(Set<String> ruleDefs,
								 CompositeGrammarTree p) {
		Set<String> localRuleDefs = new HashSet<String>();
		Set<String> overrides = new HashSet<String>();
		// compute set of non-overridden rules for this delegate
		for (Rule r : p.grammar.getRules()) {
			if ( !ruleDefs.contains(r.name) ) {
				localRuleDefs.add(r.name);
			}
			else if ( !r.name.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) {
				// record any overridden rule 'cept tokens rule
				overrides.add(r.name);
			}
		}
		//System.out.println("rule defs for "+p.grammar.name+": "+localRuleDefs);
		//System.out.println("overridden rule for "+p.grammar.name+": "+overrides);
		p.grammar.overriddenRules = overrides;

		// make set of all rules defined thus far walking delegation tree.
		// the same rule in two delegates resolves in favor of first found
		// in tree therefore second must not be included
		ruleDefs.addAll(localRuleDefs);

		// pass larger set of defined rules to delegates
		if ( p.children!=null ) {
			for (CompositeGrammarTree delegate : p.children) {
				_minimizeRuleSet(ruleDefs, delegate);
			}
		}
	}

	/*
	public void minimizeRuleSet() {
		Set<Rule> refs = _minimizeRuleSet(delegateGrammarTreeRoot);
		System.out.println("all rule refs: "+refs);
	}

	public Set<Rule> _minimizeRuleSet(CompositeGrammarTree p) {
		Set<Rule> refs = new HashSet<Rule>();
		for (GrammarAST refAST : p.grammar.ruleRefs) {
			System.out.println("ref "+refAST.getText()+": "+refAST.NFAStartState+
							   " enclosing rule: "+refAST.NFAStartState.enclosingRule+
							   " invoking rule: "+((NFAState)refAST.NFAStartState.transition[0].target).enclosingRule);
			refs.add(((NFAState)refAST.NFAStartState.transition[0].target).enclosingRule);
		}

		if ( p.children!=null ) {
			for (CompositeGrammarTree delegate : p.children) {
				Set<Rule> delegateRuleRefs = _minimizeRuleSet(delegate);
				refs.addAll(delegateRuleRefs);
			}
		}

		return refs;
	}
	*/

	/*
	public void oldminimizeRuleSet() {
		// first walk to remove all overridden rules
		Set<String> ruleDefs = new HashSet<String>();
		Set<String> ruleRefs = new HashSet<String>();
		for (GrammarAST refAST : delegateGrammarTreeRoot.grammar.ruleRefs) {
			String rname = refAST.getText();
			ruleRefs.add(rname);
		}
		_minimizeRuleSet(ruleDefs,
						 ruleRefs,
						 delegateGrammarTreeRoot);
		System.out.println("overall rule defs: "+ruleDefs);
	}

	public void _minimizeRuleSet(Set<String> ruleDefs,
								 Set<String> ruleRefs,
								 CompositeGrammarTree p) {
		Set<String> localRuleDefs = new HashSet<String>();
		for (Rule r : p.grammar.getRules()) {
			if ( !ruleDefs.contains(r.name) ) {
				localRuleDefs.add(r.name);
				ruleDefs.add(r.name);
			}
		}
		System.out.println("rule defs for "+p.grammar.name+": "+localRuleDefs);

		// remove locally-defined rules not in ref set
		// find intersection of local rules and references from delegator
		// that is set of rules needed by delegator
		Set<String> localRuleDefsSatisfyingRefsFromBelow = new HashSet<String>();
		for (String r : ruleRefs) {
			if ( localRuleDefs.contains(r) ) {
				localRuleDefsSatisfyingRefsFromBelow.add(r);
			}
		}

		// now get list of refs from localRuleDefsSatisfyingRefsFromBelow.
		// Those rules are also allowed in this delegate
		for (GrammarAST refAST : p.grammar.ruleRefs) {
			if ( localRuleDefsSatisfyingRefsFromBelow.contains(refAST.enclosingRuleName) ) {
				// found rule ref within needed rule
			}
		}

		// remove rule refs not in the new rule def set

		// walk all children, adding rules not already defined
		if ( p.children!=null ) {
			for (CompositeGrammarTree delegate : p.children) {
				_minimizeRuleSet(ruleDefs, ruleRefs, delegate);
			}
		}
	}
	*/

	/*
	public void trackNFAStatesThatHaveLabeledEdge(Label label,
												  NFAState stateWithLabeledEdge)
	{
		Set<NFAState> states = typeToNFAStatesWithEdgeOfTypeMap.get(label);
		if ( states==null ) {
			states = new HashSet<NFAState>();
			typeToNFAStatesWithEdgeOfTypeMap.put(label, states);
		}
		states.add(stateWithLabeledEdge);
	}

	public Map<Label, Set<NFAState>> getTypeToNFAStatesWithEdgeOfTypeMap() {
		return typeToNFAStatesWithEdgeOfTypeMap;
	}

	public Set<NFAState> getStatesWithEdge(Label label) {
		return typeToNFAStatesWithEdgeOfTypeMap.get(label);
	}
*/
}