antlr/tool/NFAFactory.java

/*
 * [The "BSD license"]
 *  Copyright (c) 2010 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.antlr.tool;

import org.antlr.analysis.*;
import org.antlr.misc.IntSet;
import org.antlr.misc.IntervalSet;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;

/** Routines to construct StateClusters from EBNF grammar constructs.
 *  No optimization is done to remove unnecessary epsilon edges.
 *
 *  TODO: add an optimization that reduces number of states and transitions
 *  will help with speed of conversion and make it easier to view NFA.  For
 *  example, o-A->o-->o-B->o should be o-A->o-B->o
 */
public class NFAFactory {
	/** This factory is attached to a specifc NFA that it is building.
     *  The NFA will be filled up with states and transitions.
     */
	NFA nfa = null;

    public Rule getCurrentRule() {
        return currentRule;
    }

    public void setCurrentRule(Rule currentRule) {
        this.currentRule = currentRule;
    }

	Rule currentRule = null;

	public NFAFactory(NFA nfa) {
        nfa.setFactory(this);
		this.nfa = nfa;
	}

    public NFAState newState() {
        NFAState n = new NFAState(nfa);
        int state = nfa.getNewNFAStateNumber();
        n.stateNumber = state;
        nfa.addState(n);
		n.enclosingRule = currentRule;
		return n;
    }

	/** Optimize an alternative (list of grammar elements).
	 *
	 *  Walk the chain of elements (which can be complicated loop blocks...)
	 *  and throw away any epsilon transitions used to link up simple elements.
	 *
	 *  This only removes 195 states from the java.g's NFA, but every little
	 *  bit helps.  Perhaps I can improve in the future.
	 */
	public void optimizeAlternative(StateCluster alt) {
		NFAState s = alt.left;
		while ( s!=alt.right ) {
			// if it's a block element, jump over it and continue
			if ( s.endOfBlockStateNumber!=State.INVALID_STATE_NUMBER ) {
				s = nfa.getState(s.endOfBlockStateNumber);
				continue;
			}
			Transition t = s.transition[0];
			if ( t instanceof RuleClosureTransition ) {
				s = ((RuleClosureTransition) t).followState;
				continue;
			}
			if ( t.label.isEpsilon() && !t.label.isAction() && s.getNumberOfTransitions()==1 ) {
				// bypass epsilon transition and point to what the epsilon's
				// target points to unless that epsilon transition points to
				// a block or loop etc..  Also don't collapse epsilons that
				// point at the last node of the alt. Don't collapse action edges
				NFAState epsilonTarget = (NFAState)t.target;
				if ( epsilonTarget.endOfBlockStateNumber==State.INVALID_STATE_NUMBER &&
					 epsilonTarget.transition[0] !=null )
				{
					s.setTransition0(epsilonTarget.transition[0]);
					/*
					System.out.println("### opt "+s.stateNumber+"->"+
									   epsilonTarget.transition(0).target.stateNumber);
					*/
				}
			}
			s = (NFAState)t.target;
		}
	}

	/** From label A build Graph o-A->o */
	public StateCluster build_Atom(int label, GrammarAST associatedAST) {
		NFAState left = newState();
		NFAState right = newState();
		left.associatedASTNode = associatedAST;
		right.associatedASTNode = associatedAST;
		transitionBetweenStates(left, right, label);
		StateCluster g = new StateCluster(left, right);
		return g;
	}

	public StateCluster build_Atom(GrammarAST atomAST) {
		int tokenType = nfa.grammar.getTokenType(atomAST.getText());
		return build_Atom(tokenType, atomAST);
	}

	/** From set build single edge graph o->o-set->o.  To conform to
     *  what an alt block looks like, must have extra state on left.
     */
	public StateCluster build_Set(IntSet set, GrammarAST associatedAST) {
        NFAState left = newState();
        NFAState right = newState();
		left.associatedASTNode = associatedAST;
		right.associatedASTNode = associatedAST;
		Label label = new Label(set);
		Transition e = new Transition(label,right);
        left.addTransition(e);
		StateCluster g = new StateCluster(left, right);
        return g;
	}

    /** Can only complement block of simple alts; can complement build_Set()
     *  result, that is.  Get set and complement, replace old with complement.
    public StateCluster build_AlternativeBlockComplement(StateCluster blk) {
        State s0 = blk.left;
        IntSet set = getCollapsedBlockAsSet(s0);
        if ( set!=null ) {
            // if set is available, then structure known and blk is a set
            set = nfa.grammar.complement(set);
            Label label = s0.transition(0).target.transition(0).label;
            label.setSet(set);
        }
        return blk;
    }
	 */

    public StateCluster build_Range(int a, int b) {
        NFAState left = newState();
        NFAState right = newState();
		Label label = new Label(IntervalSet.of(a, b));
		Transition e = new Transition(label,right);
        left.addTransition(e);
        StateCluster g = new StateCluster(left, right);
        return g;
    }

	/** From char 'c' build StateCluster o-intValue(c)->o
	 */
	public StateCluster build_CharLiteralAtom(GrammarAST charLiteralAST) {
        int c = Grammar.getCharValueFromGrammarCharLiteral(charLiteralAST.getText());
		return build_Atom(c, charLiteralAST);
	}

	/** From char 'c' build StateCluster o-intValue(c)->o
	 *  can include unicode spec likes '\u0024' later.  Accepts
	 *  actual unicode 16-bit now, of course, by default.
     *  TODO not supplemental char clean!
	 */
	public StateCluster build_CharRange(String a, String b) {
		int from = Grammar.getCharValueFromGrammarCharLiteral(a);
		int to = Grammar.getCharValueFromGrammarCharLiteral(b);
		return build_Range(from, to);
	}

    /** For a non-lexer, just build a simple token reference atom.
     *  For a lexer, a string is a sequence of char to match.  That is,
     *  "fog" is treated as 'f' 'o' 'g' not as a single transition in
     *  the DFA.  Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states
     *  for n characters.
     */
    public StateCluster build_StringLiteralAtom(GrammarAST stringLiteralAST) {
        if ( nfa.grammar.type==Grammar.LEXER ) {
			StringBuffer chars =
				Grammar.getUnescapedStringFromGrammarStringLiteral(stringLiteralAST.getText());
            NFAState first = newState();
            NFAState last = null;
            NFAState prev = first;
            for (int i=0; i<chars.length(); i++) {
                int c = chars.charAt(i);
                NFAState next = newState();
                transitionBetweenStates(prev, next, c);
                prev = last = next;
            }
            return  new StateCluster(first, last);
        }

        // a simple token reference in non-Lexers
        int tokenType = nfa.grammar.getTokenType(stringLiteralAST.getText());
		return build_Atom(tokenType, stringLiteralAST);
    }

    /** For reference to rule r, build
     *
     *  o-e->(r)  o
     *
     *  where (r) is the start of rule r and the trailing o is not linked
     *  to from rule ref state directly (it's done thru the transition(0)
     *  RuleClosureTransition.
     *
     *  If the rule r is just a list of tokens, it's block will be just
     *  a set on an edge o->o->o-set->o->o->o, could inline it rather than doing
     *  the rule reference, but i'm not doing this yet as I'm not sure
     *  it would help much in the NFA->DFA construction.
     *
     *  TODO add to codegen: collapse alt blks that are sets into single matchSet
     */
    public StateCluster build_RuleRef(Rule refDef, NFAState ruleStart) {
        //System.out.println("building ref to rule "+nfa.grammar.name+"."+refDef.name);
        NFAState left = newState();
        // left.setDescription("ref to "+ruleStart.getDescription());
        NFAState right = newState();
        // right.setDescription("NFAState following ref to "+ruleStart.getDescription());
        Transition e = new RuleClosureTransition(refDef,ruleStart,right);
        left.addTransition(e);
        StateCluster g = new StateCluster(left, right);
        return g;
    }

    /** From an empty alternative build StateCluster o-e->o */
    public StateCluster build_Epsilon() {
        NFAState left = newState();
        NFAState right = newState();
        transitionBetweenStates(left, right, Label.EPSILON);
        StateCluster g = new StateCluster(left, right);
        return g;
    }

	/** Build what amounts to an epsilon transition with a semantic
	 *  predicate action.  The pred is a pointer into the AST of
	 *  the SEMPRED token.
	 */
	public StateCluster build_SemanticPredicate(GrammarAST pred) {
		// don't count syn preds
		if ( !pred.getText().toUpperCase()
				.startsWith(Grammar.SYNPRED_RULE_PREFIX.toUpperCase()) )
		{
			nfa.grammar.numberOfSemanticPredicates++;
		}
		NFAState left = newState();
		NFAState right = newState();
		Transition e = new Transition(new PredicateLabel(pred), right);
		left.addTransition(e);
		StateCluster g = new StateCluster(left, right);
		return g;
	}

	/** Build what amounts to an epsilon transition with an action.
	 *  The action goes into NFA though it is ignored during analysis.
	 *  It slows things down a bit, but I must ignore predicates after
	 *  having seen an action (5-5-2008).
	 */
	public StateCluster build_Action(GrammarAST action) {
		NFAState left = newState();
		NFAState right = newState();
		Transition e = new Transition(new ActionLabel(action), right);
		left.addTransition(e);
		return new StateCluster(left, right);
	}

	/** add an EOF transition to any rule end NFAState that points to nothing
     *  (i.e., for all those rules not invoked by another rule).  These
     *  are start symbols then.
	 *
	 *  Return the number of grammar entry points; i.e., how many rules are
	 *  not invoked by another rule (they can only be invoked from outside).
	 *  These are the start rules.
     */
    public int build_EOFStates(Collection rules) {
		int numberUnInvokedRules = 0;
        for (Iterator iterator = rules.iterator(); iterator.hasNext();) {
			Rule r = (Rule) iterator.next();
			NFAState endNFAState = r.stopState;
            // Is this rule a start symbol?  (no follow links)
			if ( endNFAState.transition[0] ==null ) {
				// if so, then don't let algorithm fall off the end of
				// the rule, make it hit EOF/EOT.
				build_EOFState(endNFAState);
				// track how many rules have been invoked by another rule
				numberUnInvokedRules++;
			}
        }
		return numberUnInvokedRules;
    }

    /** set up an NFA NFAState that will yield eof tokens or,
     *  in the case of a lexer grammar, an EOT token when the conversion
     *  hits the end of a rule.
     */
    private void build_EOFState(NFAState endNFAState) {
		NFAState end = newState();
        int label = Label.EOF;
        if ( nfa.grammar.type==Grammar.LEXER ) {
            label = Label.EOT;
			end.setEOTTargetState(true);
        }
		/*
		System.out.println("build "+nfa.grammar.getTokenDisplayName(label)+
						   " loop on end of state "+endNFAState.getDescription()+
						   " to state "+end.stateNumber);
		*/
		Transition toEnd = new Transition(label, end);
		endNFAState.addTransition(toEnd);
	}

    /** From A B build A-e->B (that is, build an epsilon arc from right
     *  of A to left of B).
     *
     *  As a convenience, return B if A is null or return A if B is null.
     */
    public StateCluster build_AB(StateCluster A, StateCluster B) {
        if ( A==null ) {
            return B;
        }
        if ( B==null ) {
            return A;
        }
		transitionBetweenStates(A.right, B.left, Label.EPSILON);
		StateCluster g = new StateCluster(A.left, B.right);
        return g;
    }

	/** From a set ('a'|'b') build
     *
     *  o->o-'a'..'b'->o->o (last NFAState is blockEndNFAState pointed to by all alts)
	 */
	public StateCluster build_AlternativeBlockFromSet(StateCluster set) {
		if ( set==null ) {
			return null;
		}

		// single alt, no decision, just return only alt state cluster
		NFAState startOfAlt = newState(); // must have this no matter what
		transitionBetweenStates(startOfAlt, set.left, Label.EPSILON);

		return new StateCluster(startOfAlt,set.right);
	}

	/** From A|B|..|Z alternative block build
     *
     *  o->o-A->o->o (last NFAState is blockEndNFAState pointed to by all alts)
     *  |          ^
     *  o->o-B->o--|
     *  |          |
     *  ...        |
     *  |          |
     *  o->o-Z->o--|
     *
     *  So every alternative gets begin NFAState connected by epsilon
     *  and every alt right side points at a block end NFAState.  There is a
     *  new NFAState in the NFAState in the StateCluster for each alt plus one for the
     *  end NFAState.
     *
     *  Special case: only one alternative: don't make a block with alt
     *  begin/end.
     *
     *  Special case: if just a list of tokens/chars/sets, then collapse
     *  to a single edge'd o-set->o graph.
     *
     *  Set alt number (1..n) in the left-Transition NFAState.
     */
    public StateCluster build_AlternativeBlock(List alternativeStateClusters)
    {
        StateCluster result = null;
        if ( alternativeStateClusters==null || alternativeStateClusters.size()==0 ) {
            return null;
        }

		// single alt case
		if ( alternativeStateClusters.size()==1 ) {
			// single alt, no decision, just return only alt state cluster
			StateCluster g = (StateCluster)alternativeStateClusters.get(0);
			NFAState startOfAlt = newState(); // must have this no matter what
			transitionBetweenStates(startOfAlt, g.left, Label.EPSILON);

			//System.out.println("### opt saved start/stop end in (...)");
			return new StateCluster(startOfAlt,g.right);
		}

		// even if we can collapse for lookahead purposes, we will still
        // need to predict the alts of this subrule in case there are actions
        // etc...  This is the decision that is pointed to from the AST node
        // (always)
        NFAState prevAlternative = null; // tracks prev so we can link to next alt
        NFAState firstAlt = null;
        NFAState blockEndNFAState = newState();
        blockEndNFAState.setDescription("end block");
        int altNum = 1;
        for (Iterator iter = alternativeStateClusters.iterator(); iter.hasNext();) {
            StateCluster g = (StateCluster) iter.next();
            // add begin NFAState for this alt connected by epsilon
            NFAState left = newState();
            left.setDescription("alt "+altNum+" of ()");
			transitionBetweenStates(left, g.left, Label.EPSILON);
			transitionBetweenStates(g.right, blockEndNFAState, Label.EPSILON);
			// Are we the first alternative?
			if ( firstAlt==null ) {
				firstAlt = left; // track extreme left node of StateCluster
			}
			else {
				// if not first alternative, must link to this alt from previous
				transitionBetweenStates(prevAlternative, left, Label.EPSILON);
			}
			prevAlternative = left;
			altNum++;
		}

		// return StateCluster pointing representing entire block
		// Points to first alt NFAState on left, block end on right
		result = new StateCluster(firstAlt, blockEndNFAState);

		firstAlt.decisionStateType = NFAState.BLOCK_START;

		// set EOB markers for Jean
		firstAlt.endOfBlockStateNumber = blockEndNFAState.stateNumber;

		return result;
    }

    /** From (A)? build either:
     *
	 *  o--A->o
	 *  |     ^
	 *  o---->|
     *
     *  or, if A is a block, just add an empty alt to the end of the block
     */
    public StateCluster build_Aoptional(StateCluster A) {
        StateCluster g = null;
        int n = nfa.grammar.getNumberOfAltsForDecisionNFA(A.left);
        if ( n==1 ) {
            // no decision, just wrap in an optional path
			//NFAState decisionState = newState();
			NFAState decisionState = A.left; // resuse left edge
			decisionState.setDescription("only alt of ()? block");
			NFAState emptyAlt = newState();
            emptyAlt.setDescription("epsilon path of ()? block");
            NFAState blockEndNFAState = null;
			blockEndNFAState = newState();
			transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON);
			blockEndNFAState.setDescription("end ()? block");
            //transitionBetweenStates(decisionState, A.left, Label.EPSILON);
            transitionBetweenStates(decisionState, emptyAlt, Label.EPSILON);
            transitionBetweenStates(emptyAlt, blockEndNFAState, Label.EPSILON);

			// set EOB markers for Jean
			decisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber;
			blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;

            g = new StateCluster(decisionState, blockEndNFAState);
        }
        else {
            // a decision block, add an empty alt
            NFAState lastRealAlt =
                    nfa.grammar.getNFAStateForAltOfDecision(A.left, n);
            NFAState emptyAlt = newState();
            emptyAlt.setDescription("epsilon path of ()? block");
            transitionBetweenStates(lastRealAlt, emptyAlt, Label.EPSILON);
            transitionBetweenStates(emptyAlt, A.right, Label.EPSILON);

			// set EOB markers for Jean (I think this is redundant here)
			A.left.endOfBlockStateNumber = A.right.stateNumber;
			A.right.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;

            g = A; // return same block, but now with optional last path
        }
		g.left.decisionStateType = NFAState.OPTIONAL_BLOCK_START;

        return g;
    }

    /** From (A)+ build
	 *
     *     |---|    (Transition 2 from A.right points at alt 1)
	 *     v   |    (follow of loop is Transition 1)
     *  o->o-A-o->o
     *
     *  Meaning that the last NFAState in A points back to A's left Transition NFAState
     *  and we add a new begin/end NFAState.  A can be single alternative or
     *  multiple.
	 *
	 *  During analysis we'll call the follow link (transition 1) alt n+1 for
	 *  an n-alt A block.
     */
    public StateCluster build_Aplus(StateCluster A) {
        NFAState left = newState();
        NFAState blockEndNFAState = newState();
		blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;

		// don't reuse A.right as loopback if it's right edge of another block
		if ( A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) {
			// nested A* so make another tail node to be the loop back
			// instead of the usual A.right which is the EOB for inner loop
			NFAState extraRightEdge = newState();
			transitionBetweenStates(A.right, extraRightEdge, Label.EPSILON);
			A.right = extraRightEdge;
		}

        transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); // follow is Transition 1
		// turn A's block end into a loopback (acts like alt 2)
		transitionBetweenStates(A.right, A.left, Label.EPSILON); // loop back Transition 2
		transitionBetweenStates(left, A.left, Label.EPSILON);

		A.right.decisionStateType = NFAState.LOOPBACK;
		A.left.decisionStateType = NFAState.BLOCK_START;

		// set EOB markers for Jean
		A.left.endOfBlockStateNumber = A.right.stateNumber;

        StateCluster g = new StateCluster(left, blockEndNFAState);
        return g;
    }

    /** From (A)* build
     *
	 *     |---|
	 *     v   |
	 *  o->o-A-o--o (Transition 2 from block end points at alt 1; follow is Transition 1)
     *  |         ^
     *  o---------| (optional branch is 2nd alt of optional block containing A+)
     *
     *  Meaning that the last (end) NFAState in A points back to A's
     *  left side NFAState and we add 3 new NFAStates (the
     *  optional branch is built just like an optional subrule).
     *  See the Aplus() method for more on the loop back Transition.
	 *  The new node on right edge is set to RIGHT_EDGE_OF_CLOSURE so we
	 *  can detect nested (A*)* loops and insert an extra node.  Previously,
	 *  two blocks shared same EOB node.
     *
     *  There are 2 or 3 decision points in a A*.  If A is not a block (i.e.,
     *  it only has one alt), then there are two decisions: the optional bypass
     *  and then loopback.  If A is a block of alts, then there are three
     *  decisions: bypass, loopback, and A's decision point.
     *
     *  Note that the optional bypass must be outside the loop as (A|B)* is
     *  not the same thing as (A|B|)+.
     *
     *  This is an accurate NFA representation of the meaning of (A)*, but
     *  for generating code, I don't need a DFA for the optional branch by
     *  virtue of how I generate code.  The exit-loopback-branch decision
     *  is sufficient to let me make an appropriate enter, exit, loop
     *  determination.  See codegen.g
     */
    public StateCluster build_Astar(StateCluster A) {
		NFAState bypassDecisionState = newState();
		bypassDecisionState.setDescription("enter loop path of ()* block");
        NFAState optionalAlt = newState();
        optionalAlt.setDescription("epsilon path of ()* block");
        NFAState blockEndNFAState = newState();
		blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;

		// don't reuse A.right as loopback if it's right edge of another block
		if ( A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) {
			// nested A* so make another tail node to be the loop back
			// instead of the usual A.right which is the EOB for inner loop
			NFAState extraRightEdge = newState();
			transitionBetweenStates(A.right, extraRightEdge, Label.EPSILON);
			A.right = extraRightEdge;
		}

		// convert A's end block to loopback
		A.right.setDescription("()* loopback");
		// Transition 1 to actual block of stuff
        transitionBetweenStates(bypassDecisionState, A.left, Label.EPSILON);
        // Transition 2 optional to bypass
        transitionBetweenStates(bypassDecisionState, optionalAlt, Label.EPSILON);
		transitionBetweenStates(optionalAlt, blockEndNFAState, Label.EPSILON);
        // Transition 1 of end block exits
        transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON);
        // Transition 2 of end block loops
        transitionBetweenStates(A.right, A.left, Label.EPSILON);

		bypassDecisionState.decisionStateType = NFAState.BYPASS;
		A.left.decisionStateType = NFAState.BLOCK_START;
		A.right.decisionStateType = NFAState.LOOPBACK;

		// set EOB markers for Jean
		A.left.endOfBlockStateNumber = A.right.stateNumber;
		bypassDecisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber;

        StateCluster g = new StateCluster(bypassDecisionState, blockEndNFAState);
        return g;
    }

    /** Build an NFA predictor for special rule called Tokens manually that
     *  predicts which token will succeed.  The refs to the rules are not
     *  RuleRefTransitions as I want DFA conversion to stop at the EOT
     *  transition on the end of each token, rather than return to Tokens rule.
     *  If I used normal build_alternativeBlock for this, the RuleRefTransitions
     *  would save return address when jumping away from Tokens rule.
     *
     *  All I do here is build n new states for n rules with an epsilon
     *  edge to the rule start states and then to the next state in the
     *  list:
     *
     *   o->(A)  (a state links to start of A and to next in list)
     *   |
     *   o->(B)
     *   |
     *   ...
     *   |
     *   o->(Z)
	 *
	 *  This is the NFA created for the artificial rule created in
	 *  Grammar.addArtificialMatchTokensRule().
	 *
	 *  11/28/2005: removed so we can use normal rule construction for Tokens.
    public NFAState build_ArtificialMatchTokensRuleNFA() {
        int altNum = 1;
        NFAState firstAlt = null; // the start state for the "rule"
        NFAState prevAlternative = null;
        Iterator iter = nfa.grammar.getRules().iterator();
		// TODO: add a single decision node/state for good description
        while (iter.hasNext()) {
			Rule r = (Rule) iter.next();
            String ruleName = r.name;
			String modifier = nfa.grammar.getRuleModifier(ruleName);
            if ( ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ||
				 (modifier!=null &&
				  modifier.equals(Grammar.FRAGMENT_RULE_MODIFIER)) )
			{
                continue; // don't loop to yourself or do nontoken rules
            }
            NFAState ruleStartState = nfa.grammar.getRuleStartState(ruleName);
            NFAState left = newState();
            left.setDescription("alt "+altNum+" of artificial rule "+Grammar.ARTIFICIAL_TOKENS_RULENAME);
            transitionBetweenStates(left, ruleStartState, Label.EPSILON);
            // Are we the first alternative?
            if ( firstAlt==null ) {
                firstAlt = left; // track extreme top left node as rule start
            }
            else {
                // if not first alternative, must link to this alt from previous
                transitionBetweenStates(prevAlternative, left, Label.EPSILON);
            }
            prevAlternative = left;
            altNum++;
        }
		firstAlt.decisionStateType = NFAState.BLOCK_START;

        return firstAlt;
    }
	 */

    /** Build an atom with all possible values in its label */
    public StateCluster build_Wildcard(GrammarAST associatedAST) {
        NFAState left = newState();
        NFAState right = newState();
        left.associatedASTNode = associatedAST;
        right.associatedASTNode = associatedAST;
        Label label = new Label(nfa.grammar.getTokenTypes()); // char or tokens
        Transition e = new Transition(label,right);
        left.addTransition(e);
        StateCluster g = new StateCluster(left, right);
        return g;
    }

    /** Build a subrule matching ^(. .*) (any tree or node). Let's use
     *  (^(. .+) | .) to be safe.
     */
    public StateCluster build_WildcardTree(GrammarAST associatedAST) {
        StateCluster wildRoot = build_Wildcard(associatedAST);

        StateCluster down = build_Atom(Label.DOWN, associatedAST);
        wildRoot = build_AB(wildRoot,down); // hook in; . DOWN

        // make .+
        StateCluster wildChildren = build_Wildcard(associatedAST);
        wildChildren = build_Aplus(wildChildren);
        wildRoot = build_AB(wildRoot,wildChildren); // hook in; . DOWN .+

        StateCluster up = build_Atom(Label.UP, associatedAST);
        wildRoot = build_AB(wildRoot,up); // hook in; . DOWN .+ UP

        // make optional . alt
        StateCluster optionalNodeAlt = build_Wildcard(associatedAST);

        List alts = new ArrayList();
        alts.add(wildRoot);
        alts.add(optionalNodeAlt);
        StateCluster blk = build_AlternativeBlock(alts);

        return blk;
    }

    /** Given a collapsed block of alts (a set of atoms), pull out
     *  the set and return it.
     */
    protected IntSet getCollapsedBlockAsSet(State blk) {
        State s0 = blk;
        if ( s0!=null && s0.transition(0)!=null ) {
            State s1 = s0.transition(0).target;
            if ( s1!=null && s1.transition(0)!=null ) {
                Label label = s1.transition(0).label;
                if ( label.isSet() ) {
                    return label.getSet();
                }
            }
        }
        return null;
    }

	private void transitionBetweenStates(NFAState a, NFAState b, int label) {
		Transition e = new Transition(label,b);
		a.addTransition(e);
	}
}