1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/*
2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * [The "BSD license"]
3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  Copyright (c) 2010 Terence Parr
4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  All rights reserved.
5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  Redistribution and use in source and binary forms, with or without
7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  modification, are permitted provided that the following conditions
8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  are met:
9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  1. Redistributions of source code must retain the above copyright
10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *      notice, this list of conditions and the following disclaimer.
11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  2. Redistributions in binary form must reproduce the above copyright
12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *      notice, this list of conditions and the following disclaimer in the
13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *      documentation and/or other materials provided with the distribution.
14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  3. The name of the author may not be used to endorse or promote products
15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *      derived from this software without specific prior written permission.
16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverpackage org.antlr.tool;
29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport org.antlr.analysis.*;
31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport org.antlr.misc.IntSet;
32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport org.antlr.misc.IntervalSet;
33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport java.util.ArrayList;
35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport java.util.Collection;
36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport java.util.Iterator;
37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport java.util.List;
38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Routines to construct StateClusters from EBNF grammar constructs.
40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  No optimization is done to remove unnecessary epsilon edges.
41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  TODO: add an optimization that reduces number of states and transitions
43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  will help with speed of conversion and make it easier to view NFA.  For
44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  example, o-A->o-->o-B->o should be o-A->o-B->o
45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverpublic class NFAFactory {
47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/** This factory is attached to a specifc NFA that it is building.
48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  The NFA will be filled up with states and transitions.
49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	NFA nfa = null;
51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public Rule getCurrentRule() {
53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return currentRule;
54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public void setCurrentRule(Rule currentRule) {
57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        this.currentRule = currentRule;
58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	Rule currentRule = null;
61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	public NFAFactory(NFA nfa) {
63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        nfa.setFactory(this);
64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		this.nfa = nfa;
65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public NFAState newState() {
68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState n = new NFAState(nfa);
69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        int state = nfa.getNewNFAStateNumber();
70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        n.stateNumber = state;
71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        nfa.addState(n);
72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		n.enclosingRule = currentRule;
73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return n;
74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/** Optimize an alternative (list of grammar elements).
77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *
78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  Walk the chain of elements (which can be complicated loop blocks...)
79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  and throw away any epsilon transitions used to link up simple elements.
80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *
81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  This only removes 195 states from the java.g's NFA, but every little
82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  bit helps.  Perhaps I can improve in the future.
83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 */
84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	public void optimizeAlternative(StateCluster alt) {
85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		NFAState s = alt.left;
86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		while ( s!=alt.right ) {
87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			// if it's a block element, jump over it and continue
88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			if ( s.endOfBlockStateNumber!=State.INVALID_STATE_NUMBER ) {
89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				s = nfa.getState(s.endOfBlockStateNumber);
90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				continue;
91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			}
92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			Transition t = s.transition[0];
93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			if ( t instanceof RuleClosureTransition ) {
94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				s = ((RuleClosureTransition) t).followState;
95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				continue;
96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			}
97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			if ( t.label.isEpsilon() && !t.label.isAction() && s.getNumberOfTransitions()==1 ) {
98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				// bypass epsilon transition and point to what the epsilon's
99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				// target points to unless that epsilon transition points to
100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				// a block or loop etc..  Also don't collapse epsilons that
101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				// point at the last node of the alt. Don't collapse action edges
102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				NFAState epsilonTarget = (NFAState)t.target;
103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				if ( epsilonTarget.endOfBlockStateNumber==State.INVALID_STATE_NUMBER &&
104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver					 epsilonTarget.transition[0] !=null )
105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				{
106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver					s.setTransition0(epsilonTarget.transition[0]);
107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver					/*
108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver					System.out.println("### opt "+s.stateNumber+"->"+
109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver									   epsilonTarget.transition(0).target.stateNumber);
110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver					*/
111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				}
112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			}
113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			s = (NFAState)t.target;
114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/** From label A build Graph o-A->o */
118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	public StateCluster build_Atom(int label, GrammarAST associatedAST) {
119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		NFAState left = newState();
120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		NFAState right = newState();
121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		left.associatedASTNode = associatedAST;
122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		right.associatedASTNode = associatedAST;
123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		transitionBetweenStates(left, right, label);
124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		StateCluster g = new StateCluster(left, right);
125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return g;
126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	public StateCluster build_Atom(GrammarAST atomAST) {
129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		int tokenType = nfa.grammar.getTokenType(atomAST.getText());
130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return build_Atom(tokenType, atomAST);
131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/** From set build single edge graph o->o-set->o.  To conform to
134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  what an alt block looks like, must have extra state on left.
135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	public StateCluster build_Set(IntSet set, GrammarAST associatedAST) {
137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState left = newState();
138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState right = newState();
139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		left.associatedASTNode = associatedAST;
140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		right.associatedASTNode = associatedAST;
141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		Label label = new Label(set);
142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		Transition e = new Transition(label,right);
143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        left.addTransition(e);
144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		StateCluster g = new StateCluster(left, right);
145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return g;
146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Can only complement block of simple alts; can complement build_Set()
149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  result, that is.  Get set and complement, replace old with complement.
150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public StateCluster build_AlternativeBlockComplement(StateCluster blk) {
151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        State s0 = blk.left;
152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        IntSet set = getCollapsedBlockAsSet(s0);
153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if ( set!=null ) {
154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // if set is available, then structure known and blk is a set
155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            set = nfa.grammar.complement(set);
156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            Label label = s0.transition(0).target.transition(0).label;
157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            label.setSet(set);
158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return blk;
160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 */
162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public StateCluster build_Range(int a, int b) {
164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState left = newState();
165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState right = newState();
166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		Label label = new Label(IntervalSet.of(a, b));
167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		Transition e = new Transition(label,right);
168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        left.addTransition(e);
169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StateCluster g = new StateCluster(left, right);
170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return g;
171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/** From char 'c' build StateCluster o-intValue(c)->o
174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 */
175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	public StateCluster build_CharLiteralAtom(GrammarAST charLiteralAST) {
176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        int c = Grammar.getCharValueFromGrammarCharLiteral(charLiteralAST.getText());
177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return build_Atom(c, charLiteralAST);
178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/** From char 'c' build StateCluster o-intValue(c)->o
181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  can include unicode spec likes '\u0024' later.  Accepts
182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  actual unicode 16-bit now, of course, by default.
183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  TODO not supplemental char clean!
184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 */
185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	public StateCluster build_CharRange(String a, String b) {
186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		int from = Grammar.getCharValueFromGrammarCharLiteral(a);
187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		int to = Grammar.getCharValueFromGrammarCharLiteral(b);
188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return build_Range(from, to);
189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** For a non-lexer, just build a simple token reference atom.
192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  For a lexer, a string is a sequence of char to match.  That is,
193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  "fog" is treated as 'f' 'o' 'g' not as a single transition in
194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  the DFA.  Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states
195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  for n characters.
196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public StateCluster build_StringLiteralAtom(GrammarAST stringLiteralAST) {
198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if ( nfa.grammar.type==Grammar.LEXER ) {
199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			StringBuffer chars =
200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				Grammar.getUnescapedStringFromGrammarStringLiteral(stringLiteralAST.getText());
201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            NFAState first = newState();
202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            NFAState last = null;
203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            NFAState prev = first;
204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            for (int i=0; i<chars.length(); i++) {
205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                int c = chars.charAt(i);
206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                NFAState next = newState();
207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                transitionBetweenStates(prev, next, c);
208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                prev = last = next;
209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return  new StateCluster(first, last);
211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // a simple token reference in non-Lexers
214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        int tokenType = nfa.grammar.getTokenType(stringLiteralAST.getText());
215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return build_Atom(tokenType, stringLiteralAST);
216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** For reference to rule r, build
219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  o-e->(r)  o
221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  where (r) is the start of rule r and the trailing o is not linked
223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  to from rule ref state directly (it's done thru the transition(0)
224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  RuleClosureTransition.
225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  If the rule r is just a list of tokens, it's block will be just
227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  a set on an edge o->o->o-set->o->o->o, could inline it rather than doing
228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  the rule reference, but i'm not doing this yet as I'm not sure
229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  it would help much in the NFA->DFA construction.
230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  TODO add to codegen: collapse alt blks that are sets into single matchSet
232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public StateCluster build_RuleRef(Rule refDef, NFAState ruleStart) {
234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //System.out.println("building ref to rule "+nfa.grammar.name+"."+refDef.name);
235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState left = newState();
236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // left.setDescription("ref to "+ruleStart.getDescription());
237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState right = newState();
238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // right.setDescription("NFAState following ref to "+ruleStart.getDescription());
239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Transition e = new RuleClosureTransition(refDef,ruleStart,right);
240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        left.addTransition(e);
241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StateCluster g = new StateCluster(left, right);
242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return g;
243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** From an empty alternative build StateCluster o-e->o */
246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public StateCluster build_Epsilon() {
247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState left = newState();
248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState right = newState();
249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        transitionBetweenStates(left, right, Label.EPSILON);
250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StateCluster g = new StateCluster(left, right);
251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return g;
252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/** Build what amounts to an epsilon transition with a semantic
255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  predicate action.  The pred is a pointer into the AST of
256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  the SEMPRED token.
257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 */
258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	public StateCluster build_SemanticPredicate(GrammarAST pred) {
259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// don't count syn preds
260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		if ( !pred.getText().toUpperCase()
261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				.startsWith(Grammar.SYNPRED_RULE_PREFIX.toUpperCase()) )
262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		{
263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			nfa.grammar.numberOfSemanticPredicates++;
264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		NFAState left = newState();
266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		NFAState right = newState();
267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		Transition e = new Transition(new PredicateLabel(pred), right);
268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		left.addTransition(e);
269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		StateCluster g = new StateCluster(left, right);
270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return g;
271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/** Build what amounts to an epsilon transition with an action.
274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  The action goes into NFA though it is ignored during analysis.
275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  It slows things down a bit, but I must ignore predicates after
276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  having seen an action (5-5-2008).
277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 */
278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	public StateCluster build_Action(GrammarAST action) {
279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		NFAState left = newState();
280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		NFAState right = newState();
281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		Transition e = new Transition(new ActionLabel(action), right);
282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		left.addTransition(e);
283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return new StateCluster(left, right);
284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/** add an EOF transition to any rule end NFAState that points to nothing
287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  (i.e., for all those rules not invoked by another rule).  These
288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  are start symbols then.
289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *
290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  Return the number of grammar entry points; i.e., how many rules are
291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  not invoked by another rule (they can only be invoked from outside).
292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  These are the start rules.
293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public int build_EOFStates(Collection rules) {
295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		int numberUnInvokedRules = 0;
296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        for (Iterator iterator = rules.iterator(); iterator.hasNext();) {
297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			Rule r = (Rule) iterator.next();
298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			NFAState endNFAState = r.stopState;
299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Is this rule a start symbol?  (no follow links)
300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			if ( endNFAState.transition[0] ==null ) {
301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				// if so, then don't let algorithm fall off the end of
302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				// the rule, make it hit EOF/EOT.
303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				build_EOFState(endNFAState);
304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				// track how many rules have been invoked by another rule
305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				numberUnInvokedRules++;
306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			}
307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return numberUnInvokedRules;
309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** set up an NFA NFAState that will yield eof tokens or,
312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  in the case of a lexer grammar, an EOT token when the conversion
313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  hits the end of a rule.
314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    private void build_EOFState(NFAState endNFAState) {
316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		NFAState end = newState();
317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        int label = Label.EOF;
318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if ( nfa.grammar.type==Grammar.LEXER ) {
319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            label = Label.EOT;
320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			end.setEOTTargetState(true);
321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		/*
323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		System.out.println("build "+nfa.grammar.getTokenDisplayName(label)+
324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver						   " loop on end of state "+endNFAState.getDescription()+
325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver						   " to state "+end.stateNumber);
326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		*/
327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		Transition toEnd = new Transition(label, end);
328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		endNFAState.addTransition(toEnd);
329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** From A B build A-e->B (that is, build an epsilon arc from right
332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  of A to left of B).
333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  As a convenience, return B if A is null or return A if B is null.
335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public StateCluster build_AB(StateCluster A, StateCluster B) {
337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if ( A==null ) {
338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return B;
339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if ( B==null ) {
341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return A;
342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		transitionBetweenStates(A.right, B.left, Label.EPSILON);
344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		StateCluster g = new StateCluster(A.left, B.right);
345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return g;
346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/** From a set ('a'|'b') build
349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  o->o-'a'..'b'->o->o (last NFAState is blockEndNFAState pointed to by all alts)
351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 */
352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	public StateCluster build_AlternativeBlockFromSet(StateCluster set) {
353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		if ( set==null ) {
354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			return null;
355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// single alt, no decision, just return only alt state cluster
358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		NFAState startOfAlt = newState(); // must have this no matter what
359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		transitionBetweenStates(startOfAlt, set.left, Label.EPSILON);
360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
361324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return new StateCluster(startOfAlt,set.right);
362324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
363324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
364324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/** From A|B|..|Z alternative block build
365324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
366324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  o->o-A->o->o (last NFAState is blockEndNFAState pointed to by all alts)
367324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  |          ^
368324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  o->o-B->o--|
369324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  |          |
370324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  ...        |
371324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  |          |
372324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  o->o-Z->o--|
373324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
374324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  So every alternative gets begin NFAState connected by epsilon
375324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  and every alt right side points at a block end NFAState.  There is a
376324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  new NFAState in the NFAState in the StateCluster for each alt plus one for the
377324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  end NFAState.
378324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
379324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  Special case: only one alternative: don't make a block with alt
380324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  begin/end.
381324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
382324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  Special case: if just a list of tokens/chars/sets, then collapse
383324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  to a single edge'd o-set->o graph.
384324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
385324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  Set alt number (1..n) in the left-Transition NFAState.
386324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
387324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public StateCluster build_AlternativeBlock(List alternativeStateClusters)
388324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
389324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StateCluster result = null;
390324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if ( alternativeStateClusters==null || alternativeStateClusters.size()==0 ) {
391324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return null;
392324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
393324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
394324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// single alt case
395324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		if ( alternativeStateClusters.size()==1 ) {
396324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			// single alt, no decision, just return only alt state cluster
397324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			StateCluster g = (StateCluster)alternativeStateClusters.get(0);
398324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			NFAState startOfAlt = newState(); // must have this no matter what
399324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			transitionBetweenStates(startOfAlt, g.left, Label.EPSILON);
400324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
401324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			//System.out.println("### opt saved start/stop end in (...)");
402324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			return new StateCluster(startOfAlt,g.right);
403324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
404324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
405324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// even if we can collapse for lookahead purposes, we will still
406324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // need to predict the alts of this subrule in case there are actions
407324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // etc...  This is the decision that is pointed to from the AST node
408324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // (always)
409324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState prevAlternative = null; // tracks prev so we can link to next alt
410324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState firstAlt = null;
411324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState blockEndNFAState = newState();
412324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        blockEndNFAState.setDescription("end block");
413324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        int altNum = 1;
414324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        for (Iterator iter = alternativeStateClusters.iterator(); iter.hasNext();) {
415324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            StateCluster g = (StateCluster) iter.next();
416324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // add begin NFAState for this alt connected by epsilon
417324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            NFAState left = newState();
418324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            left.setDescription("alt "+altNum+" of ()");
419324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			transitionBetweenStates(left, g.left, Label.EPSILON);
420324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			transitionBetweenStates(g.right, blockEndNFAState, Label.EPSILON);
421324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			// Are we the first alternative?
422324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			if ( firstAlt==null ) {
423324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				firstAlt = left; // track extreme left node of StateCluster
424324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			}
425324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			else {
426324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				// if not first alternative, must link to this alt from previous
427324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				transitionBetweenStates(prevAlternative, left, Label.EPSILON);
428324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			}
429324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			prevAlternative = left;
430324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			altNum++;
431324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
432324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
433324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// return StateCluster pointing representing entire block
434324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// Points to first alt NFAState on left, block end on right
435324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		result = new StateCluster(firstAlt, blockEndNFAState);
436324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
437324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		firstAlt.decisionStateType = NFAState.BLOCK_START;
438324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
439324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// set EOB markers for Jean
440324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		firstAlt.endOfBlockStateNumber = blockEndNFAState.stateNumber;
441324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
442324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return result;
443324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
444324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
445324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** From (A)? build either:
446324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
447324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  o--A->o
448324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  |     ^
449324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  o---->|
450324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
451324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  or, if A is a block, just add an empty alt to the end of the block
452324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
453324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public StateCluster build_Aoptional(StateCluster A) {
454324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StateCluster g = null;
455324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        int n = nfa.grammar.getNumberOfAltsForDecisionNFA(A.left);
456324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if ( n==1 ) {
457324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // no decision, just wrap in an optional path
458324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			//NFAState decisionState = newState();
459324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			NFAState decisionState = A.left; // resuse left edge
460324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			decisionState.setDescription("only alt of ()? block");
461324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			NFAState emptyAlt = newState();
462324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            emptyAlt.setDescription("epsilon path of ()? block");
463324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            NFAState blockEndNFAState = null;
464324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			blockEndNFAState = newState();
465324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON);
466324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			blockEndNFAState.setDescription("end ()? block");
467324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //transitionBetweenStates(decisionState, A.left, Label.EPSILON);
468324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            transitionBetweenStates(decisionState, emptyAlt, Label.EPSILON);
469324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            transitionBetweenStates(emptyAlt, blockEndNFAState, Label.EPSILON);
470324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
471324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			// set EOB markers for Jean
472324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			decisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber;
473324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
474324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
475324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            g = new StateCluster(decisionState, blockEndNFAState);
476324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
477324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        else {
478324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // a decision block, add an empty alt
479324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            NFAState lastRealAlt =
480324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    nfa.grammar.getNFAStateForAltOfDecision(A.left, n);
481324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            NFAState emptyAlt = newState();
482324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            emptyAlt.setDescription("epsilon path of ()? block");
483324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            transitionBetweenStates(lastRealAlt, emptyAlt, Label.EPSILON);
484324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            transitionBetweenStates(emptyAlt, A.right, Label.EPSILON);
485324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
486324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			// set EOB markers for Jean (I think this is redundant here)
487324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			A.left.endOfBlockStateNumber = A.right.stateNumber;
488324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			A.right.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
489324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
490324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            g = A; // return same block, but now with optional last path
491324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
492324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		g.left.decisionStateType = NFAState.OPTIONAL_BLOCK_START;
493324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
494324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return g;
495324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
496324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
497324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** From (A)+ build
498324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *
499324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *     |---|    (Transition 2 from A.right points at alt 1)
500324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *     v   |    (follow of loop is Transition 1)
501324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  o->o-A-o->o
502324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
503324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  Meaning that the last NFAState in A points back to A's left Transition NFAState
504324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  and we add a new begin/end NFAState.  A can be single alternative or
505324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  multiple.
506324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *
507324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  During analysis we'll call the follow link (transition 1) alt n+1 for
508324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  an n-alt A block.
509324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
510324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public StateCluster build_Aplus(StateCluster A) {
511324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState left = newState();
512324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState blockEndNFAState = newState();
513324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
514324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
515324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// don't reuse A.right as loopback if it's right edge of another block
516324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		if ( A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) {
517324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			// nested A* so make another tail node to be the loop back
518324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			// instead of the usual A.right which is the EOB for inner loop
519324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			NFAState extraRightEdge = newState();
520324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			transitionBetweenStates(A.right, extraRightEdge, Label.EPSILON);
521324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			A.right = extraRightEdge;
522324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
523324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
524324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); // follow is Transition 1
525324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// turn A's block end into a loopback (acts like alt 2)
526324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		transitionBetweenStates(A.right, A.left, Label.EPSILON); // loop back Transition 2
527324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		transitionBetweenStates(left, A.left, Label.EPSILON);
528324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
529324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		A.right.decisionStateType = NFAState.LOOPBACK;
530324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		A.left.decisionStateType = NFAState.BLOCK_START;
531324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
532324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// set EOB markers for Jean
533324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		A.left.endOfBlockStateNumber = A.right.stateNumber;
534324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
535324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StateCluster g = new StateCluster(left, blockEndNFAState);
536324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return g;
537324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
538324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
539324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** From (A)* build
540324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
541324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *     |---|
542324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *     v   |
543324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  o->o-A-o--o (Transition 2 from block end points at alt 1; follow is Transition 1)
544324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  |         ^
545324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  o---------| (optional branch is 2nd alt of optional block containing A+)
546324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
547324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  Meaning that the last (end) NFAState in A points back to A's
548324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  left side NFAState and we add 3 new NFAStates (the
549324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  optional branch is built just like an optional subrule).
550324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  See the Aplus() method for more on the loop back Transition.
551324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  The new node on right edge is set to RIGHT_EDGE_OF_CLOSURE so we
552324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  can detect nested (A*)* loops and insert an extra node.  Previously,
553324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  two blocks shared same EOB node.
554324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
555324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  There are 2 or 3 decision points in a A*.  If A is not a block (i.e.,
556324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  it only has one alt), then there are two decisions: the optional bypass
557324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  and then loopback.  If A is a block of alts, then there are three
558324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  decisions: bypass, loopback, and A's decision point.
559324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
560324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  Note that the optional bypass must be outside the loop as (A|B)* is
561324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  not the same thing as (A|B|)+.
562324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
563324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  This is an accurate NFA representation of the meaning of (A)*, but
564324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  for generating code, I don't need a DFA for the optional branch by
565324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  virtue of how I generate code.  The exit-loopback-branch decision
566324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  is sufficient to let me make an appropriate enter, exit, loop
567324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  determination.  See codegen.g
568324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
569324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public StateCluster build_Astar(StateCluster A) {
570324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		NFAState bypassDecisionState = newState();
571324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		bypassDecisionState.setDescription("enter loop path of ()* block");
572324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState optionalAlt = newState();
573324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        optionalAlt.setDescription("epsilon path of ()* block");
574324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState blockEndNFAState = newState();
575324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
576324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
577324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// don't reuse A.right as loopback if it's right edge of another block
578324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		if ( A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) {
579324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			// nested A* so make another tail node to be the loop back
580324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			// instead of the usual A.right which is the EOB for inner loop
581324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			NFAState extraRightEdge = newState();
582324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			transitionBetweenStates(A.right, extraRightEdge, Label.EPSILON);
583324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			A.right = extraRightEdge;
584324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
585324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
586324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// convert A's end block to loopback
587324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		A.right.setDescription("()* loopback");
588324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// Transition 1 to actual block of stuff
589324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        transitionBetweenStates(bypassDecisionState, A.left, Label.EPSILON);
590324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Transition 2 optional to bypass
591324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        transitionBetweenStates(bypassDecisionState, optionalAlt, Label.EPSILON);
592324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		transitionBetweenStates(optionalAlt, blockEndNFAState, Label.EPSILON);
593324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Transition 1 of end block exits
594324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON);
595324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Transition 2 of end block loops
596324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        transitionBetweenStates(A.right, A.left, Label.EPSILON);
597324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
598324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		bypassDecisionState.decisionStateType = NFAState.BYPASS;
599324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		A.left.decisionStateType = NFAState.BLOCK_START;
600324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		A.right.decisionStateType = NFAState.LOOPBACK;
601324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
602324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// set EOB markers for Jean
603324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		A.left.endOfBlockStateNumber = A.right.stateNumber;
604324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		bypassDecisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber;
605324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
606324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StateCluster g = new StateCluster(bypassDecisionState, blockEndNFAState);
607324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return g;
608324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
609324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
610324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Build an NFA predictor for special rule called Tokens manually that
611324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  predicts which token will succeed.  The refs to the rules are not
612324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  RuleRefTransitions as I want DFA conversion to stop at the EOT
613324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  transition on the end of each token, rather than return to Tokens rule.
614324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  If I used normal build_alternativeBlock for this, the RuleRefTransitions
615324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  would save return address when jumping away from Tokens rule.
616324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
617324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  All I do here is build n new states for n rules with an epsilon
618324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  edge to the rule start states and then to the next state in the
619324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  list:
620324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *
621324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *   o->(A)  (a state links to start of A and to next in list)
622324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *   |
623324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *   o->(B)
624324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *   |
625324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *   ...
626324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *   |
627324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *   o->(Z)
628324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *
629324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  This is the NFA created for the artificial rule created in
630324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  Grammar.addArtificialMatchTokensRule().
631324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *
632324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 *  11/28/2005: removed so we can use normal rule construction for Tokens.
633324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public NFAState build_ArtificialMatchTokensRuleNFA() {
634324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        int altNum = 1;
635324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState firstAlt = null; // the start state for the "rule"
636324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState prevAlternative = null;
637324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Iterator iter = nfa.grammar.getRules().iterator();
638324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// TODO: add a single decision node/state for good description
639324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        while (iter.hasNext()) {
640324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			Rule r = (Rule) iter.next();
641324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            String ruleName = r.name;
642324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			String modifier = nfa.grammar.getRuleModifier(ruleName);
643324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if ( ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ||
644324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				 (modifier!=null &&
645324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				  modifier.equals(Grammar.FRAGMENT_RULE_MODIFIER)) )
646324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			{
647324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                continue; // don't loop to yourself or do nontoken rules
648324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
649324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            NFAState ruleStartState = nfa.grammar.getRuleStartState(ruleName);
650324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            NFAState left = newState();
651324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            left.setDescription("alt "+altNum+" of artificial rule "+Grammar.ARTIFICIAL_TOKENS_RULENAME);
652324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            transitionBetweenStates(left, ruleStartState, Label.EPSILON);
653324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Are we the first alternative?
654324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if ( firstAlt==null ) {
655324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                firstAlt = left; // track extreme top left node as rule start
656324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
657324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            else {
658324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // if not first alternative, must link to this alt from previous
659324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                transitionBetweenStates(prevAlternative, left, Label.EPSILON);
660324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
661324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            prevAlternative = left;
662324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            altNum++;
663324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
664324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		firstAlt.decisionStateType = NFAState.BLOCK_START;
665324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
666324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return firstAlt;
667324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
668324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 */
669324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
670324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Build an atom with all possible values in its label */
671324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public StateCluster build_Wildcard(GrammarAST associatedAST) {
672324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState left = newState();
673324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        NFAState right = newState();
674324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        left.associatedASTNode = associatedAST;
675324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        right.associatedASTNode = associatedAST;
676324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Label label = new Label(nfa.grammar.getTokenTypes()); // char or tokens
677324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Transition e = new Transition(label,right);
678324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        left.addTransition(e);
679324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StateCluster g = new StateCluster(left, right);
680324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return g;
681324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
682324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
683324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Build a subrule matching ^(. .*) (any tree or node). Let's use
684324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  (^(. .+) | .) to be safe.
685324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
686324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public StateCluster build_WildcardTree(GrammarAST associatedAST) {
687324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StateCluster wildRoot = build_Wildcard(associatedAST);
688324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
689324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StateCluster down = build_Atom(Label.DOWN, associatedAST);
690324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        wildRoot = build_AB(wildRoot,down); // hook in; . DOWN
691324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
692324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // make .+
693324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StateCluster wildChildren = build_Wildcard(associatedAST);
694324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        wildChildren = build_Aplus(wildChildren);
695324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        wildRoot = build_AB(wildRoot,wildChildren); // hook in; . DOWN .+
696324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
697324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StateCluster up = build_Atom(Label.UP, associatedAST);
698324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        wildRoot = build_AB(wildRoot,up); // hook in; . DOWN .+ UP
699324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
700324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // make optional . alt
701324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StateCluster optionalNodeAlt = build_Wildcard(associatedAST);
702324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
703324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        List alts = new ArrayList();
704324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        alts.add(wildRoot);
705324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        alts.add(optionalNodeAlt);
706324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StateCluster blk = build_AlternativeBlock(alts);
707324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
708324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return blk;
709324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
710324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
711324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Given a collapsed block of alts (a set of atoms), pull out
712324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  the set and return it.
713324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
714324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    protected IntSet getCollapsedBlockAsSet(State blk) {
715324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        State s0 = blk;
716324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if ( s0!=null && s0.transition(0)!=null ) {
717324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            State s1 = s0.transition(0).target;
718324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if ( s1!=null && s1.transition(0)!=null ) {
719324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                Label label = s1.transition(0).label;
720324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if ( label.isSet() ) {
721324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    return label.getSet();
722324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                }
723324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
724324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
725324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return null;
726324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
727324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
728324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	private void transitionBetweenStates(NFAState a, NFAState b, int label) {
729324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		Transition e = new Transition(label,b);
730324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		a.addTransition(e);
731324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
732324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
733