1/*
2 * [The "BSD license"]
3 *  Copyright (c) 2010 Terence Parr
4 *  All rights reserved.
5 *
6 *  Redistribution and use in source and binary forms, with or without
7 *  modification, are permitted provided that the following conditions
8 *  are met:
9 *  1. Redistributions of source code must retain the above copyright
10 *      notice, this list of conditions and the following disclaimer.
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *  3. The name of the author may not be used to endorse or promote products
15 *      derived from this software without specific prior written permission.
16 *
17 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28package org.antlr.tool;
29
30import org.antlr.analysis.*;
31import org.antlr.misc.IntSet;
32import org.antlr.misc.IntervalSet;
33
34import java.util.ArrayList;
35import java.util.Collection;
36import java.util.Iterator;
37import java.util.List;
38
39/** Routines to construct StateClusters from EBNF grammar constructs.
40 *  No optimization is done to remove unnecessary epsilon edges.
41 *
42 *  TODO: add an optimization that reduces number of states and transitions
43 *  will help with speed of conversion and make it easier to view NFA.  For
44 *  example, o-A->o-->o-B->o should be o-A->o-B->o
45 */
46public class NFAFactory {
47	/** This factory is attached to a specifc NFA that it is building.
48     *  The NFA will be filled up with states and transitions.
49     */
50	NFA nfa = null;
51
52    public Rule getCurrentRule() {
53        return currentRule;
54    }
55
56    public void setCurrentRule(Rule currentRule) {
57        this.currentRule = currentRule;
58    }
59
60	Rule currentRule = null;
61
62	public NFAFactory(NFA nfa) {
63        nfa.setFactory(this);
64		this.nfa = nfa;
65	}
66
67    public NFAState newState() {
68        NFAState n = new NFAState(nfa);
69        int state = nfa.getNewNFAStateNumber();
70        n.stateNumber = state;
71        nfa.addState(n);
72		n.enclosingRule = currentRule;
73		return n;
74    }
75
76	/** Optimize an alternative (list of grammar elements).
77	 *
78	 *  Walk the chain of elements (which can be complicated loop blocks...)
79	 *  and throw away any epsilon transitions used to link up simple elements.
80	 *
81	 *  This only removes 195 states from the java.g's NFA, but every little
82	 *  bit helps.  Perhaps I can improve in the future.
83	 */
84	public void optimizeAlternative(StateCluster alt) {
85		NFAState s = alt.left;
86		while ( s!=alt.right ) {
87			// if it's a block element, jump over it and continue
88			if ( s.endOfBlockStateNumber!=State.INVALID_STATE_NUMBER ) {
89				s = nfa.getState(s.endOfBlockStateNumber);
90				continue;
91			}
92			Transition t = s.transition[0];
93			if ( t instanceof RuleClosureTransition ) {
94				s = ((RuleClosureTransition) t).followState;
95				continue;
96			}
97			if ( t.label.isEpsilon() && !t.label.isAction() && s.getNumberOfTransitions()==1 ) {
98				// bypass epsilon transition and point to what the epsilon's
99				// target points to unless that epsilon transition points to
100				// a block or loop etc..  Also don't collapse epsilons that
101				// point at the last node of the alt. Don't collapse action edges
102				NFAState epsilonTarget = (NFAState)t.target;
103				if ( epsilonTarget.endOfBlockStateNumber==State.INVALID_STATE_NUMBER &&
104					 epsilonTarget.transition[0] !=null )
105				{
106					s.setTransition0(epsilonTarget.transition[0]);
107					/*
108					System.out.println("### opt "+s.stateNumber+"->"+
109									   epsilonTarget.transition(0).target.stateNumber);
110					*/
111				}
112			}
113			s = (NFAState)t.target;
114		}
115	}
116
117	/** From label A build Graph o-A->o */
118	public StateCluster build_Atom(int label, GrammarAST associatedAST) {
119		NFAState left = newState();
120		NFAState right = newState();
121		left.associatedASTNode = associatedAST;
122		right.associatedASTNode = associatedAST;
123		transitionBetweenStates(left, right, label);
124		StateCluster g = new StateCluster(left, right);
125		return g;
126	}
127
128	public StateCluster build_Atom(GrammarAST atomAST) {
129		int tokenType = nfa.grammar.getTokenType(atomAST.getText());
130		return build_Atom(tokenType, atomAST);
131	}
132
133	/** From set build single edge graph o->o-set->o.  To conform to
134     *  what an alt block looks like, must have extra state on left.
135     */
136	public StateCluster build_Set(IntSet set, GrammarAST associatedAST) {
137        NFAState left = newState();
138        NFAState right = newState();
139		left.associatedASTNode = associatedAST;
140		right.associatedASTNode = associatedAST;
141		Label label = new Label(set);
142		Transition e = new Transition(label,right);
143        left.addTransition(e);
144		StateCluster g = new StateCluster(left, right);
145        return g;
146	}
147
148    /** Can only complement block of simple alts; can complement build_Set()
149     *  result, that is.  Get set and complement, replace old with complement.
150    public StateCluster build_AlternativeBlockComplement(StateCluster blk) {
151        State s0 = blk.left;
152        IntSet set = getCollapsedBlockAsSet(s0);
153        if ( set!=null ) {
154            // if set is available, then structure known and blk is a set
155            set = nfa.grammar.complement(set);
156            Label label = s0.transition(0).target.transition(0).label;
157            label.setSet(set);
158        }
159        return blk;
160    }
161	 */
162
163    public StateCluster build_Range(int a, int b) {
164        NFAState left = newState();
165        NFAState right = newState();
166		Label label = new Label(IntervalSet.of(a, b));
167		Transition e = new Transition(label,right);
168        left.addTransition(e);
169        StateCluster g = new StateCluster(left, right);
170        return g;
171    }
172
173	/** From char 'c' build StateCluster o-intValue(c)->o
174	 */
175	public StateCluster build_CharLiteralAtom(GrammarAST charLiteralAST) {
176        int c = Grammar.getCharValueFromGrammarCharLiteral(charLiteralAST.getText());
177		return build_Atom(c, charLiteralAST);
178	}
179
180	/** From char 'c' build StateCluster o-intValue(c)->o
181	 *  can include unicode spec likes '\u0024' later.  Accepts
182	 *  actual unicode 16-bit now, of course, by default.
183     *  TODO not supplemental char clean!
184	 */
185	public StateCluster build_CharRange(String a, String b) {
186		int from = Grammar.getCharValueFromGrammarCharLiteral(a);
187		int to = Grammar.getCharValueFromGrammarCharLiteral(b);
188		return build_Range(from, to);
189	}
190
191    /** For a non-lexer, just build a simple token reference atom.
192     *  For a lexer, a string is a sequence of char to match.  That is,
193     *  "fog" is treated as 'f' 'o' 'g' not as a single transition in
194     *  the DFA.  Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states
195     *  for n characters.
196     */
197    public StateCluster build_StringLiteralAtom(GrammarAST stringLiteralAST) {
198        if ( nfa.grammar.type==Grammar.LEXER ) {
199			StringBuffer chars =
200				Grammar.getUnescapedStringFromGrammarStringLiteral(stringLiteralAST.getText());
201            NFAState first = newState();
202            NFAState last = null;
203            NFAState prev = first;
204            for (int i=0; i<chars.length(); i++) {
205                int c = chars.charAt(i);
206                NFAState next = newState();
207                transitionBetweenStates(prev, next, c);
208                prev = last = next;
209            }
210            return  new StateCluster(first, last);
211        }
212
213        // a simple token reference in non-Lexers
214        int tokenType = nfa.grammar.getTokenType(stringLiteralAST.getText());
215		return build_Atom(tokenType, stringLiteralAST);
216    }
217
218    /** For reference to rule r, build
219     *
220     *  o-e->(r)  o
221     *
222     *  where (r) is the start of rule r and the trailing o is not linked
223     *  to from rule ref state directly (it's done thru the transition(0)
224     *  RuleClosureTransition.
225     *
226     *  If the rule r is just a list of tokens, it's block will be just
227     *  a set on an edge o->o->o-set->o->o->o, could inline it rather than doing
228     *  the rule reference, but i'm not doing this yet as I'm not sure
229     *  it would help much in the NFA->DFA construction.
230     *
231     *  TODO add to codegen: collapse alt blks that are sets into single matchSet
232     */
233    public StateCluster build_RuleRef(Rule refDef, NFAState ruleStart) {
234        //System.out.println("building ref to rule "+nfa.grammar.name+"."+refDef.name);
235        NFAState left = newState();
236        // left.setDescription("ref to "+ruleStart.getDescription());
237        NFAState right = newState();
238        // right.setDescription("NFAState following ref to "+ruleStart.getDescription());
239        Transition e = new RuleClosureTransition(refDef,ruleStart,right);
240        left.addTransition(e);
241        StateCluster g = new StateCluster(left, right);
242        return g;
243    }
244
245    /** From an empty alternative build StateCluster o-e->o */
246    public StateCluster build_Epsilon() {
247        NFAState left = newState();
248        NFAState right = newState();
249        transitionBetweenStates(left, right, Label.EPSILON);
250        StateCluster g = new StateCluster(left, right);
251        return g;
252    }
253
254	/** Build what amounts to an epsilon transition with a semantic
255	 *  predicate action.  The pred is a pointer into the AST of
256	 *  the SEMPRED token.
257	 */
258	public StateCluster build_SemanticPredicate(GrammarAST pred) {
259		// don't count syn preds
260		if ( !pred.getText().toUpperCase()
261				.startsWith(Grammar.SYNPRED_RULE_PREFIX.toUpperCase()) )
262		{
263			nfa.grammar.numberOfSemanticPredicates++;
264		}
265		NFAState left = newState();
266		NFAState right = newState();
267		Transition e = new Transition(new PredicateLabel(pred), right);
268		left.addTransition(e);
269		StateCluster g = new StateCluster(left, right);
270		return g;
271	}
272
273	/** Build what amounts to an epsilon transition with an action.
274	 *  The action goes into NFA though it is ignored during analysis.
275	 *  It slows things down a bit, but I must ignore predicates after
276	 *  having seen an action (5-5-2008).
277	 */
278	public StateCluster build_Action(GrammarAST action) {
279		NFAState left = newState();
280		NFAState right = newState();
281		Transition e = new Transition(new ActionLabel(action), right);
282		left.addTransition(e);
283		return new StateCluster(left, right);
284	}
285
286	/** add an EOF transition to any rule end NFAState that points to nothing
287     *  (i.e., for all those rules not invoked by another rule).  These
288     *  are start symbols then.
289	 *
290	 *  Return the number of grammar entry points; i.e., how many rules are
291	 *  not invoked by another rule (they can only be invoked from outside).
292	 *  These are the start rules.
293     */
294    public int build_EOFStates(Collection rules) {
295		int numberUnInvokedRules = 0;
296        for (Iterator iterator = rules.iterator(); iterator.hasNext();) {
297			Rule r = (Rule) iterator.next();
298			NFAState endNFAState = r.stopState;
299            // Is this rule a start symbol?  (no follow links)
300			if ( endNFAState.transition[0] ==null ) {
301				// if so, then don't let algorithm fall off the end of
302				// the rule, make it hit EOF/EOT.
303				build_EOFState(endNFAState);
304				// track how many rules have been invoked by another rule
305				numberUnInvokedRules++;
306			}
307        }
308		return numberUnInvokedRules;
309    }
310
311    /** set up an NFA NFAState that will yield eof tokens or,
312     *  in the case of a lexer grammar, an EOT token when the conversion
313     *  hits the end of a rule.
314     */
315    private void build_EOFState(NFAState endNFAState) {
316		NFAState end = newState();
317        int label = Label.EOF;
318        if ( nfa.grammar.type==Grammar.LEXER ) {
319            label = Label.EOT;
320			end.setEOTTargetState(true);
321        }
322		/*
323		System.out.println("build "+nfa.grammar.getTokenDisplayName(label)+
324						   " loop on end of state "+endNFAState.getDescription()+
325						   " to state "+end.stateNumber);
326		*/
327		Transition toEnd = new Transition(label, end);
328		endNFAState.addTransition(toEnd);
329	}
330
331    /** From A B build A-e->B (that is, build an epsilon arc from right
332     *  of A to left of B).
333     *
334     *  As a convenience, return B if A is null or return A if B is null.
335     */
336    public StateCluster build_AB(StateCluster A, StateCluster B) {
337        if ( A==null ) {
338            return B;
339        }
340        if ( B==null ) {
341            return A;
342        }
343		transitionBetweenStates(A.right, B.left, Label.EPSILON);
344		StateCluster g = new StateCluster(A.left, B.right);
345        return g;
346    }
347
348	/** From a set ('a'|'b') build
349     *
350     *  o->o-'a'..'b'->o->o (last NFAState is blockEndNFAState pointed to by all alts)
351	 */
352	public StateCluster build_AlternativeBlockFromSet(StateCluster set) {
353		if ( set==null ) {
354			return null;
355		}
356
357		// single alt, no decision, just return only alt state cluster
358		NFAState startOfAlt = newState(); // must have this no matter what
359		transitionBetweenStates(startOfAlt, set.left, Label.EPSILON);
360
361		return new StateCluster(startOfAlt,set.right);
362	}
363
364	/** From A|B|..|Z alternative block build
365     *
366     *  o->o-A->o->o (last NFAState is blockEndNFAState pointed to by all alts)
367     *  |          ^
368     *  o->o-B->o--|
369     *  |          |
370     *  ...        |
371     *  |          |
372     *  o->o-Z->o--|
373     *
374     *  So every alternative gets begin NFAState connected by epsilon
375     *  and every alt right side points at a block end NFAState.  There is a
376     *  new NFAState in the NFAState in the StateCluster for each alt plus one for the
377     *  end NFAState.
378     *
379     *  Special case: only one alternative: don't make a block with alt
380     *  begin/end.
381     *
382     *  Special case: if just a list of tokens/chars/sets, then collapse
383     *  to a single edge'd o-set->o graph.
384     *
385     *  Set alt number (1..n) in the left-Transition NFAState.
386     */
387    public StateCluster build_AlternativeBlock(List alternativeStateClusters)
388    {
389        StateCluster result = null;
390        if ( alternativeStateClusters==null || alternativeStateClusters.size()==0 ) {
391            return null;
392        }
393
394		// single alt case
395		if ( alternativeStateClusters.size()==1 ) {
396			// single alt, no decision, just return only alt state cluster
397			StateCluster g = (StateCluster)alternativeStateClusters.get(0);
398			NFAState startOfAlt = newState(); // must have this no matter what
399			transitionBetweenStates(startOfAlt, g.left, Label.EPSILON);
400
401			//System.out.println("### opt saved start/stop end in (...)");
402			return new StateCluster(startOfAlt,g.right);
403		}
404
405		// even if we can collapse for lookahead purposes, we will still
406        // need to predict the alts of this subrule in case there are actions
407        // etc...  This is the decision that is pointed to from the AST node
408        // (always)
409        NFAState prevAlternative = null; // tracks prev so we can link to next alt
410        NFAState firstAlt = null;
411        NFAState blockEndNFAState = newState();
412        blockEndNFAState.setDescription("end block");
413        int altNum = 1;
414        for (Iterator iter = alternativeStateClusters.iterator(); iter.hasNext();) {
415            StateCluster g = (StateCluster) iter.next();
416            // add begin NFAState for this alt connected by epsilon
417            NFAState left = newState();
418            left.setDescription("alt "+altNum+" of ()");
419			transitionBetweenStates(left, g.left, Label.EPSILON);
420			transitionBetweenStates(g.right, blockEndNFAState, Label.EPSILON);
421			// Are we the first alternative?
422			if ( firstAlt==null ) {
423				firstAlt = left; // track extreme left node of StateCluster
424			}
425			else {
426				// if not first alternative, must link to this alt from previous
427				transitionBetweenStates(prevAlternative, left, Label.EPSILON);
428			}
429			prevAlternative = left;
430			altNum++;
431		}
432
433		// return StateCluster pointing representing entire block
434		// Points to first alt NFAState on left, block end on right
435		result = new StateCluster(firstAlt, blockEndNFAState);
436
437		firstAlt.decisionStateType = NFAState.BLOCK_START;
438
439		// set EOB markers for Jean
440		firstAlt.endOfBlockStateNumber = blockEndNFAState.stateNumber;
441
442		return result;
443    }
444
445    /** From (A)? build either:
446     *
447	 *  o--A->o
448	 *  |     ^
449	 *  o---->|
450     *
451     *  or, if A is a block, just add an empty alt to the end of the block
452     */
453    public StateCluster build_Aoptional(StateCluster A) {
454        StateCluster g = null;
455        int n = nfa.grammar.getNumberOfAltsForDecisionNFA(A.left);
456        if ( n==1 ) {
457            // no decision, just wrap in an optional path
458			//NFAState decisionState = newState();
459			NFAState decisionState = A.left; // resuse left edge
460			decisionState.setDescription("only alt of ()? block");
461			NFAState emptyAlt = newState();
462            emptyAlt.setDescription("epsilon path of ()? block");
463            NFAState blockEndNFAState = null;
464			blockEndNFAState = newState();
465			transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON);
466			blockEndNFAState.setDescription("end ()? block");
467            //transitionBetweenStates(decisionState, A.left, Label.EPSILON);
468            transitionBetweenStates(decisionState, emptyAlt, Label.EPSILON);
469            transitionBetweenStates(emptyAlt, blockEndNFAState, Label.EPSILON);
470
471			// set EOB markers for Jean
472			decisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber;
473			blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
474
475            g = new StateCluster(decisionState, blockEndNFAState);
476        }
477        else {
478            // a decision block, add an empty alt
479            NFAState lastRealAlt =
480                    nfa.grammar.getNFAStateForAltOfDecision(A.left, n);
481            NFAState emptyAlt = newState();
482            emptyAlt.setDescription("epsilon path of ()? block");
483            transitionBetweenStates(lastRealAlt, emptyAlt, Label.EPSILON);
484            transitionBetweenStates(emptyAlt, A.right, Label.EPSILON);
485
486			// set EOB markers for Jean (I think this is redundant here)
487			A.left.endOfBlockStateNumber = A.right.stateNumber;
488			A.right.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
489
490            g = A; // return same block, but now with optional last path
491        }
492		g.left.decisionStateType = NFAState.OPTIONAL_BLOCK_START;
493
494        return g;
495    }
496
497    /** From (A)+ build
498	 *
499     *     |---|    (Transition 2 from A.right points at alt 1)
500	 *     v   |    (follow of loop is Transition 1)
501     *  o->o-A-o->o
502     *
503     *  Meaning that the last NFAState in A points back to A's left Transition NFAState
504     *  and we add a new begin/end NFAState.  A can be single alternative or
505     *  multiple.
506	 *
507	 *  During analysis we'll call the follow link (transition 1) alt n+1 for
508	 *  an n-alt A block.
509     */
510    public StateCluster build_Aplus(StateCluster A) {
511        NFAState left = newState();
512        NFAState blockEndNFAState = newState();
513		blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
514
515		// don't reuse A.right as loopback if it's right edge of another block
516		if ( A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) {
517			// nested A* so make another tail node to be the loop back
518			// instead of the usual A.right which is the EOB for inner loop
519			NFAState extraRightEdge = newState();
520			transitionBetweenStates(A.right, extraRightEdge, Label.EPSILON);
521			A.right = extraRightEdge;
522		}
523
524        transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); // follow is Transition 1
525		// turn A's block end into a loopback (acts like alt 2)
526		transitionBetweenStates(A.right, A.left, Label.EPSILON); // loop back Transition 2
527		transitionBetweenStates(left, A.left, Label.EPSILON);
528
529		A.right.decisionStateType = NFAState.LOOPBACK;
530		A.left.decisionStateType = NFAState.BLOCK_START;
531
532		// set EOB markers for Jean
533		A.left.endOfBlockStateNumber = A.right.stateNumber;
534
535        StateCluster g = new StateCluster(left, blockEndNFAState);
536        return g;
537    }
538
539    /** From (A)* build
540     *
541	 *     |---|
542	 *     v   |
543	 *  o->o-A-o--o (Transition 2 from block end points at alt 1; follow is Transition 1)
544     *  |         ^
545     *  o---------| (optional branch is 2nd alt of optional block containing A+)
546     *
547     *  Meaning that the last (end) NFAState in A points back to A's
548     *  left side NFAState and we add 3 new NFAStates (the
549     *  optional branch is built just like an optional subrule).
550     *  See the Aplus() method for more on the loop back Transition.
551	 *  The new node on right edge is set to RIGHT_EDGE_OF_CLOSURE so we
552	 *  can detect nested (A*)* loops and insert an extra node.  Previously,
553	 *  two blocks shared same EOB node.
554     *
555     *  There are 2 or 3 decision points in a A*.  If A is not a block (i.e.,
556     *  it only has one alt), then there are two decisions: the optional bypass
557     *  and then loopback.  If A is a block of alts, then there are three
558     *  decisions: bypass, loopback, and A's decision point.
559     *
560     *  Note that the optional bypass must be outside the loop as (A|B)* is
561     *  not the same thing as (A|B|)+.
562     *
563     *  This is an accurate NFA representation of the meaning of (A)*, but
564     *  for generating code, I don't need a DFA for the optional branch by
565     *  virtue of how I generate code.  The exit-loopback-branch decision
566     *  is sufficient to let me make an appropriate enter, exit, loop
567     *  determination.  See codegen.g
568     */
569    public StateCluster build_Astar(StateCluster A) {
570		NFAState bypassDecisionState = newState();
571		bypassDecisionState.setDescription("enter loop path of ()* block");
572        NFAState optionalAlt = newState();
573        optionalAlt.setDescription("epsilon path of ()* block");
574        NFAState blockEndNFAState = newState();
575		blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
576
577		// don't reuse A.right as loopback if it's right edge of another block
578		if ( A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) {
579			// nested A* so make another tail node to be the loop back
580			// instead of the usual A.right which is the EOB for inner loop
581			NFAState extraRightEdge = newState();
582			transitionBetweenStates(A.right, extraRightEdge, Label.EPSILON);
583			A.right = extraRightEdge;
584		}
585
586		// convert A's end block to loopback
587		A.right.setDescription("()* loopback");
588		// Transition 1 to actual block of stuff
589        transitionBetweenStates(bypassDecisionState, A.left, Label.EPSILON);
590        // Transition 2 optional to bypass
591        transitionBetweenStates(bypassDecisionState, optionalAlt, Label.EPSILON);
592		transitionBetweenStates(optionalAlt, blockEndNFAState, Label.EPSILON);
593        // Transition 1 of end block exits
594        transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON);
595        // Transition 2 of end block loops
596        transitionBetweenStates(A.right, A.left, Label.EPSILON);
597
598		bypassDecisionState.decisionStateType = NFAState.BYPASS;
599		A.left.decisionStateType = NFAState.BLOCK_START;
600		A.right.decisionStateType = NFAState.LOOPBACK;
601
602		// set EOB markers for Jean
603		A.left.endOfBlockStateNumber = A.right.stateNumber;
604		bypassDecisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber;
605
606        StateCluster g = new StateCluster(bypassDecisionState, blockEndNFAState);
607        return g;
608    }
609
610    /** Build an NFA predictor for special rule called Tokens manually that
611     *  predicts which token will succeed.  The refs to the rules are not
612     *  RuleRefTransitions as I want DFA conversion to stop at the EOT
613     *  transition on the end of each token, rather than return to Tokens rule.
614     *  If I used normal build_alternativeBlock for this, the RuleRefTransitions
615     *  would save return address when jumping away from Tokens rule.
616     *
617     *  All I do here is build n new states for n rules with an epsilon
618     *  edge to the rule start states and then to the next state in the
619     *  list:
620     *
621     *   o->(A)  (a state links to start of A and to next in list)
622     *   |
623     *   o->(B)
624     *   |
625     *   ...
626     *   |
627     *   o->(Z)
628	 *
629	 *  This is the NFA created for the artificial rule created in
630	 *  Grammar.addArtificialMatchTokensRule().
631	 *
632	 *  11/28/2005: removed so we can use normal rule construction for Tokens.
633    public NFAState build_ArtificialMatchTokensRuleNFA() {
634        int altNum = 1;
635        NFAState firstAlt = null; // the start state for the "rule"
636        NFAState prevAlternative = null;
637        Iterator iter = nfa.grammar.getRules().iterator();
638		// TODO: add a single decision node/state for good description
639        while (iter.hasNext()) {
640			Rule r = (Rule) iter.next();
641            String ruleName = r.name;
642			String modifier = nfa.grammar.getRuleModifier(ruleName);
643            if ( ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ||
644				 (modifier!=null &&
645				  modifier.equals(Grammar.FRAGMENT_RULE_MODIFIER)) )
646			{
647                continue; // don't loop to yourself or do nontoken rules
648            }
649            NFAState ruleStartState = nfa.grammar.getRuleStartState(ruleName);
650            NFAState left = newState();
651            left.setDescription("alt "+altNum+" of artificial rule "+Grammar.ARTIFICIAL_TOKENS_RULENAME);
652            transitionBetweenStates(left, ruleStartState, Label.EPSILON);
653            // Are we the first alternative?
654            if ( firstAlt==null ) {
655                firstAlt = left; // track extreme top left node as rule start
656            }
657            else {
658                // if not first alternative, must link to this alt from previous
659                transitionBetweenStates(prevAlternative, left, Label.EPSILON);
660            }
661            prevAlternative = left;
662            altNum++;
663        }
664		firstAlt.decisionStateType = NFAState.BLOCK_START;
665
666        return firstAlt;
667    }
668	 */
669
670    /** Build an atom with all possible values in its label */
671    public StateCluster build_Wildcard(GrammarAST associatedAST) {
672        NFAState left = newState();
673        NFAState right = newState();
674        left.associatedASTNode = associatedAST;
675        right.associatedASTNode = associatedAST;
676        Label label = new Label(nfa.grammar.getTokenTypes()); // char or tokens
677        Transition e = new Transition(label,right);
678        left.addTransition(e);
679        StateCluster g = new StateCluster(left, right);
680        return g;
681    }
682
683    /** Build a subrule matching ^(. .*) (any tree or node). Let's use
684     *  (^(. .+) | .) to be safe.
685     */
686    public StateCluster build_WildcardTree(GrammarAST associatedAST) {
687        StateCluster wildRoot = build_Wildcard(associatedAST);
688
689        StateCluster down = build_Atom(Label.DOWN, associatedAST);
690        wildRoot = build_AB(wildRoot,down); // hook in; . DOWN
691
692        // make .+
693        StateCluster wildChildren = build_Wildcard(associatedAST);
694        wildChildren = build_Aplus(wildChildren);
695        wildRoot = build_AB(wildRoot,wildChildren); // hook in; . DOWN .+
696
697        StateCluster up = build_Atom(Label.UP, associatedAST);
698        wildRoot = build_AB(wildRoot,up); // hook in; . DOWN .+ UP
699
700        // make optional . alt
701        StateCluster optionalNodeAlt = build_Wildcard(associatedAST);
702
703        List alts = new ArrayList();
704        alts.add(wildRoot);
705        alts.add(optionalNodeAlt);
706        StateCluster blk = build_AlternativeBlock(alts);
707
708        return blk;
709    }
710
711    /** Given a collapsed block of alts (a set of atoms), pull out
712     *  the set and return it.
713     */
714    protected IntSet getCollapsedBlockAsSet(State blk) {
715        State s0 = blk;
716        if ( s0!=null && s0.transition(0)!=null ) {
717            State s1 = s0.transition(0).target;
718            if ( s1!=null && s1.transition(0)!=null ) {
719                Label label = s1.transition(0).label;
720                if ( label.isSet() ) {
721                    return label.getSet();
722                }
723            }
724        }
725        return null;
726    }
727
728	private void transitionBetweenStates(NFAState a, NFAState b, int label) {
729		Transition e = new Transition(label,b);
730		a.addTransition(e);
731	}
732}
733