DFA.java revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/*
2 * [The "BSD license"]
3 *  Copyright (c) 2010 Terence Parr
4 *  All rights reserved.
5 *
6 *  Redistribution and use in source and binary forms, with or without
7 *  modification, are permitted provided that the following conditions
8 *  are met:
9 *  1. Redistributions of source code must retain the above copyright
10 *      notice, this list of conditions and the following disclaimer.
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *  3. The name of the author may not be used to endorse or promote products
15 *      derived from this software without specific prior written permission.
16 *
17 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28package org.antlr.analysis;
29
30import org.antlr.codegen.CodeGenerator;
31import org.antlr.misc.IntSet;
32import org.antlr.misc.IntervalSet;
33import org.antlr.misc.Utils;
34import org.antlr.runtime.IntStream;
35import org.stringtemplate.v4.ST;
36import org.antlr.tool.*;
37
38import java.util.*;
39
40/** A DFA (converted from a grammar's NFA).
41 *  DFAs are used as prediction machine for alternative blocks in all kinds
42 *  of recognizers (lexers, parsers, tree walkers).
43 */
44public class DFA {
45	public static final int REACHABLE_UNKNOWN = -2;
46	public static final int REACHABLE_BUSY = -1; // in process of computing
47	public static final int REACHABLE_NO = 0;
48	public static final int REACHABLE_YES = 1;
49
50	public static final int CYCLIC_UNKNOWN = -2;
51	public static final int CYCLIC_BUSY = -1; // in process of computing
52	public static final int CYCLIC_DONE = 0;
53
54	/** Prevent explosion of DFA states during conversion. The max number
55	 *  of states per alt in a single decision's DFA.
56	public static final int MAX_STATES_PER_ALT_IN_DFA = 450;
57	 */
58
59	/** Set to 0 to not terminate early (time in ms) */
60	public static int MAX_TIME_PER_DFA_CREATION = 1*1000;
61
62	/** How many edges can each DFA state have before a "special" state
63	 *  is created that uses IF expressions instead of a table?
64	 */
65	public static int MAX_STATE_TRANSITIONS_FOR_TABLE = 65534;
66
67	/** What's the start state for this DFA? */
68    public DFAState startState;
69
70	/** This DFA is being built for which decision? */
71	public int decisionNumber = 0;
72
73    /** From what NFAState did we create the DFA? */
74    public NFAState decisionNFAStartState;
75
76	/** The printable grammar fragment associated with this DFA */
77	public String description;
78
79	/** A set of all uniquely-numbered DFA states.  Maps hash of DFAState
80     *  to the actual DFAState object.  We use this to detect
81     *  existing DFA states.  Map<DFAState,DFAState>.  Use Map so
82	 *  we can get old state back (Set only allows you to see if it's there).
83	 *  Not used during fixed k lookahead as it's a waste to fill it with
84	 *  a dup of states array.
85     */
86    protected Map<DFAState, DFAState> uniqueStates = new HashMap<DFAState, DFAState>();
87
88	/** Maps the state number to the actual DFAState.  Use a Vector as it
89	 *  grows automatically when I set the ith element.  This contains all
90	 *  states, but the states are not unique.  s3 might be same as s1 so
91	 *  s3 -> s1 in this table.  This is how cycles occur.  If fixed k,
92	 *  then these states will all be unique as states[i] always points
93	 *  at state i when no cycles exist.
94	 *
95	 *  This is managed in parallel with uniqueStates and simply provides
96	 *  a way to go from state number to DFAState rather than via a
97	 *  hash lookup.
98	 */
99	protected Vector<DFAState> states = new Vector<DFAState>();
100
101	/** Unique state numbers per DFA */
102	protected int stateCounter = 0;
103
104	/** count only new states not states that were rejected as already present */
105	protected int numberOfStates = 0;
106
107	/** User specified max fixed lookahead.  If 0, nothing specified.  -1
108	 *  implies we have not looked at the options table yet to set k.
109	 */
110	protected int user_k = -1;
111
112	/** While building the DFA, track max lookahead depth if not cyclic */
113	protected int max_k = -1;
114
115    /** Is this DFA reduced?  I.e., can all states lead to an accept state? */
116    protected boolean reduced = true;
117
118    /** Are there any loops in this DFA?
119	 *  Computed by doesStateReachAcceptState()
120	 */
121    protected boolean cyclic = false;
122
123	/** Track whether this DFA has at least one sem/syn pred encountered
124	 *  during a closure operation.  This is useful for deciding whether
125	 *  to retry a non-LL(*) with k=1.  If no pred, it will not work w/o
126	 *  a pred so don't bother.  It would just give another error message.
127	 */
128	public boolean predicateVisible = false;
129
130	public boolean hasPredicateBlockedByAction = false;
131
132	/** Each alt in an NFA derived from a grammar must have a DFA state that
133     *  predicts it lest the parser not know what to do.  Nondeterminisms can
134     *  lead to this situation (assuming no semantic predicates can resolve
135     *  the problem) and when for some reason, I cannot compute the lookahead
136     *  (which might arise from an error in the algorithm or from
137     *  left-recursion etc...).  This list starts out with all alts contained
138     *  and then in method doesStateReachAcceptState() I remove the alts I
139     *  know to be uniquely predicted.
140     */
141    protected List<Integer> unreachableAlts;
142
143	protected int nAlts = 0;
144
145	/** We only want one accept state per predicted alt; track here */
146	protected DFAState[] altToAcceptState;
147
148	/** Track whether an alt discovers recursion for each alt during
149	 *  NFA to DFA conversion; >1 alt with recursion implies nonregular.
150	 */
151	public IntSet recursiveAltSet = new IntervalSet();
152
153	/** Which NFA are we converting (well, which piece of the NFA)? */
154    public NFA nfa;
155
156	protected NFAToDFAConverter nfaConverter;
157
158	/** This probe tells you a lot about a decision and is useful even
159	 *  when there is no error such as when a syntactic nondeterminism
160	 *  is solved via semantic predicates.  Perhaps a GUI would want
161	 *  the ability to show that.
162	 */
163	public DecisionProbe probe = new DecisionProbe(this);
164
165	/** Track absolute time of the conversion so we can have a failsafe:
166	 *  if it takes too long, then terminate.  Assume bugs are in the
167	 *  analysis engine.
168	 */
169	//protected long conversionStartTime;
170
171	/** Map an edge transition table to a unique set number; ordered so
172	 *  we can push into the output template as an ordered list of sets
173	 *  and then ref them from within the transition[][] table.  Like this
174	 *  for C# target:
175	 *     public static readonly DFA30_transition0 =
176	 *     	new short[] { 46, 46, -1, 46, 46, -1, -1, -1, -1, -1, -1, -1,...};
177	 *         public static readonly DFA30_transition1 =
178	 *     	new short[] { 21 };
179	 *      public static readonly short[][] DFA30_transition = {
180	 *     	  DFA30_transition0,
181	 *     	  DFA30_transition0,
182	 *     	  DFA30_transition1,
183	 *     	  ...
184	 *      };
185	 */
186	public Map edgeTransitionClassMap = new LinkedHashMap();
187
188	/** The unique edge transition class number; every time we see a new
189	 *  set of edges emanating from a state, we number it so we can reuse
190	 *  if it's every seen again for another state.  For Java grammar,
191	 *  some of the big edge transition tables are seen about 57 times.
192	 */
193	protected int edgeTransitionClass =0;
194
195	/* This DFA can be converted to a transition[state][char] table and
196	 * the following tables are filled by createStateTables upon request.
197	 * These are injected into the templates for code generation.
198	 * See March 25, 2006 entry for description:
199	 *   http://www.antlr.org/blog/antlr3/codegen.tml
200	 * Often using Vector as can't set ith position in a List and have
201	 * it extend list size; bizarre.
202	 */
203
204	/** List of special DFAState objects */
205	public List specialStates;
206	/** List of ST for special states. */
207	public List specialStateSTs;
208	public Vector accept;
209	public Vector eot;
210	public Vector eof;
211	public Vector min;
212	public Vector max;
213	public Vector special;
214	public Vector transition;
215	/** just the Vector<Integer> indicating which unique edge table is at
216	 *  position i.
217	 */
218	public Vector transitionEdgeTables; // not used by java yet
219	protected int uniqueCompressedSpecialStateNum = 0;
220
221	/** Which generator to use if we're building state tables */
222	protected CodeGenerator generator = null;
223
224	protected DFA() {;}
225
226	public DFA(int decisionNumber, NFAState decisionStartState) {
227		this.decisionNumber = decisionNumber;
228        this.decisionNFAStartState = decisionStartState;
229        nfa = decisionStartState.nfa;
230        nAlts = nfa.grammar.getNumberOfAltsForDecisionNFA(decisionStartState);
231        //setOptions( nfa.grammar.getDecisionOptions(getDecisionNumber()) );
232        initAltRelatedInfo();
233
234		//long start = System.currentTimeMillis();
235        nfaConverter = new NFAToDFAConverter(this);
236		try {
237			nfaConverter.convert();
238
239			// figure out if there are problems with decision
240			verify();
241
242			if ( !probe.isDeterministic() || probe.analysisOverflowed() ) {
243				probe.issueWarnings();
244			}
245
246			// must be after verify as it computes cyclic, needed by this routine
247			// should be after warnings because early termination or something
248			// will not allow the reset to operate properly in some cases.
249			resetStateNumbersToBeContiguous();
250
251			//long stop = System.currentTimeMillis();
252			//System.out.println("verify cost: "+(int)(stop-start)+" ms");
253		}
254//		catch (AnalysisTimeoutException at) {
255//			probe.reportAnalysisTimeout();
256//			if ( !okToRetryDFAWithK1() ) {
257//				probe.issueWarnings();
258//			}
259//		}
260		catch (NonLLStarDecisionException nonLL) {
261			probe.reportNonLLStarDecision(this);
262			// >1 alt recurses, k=* and no auto backtrack nor manual sem/syn
263			if ( !okToRetryDFAWithK1() ) {
264				probe.issueWarnings();
265			}
266		}
267    }
268
269	/** Walk all states and reset their numbers to be a contiguous sequence
270	 *  of integers starting from 0.  Only cyclic DFA can have unused positions
271	 *  in states list.  State i might be identical to a previous state j and
272	 *  will result in states[i] == states[j].  We don't want to waste a state
273	 *  number on this.  Useful mostly for code generation in tables.
274	 *
275	 *  At the start of this routine, states[i].stateNumber <= i by definition.
276	 *  If states[50].stateNumber is 50 then a cycle during conversion may
277	 *  try to add state 103, but we find that an identical DFA state, named
278	 *  50, already exists, hence, states[103]==states[50] and both have
279	 *  stateNumber 50 as they point at same object.  Afterwards, the set
280	 *  of state numbers from all states should represent a contiguous range
281	 *  from 0..n-1 where n is the number of unique states.
282	 */
283	public void resetStateNumbersToBeContiguous() {
284		if ( getUserMaxLookahead()>0 ) {
285			// all numbers are unique already; no states are thrown out.
286			return;
287		}
288
289        // walk list of DFAState objects by state number,
290		// setting state numbers to 0..n-1
291		int snum=0;
292		for (int i = 0; i <= getMaxStateNumber(); i++) {
293			DFAState s = getState(i);
294            // some states are unused after creation most commonly due to cycles
295            // or conflict resolution.
296            if ( s==null ) {
297                continue;
298            }
299			// state i is mapped to DFAState with state number set to i originally
300			// so if it's less than i, then we renumbered it already; that
301			// happens when states have been merged or cycles occurred I think.
302			// states[50] will point to DFAState with s50 in it but
303			// states[103] might also point at this same DFAState.  Since
304			// 50 < 103 then it's already been renumbered as it points downwards.
305			boolean alreadyRenumbered = s.stateNumber<i;
306			if ( !alreadyRenumbered ) {
307				// state i is a valid state, reset it's state number
308				s.stateNumber = snum; // rewrite state numbers to be 0..n-1
309				snum++;
310			}
311		}
312        if ( snum!=getNumberOfStates() ) {
313			ErrorManager.internalError("DFA "+decisionNumber+": "+
314				decisionNFAStartState.getDescription()+" num unique states "+getNumberOfStates()+
315				"!= num renumbered states "+snum);
316		}
317	}
318
319	// JAVA-SPECIFIC Accessors!!!!!  It is so impossible to get arrays
320	// or even consistently formatted strings acceptable to java that
321	// I am forced to build the individual char elements here
322
323	public List getJavaCompressedAccept() { return getRunLengthEncoding(accept); }
324	public List getJavaCompressedEOT() { return getRunLengthEncoding(eot); }
325	public List getJavaCompressedEOF() { return getRunLengthEncoding(eof); }
326	public List getJavaCompressedMin() { return getRunLengthEncoding(min); }
327	public List getJavaCompressedMax() { return getRunLengthEncoding(max); }
328	public List getJavaCompressedSpecial() { return getRunLengthEncoding(special); }
329	public List getJavaCompressedTransition() {
330		if ( transition==null || transition.size()==0 ) {
331			return null;
332		}
333		List encoded = new ArrayList(transition.size());
334		// walk Vector<Vector<FormattedInteger>> which is the transition[][] table
335		for (int i = 0; i < transition.size(); i++) {
336			Vector transitionsForState = (Vector) transition.elementAt(i);
337			encoded.add(getRunLengthEncoding(transitionsForState));
338		}
339		return encoded;
340	}
341
342	/** Compress the incoming data list so that runs of same number are
343	 *  encoded as number,value pair sequences.  3 -1 -1 -1 28 is encoded
344	 *  as 1 3 3 -1 1 28.  I am pretty sure this is the lossless compression
345	 *  that GIF files use.  Transition tables are heavily compressed by
346	 *  this technique.  I got the idea from JFlex http://jflex.de/
347	 *
348	 *  Return List<String> where each string is either \xyz for 8bit char
349	 *  and \uFFFF for 16bit.  Hideous and specific to Java, but it is the
350	 *  only target bad enough to need it.
351	 */
352	public List getRunLengthEncoding(List data) {
353		if ( data==null || data.size()==0 ) {
354			// for states with no transitions we want an empty string ""
355			// to hold its place in the transitions array.
356			List empty = new ArrayList();
357			empty.add("");
358			return empty;
359		}
360		int size = Math.max(2,data.size()/2);
361		List encoded = new ArrayList(size); // guess at size
362		// scan values looking for runs
363		int i = 0;
364		Integer emptyValue = Utils.integer(-1);
365		while ( i < data.size() ) {
366			Integer I = (Integer)data.get(i);
367			if ( I==null ) {
368				I = emptyValue;
369			}
370			// count how many v there are?
371			int n = 0;
372			for (int j = i; j < data.size(); j++) {
373				Integer v = (Integer)data.get(j);
374				if ( v==null ) {
375					v = emptyValue;
376				}
377				if ( I.equals(v) ) {
378					n++;
379				}
380				else {
381					break;
382				}
383			}
384			encoded.add(generator.target.encodeIntAsCharEscape((char)n));
385			encoded.add(generator.target.encodeIntAsCharEscape((char)I.intValue()));
386			i+=n;
387		}
388		return encoded;
389	}
390
391	public void createStateTables(CodeGenerator generator) {
392		//System.out.println("createTables:\n"+this);
393		this.generator = generator;
394		description = getNFADecisionStartState().getDescription();
395		description =
396			generator.target.getTargetStringLiteralFromString(description);
397
398		// create all the tables
399		special = new Vector(this.getNumberOfStates()); // Vector<short>
400		special.setSize(this.getNumberOfStates());
401		specialStates = new ArrayList();				// List<DFAState>
402		specialStateSTs = new ArrayList();				// List<ST>
403		accept = new Vector(this.getNumberOfStates()); // Vector<int>
404		accept.setSize(this.getNumberOfStates());
405		eot = new Vector(this.getNumberOfStates()); // Vector<int>
406		eot.setSize(this.getNumberOfStates());
407		eof = new Vector(this.getNumberOfStates()); // Vector<int>
408		eof.setSize(this.getNumberOfStates());
409		min = new Vector(this.getNumberOfStates()); // Vector<int>
410		min.setSize(this.getNumberOfStates());
411		max = new Vector(this.getNumberOfStates()); // Vector<int>
412		max.setSize(this.getNumberOfStates());
413		transition = new Vector(this.getNumberOfStates()); // Vector<Vector<int>>
414		transition.setSize(this.getNumberOfStates());
415		transitionEdgeTables = new Vector(this.getNumberOfStates()); // Vector<Vector<int>>
416		transitionEdgeTables.setSize(this.getNumberOfStates());
417
418		// for each state in the DFA, fill relevant tables.
419		Iterator it = null;
420		if ( getUserMaxLookahead()>0 ) {
421			it = states.iterator();
422		}
423		else {
424			it = getUniqueStates().values().iterator();
425		}
426		while ( it.hasNext() ) {
427			DFAState s = (DFAState)it.next();
428			if ( s==null ) {
429				// ignore null states; some acylic DFA see this condition
430				// when inlining DFA (due to lacking of exit branch pruning?)
431				continue;
432			}
433			if ( s.isAcceptState() ) {
434				// can't compute min,max,special,transition on accepts
435				accept.set(s.stateNumber,
436						   Utils.integer(s.getUniquelyPredictedAlt()));
437			}
438			else {
439				createMinMaxTables(s);
440				createTransitionTableEntryForState(s);
441				createSpecialTable(s);
442				createEOTAndEOFTables(s);
443			}
444		}
445
446		// now that we have computed list of specialStates, gen code for 'em
447		for (int i = 0; i < specialStates.size(); i++) {
448			DFAState ss = (DFAState) specialStates.get(i);
449			ST stateST =
450				generator.generateSpecialState(ss);
451			specialStateSTs.add(stateST);
452		}
453
454		// check that the tables are not messed up by encode/decode
455		/*
456		testEncodeDecode(min);
457		testEncodeDecode(max);
458		testEncodeDecode(accept);
459		testEncodeDecode(special);
460		System.out.println("min="+min);
461		System.out.println("max="+max);
462		System.out.println("eot="+eot);
463		System.out.println("eof="+eof);
464		System.out.println("accept="+accept);
465		System.out.println("special="+special);
466		System.out.println("transition="+transition);
467		*/
468	}
469
470	/*
471	private void testEncodeDecode(List data) {
472		System.out.println("data="+data);
473		List encoded = getRunLengthEncoding(data);
474		StringBuffer buf = new StringBuffer();
475		for (int i = 0; i < encoded.size(); i++) {
476			String I = (String)encoded.get(i);
477			int v = 0;
478			if ( I.startsWith("\\u") ) {
479				v = Integer.parseInt(I.substring(2,I.length()), 16);
480			}
481			else {
482				v = Integer.parseInt(I.substring(1,I.length()), 8);
483			}
484			buf.append((char)v);
485		}
486		String encodedS = buf.toString();
487		short[] decoded = org.antlr.runtime.DFA.unpackEncodedString(encodedS);
488		//System.out.println("decoded:");
489		for (int i = 0; i < decoded.length; i++) {
490			short x = decoded[i];
491			if ( x!=((Integer)data.get(i)).intValue() ) {
492				System.err.println("problem with encoding");
493			}
494			//System.out.print(", "+x);
495		}
496		//System.out.println();
497	}
498	*/
499
500	protected void createMinMaxTables(DFAState s) {
501		int smin = Label.MAX_CHAR_VALUE + 1;
502		int smax = Label.MIN_ATOM_VALUE - 1;
503		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
504			Transition edge = (Transition) s.transition(j);
505			Label label = edge.label;
506			if ( label.isAtom() ) {
507				if ( label.getAtom()>=Label.MIN_CHAR_VALUE ) {
508					if ( label.getAtom()<smin ) {
509						smin = label.getAtom();
510					}
511					if ( label.getAtom()>smax ) {
512						smax = label.getAtom();
513					}
514				}
515			}
516			else if ( label.isSet() ) {
517				IntervalSet labels = (IntervalSet)label.getSet();
518				int lmin = labels.getMinElement();
519				// if valid char (don't do EOF) and less than current min
520				if ( lmin<smin && lmin>=Label.MIN_CHAR_VALUE ) {
521					smin = labels.getMinElement();
522				}
523				if ( labels.getMaxElement()>smax ) {
524					smax = labels.getMaxElement();
525				}
526			}
527		}
528
529		if ( smax<0 ) {
530			// must be predicates or pure EOT transition; just zero out min, max
531			smin = Label.MIN_CHAR_VALUE;
532			smax = Label.MIN_CHAR_VALUE;
533		}
534
535		min.set(s.stateNumber, Utils.integer((char)smin));
536		max.set(s.stateNumber, Utils.integer((char)smax));
537
538		if ( smax<0 || smin>Label.MAX_CHAR_VALUE || smin<0 ) {
539			ErrorManager.internalError("messed up: min="+min+", max="+max);
540		}
541	}
542
543	protected void createTransitionTableEntryForState(DFAState s) {
544		/*
545		System.out.println("createTransitionTableEntryForState s"+s.stateNumber+
546			" dec "+s.dfa.decisionNumber+" cyclic="+s.dfa.isCyclic());
547			*/
548		int smax = ((Integer)max.get(s.stateNumber)).intValue();
549		int smin = ((Integer)min.get(s.stateNumber)).intValue();
550
551		Vector stateTransitions = new Vector(smax-smin+1);
552		stateTransitions.setSize(smax-smin+1);
553		transition.set(s.stateNumber, stateTransitions);
554		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
555			Transition edge = (Transition) s.transition(j);
556			Label label = edge.label;
557			if ( label.isAtom() && label.getAtom()>=Label.MIN_CHAR_VALUE ) {
558				int labelIndex = label.getAtom()-smin; // offset from 0
559				stateTransitions.set(labelIndex,
560									 Utils.integer(edge.target.stateNumber));
561			}
562			else if ( label.isSet() ) {
563				IntervalSet labels = (IntervalSet)label.getSet();
564				int[] atoms = labels.toArray();
565				for (int a = 0; a < atoms.length; a++) {
566					// set the transition if the label is valid (don't do EOF)
567					if ( atoms[a]>=Label.MIN_CHAR_VALUE ) {
568						int labelIndex = atoms[a]-smin; // offset from 0
569						stateTransitions.set(labelIndex,
570											 Utils.integer(edge.target.stateNumber));
571					}
572				}
573			}
574		}
575		// track unique state transition tables so we can reuse
576		Integer edgeClass = (Integer)edgeTransitionClassMap.get(stateTransitions);
577		if ( edgeClass!=null ) {
578			//System.out.println("we've seen this array before; size="+stateTransitions.size());
579			transitionEdgeTables.set(s.stateNumber, edgeClass);
580		}
581		else {
582			edgeClass = Utils.integer(edgeTransitionClass);
583			transitionEdgeTables.set(s.stateNumber, edgeClass);
584			edgeTransitionClassMap.put(stateTransitions, edgeClass);
585			edgeTransitionClass++;
586		}
587	}
588
589	/** Set up the EOT and EOF tables; we cannot put -1 min/max values so
590	 *  we need another way to test that in the DFA transition function.
591	 */
592	protected void createEOTAndEOFTables(DFAState s) {
593		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
594			Transition edge = (Transition) s.transition(j);
595			Label label = edge.label;
596			if ( label.isAtom() ) {
597				if ( label.getAtom()==Label.EOT ) {
598					// eot[s] points to accept state
599					eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
600				}
601				else if ( label.getAtom()==Label.EOF ) {
602					// eof[s] points to accept state
603					eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
604				}
605			}
606			else if ( label.isSet() ) {
607				IntervalSet labels = (IntervalSet)label.getSet();
608				int[] atoms = labels.toArray();
609				for (int a = 0; a < atoms.length; a++) {
610					if ( atoms[a]==Label.EOT ) {
611						// eot[s] points to accept state
612						eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
613					}
614					else if ( atoms[a]==Label.EOF ) {
615						eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
616					}
617				}
618			}
619		}
620	}
621
622	protected void createSpecialTable(DFAState s) {
623		// number all special states from 0...n-1 instead of their usual numbers
624		boolean hasSemPred = false;
625
626		// TODO this code is very similar to canGenerateSwitch.  Refactor to share
627		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
628			Transition edge = (Transition) s.transition(j);
629			Label label = edge.label;
630			// can't do a switch if the edges have preds or are going to
631			// require gated predicates
632			if ( label.isSemanticPredicate() ||
633				 ((DFAState)edge.target).getGatedPredicatesInNFAConfigurations()!=null)
634			{
635				hasSemPred = true;
636				break;
637			}
638		}
639		// if has pred or too big for table, make it special
640		int smax = ((Integer)max.get(s.stateNumber)).intValue();
641		int smin = ((Integer)min.get(s.stateNumber)).intValue();
642		if ( hasSemPred || smax-smin>MAX_STATE_TRANSITIONS_FOR_TABLE ) {
643			special.set(s.stateNumber,
644						Utils.integer(uniqueCompressedSpecialStateNum));
645			uniqueCompressedSpecialStateNum++;
646			specialStates.add(s);
647		}
648		else {
649			special.set(s.stateNumber, Utils.integer(-1)); // not special
650		}
651	}
652
653	public int predict(IntStream input) {
654		Interpreter interp = new Interpreter(nfa.grammar, input);
655		return interp.predict(this);
656	}
657
658	/** Add a new DFA state to this DFA if not already present.
659     *  To force an acyclic, fixed maximum depth DFA, just always
660	 *  return the incoming state.  By not reusing old states,
661	 *  no cycles can be created.  If we're doing fixed k lookahead
662	 *  don't updated uniqueStates, just return incoming state, which
663	 *  indicates it's a new state.
664     */
665    protected DFAState addState(DFAState d) {
666		if ( getUserMaxLookahead()>0 ) {
667			return d;
668		}
669		// does a DFA state exist already with everything the same
670		// except its state number?
671		DFAState existing = (DFAState)uniqueStates.get(d);
672		if ( existing != null ) {
673            /*
674            System.out.println("state "+d.stateNumber+" exists as state "+
675                existing.stateNumber);
676                */
677            // already there...get the existing DFA state
678			return existing;
679		}
680
681		// if not there, then add new state.
682		uniqueStates.put(d,d);
683        numberOfStates++;
684		return d;
685	}
686
687	public void removeState(DFAState d) {
688		DFAState it = (DFAState)uniqueStates.remove(d);
689		if ( it!=null ) {
690			numberOfStates--;
691		}
692	}
693
694	public Map<DFAState, DFAState> getUniqueStates() {
695		return uniqueStates;
696	}
697
698	/** What is the max state number ever created?  This may be beyond
699	 *  getNumberOfStates().
700	 */
701	public int getMaxStateNumber() {
702		return states.size()-1;
703	}
704
705	public DFAState getState(int stateNumber) {
706		return (DFAState)states.get(stateNumber);
707	}
708
709	public void setState(int stateNumber, DFAState d) {
710		states.set(stateNumber, d);
711	}
712
713	/** Is the DFA reduced?  I.e., does every state have a path to an accept
714     *  state?  If not, don't delete as we need to generate an error indicating
715     *  which paths are "dead ends".  Also tracks list of alts with no accept
716     *  state in the DFA.  Must call verify() first before this makes sense.
717     */
718    public boolean isReduced() {
719        return reduced;
720    }
721
722    /** Is this DFA cyclic?  That is, are there any loops?  If not, then
723     *  the DFA is essentially an LL(k) predictor for some fixed, max k value.
724     *  We can build a series of nested IF statements to match this.  In the
725     *  presence of cycles, we need to build a general DFA and interpret it
726     *  to distinguish between alternatives.
727     */
728    public boolean isCyclic() {
729        return cyclic && getUserMaxLookahead()==0;
730    }
731
732	public boolean isClassicDFA() {
733		return !isCyclic() &&
734			   !nfa.grammar.decisionsWhoseDFAsUsesSemPreds.contains(this) &&
735			   !nfa.grammar.decisionsWhoseDFAsUsesSynPreds.contains(this);
736	}
737
738	public boolean canInlineDecision() {
739		return !isCyclic() &&
740		    !probe.isNonLLStarDecision() &&
741			getNumberOfStates() < CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE;
742	}
743
744	/** Is this DFA derived from the NFA for the Tokens rule? */
745	public boolean isTokensRuleDecision() {
746		if ( nfa.grammar.type!=Grammar.LEXER ) {
747			return false;
748		}
749		NFAState nfaStart = getNFADecisionStartState();
750		Rule r = nfa.grammar.getLocallyDefinedRule(Grammar.ARTIFICIAL_TOKENS_RULENAME);
751		NFAState TokensRuleStart = r.startState;
752		NFAState TokensDecisionStart =
753			(NFAState)TokensRuleStart.transition[0].target;
754		return nfaStart == TokensDecisionStart;
755	}
756
757	/** The user may specify a max, acyclic lookahead for any decision.  No
758	 *  DFA cycles are created when this value, k, is greater than 0.
759	 *  If this decision has no k lookahead specified, then try the grammar.
760	 */
761	public int getUserMaxLookahead() {
762		if ( user_k>=0 ) { // cache for speed
763			return user_k;
764		}
765		user_k = nfa.grammar.getUserMaxLookahead(decisionNumber);
766		return user_k;
767	}
768
769	public boolean getAutoBacktrackMode() {
770		return nfa.grammar.getAutoBacktrackMode(decisionNumber);
771	}
772
773	public void setUserMaxLookahead(int k) {
774		this.user_k = k;
775	}
776
777	/** Return k if decision is LL(k) for some k else return max int
778     */
779	public int getMaxLookaheadDepth() {
780		if ( hasCycle() ) return Integer.MAX_VALUE;
781		// compute to be sure
782		return _getMaxLookaheadDepth(startState, 0);
783	}
784
785	int _getMaxLookaheadDepth(DFAState d, int depth) {
786		// not cyclic; don't worry about termination
787		// fail if pred edge.
788		int max = depth;
789		for (int i=0; i<d.getNumberOfTransitions(); i++) {
790			Transition t = d.transition(i);
791//			if ( t.isSemanticPredicate() ) return Integer.MAX_VALUE;
792			if ( !t.isSemanticPredicate() ) {
793				// if pure pred not gated, it must target stop state; don't count
794				DFAState edgeTarget = (DFAState)t.target;
795				int m = _getMaxLookaheadDepth(edgeTarget, depth+1);
796				max = Math.max(max, m);
797			}
798		}
799		return max;
800	}
801
802	/** Count all disambiguating syn preds (ignore synpred tests
803	 *  for gated edges, which occur for nonambig input sequences).
804	 *  E.g.,
805	 *  x  : (X)=> (X|Y)\n" +
806	 *     | X\n" +
807	 *     ;
808	 *
809	 *  gives
810	 *
811	 * .s0-X->.s1
812	 * .s0-Y&&{synpred1_t}?->:s2=>1
813	 * .s1-{synpred1_t}?->:s2=>1
814	 * .s1-{true}?->:s3=>2
815	 */
816	public boolean hasSynPred() {
817		boolean has = _hasSynPred(startState, new HashSet<DFAState>());
818//		if ( !has ) {
819//			System.out.println("no synpred in dec "+decisionNumber);
820//			FASerializer serializer = new FASerializer(nfa.grammar);
821//			String result = serializer.serialize(startState);
822//			System.out.println(result);
823//		}
824		return has;
825	}
826
827	public boolean getHasSynPred() { return hasSynPred(); } // for ST
828
829	boolean _hasSynPred(DFAState d, Set<DFAState> busy) {
830		busy.add(d);
831		for (int i=0; i<d.getNumberOfTransitions(); i++) {
832			Transition t = d.transition(i);
833			if ( t.isSemanticPredicate() ) {
834				SemanticContext ctx = t.label.getSemanticContext();
835//				if ( ctx.toString().indexOf("synpred")>=0 ) {
836//					System.out.println("has pred "+ctx.toString()+" "+ctx.isSyntacticPredicate());
837//					System.out.println(((SemanticContext.Predicate)ctx).predicateAST.token);
838//				}
839				if ( ctx.isSyntacticPredicate() ) return true;
840			}
841			DFAState edgeTarget = (DFAState)t.target;
842			if ( !busy.contains(edgeTarget) && _hasSynPred(edgeTarget, busy) ) return true;
843		}
844
845		return false;
846	}
847
848	public boolean hasSemPred() { // has user-defined sempred
849		boolean has = _hasSemPred(startState, new HashSet<DFAState>());
850		return has;
851	}
852
853	boolean _hasSemPred(DFAState d, Set<DFAState> busy) {
854		busy.add(d);
855		for (int i=0; i<d.getNumberOfTransitions(); i++) {
856			Transition t = d.transition(i);
857			if ( t.isSemanticPredicate() ) {
858				SemanticContext ctx = t.label.getSemanticContext();
859				if ( ctx.hasUserSemanticPredicate() ) return true;
860			}
861			DFAState edgeTarget = (DFAState)t.target;
862			if ( !busy.contains(edgeTarget) && _hasSemPred(edgeTarget, busy) ) return true;
863		}
864
865		return false;
866	}
867
868	/** Compute cyclic w/o relying on state computed during analysis. just check. */
869	public boolean hasCycle() {
870		boolean cyclic = _hasCycle(startState, new HashMap<DFAState, Integer>());
871		return cyclic;
872	}
873
874	boolean _hasCycle(DFAState d, Map<DFAState, Integer> busy) {
875		busy.put(d, CYCLIC_BUSY);
876		for (int i=0; i<d.getNumberOfTransitions(); i++) {
877			Transition t = d.transition(i);
878			DFAState target = (DFAState)t.target;
879			int cond = CYCLIC_UNKNOWN;
880			if ( busy.get(target)!=null ) cond = busy.get(target);
881			if ( cond==CYCLIC_BUSY ) return true;
882			if ( cond!=CYCLIC_DONE && _hasCycle(target, busy) ) return true;
883		}
884		busy.put(d, CYCLIC_DONE);
885		return false;
886	}
887
888
889    /** Return a list of Integer alt numbers for which no lookahead could
890     *  be computed or for which no single DFA accept state predicts those
891     *  alts.  Must call verify() first before this makes sense.
892     */
893    public List<Integer> getUnreachableAlts() {
894        return unreachableAlts;
895    }
896
897	/** Once this DFA has been built, need to verify that:
898	 *
899	 *  1. it's reduced
900	 *  2. all alts have an accept state
901	 *
902	 *  Elsewhere, in the NFA converter, we need to verify that:
903	 *
904	 *  3. alts i and j have disjoint lookahead if no sem preds
905	 *  4. if sem preds, nondeterministic alts must be sufficiently covered
906	 *
907	 *  This is avoided if analysis bails out for any reason.
908	 */
909	public void verify() {
910		doesStateReachAcceptState(startState);
911	}
912
913    /** figure out if this state eventually reaches an accept state and
914     *  modify the instance variable 'reduced' to indicate if we find
915     *  at least one state that cannot reach an accept state.  This implies
916     *  that the overall DFA is not reduced.  This algorithm should be
917     *  linear in the number of DFA states.
918     *
919     *  The algorithm also tracks which alternatives have no accept state,
920     *  indicating a nondeterminism.
921	 *
922	 *  Also computes whether the DFA is cyclic.
923	 *
924     *  TODO: I call getUniquelyPredicatedAlt too much; cache predicted alt
925     */
926    protected boolean doesStateReachAcceptState(DFAState d) {
927		if ( d.isAcceptState() ) {
928            // accept states have no edges emanating from them so we can return
929            d.setAcceptStateReachable(REACHABLE_YES);
930            // this alt is uniquely predicted, remove from nondeterministic list
931            int predicts = d.getUniquelyPredictedAlt();
932            unreachableAlts.remove(Utils.integer(predicts));
933            return true;
934        }
935
936        // avoid infinite loops
937        d.setAcceptStateReachable(REACHABLE_BUSY);
938
939        boolean anEdgeReachesAcceptState = false;
940        // Visit every transition, track if at least one edge reaches stop state
941		// Cannot terminate when we know this state reaches stop state since
942		// all transitions must be traversed to set status of each DFA state.
943		for (int i=0; i<d.getNumberOfTransitions(); i++) {
944            Transition t = d.transition(i);
945            DFAState edgeTarget = (DFAState)t.target;
946            int targetStatus = edgeTarget.getAcceptStateReachable();
947            if ( targetStatus==REACHABLE_BUSY ) { // avoid cycles; they say nothing
948                cyclic = true;
949                continue;
950            }
951            if ( targetStatus==REACHABLE_YES ) { // avoid unnecessary work
952                anEdgeReachesAcceptState = true;
953                continue;
954            }
955            if ( targetStatus==REACHABLE_NO ) {  // avoid unnecessary work
956                continue;
957            }
958			// target must be REACHABLE_UNKNOWN (i.e., unvisited)
959            if ( doesStateReachAcceptState(edgeTarget) ) {
960                anEdgeReachesAcceptState = true;
961                // have to keep looking so don't break loop
962                // must cover all states even if we find a path for this state
963            }
964        }
965        if ( anEdgeReachesAcceptState ) {
966            d.setAcceptStateReachable(REACHABLE_YES);
967        }
968        else {
969            d.setAcceptStateReachable(REACHABLE_NO);
970			reduced = false;
971        }
972        return anEdgeReachesAcceptState;
973    }
974
975	/** Walk all accept states and find the manually-specified synpreds.
976	 *  Gated preds are not always hoisted
977	 *  I used to do this in the code generator, but that is too late.
978	 *  This converter tries to avoid computing DFA for decisions in
979	 *  syntactic predicates that are not ever used such as those
980	 *  created by autobacktrack mode.
981	 */
982	public void findAllGatedSynPredsUsedInDFAAcceptStates() {
983		int nAlts = getNumberOfAlts();
984		for (int i=1; i<=nAlts; i++) {
985			DFAState a = getAcceptState(i);
986			//System.out.println("alt "+i+": "+a);
987			if ( a!=null ) {
988				Set synpreds = a.getGatedSyntacticPredicatesInNFAConfigurations();
989				if ( synpreds!=null ) {
990					// add all the predicates we find (should be just one, right?)
991					for (Iterator it = synpreds.iterator(); it.hasNext();) {
992						SemanticContext semctx = (SemanticContext) it.next();
993						// System.out.println("synpreds: "+semctx);
994						nfa.grammar.synPredUsedInDFA(this, semctx);
995					}
996				}
997			}
998		}
999	}
1000
1001	public NFAState getNFADecisionStartState() {
1002        return decisionNFAStartState;
1003    }
1004
1005	public DFAState getAcceptState(int alt) {
1006		return altToAcceptState[alt];
1007	}
1008
1009	public void setAcceptState(int alt, DFAState acceptState) {
1010		altToAcceptState[alt] = acceptState;
1011	}
1012
1013	public String getDescription() {
1014		return description;
1015	}
1016
1017	public int getDecisionNumber() {
1018        return decisionNFAStartState.getDecisionNumber();
1019    }
1020
1021	/** If this DFA failed to finish during construction, we might be
1022	 *  able to retry with k=1 but we need to know whether it will
1023	 *  potentially succeed.  Can only succeed if there is a predicate
1024	 *  to resolve the issue.  Don't try if k=1 already as it would
1025	 *  cycle forever.  Timeout can retry with k=1 even if no predicate
1026	 *  if k!=1.
1027	 */
1028	public boolean okToRetryDFAWithK1() {
1029		boolean nonLLStarOrOverflowAndPredicateVisible =
1030			(probe.isNonLLStarDecision()||probe.analysisOverflowed()) &&
1031		    predicateVisible; // auto backtrack or manual sem/syn
1032		return getUserMaxLookahead()!=1 &&
1033			 nonLLStarOrOverflowAndPredicateVisible;
1034	}
1035
1036	public String getReasonForFailure() {
1037		StringBuffer buf = new StringBuffer();
1038		if ( probe.isNonLLStarDecision() ) {
1039			buf.append("non-LL(*)");
1040			if ( predicateVisible ) {
1041				buf.append(" && predicate visible");
1042			}
1043		}
1044		if ( probe.analysisOverflowed() ) {
1045			buf.append("recursion overflow");
1046			if ( predicateVisible ) {
1047				buf.append(" && predicate visible");
1048			}
1049		}
1050		buf.append("\n");
1051		return buf.toString();
1052	}
1053
1054	/** What GrammarAST node (derived from the grammar) is this DFA
1055     *  associated with?  It will point to the start of a block or
1056     *  the loop back of a (...)+ block etc...
1057     */
1058    public GrammarAST getDecisionASTNode() {
1059        return decisionNFAStartState.associatedASTNode;
1060    }
1061
1062    public boolean isGreedy() {
1063		GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decisionNumber);
1064		Object v = nfa.grammar.getBlockOption(blockAST,"greedy");
1065		if ( v!=null && v.equals("false") ) {
1066			return false;
1067		}
1068        return true;
1069
1070	}
1071
1072    public DFAState newState() {
1073        DFAState n = new DFAState(this);
1074        n.stateNumber = stateCounter;
1075        stateCounter++;
1076		states.setSize(n.stateNumber+1);
1077		states.set(n.stateNumber, n); // track state num to state
1078        return n;
1079    }
1080
1081	public int getNumberOfStates() {
1082		if ( getUserMaxLookahead()>0 ) {
1083			// if using fixed lookahead then uniqueSets not set
1084			return states.size();
1085		}
1086		return numberOfStates;
1087	}
1088
1089	public int getNumberOfAlts() {
1090		return nAlts;
1091	}
1092
1093//	public boolean analysisTimedOut() {
1094//		return probe.analysisTimedOut();
1095//	}
1096
1097    protected void initAltRelatedInfo() {
1098        unreachableAlts = new LinkedList();
1099        for (int i = 1; i <= nAlts; i++) {
1100            unreachableAlts.add(Utils.integer(i));
1101        }
1102		altToAcceptState = new DFAState[nAlts+1];
1103    }
1104
1105	public String toString() {
1106		FASerializer serializer = new FASerializer(nfa.grammar);
1107		if ( startState==null ) {
1108			return "";
1109		}
1110		return serializer.serialize(startState, false);
1111	}
1112
1113	/** EOT (end of token) is a label that indicates when the DFA conversion
1114	 *  algorithm would "fall off the end of a lexer rule".  It normally
1115	 *  means the default clause.  So for ('a'..'z')+ you would see a DFA
1116	 *  with a state that has a..z and EOT emanating from it.  a..z would
1117	 *  jump to a state predicting alt 1 and EOT would jump to a state
1118	 *  predicting alt 2 (the exit loop branch).  EOT implies anything other
1119	 *  than a..z.  If for some reason, the set is "all char" such as with
1120	 *  the wildcard '.', then EOT cannot match anything.  For example,
1121	 *
1122	 *     BLOCK : '{' (.)* '}'
1123	 *
1124	 *  consumes all char until EOF when greedy=true.  When all edges are
1125	 *  combined for the DFA state after matching '}', you will find that
1126	 *  it is all char.  The EOT transition has nothing to match and is
1127	 *  unreachable.  The findNewDFAStatesAndAddDFATransitions() method
1128	 *  must know to ignore the EOT, so we simply remove it from the
1129	 *  reachable labels.  Later analysis will find that the exit branch
1130	 *  is not predicted by anything.  For greedy=false, we leave only
1131	 *  the EOT label indicating that the DFA should stop immediately
1132	 *  and predict the exit branch. The reachable labels are often a
1133	 *  set of disjoint values like: [<EOT>, 42, {0..41, 43..65534}]
1134	 *  due to DFA conversion so must construct a pure set to see if
1135	 *  it is same as Label.ALLCHAR.
1136	 *
1137	 *  Only do this for Lexers.
1138	 *
1139	 *  If EOT coexists with ALLCHAR:
1140	 *  1. If not greedy, modify the labels parameter to be EOT
1141	 *  2. If greedy, remove EOT from the labels set
1142	protected boolean reachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels)
1143	{
1144		Label eot = new Label(Label.EOT);
1145		if ( !labels.containsKey(eot) ) {
1146			return false;
1147		}
1148		System.out.println("### contains EOT");
1149		boolean containsAllChar = false;
1150		IntervalSet completeVocab = new IntervalSet();
1151		int n = labels.size();
1152		for (int i=0; i<n; i++) {
1153			Label rl = (Label)labels.get(i);
1154			if ( !rl.equals(eot) ) {
1155				completeVocab.addAll(rl.getSet());
1156			}
1157		}
1158		System.out.println("completeVocab="+completeVocab);
1159		if ( completeVocab.equals(Label.ALLCHAR) ) {
1160			System.out.println("all char");
1161			containsAllChar = true;
1162		}
1163		return containsAllChar;
1164	}
1165	 */
1166}
1167
1168