/* * [The "BSD license"] * Copyright (c) 2010 Terence Parr * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.analysis; import org.antlr.codegen.CodeGenerator; import org.antlr.misc.IntSet; import org.antlr.misc.IntervalSet; import org.antlr.misc.Utils; import org.antlr.runtime.IntStream; import org.stringtemplate.v4.ST; import org.antlr.tool.*; import java.util.*; /** A DFA (converted from a grammar's NFA). * DFAs are used as prediction machine for alternative blocks in all kinds * of recognizers (lexers, parsers, tree walkers). */ public class DFA { public static final int REACHABLE_UNKNOWN = -2; public static final int REACHABLE_BUSY = -1; // in process of computing public static final int REACHABLE_NO = 0; public static final int REACHABLE_YES = 1; public static final int CYCLIC_UNKNOWN = -2; public static final int CYCLIC_BUSY = -1; // in process of computing public static final int CYCLIC_DONE = 0; /** Prevent explosion of DFA states during conversion. The max number * of states per alt in a single decision's DFA. public static final int MAX_STATES_PER_ALT_IN_DFA = 450; */ /** Set to 0 to not terminate early (time in ms) */ public static int MAX_TIME_PER_DFA_CREATION = 1*1000; /** How many edges can each DFA state have before a "special" state * is created that uses IF expressions instead of a table? */ public static int MAX_STATE_TRANSITIONS_FOR_TABLE = 65534; /** What's the start state for this DFA? */ public DFAState startState; /** This DFA is being built for which decision? */ public int decisionNumber = 0; /** From what NFAState did we create the DFA? */ public NFAState decisionNFAStartState; /** The printable grammar fragment associated with this DFA */ public String description; /** A set of all uniquely-numbered DFA states. Maps hash of DFAState * to the actual DFAState object. We use this to detect * existing DFA states. Map. Use Map so * we can get old state back (Set only allows you to see if it's there). * Not used during fixed k lookahead as it's a waste to fill it with * a dup of states array. */ protected Map uniqueStates = new HashMap(); /** Maps the state number to the actual DFAState. Use a Vector as it * grows automatically when I set the ith element. This contains all * states, but the states are not unique. s3 might be same as s1 so * s3 -> s1 in this table. This is how cycles occur. If fixed k, * then these states will all be unique as states[i] always points * at state i when no cycles exist. * * This is managed in parallel with uniqueStates and simply provides * a way to go from state number to DFAState rather than via a * hash lookup. */ protected Vector states = new Vector(); /** Unique state numbers per DFA */ protected int stateCounter = 0; /** count only new states not states that were rejected as already present */ protected int numberOfStates = 0; /** User specified max fixed lookahead. If 0, nothing specified. -1 * implies we have not looked at the options table yet to set k. */ protected int user_k = -1; /** While building the DFA, track max lookahead depth if not cyclic */ protected int max_k = -1; /** Is this DFA reduced? I.e., can all states lead to an accept state? */ protected boolean reduced = true; /** Are there any loops in this DFA? * Computed by doesStateReachAcceptState() */ protected boolean cyclic = false; /** Track whether this DFA has at least one sem/syn pred encountered * during a closure operation. This is useful for deciding whether * to retry a non-LL(*) with k=1. If no pred, it will not work w/o * a pred so don't bother. It would just give another error message. */ public boolean predicateVisible = false; public boolean hasPredicateBlockedByAction = false; /** Each alt in an NFA derived from a grammar must have a DFA state that * predicts it lest the parser not know what to do. Nondeterminisms can * lead to this situation (assuming no semantic predicates can resolve * the problem) and when for some reason, I cannot compute the lookahead * (which might arise from an error in the algorithm or from * left-recursion etc...). This list starts out with all alts contained * and then in method doesStateReachAcceptState() I remove the alts I * know to be uniquely predicted. */ protected List unreachableAlts; protected int nAlts = 0; /** We only want one accept state per predicted alt; track here */ protected DFAState[] altToAcceptState; /** Track whether an alt discovers recursion for each alt during * NFA to DFA conversion; >1 alt with recursion implies nonregular. */ public IntSet recursiveAltSet = new IntervalSet(); /** Which NFA are we converting (well, which piece of the NFA)? */ public NFA nfa; protected NFAToDFAConverter nfaConverter; /** This probe tells you a lot about a decision and is useful even * when there is no error such as when a syntactic nondeterminism * is solved via semantic predicates. Perhaps a GUI would want * the ability to show that. */ public DecisionProbe probe = new DecisionProbe(this); /** Track absolute time of the conversion so we can have a failsafe: * if it takes too long, then terminate. Assume bugs are in the * analysis engine. */ //protected long conversionStartTime; /** Map an edge transition table to a unique set number; ordered so * we can push into the output template as an ordered list of sets * and then ref them from within the transition[][] table. Like this * for C# target: * public static readonly DFA30_transition0 = * new short[] { 46, 46, -1, 46, 46, -1, -1, -1, -1, -1, -1, -1,...}; * public static readonly DFA30_transition1 = * new short[] { 21 }; * public static readonly short[][] DFA30_transition = { * DFA30_transition0, * DFA30_transition0, * DFA30_transition1, * ... * }; */ public Map edgeTransitionClassMap = new LinkedHashMap(); /** The unique edge transition class number; every time we see a new * set of edges emanating from a state, we number it so we can reuse * if it's every seen again for another state. For Java grammar, * some of the big edge transition tables are seen about 57 times. */ protected int edgeTransitionClass =0; /* This DFA can be converted to a transition[state][char] table and * the following tables are filled by createStateTables upon request. * These are injected into the templates for code generation. * See March 25, 2006 entry for description: * http://www.antlr.org/blog/antlr3/codegen.tml * Often using Vector as can't set ith position in a List and have * it extend list size; bizarre. */ /** List of special DFAState objects */ public List specialStates; /** List of ST for special states. */ public List specialStateSTs; public Vector accept; public Vector eot; public Vector eof; public Vector min; public Vector max; public Vector special; public Vector transition; /** just the Vector indicating which unique edge table is at * position i. */ public Vector transitionEdgeTables; // not used by java yet protected int uniqueCompressedSpecialStateNum = 0; /** Which generator to use if we're building state tables */ protected CodeGenerator generator = null; protected DFA() {;} public DFA(int decisionNumber, NFAState decisionStartState) { this.decisionNumber = decisionNumber; this.decisionNFAStartState = decisionStartState; nfa = decisionStartState.nfa; nAlts = nfa.grammar.getNumberOfAltsForDecisionNFA(decisionStartState); //setOptions( nfa.grammar.getDecisionOptions(getDecisionNumber()) ); initAltRelatedInfo(); //long start = System.currentTimeMillis(); nfaConverter = new NFAToDFAConverter(this); try { nfaConverter.convert(); // figure out if there are problems with decision verify(); if ( !probe.isDeterministic() || probe.analysisOverflowed() ) { probe.issueWarnings(); } // must be after verify as it computes cyclic, needed by this routine // should be after warnings because early termination or something // will not allow the reset to operate properly in some cases. resetStateNumbersToBeContiguous(); //long stop = System.currentTimeMillis(); //System.out.println("verify cost: "+(int)(stop-start)+" ms"); } // catch (AnalysisTimeoutException at) { // probe.reportAnalysisTimeout(); // if ( !okToRetryDFAWithK1() ) { // probe.issueWarnings(); // } // } catch (NonLLStarDecisionException nonLL) { probe.reportNonLLStarDecision(this); // >1 alt recurses, k=* and no auto backtrack nor manual sem/syn if ( !okToRetryDFAWithK1() ) { probe.issueWarnings(); } } } /** Walk all states and reset their numbers to be a contiguous sequence * of integers starting from 0. Only cyclic DFA can have unused positions * in states list. State i might be identical to a previous state j and * will result in states[i] == states[j]. We don't want to waste a state * number on this. Useful mostly for code generation in tables. * * At the start of this routine, states[i].stateNumber <= i by definition. * If states[50].stateNumber is 50 then a cycle during conversion may * try to add state 103, but we find that an identical DFA state, named * 50, already exists, hence, states[103]==states[50] and both have * stateNumber 50 as they point at same object. Afterwards, the set * of state numbers from all states should represent a contiguous range * from 0..n-1 where n is the number of unique states. */ public void resetStateNumbersToBeContiguous() { if ( getUserMaxLookahead()>0 ) { // all numbers are unique already; no states are thrown out. return; } // walk list of DFAState objects by state number, // setting state numbers to 0..n-1 int snum=0; for (int i = 0; i <= getMaxStateNumber(); i++) { DFAState s = getState(i); // some states are unused after creation most commonly due to cycles // or conflict resolution. if ( s==null ) { continue; } // state i is mapped to DFAState with state number set to i originally // so if it's less than i, then we renumbered it already; that // happens when states have been merged or cycles occurred I think. // states[50] will point to DFAState with s50 in it but // states[103] might also point at this same DFAState. Since // 50 < 103 then it's already been renumbered as it points downwards. boolean alreadyRenumbered = s.stateNumber> which is the transition[][] table for (int i = 0; i < transition.size(); i++) { Vector transitionsForState = (Vector) transition.elementAt(i); encoded.add(getRunLengthEncoding(transitionsForState)); } return encoded; } /** Compress the incoming data list so that runs of same number are * encoded as number,value pair sequences. 3 -1 -1 -1 28 is encoded * as 1 3 3 -1 1 28. I am pretty sure this is the lossless compression * that GIF files use. Transition tables are heavily compressed by * this technique. I got the idea from JFlex http://jflex.de/ * * Return List where each string is either \xyz for 8bit char * and \uFFFF for 16bit. Hideous and specific to Java, but it is the * only target bad enough to need it. */ public List getRunLengthEncoding(List data) { if ( data==null || data.size()==0 ) { // for states with no transitions we want an empty string "" // to hold its place in the transitions array. List empty = new ArrayList(); empty.add(""); return empty; } int size = Math.max(2,data.size()/2); List encoded = new ArrayList(size); // guess at size // scan values looking for runs int i = 0; Integer emptyValue = Utils.integer(-1); while ( i < data.size() ) { Integer I = (Integer)data.get(i); if ( I==null ) { I = emptyValue; } // count how many v there are? int n = 0; for (int j = i; j < data.size(); j++) { Integer v = (Integer)data.get(j); if ( v==null ) { v = emptyValue; } if ( I.equals(v) ) { n++; } else { break; } } encoded.add(generator.target.encodeIntAsCharEscape((char)n)); encoded.add(generator.target.encodeIntAsCharEscape((char)I.intValue())); i+=n; } return encoded; } public void createStateTables(CodeGenerator generator) { //System.out.println("createTables:\n"+this); this.generator = generator; description = getNFADecisionStartState().getDescription(); description = generator.target.getTargetStringLiteralFromString(description); // create all the tables special = new Vector(this.getNumberOfStates()); // Vector special.setSize(this.getNumberOfStates()); specialStates = new ArrayList(); // List specialStateSTs = new ArrayList(); // List accept = new Vector(this.getNumberOfStates()); // Vector accept.setSize(this.getNumberOfStates()); eot = new Vector(this.getNumberOfStates()); // Vector eot.setSize(this.getNumberOfStates()); eof = new Vector(this.getNumberOfStates()); // Vector eof.setSize(this.getNumberOfStates()); min = new Vector(this.getNumberOfStates()); // Vector min.setSize(this.getNumberOfStates()); max = new Vector(this.getNumberOfStates()); // Vector max.setSize(this.getNumberOfStates()); transition = new Vector(this.getNumberOfStates()); // Vector> transition.setSize(this.getNumberOfStates()); transitionEdgeTables = new Vector(this.getNumberOfStates()); // Vector> transitionEdgeTables.setSize(this.getNumberOfStates()); // for each state in the DFA, fill relevant tables. Iterator it = null; if ( getUserMaxLookahead()>0 ) { it = states.iterator(); } else { it = getUniqueStates().values().iterator(); } while ( it.hasNext() ) { DFAState s = (DFAState)it.next(); if ( s==null ) { // ignore null states; some acylic DFA see this condition // when inlining DFA (due to lacking of exit branch pruning?) continue; } if ( s.isAcceptState() ) { // can't compute min,max,special,transition on accepts accept.set(s.stateNumber, Utils.integer(s.getUniquelyPredictedAlt())); } else { createMinMaxTables(s); createTransitionTableEntryForState(s); createSpecialTable(s); createEOTAndEOFTables(s); } } // now that we have computed list of specialStates, gen code for 'em for (int i = 0; i < specialStates.size(); i++) { DFAState ss = (DFAState) specialStates.get(i); ST stateST = generator.generateSpecialState(ss); specialStateSTs.add(stateST); } // check that the tables are not messed up by encode/decode /* testEncodeDecode(min); testEncodeDecode(max); testEncodeDecode(accept); testEncodeDecode(special); System.out.println("min="+min); System.out.println("max="+max); System.out.println("eot="+eot); System.out.println("eof="+eof); System.out.println("accept="+accept); System.out.println("special="+special); System.out.println("transition="+transition); */ } /* private void testEncodeDecode(List data) { System.out.println("data="+data); List encoded = getRunLengthEncoding(data); StringBuffer buf = new StringBuffer(); for (int i = 0; i < encoded.size(); i++) { String I = (String)encoded.get(i); int v = 0; if ( I.startsWith("\\u") ) { v = Integer.parseInt(I.substring(2,I.length()), 16); } else { v = Integer.parseInt(I.substring(1,I.length()), 8); } buf.append((char)v); } String encodedS = buf.toString(); short[] decoded = org.antlr.runtime.DFA.unpackEncodedString(encodedS); //System.out.println("decoded:"); for (int i = 0; i < decoded.length; i++) { short x = decoded[i]; if ( x!=((Integer)data.get(i)).intValue() ) { System.err.println("problem with encoding"); } //System.out.print(", "+x); } //System.out.println(); } */ protected void createMinMaxTables(DFAState s) { int smin = Label.MAX_CHAR_VALUE + 1; int smax = Label.MIN_ATOM_VALUE - 1; for (int j = 0; j < s.getNumberOfTransitions(); j++) { Transition edge = (Transition) s.transition(j); Label label = edge.label; if ( label.isAtom() ) { if ( label.getAtom()>=Label.MIN_CHAR_VALUE ) { if ( label.getAtom()smax ) { smax = label.getAtom(); } } } else if ( label.isSet() ) { IntervalSet labels = (IntervalSet)label.getSet(); int lmin = labels.getMinElement(); // if valid char (don't do EOF) and less than current min if ( lmin=Label.MIN_CHAR_VALUE ) { smin = labels.getMinElement(); } if ( labels.getMaxElement()>smax ) { smax = labels.getMaxElement(); } } } if ( smax<0 ) { // must be predicates or pure EOT transition; just zero out min, max smin = Label.MIN_CHAR_VALUE; smax = Label.MIN_CHAR_VALUE; } min.set(s.stateNumber, Utils.integer((char)smin)); max.set(s.stateNumber, Utils.integer((char)smax)); if ( smax<0 || smin>Label.MAX_CHAR_VALUE || smin<0 ) { ErrorManager.internalError("messed up: min="+min+", max="+max); } } protected void createTransitionTableEntryForState(DFAState s) { /* System.out.println("createTransitionTableEntryForState s"+s.stateNumber+ " dec "+s.dfa.decisionNumber+" cyclic="+s.dfa.isCyclic()); */ int smax = ((Integer)max.get(s.stateNumber)).intValue(); int smin = ((Integer)min.get(s.stateNumber)).intValue(); Vector stateTransitions = new Vector(smax-smin+1); stateTransitions.setSize(smax-smin+1); transition.set(s.stateNumber, stateTransitions); for (int j = 0; j < s.getNumberOfTransitions(); j++) { Transition edge = (Transition) s.transition(j); Label label = edge.label; if ( label.isAtom() && label.getAtom()>=Label.MIN_CHAR_VALUE ) { int labelIndex = label.getAtom()-smin; // offset from 0 stateTransitions.set(labelIndex, Utils.integer(edge.target.stateNumber)); } else if ( label.isSet() ) { IntervalSet labels = (IntervalSet)label.getSet(); int[] atoms = labels.toArray(); for (int a = 0; a < atoms.length; a++) { // set the transition if the label is valid (don't do EOF) if ( atoms[a]>=Label.MIN_CHAR_VALUE ) { int labelIndex = atoms[a]-smin; // offset from 0 stateTransitions.set(labelIndex, Utils.integer(edge.target.stateNumber)); } } } } // track unique state transition tables so we can reuse Integer edgeClass = (Integer)edgeTransitionClassMap.get(stateTransitions); if ( edgeClass!=null ) { //System.out.println("we've seen this array before; size="+stateTransitions.size()); transitionEdgeTables.set(s.stateNumber, edgeClass); } else { edgeClass = Utils.integer(edgeTransitionClass); transitionEdgeTables.set(s.stateNumber, edgeClass); edgeTransitionClassMap.put(stateTransitions, edgeClass); edgeTransitionClass++; } } /** Set up the EOT and EOF tables; we cannot put -1 min/max values so * we need another way to test that in the DFA transition function. */ protected void createEOTAndEOFTables(DFAState s) { for (int j = 0; j < s.getNumberOfTransitions(); j++) { Transition edge = (Transition) s.transition(j); Label label = edge.label; if ( label.isAtom() ) { if ( label.getAtom()==Label.EOT ) { // eot[s] points to accept state eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); } else if ( label.getAtom()==Label.EOF ) { // eof[s] points to accept state eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); } } else if ( label.isSet() ) { IntervalSet labels = (IntervalSet)label.getSet(); int[] atoms = labels.toArray(); for (int a = 0; a < atoms.length; a++) { if ( atoms[a]==Label.EOT ) { // eot[s] points to accept state eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); } else if ( atoms[a]==Label.EOF ) { eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); } } } } } protected void createSpecialTable(DFAState s) { // number all special states from 0...n-1 instead of their usual numbers boolean hasSemPred = false; // TODO this code is very similar to canGenerateSwitch. Refactor to share for (int j = 0; j < s.getNumberOfTransitions(); j++) { Transition edge = (Transition) s.transition(j); Label label = edge.label; // can't do a switch if the edges have preds or are going to // require gated predicates if ( label.isSemanticPredicate() || ((DFAState)edge.target).getGatedPredicatesInNFAConfigurations()!=null) { hasSemPred = true; break; } } // if has pred or too big for table, make it special int smax = ((Integer)max.get(s.stateNumber)).intValue(); int smin = ((Integer)min.get(s.stateNumber)).intValue(); if ( hasSemPred || smax-smin>MAX_STATE_TRANSITIONS_FOR_TABLE ) { special.set(s.stateNumber, Utils.integer(uniqueCompressedSpecialStateNum)); uniqueCompressedSpecialStateNum++; specialStates.add(s); } else { special.set(s.stateNumber, Utils.integer(-1)); // not special } } public int predict(IntStream input) { Interpreter interp = new Interpreter(nfa.grammar, input); return interp.predict(this); } /** Add a new DFA state to this DFA if not already present. * To force an acyclic, fixed maximum depth DFA, just always * return the incoming state. By not reusing old states, * no cycles can be created. If we're doing fixed k lookahead * don't updated uniqueStates, just return incoming state, which * indicates it's a new state. */ protected DFAState addState(DFAState d) { if ( getUserMaxLookahead()>0 ) { return d; } // does a DFA state exist already with everything the same // except its state number? DFAState existing = (DFAState)uniqueStates.get(d); if ( existing != null ) { /* System.out.println("state "+d.stateNumber+" exists as state "+ existing.stateNumber); */ // already there...get the existing DFA state return existing; } // if not there, then add new state. uniqueStates.put(d,d); numberOfStates++; return d; } public void removeState(DFAState d) { DFAState it = (DFAState)uniqueStates.remove(d); if ( it!=null ) { numberOfStates--; } } public Map getUniqueStates() { return uniqueStates; } /** What is the max state number ever created? This may be beyond * getNumberOfStates(). */ public int getMaxStateNumber() { return states.size()-1; } public DFAState getState(int stateNumber) { return (DFAState)states.get(stateNumber); } public void setState(int stateNumber, DFAState d) { states.set(stateNumber, d); } /** Is the DFA reduced? I.e., does every state have a path to an accept * state? If not, don't delete as we need to generate an error indicating * which paths are "dead ends". Also tracks list of alts with no accept * state in the DFA. Must call verify() first before this makes sense. */ public boolean isReduced() { return reduced; } /** Is this DFA cyclic? That is, are there any loops? If not, then * the DFA is essentially an LL(k) predictor for some fixed, max k value. * We can build a series of nested IF statements to match this. In the * presence of cycles, we need to build a general DFA and interpret it * to distinguish between alternatives. */ public boolean isCyclic() { return cyclic && getUserMaxLookahead()==0; } public boolean isClassicDFA() { return !isCyclic() && !nfa.grammar.decisionsWhoseDFAsUsesSemPreds.contains(this) && !nfa.grammar.decisionsWhoseDFAsUsesSynPreds.contains(this); } public boolean canInlineDecision() { return !isCyclic() && !probe.isNonLLStarDecision() && getNumberOfStates() < CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE; } /** Is this DFA derived from the NFA for the Tokens rule? */ public boolean isTokensRuleDecision() { if ( nfa.grammar.type!=Grammar.LEXER ) { return false; } NFAState nfaStart = getNFADecisionStartState(); Rule r = nfa.grammar.getLocallyDefinedRule(Grammar.ARTIFICIAL_TOKENS_RULENAME); NFAState TokensRuleStart = r.startState; NFAState TokensDecisionStart = (NFAState)TokensRuleStart.transition[0].target; return nfaStart == TokensDecisionStart; } /** The user may specify a max, acyclic lookahead for any decision. No * DFA cycles are created when this value, k, is greater than 0. * If this decision has no k lookahead specified, then try the grammar. */ public int getUserMaxLookahead() { if ( user_k>=0 ) { // cache for speed return user_k; } user_k = nfa.grammar.getUserMaxLookahead(decisionNumber); return user_k; } public boolean getAutoBacktrackMode() { return nfa.grammar.getAutoBacktrackMode(decisionNumber); } public void setUserMaxLookahead(int k) { this.user_k = k; } /** Return k if decision is LL(k) for some k else return max int */ public int getMaxLookaheadDepth() { if ( hasCycle() ) return Integer.MAX_VALUE; // compute to be sure return _getMaxLookaheadDepth(startState, 0); } int _getMaxLookaheadDepth(DFAState d, int depth) { // not cyclic; don't worry about termination // fail if pred edge. int max = depth; for (int i=0; i (X|Y)\n" + * | X\n" + * ; * * gives * * .s0-X->.s1 * .s0-Y&&{synpred1_t}?->:s2=>1 * .s1-{synpred1_t}?->:s2=>1 * .s1-{true}?->:s3=>2 */ public boolean hasSynPred() { boolean has = _hasSynPred(startState, new HashSet()); // if ( !has ) { // System.out.println("no synpred in dec "+decisionNumber); // FASerializer serializer = new FASerializer(nfa.grammar); // String result = serializer.serialize(startState); // System.out.println(result); // } return has; } public boolean getHasSynPred() { return hasSynPred(); } // for ST boolean _hasSynPred(DFAState d, Set busy) { busy.add(d); for (int i=0; i=0 ) { // System.out.println("has pred "+ctx.toString()+" "+ctx.isSyntacticPredicate()); // System.out.println(((SemanticContext.Predicate)ctx).predicateAST.token); // } if ( ctx.isSyntacticPredicate() ) return true; } DFAState edgeTarget = (DFAState)t.target; if ( !busy.contains(edgeTarget) && _hasSynPred(edgeTarget, busy) ) return true; } return false; } public boolean hasSemPred() { // has user-defined sempred boolean has = _hasSemPred(startState, new HashSet()); return has; } boolean _hasSemPred(DFAState d, Set busy) { busy.add(d); for (int i=0; i()); return cyclic; } boolean _hasCycle(DFAState d, Map busy) { busy.put(d, CYCLIC_BUSY); for (int i=0; i getUnreachableAlts() { return unreachableAlts; } /** Once this DFA has been built, need to verify that: * * 1. it's reduced * 2. all alts have an accept state * * Elsewhere, in the NFA converter, we need to verify that: * * 3. alts i and j have disjoint lookahead if no sem preds * 4. if sem preds, nondeterministic alts must be sufficiently covered * * This is avoided if analysis bails out for any reason. */ public void verify() { doesStateReachAcceptState(startState); } /** figure out if this state eventually reaches an accept state and * modify the instance variable 'reduced' to indicate if we find * at least one state that cannot reach an accept state. This implies * that the overall DFA is not reduced. This algorithm should be * linear in the number of DFA states. * * The algorithm also tracks which alternatives have no accept state, * indicating a nondeterminism. * * Also computes whether the DFA is cyclic. * * TODO: I call getUniquelyPredicatedAlt too much; cache predicted alt */ protected boolean doesStateReachAcceptState(DFAState d) { if ( d.isAcceptState() ) { // accept states have no edges emanating from them so we can return d.setAcceptStateReachable(REACHABLE_YES); // this alt is uniquely predicted, remove from nondeterministic list int predicts = d.getUniquelyPredictedAlt(); unreachableAlts.remove(Utils.integer(predicts)); return true; } // avoid infinite loops d.setAcceptStateReachable(REACHABLE_BUSY); boolean anEdgeReachesAcceptState = false; // Visit every transition, track if at least one edge reaches stop state // Cannot terminate when we know this state reaches stop state since // all transitions must be traversed to set status of each DFA state. for (int i=0; i0 ) { // if using fixed lookahead then uniqueSets not set return states.size(); } return numberOfStates; } public int getNumberOfAlts() { return nAlts; } // public boolean analysisTimedOut() { // return probe.analysisTimedOut(); // } protected void initAltRelatedInfo() { unreachableAlts = new LinkedList(); for (int i = 1; i <= nAlts; i++) { unreachableAlts.add(Utils.integer(i)); } altToAcceptState = new DFAState[nAlts+1]; } public String toString() { FASerializer serializer = new FASerializer(nfa.grammar); if ( startState==null ) { return ""; } return serializer.serialize(startState, false); } /** EOT (end of token) is a label that indicates when the DFA conversion * algorithm would "fall off the end of a lexer rule". It normally * means the default clause. So for ('a'..'z')+ you would see a DFA * with a state that has a..z and EOT emanating from it. a..z would * jump to a state predicting alt 1 and EOT would jump to a state * predicting alt 2 (the exit loop branch). EOT implies anything other * than a..z. If for some reason, the set is "all char" such as with * the wildcard '.', then EOT cannot match anything. For example, * * BLOCK : '{' (.)* '}' * * consumes all char until EOF when greedy=true. When all edges are * combined for the DFA state after matching '}', you will find that * it is all char. The EOT transition has nothing to match and is * unreachable. The findNewDFAStatesAndAddDFATransitions() method * must know to ignore the EOT, so we simply remove it from the * reachable labels. Later analysis will find that the exit branch * is not predicted by anything. For greedy=false, we leave only * the EOT label indicating that the DFA should stop immediately * and predict the exit branch. The reachable labels are often a * set of disjoint values like: [, 42, {0..41, 43..65534}] * due to DFA conversion so must construct a pure set to see if * it is same as Label.ALLCHAR. * * Only do this for Lexers. * * If EOT coexists with ALLCHAR: * 1. If not greedy, modify the labels parameter to be EOT * 2. If greedy, remove EOT from the labels set protected boolean reachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels) { Label eot = new Label(Label.EOT); if ( !labels.containsKey(eot) ) { return false; } System.out.println("### contains EOT"); boolean containsAllChar = false; IntervalSet completeVocab = new IntervalSet(); int n = labels.size(); for (int i=0; i