/* * [The "BSD license"] * Copyright (c) 2010 Terence Parr * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.analysis; import org.antlr.misc.IntSet; import org.antlr.misc.MultiMap; import org.antlr.misc.OrderedHashSet; import org.antlr.misc.Utils; import org.antlr.tool.Grammar; import java.util.*; /** A DFA state represents a set of possible NFA configurations. * As Aho, Sethi, Ullman p. 117 says "The DFA uses its state * to keep track of all possible states the NFA can be in after * reading each input symbol. That is to say, after reading * input a1a2..an, the DFA is in a state that represents the * subset T of the states of the NFA that are reachable from the * NFA's start state along some path labeled a1a2..an." * In conventional NFA->DFA conversion, therefore, the subset T * would be a bitset representing the set of states the * NFA could be in. We need to track the alt predicted by each * state as well, however. More importantly, we need to maintain * a stack of states, tracking the closure operations as they * jump from rule to rule, emulating rule invocations (method calls). * Recall that NFAs do not normally have a stack like a pushdown-machine * so I have to add one to simulate the proper lookahead sequences for * the underlying LL grammar from which the NFA was derived. * * I use a list of NFAConfiguration objects. An NFAConfiguration * is both a state (ala normal conversion) and an NFAContext describing * the chain of rules (if any) followed to arrive at that state. There * is also the semantic context, which is the "set" of predicates found * on the path to this configuration. * * A DFA state may have multiple references to a particular state, * but with different NFAContexts (with same or different alts) * meaning that state was reached via a different set of rule invocations. */ public class DFAState extends State { public static final int INITIAL_NUM_TRANSITIONS = 4; public static final int PREDICTED_ALT_UNSET = NFA.INVALID_ALT_NUMBER-1; /** We are part of what DFA? Use this ref to get access to the * context trees for an alt. */ public DFA dfa; /** Track the transitions emanating from this DFA state. The List * elements are Transition objects. */ protected List transitions = new ArrayList(INITIAL_NUM_TRANSITIONS); /** When doing an acyclic DFA, this is the number of lookahead symbols * consumed to reach this state. This value may be nonzero for most * dfa states, but it is only a valid value if the user has specified * a max fixed lookahead. */ protected int k; /** The NFA->DFA algorithm may terminate leaving some states * without a path to an accept state, implying that upon certain * input, the decision is not deterministic--no decision about * predicting a unique alternative can be made. Recall that an * accept state is one in which a unique alternative is predicted. */ protected int acceptStateReachable = DFA.REACHABLE_UNKNOWN; /** Rather than recheck every NFA configuration in a DFA state (after * resolving) in findNewDFAStatesAndAddDFATransitions just check * this boolean. Saves a linear walk perhaps DFA state creation. * Every little bit helps. */ protected boolean resolvedWithPredicates = false; /** If a closure operation finds that we tried to invoke the same * rule too many times (stack would grow beyond a threshold), it * marks the state has aborted and notifies the DecisionProbe. */ public boolean abortedDueToRecursionOverflow = false; /** If we detect recursion on more than one alt, decision is non-LL(*), * but try to isolate it to only those states whose closure operations * detect recursion. There may be other alts that are cool: * * a : recur '.' * | recur ';' * | X Y // LL(2) decision; don't abort and use k=1 plus backtracking * | X Z * ; * * 12/13/2007: Actually this has caused problems. If k=*, must terminate * and throw out entire DFA; retry with k=1. Since recursive, do not * attempt more closure ops as it may take forever. Exception thrown * now and we simply report the problem. If synpreds exist, I'll retry * with k=1. */ protected boolean abortedDueToMultipleRecursiveAlts = false; /** Build up the hash code for this state as NFA configurations * are added as it's monotonically increasing list of configurations. */ protected int cachedHashCode; protected int cachedUniquelyPredicatedAlt = PREDICTED_ALT_UNSET; public int minAltInConfigurations=Integer.MAX_VALUE; public boolean atLeastOneConfigurationHasAPredicate = false; /** The set of NFA configurations (state,alt,context) for this DFA state */ public OrderedHashSet nfaConfigurations = new OrderedHashSet(); public List configurationsWithLabeledEdges = new ArrayList(); /** Used to prevent the closure operation from looping to itself and * hence looping forever. Sensitive to the NFA state, the alt, and * the stack context. This just the nfa config set because we want to * prevent closures only on states contributed by closure not reach * operations. * * Two configurations identical including semantic context are * considered the same closure computation. @see NFAToDFAConverter.closureBusy(). */ protected Set closureBusy = new HashSet(); /** As this state is constructed (i.e., as NFA states are added), we * can easily check for non-epsilon transitions because the only * transition that could be a valid label is transition(0). When we * process this node eventually, we'll have to walk all states looking * for all possible transitions. That is of the order: size(label space) * times size(nfa states), which can be pretty damn big. It's better * to simply track possible labels. */ protected OrderedHashSet reachableLabels; public DFAState(DFA dfa) { this.dfa = dfa; } public void reset() { //nfaConfigurations = null; // getGatedPredicatesInNFAConfigurations needs configurationsWithLabeledEdges = null; closureBusy = null; reachableLabels = null; } public Transition transition(int i) { return (Transition)transitions.get(i); } public int getNumberOfTransitions() { return transitions.size(); } public void addTransition(Transition t) { transitions.add(t); } /** Add a transition from this state to target with label. Return * the transition number from 0..n-1. */ public int addTransition(DFAState target, Label label) { transitions.add( new Transition(label, target) ); return transitions.size()-1; } public Transition getTransition(int trans) { return transitions.get(trans); } public void removeTransition(int trans) { transitions.remove(trans); } /** Add an NFA configuration to this DFA node. Add uniquely * an NFA state/alt/syntactic&semantic context (chain of invoking state(s) * and semantic predicate contexts). * * I don't see how there could be two configurations with same * state|alt|synCtx and different semantic contexts because the * semantic contexts are computed along the path to a particular state * so those two configurations would have to have the same predicate. * Nonetheless, the addition of configurations is unique on all * configuration info. I guess I'm saying that syntactic context * implies semantic context as the latter is computed according to the * former. * * As we add configurations to this DFA state, track the set of all possible * transition labels so we can simply walk it later rather than doing a * loop over all possible labels in the NFA. */ public void addNFAConfiguration(NFAState state, NFAConfiguration c) { if ( nfaConfigurations.contains(c) ) { return; } nfaConfigurations.add(c); // track min alt rather than compute later if ( c.alt < minAltInConfigurations ) { minAltInConfigurations = c.alt; } if ( c.semanticContext!=SemanticContext.EMPTY_SEMANTIC_CONTEXT ) { atLeastOneConfigurationHasAPredicate = true; } // update hashCode; for some reason using context.hashCode() also // makes the GC take like 70% of the CPU and is slow! cachedHashCode += c.state + c.alt; // update reachableLabels // We're adding an NFA state; check to see if it has a non-epsilon edge if ( state.transition[0] != null ) { Label label = state.transition[0].label; if ( !(label.isEpsilon()||label.isSemanticPredicate()) ) { // this NFA state has a non-epsilon edge, track for fast // walking later when we do reach on this DFA state we're // building. configurationsWithLabeledEdges.add(c); if ( state.transition[1] ==null ) { // later we can check this to ignore o-A->o states in closure c.singleAtomTransitionEmanating = true; } addReachableLabel(label); } } } public NFAConfiguration addNFAConfiguration(NFAState state, int alt, NFAContext context, SemanticContext semanticContext) { NFAConfiguration c = new NFAConfiguration(state.stateNumber, alt, context, semanticContext); addNFAConfiguration(state, c); return c; } /** Add label uniquely and disjointly; intersection with * another set or int/char forces breaking up the set(s). * * Example, if reachable list of labels is [a..z, {k,9}, 0..9], * the disjoint list will be [{a..j,l..z}, k, 9, 0..8]. * * As we add NFA configurations to a DFA state, we might as well track * the set of all possible transition labels to make the DFA conversion * more efficient. W/o the reachable labels, we'd need to check the * whole vocabulary space (could be 0..\uFFFF)! The problem is that * labels can be sets, which may overlap with int labels or other sets. * As we need a deterministic set of transitions from any * state in the DFA, we must make the reachable labels set disjoint. * This operation amounts to finding the character classes for this * DFA state whereas with tools like flex, that need to generate a * homogeneous DFA, must compute char classes across all states. * We are going to generate DFAs with heterogeneous states so we * only care that the set of transitions out of a single state are * unique. :) * * The idea for adding a new set, t, is to look for overlap with the * elements of existing list s. Upon overlap, replace * existing set s[i] with two new disjoint sets, s[i]-t and s[i]&t. * (if s[i]-t is nil, don't add). The remainder is t-s[i], which is * what you want to add to the set minus what was already there. The * remainder must then be compared against the i+1..n elements in s * looking for another collision. Each collision results in a smaller * and smaller remainder. Stop when you run out of s elements or * remainder goes to nil. If remainder is non nil when you run out of * s elements, then add remainder to the end. * * Single element labels are treated as sets to make the code uniform. */ protected void addReachableLabel(Label label) { if ( reachableLabels==null ) { reachableLabels = new OrderedHashSet(); } /* System.out.println("addReachableLabel to state "+dfa.decisionNumber+"."+stateNumber+": "+label.getSet().toString(dfa.nfa.grammar)); System.out.println("start of add to state "+dfa.decisionNumber+"."+stateNumber+": " + "reachableLabels="+reachableLabels.toString()); */ if ( reachableLabels.contains(label) ) { // exact label present return; } IntSet t = label.getSet(); IntSet remainder = t; // remainder starts out as whole set to add int n = reachableLabels.size(); // only look at initial elements // walk the existing list looking for the collision for (int i=0; i0 && user_k==k ) { // if fixed lookahead, then more than 1 alt is a nondeterminism // if we have hit the max lookahead return getAltSet(); } else if ( abortedDueToMultipleRecursiveAlts || abortedDueToRecursionOverflow ) { // if we had to abort for non-LL(*) state assume all alts are a problem return getAltSet(); } else { return getConflictingAlts(); } } /** Walk each NFA configuration in this DFA state looking for a conflict * where (s|i|ctx) and (s|j|ctx) exist, indicating that state s with * context conflicting ctx predicts alts i and j. Return an Integer set * of the alternative numbers that conflict. Two contexts conflict if * they are equal or one is a stack suffix of the other or one is * the empty context. * * Use a hash table to record the lists of configs for each state * as they are encountered. We need only consider states for which * there is more than one configuration. The configurations' predicted * alt must be different or must have different contexts to avoid a * conflict. * * Don't report conflicts for DFA states that have conflicting Tokens * rule NFA states; they will be resolved in favor of the first rule. */ protected Set getConflictingAlts() { // TODO this is called multiple times: cache result? //System.out.println("getNondetAlts for DFA state "+stateNumber); Set nondeterministicAlts = new HashSet(); // If only 1 NFA conf then no way it can be nondeterministic; // save the overhead. There are many o-a->o NFA transitions // and so we save a hash map and iterator creation for each // state. int numConfigs = nfaConfigurations.size(); if ( numConfigs <=1 ) { return null; } // First get a list of configurations for each state. // Most of the time, each state will have one associated configuration. MultiMap stateToConfigListMap = new MultiMap(); for (int i = 0; i < numConfigs; i++) { NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i); Integer stateI = Utils.integer(configuration.state); stateToConfigListMap.map(stateI, configuration); } // potential conflicts are states with > 1 configuration and diff alts Set states = stateToConfigListMap.keySet(); int numPotentialConflicts = 0; for (Iterator it = states.iterator(); it.hasNext();) { Integer stateI = (Integer) it.next(); boolean thisStateHasPotentialProblem = false; List configsForState = (List)stateToConfigListMap.get(stateI); int alt=0; int numConfigsForState = configsForState.size(); for (int i = 0; i < numConfigsForState && numConfigsForState>1 ; i++) { NFAConfiguration c = (NFAConfiguration) configsForState.get(i); if ( alt==0 ) { alt = c.alt; } else if ( c.alt!=alt ) { /* System.out.println("potential conflict in state "+stateI+ " configs: "+configsForState); */ // 11/28/2005: don't report closures that pinch back // together in Tokens rule. We want to silently resolve // to the first token definition ala lex/flex by ignoring // these conflicts. // Also this ensures that lexers look for more and more // characters (longest match) before resorting to predicates. // TestSemanticPredicates.testLexerMatchesLongestThenTestPred() // for example would terminate at state s1 and test predicate // meaning input "ab" would test preds to decide what to // do but it should match rule C w/o testing preds. if ( dfa.nfa.grammar.type!=Grammar.LEXER || !dfa.decisionNFAStartState.enclosingRule.name.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) { numPotentialConflicts++; thisStateHasPotentialProblem = true; } } } if ( !thisStateHasPotentialProblem ) { // remove NFA state's configurations from // further checking; no issues with it // (can't remove as it's concurrent modification; set to null) stateToConfigListMap.put(stateI, null); } } // a fast check for potential issues; most states have none if ( numPotentialConflicts==0 ) { return null; } // we have a potential problem, so now go through config lists again // looking for different alts (only states with potential issues // are left in the states set). Now we will check context. // For example, the list of configs for NFA state 3 in some DFA // state might be: // [3|2|[28 18 $], 3|1|[28 $], 3|1, 3|2] // I want to create a map from context to alts looking for overlap: // [28 18 $] -> 2 // [28 $] -> 1 // [$] -> 1,2 // Indeed a conflict exists as same state 3, same context [$], predicts // alts 1 and 2. // walk each state with potential conflicting configurations for (Iterator it = states.iterator(); it.hasNext();) { Integer stateI = (Integer) it.next(); List configsForState = (List)stateToConfigListMap.get(stateI); // compare each configuration pair s, t to ensure: // s.ctx different than t.ctx if s.alt != t.alt int numConfigsForState = 0; if ( configsForState!=null ) { numConfigsForState = configsForState.size(); } for (int i = 0; i < numConfigsForState; i++) { NFAConfiguration s = (NFAConfiguration) configsForState.get(i); for (int j = i+1; j < numConfigsForState; j++) { NFAConfiguration t = (NFAConfiguration)configsForState.get(j); // conflicts means s.ctx==t.ctx or s.ctx is a stack // suffix of t.ctx or vice versa (if alts differ). // Also a conflict if s.ctx or t.ctx is empty if ( s.alt != t.alt && s.context.conflictsWith(t.context) ) { nondeterministicAlts.add(Utils.integer(s.alt)); nondeterministicAlts.add(Utils.integer(t.alt)); } } } } if ( nondeterministicAlts.size()==0 ) { return null; } return nondeterministicAlts; } /** Get the set of all alts mentioned by all NFA configurations in this * DFA state. */ public Set getAltSet() { int numConfigs = nfaConfigurations.size(); Set alts = new HashSet(); for (int i = 0; i < numConfigs; i++) { NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i); alts.add(Utils.integer(configuration.alt)); } if ( alts.size()==0 ) { return null; } return alts; } public Set getGatedSyntacticPredicatesInNFAConfigurations() { int numConfigs = nfaConfigurations.size(); Set synpreds = new HashSet(); for (int i = 0; i < numConfigs; i++) { NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i); SemanticContext gatedPredExpr = configuration.semanticContext.getGatedPredicateContext(); // if this is a manual syn pred (gated and syn pred), add if ( gatedPredExpr!=null && configuration.semanticContext.isSyntacticPredicate() ) { synpreds.add(configuration.semanticContext); } } if ( synpreds.size()==0 ) { return null; } return synpreds; } /** For gated productions, we need an OR'd list of all predicates for the * target of an edge so we can gate the edge based upon the predicates * associated with taking that path (if any). * * For syntactic predicates, we only want to generate predicate * evaluations as it transitions to an accept state; waste to * do it earlier. So, only add gated preds derived from manually- * specified syntactic predicates if this is an accept state. * * Also, since configurations w/o gated predicates are like true * gated predicates, finding a configuration whose alt has no gated * predicate implies we should evaluate the predicate to true. This * means the whole edge has to be ungated. Consider: * * X : ('a' | {p}?=> 'a') * | 'a' 'b' * ; * * Here, you 'a' gets you from s0 to s1 but you can't test p because * plain 'a' is ok. It's also ok for starting alt 2. Hence, you can't * test p. Even on the edge going to accept state for alt 1 of X, you * can't test p. You can get to the same place with and w/o the context. * Therefore, it is never ok to test p in this situation. * * TODO: cache this as it's called a lot; or at least set bit if >1 present in state */ public SemanticContext getGatedPredicatesInNFAConfigurations() { SemanticContext unionOfPredicatesFromAllAlts = null; int numConfigs = nfaConfigurations.size(); for (int i = 0; i < numConfigs; i++) { NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i); SemanticContext gatedPredExpr = configuration.semanticContext.getGatedPredicateContext(); if ( gatedPredExpr==null ) { // if we ever find a configuration w/o a gated predicate // (even if it's a nongated predicate), we cannot gate // the indident edges. return null; } else if ( acceptState || !configuration.semanticContext.isSyntacticPredicate() ) { // at this point we have a gated predicate and, due to elseif, // we know it's an accept or not a syn pred. In this case, // it's safe to add the gated predicate to the union. We // only want to add syn preds if it's an accept state. Other // gated preds can be used with edges leading to accept states. if ( unionOfPredicatesFromAllAlts==null ) { unionOfPredicatesFromAllAlts = gatedPredExpr; } else { unionOfPredicatesFromAllAlts = SemanticContext.or(unionOfPredicatesFromAllAlts,gatedPredExpr); } } } if ( unionOfPredicatesFromAllAlts instanceof SemanticContext.TruePredicate ) { return null; } return unionOfPredicatesFromAllAlts; } /** Is an accept state reachable from this state? */ public int getAcceptStateReachable() { return acceptStateReachable; } public void setAcceptStateReachable(int acceptStateReachable) { this.acceptStateReachable = acceptStateReachable; } public boolean isResolvedWithPredicates() { return resolvedWithPredicates; } /** Print all NFA states plus what alts they predict */ public String toString() { StringBuffer buf = new StringBuffer(); buf.append(stateNumber+":{"); for (int i = 0; i < nfaConfigurations.size(); i++) { NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i); if ( i>0 ) { buf.append(", "); } buf.append(configuration); } buf.append("}"); return buf.toString(); } public int getLookaheadDepth() { return k; } public void setLookaheadDepth(int k) { this.k = k; if ( k > dfa.max_k ) { // track max k for entire DFA dfa.max_k = k; } } }