/* * [The "BSD license"] * Copyright (c) 2010 Terence Parr * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.analysis; import org.antlr.grammar.v3.ANTLRParser; import org.antlr.misc.IntSet; import org.antlr.misc.IntervalSet; import org.antlr.tool.Grammar; import org.antlr.tool.Rule; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; /** * Created by IntelliJ IDEA. * User: parrt * Date: Dec 31, 2007 * Time: 1:31:16 PM * To change this template use File | Settings | File Templates. */ public class LL1Analyzer { /** 0 if we hit end of rule and invoker should keep going (epsilon) */ public static final int DETECT_PRED_EOR = 0; /** 1 if we found a nonautobacktracking pred */ public static final int DETECT_PRED_FOUND = 1; /** 2 if we didn't find such a pred */ public static final int DETECT_PRED_NOT_FOUND = 2; public Grammar grammar; /** Used during LOOK to detect computation cycles */ protected Set lookBusy = new HashSet(); public Map FIRSTCache = new HashMap(); public Map FOLLOWCache = new HashMap(); public LL1Analyzer(Grammar grammar) { this.grammar = grammar; } /* public void computeRuleFIRSTSets() { if ( getNumberOfDecisions()==0 ) { createNFAs(); } for (Iterator it = getRules().iterator(); it.hasNext();) { Rule r = (Rule)it.next(); if ( r.isSynPred ) { continue; } LookaheadSet s = FIRST(r); System.out.println("FIRST("+r.name+")="+s); } } */ /* public Set getOverriddenRulesWithDifferentFIRST() { // walk every rule in this grammar and compare FIRST set with // those in imported grammars. Set rules = new HashSet(); for (Iterator it = getRules().iterator(); it.hasNext();) { Rule r = (Rule)it.next(); //System.out.println(r.name+" FIRST="+r.FIRST); for (int i = 0; i < delegates.size(); i++) { Grammar g = delegates.get(i); Rule importedRule = g.getRule(r.name); if ( importedRule != null ) { // exists in imported grammar // System.out.println(r.name+" exists in imported grammar: FIRST="+importedRule.FIRST); if ( !r.FIRST.equals(importedRule.FIRST) ) { rules.add(r.name); } } } } return rules; } public Set getImportedRulesSensitiveToOverriddenRulesDueToLOOK() { Set diffFIRSTs = getOverriddenRulesWithDifferentFIRST(); Set rules = new HashSet(); for (Iterator it = diffFIRSTs.iterator(); it.hasNext();) { String r = (String) it.next(); for (int i = 0; i < delegates.size(); i++) { Grammar g = delegates.get(i); Set callers = g.ruleSensitivity.get(r); // somebody invokes rule whose FIRST changed in subgrammar? if ( callers!=null ) { rules.addAll(callers); //System.out.println(g.name+" rules "+callers+" sensitive to "+r+"; dup 'em"); } } } return rules; } */ /* public LookaheadSet LOOK(Rule r) { if ( r.FIRST==null ) { r.FIRST = FIRST(r.startState); } return r.FIRST; } */ /** From an NFA state, s, find the set of all labels reachable from s. * Used to compute follow sets for error recovery. Never computes * a FOLLOW operation. FIRST stops at end of rules, returning EOR, unless * invoked from another rule. I.e., routine properly handles * * a : b A ; * * where b is nullable. * * We record with EOR_TOKEN_TYPE if we hit the end of a rule so we can * know at runtime (when these sets are used) to start walking up the * follow chain to compute the real, correct follow set (as opposed to * the FOLLOW, which is a superset). * * This routine will only be used on parser and tree parser grammars. */ public LookaheadSet FIRST(NFAState s) { //System.out.println("> FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule); lookBusy.clear(); LookaheadSet look = _FIRST(s, false); //System.out.println("< FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule+"="+look.toString(this.grammar)); return look; } public LookaheadSet FOLLOW(Rule r) { //System.out.println("> FOLLOW("+r.name+") in rule "+r.startState.enclosingRule); LookaheadSet f = FOLLOWCache.get(r); if ( f!=null ) { return f; } f = _FIRST(r.stopState, true); FOLLOWCache.put(r, f); //System.out.println("< FOLLOW("+r+") in rule "+r.startState.enclosingRule+"="+f.toString(this.grammar)); return f; } public LookaheadSet LOOK(NFAState s) { if ( NFAToDFAConverter.debug ) { System.out.println("> LOOK("+s+")"); } lookBusy.clear(); LookaheadSet look = _FIRST(s, true); // FOLLOW makes no sense (at the moment!) for lexical rules. if ( grammar.type!=Grammar.LEXER && look.member(Label.EOR_TOKEN_TYPE) ) { // avoid altering FIRST reset as it is cached LookaheadSet f = FOLLOW(s.enclosingRule); f.orInPlace(look); f.remove(Label.EOR_TOKEN_TYPE); look = f; //look.orInPlace(FOLLOW(s.enclosingRule)); } else if ( grammar.type==Grammar.LEXER && look.member(Label.EOT) ) { // if this has EOT, lookahead is all char (all char can follow rule) //look = new LookaheadSet(Label.EOT); look = new LookaheadSet(IntervalSet.COMPLETE_SET); } if ( NFAToDFAConverter.debug ) { System.out.println("< LOOK("+s+")="+look.toString(grammar)); } return look; } protected LookaheadSet _FIRST(NFAState s, boolean chaseFollowTransitions) { /* System.out.println("_LOOK("+s+") in rule "+s.enclosingRule); if ( s.transition[0] instanceof RuleClosureTransition ) { System.out.println("go to rule "+((NFAState)s.transition[0].target).enclosingRule); } */ if ( !chaseFollowTransitions && s.isAcceptState() ) { if ( grammar.type==Grammar.LEXER ) { // FOLLOW makes no sense (at the moment!) for lexical rules. // assume all char can follow return new LookaheadSet(IntervalSet.COMPLETE_SET); } return new LookaheadSet(Label.EOR_TOKEN_TYPE); } if ( lookBusy.contains(s) ) { // return a copy of an empty set; we may modify set inline return new LookaheadSet(); } lookBusy.add(s); Transition transition0 = s.transition[0]; if ( transition0==null ) { return null; } if ( transition0.label.isAtom() ) { int atom = transition0.label.getAtom(); return new LookaheadSet(atom); } if ( transition0.label.isSet() ) { IntSet sl = transition0.label.getSet(); return new LookaheadSet(sl); } // compute FIRST of transition 0 LookaheadSet tset = null; // if transition 0 is a rule call and we don't want FOLLOW, check cache if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) { tset = FIRSTCache.get((NFAState)transition0.target); } // if not in cache, must compute if ( tset==null ) { tset = _FIRST((NFAState)transition0.target, chaseFollowTransitions); // save FIRST cache for transition 0 if rule call if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) { FIRSTCache.put((NFAState)transition0.target, tset); } } LookaheadSet tsetCached = tset; // tset is stored in cache. We can't return the same instance // did we fall off the end? if ( grammar.type!=Grammar.LEXER && tset.member(Label.EOR_TOKEN_TYPE) ) { if ( transition0 instanceof RuleClosureTransition ) { // we called a rule that found the end of the rule. // That means the rule is nullable and we need to // keep looking at what follows the rule ref. E.g., // a : b A ; where b is nullable means that LOOK(a) // should include A. RuleClosureTransition ruleInvocationTrans = (RuleClosureTransition)transition0; // remove the EOR and get what follows //tset.remove(Label.EOR_TOKEN_TYPE); NFAState following = (NFAState) ruleInvocationTrans.followState; LookaheadSet fset = _FIRST(following, chaseFollowTransitions); fset.orInPlace(tset); // tset cached; or into new set fset.remove(Label.EOR_TOKEN_TYPE); tset = fset; } } Transition transition1 = s.transition[1]; if ( transition1!=null ) { LookaheadSet tset1 = _FIRST((NFAState)transition1.target, chaseFollowTransitions); tset1.orInPlace(tset); tset = tset1; } // never return a cached set; clone return tset==tsetCached ? new LookaheadSet(tset) : tset; } /** Is there a non-syn-pred predicate visible from s that is not in * the rule enclosing s? This accounts for most predicate situations * and lets ANTLR do a simple LL(1)+pred computation. * * TODO: what about gated vs regular preds? */ public boolean detectConfoundingPredicates(NFAState s) { lookBusy.clear(); Rule r = s.enclosingRule; return _detectConfoundingPredicates(s, r, false) == DETECT_PRED_FOUND; } protected int _detectConfoundingPredicates(NFAState s, Rule enclosingRule, boolean chaseFollowTransitions) { //System.out.println("_detectNonAutobacktrackPredicates("+s+")"); if ( !chaseFollowTransitions && s.isAcceptState() ) { if ( grammar.type==Grammar.LEXER ) { // FOLLOW makes no sense (at the moment!) for lexical rules. // assume all char can follow return DETECT_PRED_NOT_FOUND; } return DETECT_PRED_EOR; } if ( lookBusy.contains(s) ) { // return a copy of an empty set; we may modify set inline return DETECT_PRED_NOT_FOUND; } lookBusy.add(s); Transition transition0 = s.transition[0]; if ( transition0==null ) { return DETECT_PRED_NOT_FOUND; } if ( !(transition0.label.isSemanticPredicate()|| transition0.label.isEpsilon()) ) { return DETECT_PRED_NOT_FOUND; } if ( transition0.label.isSemanticPredicate() ) { //System.out.println("pred "+transition0.label); SemanticContext ctx = transition0.label.getSemanticContext(); SemanticContext.Predicate p = (SemanticContext.Predicate)ctx; if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED ) { return DETECT_PRED_FOUND; } } /* if ( transition0.label.isSemanticPredicate() ) { System.out.println("pred "+transition0.label); SemanticContext ctx = transition0.label.getSemanticContext(); SemanticContext.Predicate p = (SemanticContext.Predicate)ctx; // if a non-syn-pred found not in enclosingRule, say we found one if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED && !p.predicateAST.enclosingRuleName.equals(enclosingRule.name) ) { System.out.println("found pred "+p+" not in "+enclosingRule.name); return DETECT_PRED_FOUND; } } */ int result = _detectConfoundingPredicates((NFAState)transition0.target, enclosingRule, chaseFollowTransitions); if ( result == DETECT_PRED_FOUND ) { return DETECT_PRED_FOUND; } if ( result == DETECT_PRED_EOR ) { if ( transition0 instanceof RuleClosureTransition ) { // we called a rule that found the end of the rule. // That means the rule is nullable and we need to // keep looking at what follows the rule ref. E.g., // a : b A ; where b is nullable means that LOOK(a) // should include A. RuleClosureTransition ruleInvocationTrans = (RuleClosureTransition)transition0; NFAState following = (NFAState) ruleInvocationTrans.followState; int afterRuleResult = _detectConfoundingPredicates(following, enclosingRule, chaseFollowTransitions); if ( afterRuleResult == DETECT_PRED_FOUND ) { return DETECT_PRED_FOUND; } } } Transition transition1 = s.transition[1]; if ( transition1!=null ) { int t1Result = _detectConfoundingPredicates((NFAState)transition1.target, enclosingRule, chaseFollowTransitions); if ( t1Result == DETECT_PRED_FOUND ) { return DETECT_PRED_FOUND; } } return DETECT_PRED_NOT_FOUND; } /** Return predicate expression found via epsilon edges from s. Do * not look into other rules for now. Do something simple. Include * backtracking synpreds. */ public SemanticContext getPredicates(NFAState altStartState) { lookBusy.clear(); return _getPredicates(altStartState, altStartState); } protected SemanticContext _getPredicates(NFAState s, NFAState altStartState) { //System.out.println("_getPredicates("+s+")"); if ( s.isAcceptState() ) { return null; } // avoid infinite loops from (..)* etc... if ( lookBusy.contains(s) ) { return null; } lookBusy.add(s); Transition transition0 = s.transition[0]; // no transitions if ( transition0==null ) { return null; } // not a predicate and not even an epsilon if ( !(transition0.label.isSemanticPredicate()|| transition0.label.isEpsilon()) ) { return null; } SemanticContext p = null; SemanticContext p0 = null; SemanticContext p1 = null; if ( transition0.label.isSemanticPredicate() ) { //System.out.println("pred "+transition0.label); p = transition0.label.getSemanticContext(); // ignore backtracking preds not on left edge for this decision if ( ((SemanticContext.Predicate)p).predicateAST.getType() == ANTLRParser.BACKTRACK_SEMPRED && s == altStartState.transition[0].target ) { p = null; // don't count } } // get preds from beyond this state p0 = _getPredicates((NFAState)transition0.target, altStartState); // get preds from other transition Transition transition1 = s.transition[1]; if ( transition1!=null ) { p1 = _getPredicates((NFAState)transition1.target, altStartState); } // join this&following-right|following-down return SemanticContext.and(p,SemanticContext.or(p0,p1)); } }