1/* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28package org.antlr.tool; 29 30import org.antlr.analysis.NFAState; 31import org.antlr.analysis.RuleClosureTransition; 32import org.antlr.analysis.Transition; 33import org.antlr.grammar.v3.ANTLRParser; 34import org.antlr.runtime.tree.Tree; 35 36import java.util.ArrayList; 37import java.util.HashSet; 38import java.util.List; 39import java.util.Set; 40 41/** Factor out routines that check sanity of rules, alts, grammars, etc.. */ 42public class GrammarSanity { 43 /** The checkForLeftRecursion method needs to track what rules it has 44 * visited to track infinite recursion. 45 */ 46 protected Set<Rule> visitedDuringRecursionCheck = null; 47 48 protected Grammar grammar; 49 public GrammarSanity(Grammar grammar) { 50 this.grammar = grammar; 51 } 52 53 /** Check all rules for infinite left recursion before analysis. Return list 54 * of troublesome rule cycles. This method has two side-effects: it notifies 55 * the error manager that we have problems and it sets the list of 56 * recursive rules that we should ignore during analysis. 57 */ 58 public List<Set<Rule>> checkAllRulesForLeftRecursion() { 59 grammar.buildNFA(); // make sure we have NFAs 60 grammar.leftRecursiveRules = new HashSet(); 61 List<Set<Rule>> listOfRecursiveCycles = new ArrayList(); 62 for (int i = 0; i < grammar.composite.ruleIndexToRuleList.size(); i++) { 63 Rule r = grammar.composite.ruleIndexToRuleList.elementAt(i); 64 if ( r!=null ) { 65 visitedDuringRecursionCheck = new HashSet(); 66 visitedDuringRecursionCheck.add(r); 67 Set visitedStates = new HashSet(); 68 traceStatesLookingForLeftRecursion(r.startState, 69 visitedStates, 70 listOfRecursiveCycles); 71 } 72 } 73 if ( listOfRecursiveCycles.size()>0 ) { 74 ErrorManager.leftRecursionCycles(listOfRecursiveCycles); 75 } 76 return listOfRecursiveCycles; 77 } 78 79 /** From state s, look for any transition to a rule that is currently 80 * being traced. When tracing r, visitedDuringRecursionCheck has r 81 * initially. If you reach an accept state, return but notify the 82 * invoking rule that it is nullable, which implies that invoking 83 * rule must look at follow transition for that invoking state. 84 * The visitedStates tracks visited states within a single rule so 85 * we can avoid epsilon-loop-induced infinite recursion here. Keep 86 * filling the cycles in listOfRecursiveCycles and also, as a 87 * side-effect, set leftRecursiveRules. 88 */ 89 protected boolean traceStatesLookingForLeftRecursion(NFAState s, 90 Set visitedStates, 91 List<Set<Rule>> listOfRecursiveCycles) 92 { 93 if ( s.isAcceptState() ) { 94 // this rule must be nullable! 95 // At least one epsilon edge reached accept state 96 return true; 97 } 98 if ( visitedStates.contains(s) ) { 99 // within same rule, we've hit same state; quit looping 100 return false; 101 } 102 visitedStates.add(s); 103 boolean stateReachesAcceptState = false; 104 Transition t0 = s.transition[0]; 105 if ( t0 instanceof RuleClosureTransition ) { 106 RuleClosureTransition refTrans = (RuleClosureTransition)t0; 107 Rule refRuleDef = refTrans.rule; 108 //String targetRuleName = ((NFAState)t0.target).getEnclosingRule(); 109 if ( visitedDuringRecursionCheck.contains(refRuleDef) ) { 110 // record left-recursive rule, but don't go back in 111 grammar.leftRecursiveRules.add(refRuleDef); 112 /* 113 System.out.println("already visited "+refRuleDef+", calling from "+ 114 s.enclosingRule); 115 */ 116 addRulesToCycle(refRuleDef, 117 s.enclosingRule, 118 listOfRecursiveCycles); 119 } 120 else { 121 // must visit if not already visited; send new visitedStates set 122 visitedDuringRecursionCheck.add(refRuleDef); 123 boolean callReachedAcceptState = 124 traceStatesLookingForLeftRecursion((NFAState)t0.target, 125 new HashSet(), 126 listOfRecursiveCycles); 127 // we're back from visiting that rule 128 visitedDuringRecursionCheck.remove(refRuleDef); 129 // must keep going in this rule then 130 if ( callReachedAcceptState ) { 131 NFAState followingState = 132 ((RuleClosureTransition) t0).followState; 133 stateReachesAcceptState |= 134 traceStatesLookingForLeftRecursion(followingState, 135 visitedStates, 136 listOfRecursiveCycles); 137 } 138 } 139 } 140 else if ( t0.label.isEpsilon() || t0.label.isSemanticPredicate() ) { 141 stateReachesAcceptState |= 142 traceStatesLookingForLeftRecursion((NFAState)t0.target, visitedStates, listOfRecursiveCycles); 143 } 144 // else it has a labeled edge 145 146 // now do the other transition if it exists 147 Transition t1 = s.transition[1]; 148 if ( t1!=null ) { 149 stateReachesAcceptState |= 150 traceStatesLookingForLeftRecursion((NFAState)t1.target, 151 visitedStates, 152 listOfRecursiveCycles); 153 } 154 return stateReachesAcceptState; 155 } 156 157 /** enclosingRuleName calls targetRuleName, find the cycle containing 158 * the target and add the caller. Find the cycle containing the caller 159 * and add the target. If no cycles contain either, then create a new 160 * cycle. listOfRecursiveCycles is List<Set<String>> that holds a list 161 * of cycles (sets of rule names). 162 */ 163 protected void addRulesToCycle(Rule targetRule, 164 Rule enclosingRule, 165 List<Set<Rule>> listOfRecursiveCycles) 166 { 167 boolean foundCycle = false; 168 for (int i = 0; i < listOfRecursiveCycles.size(); i++) { 169 Set<Rule> rulesInCycle = listOfRecursiveCycles.get(i); 170 // ensure both rules are in same cycle 171 if ( rulesInCycle.contains(targetRule) ) { 172 rulesInCycle.add(enclosingRule); 173 foundCycle = true; 174 } 175 if ( rulesInCycle.contains(enclosingRule) ) { 176 rulesInCycle.add(targetRule); 177 foundCycle = true; 178 } 179 } 180 if ( !foundCycle ) { 181 Set cycle = new HashSet(); 182 cycle.add(targetRule); 183 cycle.add(enclosingRule); 184 listOfRecursiveCycles.add(cycle); 185 } 186 } 187 188 public void checkRuleReference(GrammarAST scopeAST, 189 GrammarAST refAST, 190 GrammarAST argsAST, 191 String currentRuleName) 192 { 193 Rule r = grammar.getRule(refAST.getText()); 194 if ( refAST.getType()==ANTLRParser.RULE_REF ) { 195 if ( argsAST!=null ) { 196 // rule[args]; ref has args 197 if ( r!=null && r.argActionAST==null ) { 198 // but rule def has no args 199 ErrorManager.grammarError( 200 ErrorManager.MSG_RULE_HAS_NO_ARGS, 201 grammar, 202 argsAST.getToken(), 203 r.name); 204 } 205 } 206 else { 207 // rule ref has no args 208 if ( r!=null && r.argActionAST!=null ) { 209 // but rule def has args 210 ErrorManager.grammarError( 211 ErrorManager.MSG_MISSING_RULE_ARGS, 212 grammar, 213 refAST.getToken(), 214 r.name); 215 } 216 } 217 } 218 else if ( refAST.getType()==ANTLRParser.TOKEN_REF ) { 219 if ( grammar.type!=Grammar.LEXER ) { 220 if ( argsAST!=null ) { 221 // args on a token ref not in a lexer rule 222 ErrorManager.grammarError( 223 ErrorManager.MSG_ARGS_ON_TOKEN_REF, 224 grammar, 225 refAST.getToken(), 226 refAST.getText()); 227 } 228 return; // ignore token refs in nonlexers 229 } 230 if ( argsAST!=null ) { 231 // tokenRef[args]; ref has args 232 if ( r!=null && r.argActionAST==null ) { 233 // but token rule def has no args 234 ErrorManager.grammarError( 235 ErrorManager.MSG_RULE_HAS_NO_ARGS, 236 grammar, 237 argsAST.getToken(), 238 r.name); 239 } 240 } 241 else { 242 // token ref has no args 243 if ( r!=null && r.argActionAST!=null ) { 244 // but token rule def has args 245 ErrorManager.grammarError( 246 ErrorManager.MSG_MISSING_RULE_ARGS, 247 grammar, 248 refAST.getToken(), 249 r.name); 250 } 251 } 252 } 253 } 254 255 /** Rules in tree grammar that use -> rewrites and are spitting out 256 * templates via output=template and then use rewrite=true must only 257 * use -> on alts that are simple nodes or trees or single rule refs 258 * that match either nodes or trees. The altAST is the ALT node 259 * for an ALT. Verify that its first child is simple. Must be either 260 * ( ALT ^( A B ) <end-of-alt> ) or ( ALT A <end-of-alt> ) or 261 * other element. 262 * 263 * Ignore predicates in front and labels. 264 */ 265 public void ensureAltIsSimpleNodeOrTree(GrammarAST altAST, 266 GrammarAST elementAST, 267 int outerAltNum) 268 { 269 if ( isValidSimpleElementNode(elementAST) ) { 270 GrammarAST next = (GrammarAST)elementAST.getNextSibling(); 271 if ( !isNextNonActionElementEOA(next)) { 272 ErrorManager.grammarWarning(ErrorManager.MSG_REWRITE_FOR_MULTI_ELEMENT_ALT, 273 grammar, 274 next.token, 275 new Integer(outerAltNum)); 276 } 277 return; 278 } 279 switch ( elementAST.getType() ) { 280 case ANTLRParser.ASSIGN : // labels ok on non-rule refs 281 case ANTLRParser.PLUS_ASSIGN : 282 if ( isValidSimpleElementNode(elementAST.getChild(1)) ) { 283 return; 284 } 285 break; 286 case ANTLRParser.ACTION : // skip past actions 287 case ANTLRParser.SEMPRED : 288 case ANTLRParser.SYN_SEMPRED : 289 case ANTLRParser.BACKTRACK_SEMPRED : 290 case ANTLRParser.GATED_SEMPRED : 291 ensureAltIsSimpleNodeOrTree(altAST, 292 (GrammarAST)elementAST.getNextSibling(), 293 outerAltNum); 294 return; 295 } 296 ErrorManager.grammarWarning(ErrorManager.MSG_REWRITE_FOR_MULTI_ELEMENT_ALT, 297 grammar, 298 elementAST.token, 299 new Integer(outerAltNum)); 300 } 301 302 protected boolean isValidSimpleElementNode(Tree t) { 303 switch ( t.getType() ) { 304 case ANTLRParser.TREE_BEGIN : 305 case ANTLRParser.TOKEN_REF : 306 case ANTLRParser.CHAR_LITERAL : 307 case ANTLRParser.STRING_LITERAL : 308 case ANTLRParser.WILDCARD : 309 return true; 310 default : 311 return false; 312 } 313 } 314 315 protected boolean isNextNonActionElementEOA(GrammarAST t) { 316 while ( t.getType()==ANTLRParser.ACTION || 317 t.getType()==ANTLRParser.SEMPRED ) 318 { 319 t = (GrammarAST)t.getNextSibling(); 320 } 321 if ( t.getType()==ANTLRParser.EOA ) { 322 return true; 323 } 324 return false; 325 } 326} 327