Interpreter.java revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28package org.antlr.tool; 29 30import org.antlr.analysis.DFA; 31import org.antlr.analysis.*; 32import org.antlr.misc.IntervalSet; 33import org.antlr.runtime.*; 34import org.antlr.runtime.debug.BlankDebugEventListener; 35import org.antlr.runtime.debug.DebugEventListener; 36import org.antlr.runtime.debug.ParseTreeBuilder; 37import org.antlr.runtime.tree.ParseTree; 38 39import java.util.List; 40import java.util.Stack; 41 42/** The recognition interpreter/engine for grammars. Separated 43 * out of Grammar as it's related, but technically not a Grammar function. 44 * You create an interpreter for a grammar and an input stream. This object 45 * can act as a TokenSource so that you can hook up two grammars (via 46 * a CommonTokenStream) to lex/parse. Being a token source only makes sense 47 * for a lexer grammar of course. 48 */ 49public class Interpreter implements TokenSource { 50 protected Grammar grammar; 51 protected IntStream input; 52 53 /** A lexer listener that just creates token objects as they 54 * are matched. scan() use this listener to get a single object. 55 * To get a stream of tokens, you must call scan() multiple times, 56 * recording the token object result after each call. 57 */ 58 class LexerActionGetTokenType extends BlankDebugEventListener { 59 public CommonToken token; 60 Grammar g; 61 public LexerActionGetTokenType(Grammar g) { 62 this.g = g; 63 } 64 65 public void exitRule(String grammarFileName, String ruleName) { 66 if ( !ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ){ 67 int type = g.getTokenType(ruleName); 68 int channel = Token.DEFAULT_CHANNEL; 69 token = new CommonToken((CharStream)input,type,channel,0,0); 70 } 71 } 72 } 73 74 public Interpreter(Grammar grammar, IntStream input) { 75 this.grammar = grammar; 76 this.input = input; 77 } 78 79 public Token nextToken() { 80 if ( grammar.type!=Grammar.LEXER ) { 81 return null; 82 } 83 if ( input.LA(1)==CharStream.EOF ) { 84 return new CommonToken((CharStream)input,Token.EOF,Token.DEFAULT_CHANNEL,input.index(),input.index()); 85 } 86 int start = input.index(); 87 int charPos = ((CharStream)input).getCharPositionInLine(); 88 CommonToken token = null; 89 loop: 90 while (input.LA(1)!=CharStream.EOF) { 91 try { 92 token = scan(Grammar.ARTIFICIAL_TOKENS_RULENAME, null); 93 break; 94 } 95 catch (RecognitionException re) { 96 // report a problem and try for another 97 reportScanError(re); 98 continue loop; 99 } 100 } 101 // the scan can only set type 102 // we must set the line, and other junk here to make it a complete token 103 int stop = input.index()-1; 104 if ( token==null ) { 105 return new CommonToken((CharStream)input,Token.EOF,Token.DEFAULT_CHANNEL,start,start); 106 } 107 token.setLine(((CharStream)input).getLine()); 108 token.setStartIndex(start); 109 token.setStopIndex(stop); 110 token.setCharPositionInLine(charPos); 111 return token; 112 } 113 114 /** For a given input char stream, try to match against the NFA 115 * starting at startRule. This is a deterministic parse even though 116 * it is using an NFA because it uses DFAs at each decision point to 117 * predict which alternative will succeed. This is exactly what the 118 * generated parser will do. 119 * 120 * This only does lexer grammars. 121 * 122 * Return the token type associated with the final rule end state. 123 */ 124 public void scan(String startRule, 125 DebugEventListener actions, 126 List visitedStates) 127 throws RecognitionException 128 { 129 if ( grammar.type!=Grammar.LEXER ) { 130 return; 131 } 132 CharStream in = (CharStream)this.input; 133 //System.out.println("scan("+startRule+",'"+in.substring(in.index(),in.size()-1)+"')"); 134 // Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet 135 if ( grammar.getRuleStartState(startRule)==null ) { 136 grammar.buildNFA(); 137 } 138 139 if ( !grammar.allDecisionDFAHaveBeenCreated() ) { 140 // Create the DFA predictors for each decision 141 grammar.createLookaheadDFAs(); 142 } 143 144 // do the parse 145 Stack ruleInvocationStack = new Stack(); 146 NFAState start = grammar.getRuleStartState(startRule); 147 NFAState stop = grammar.getRuleStopState(startRule); 148 parseEngine(startRule, start, stop, in, ruleInvocationStack, 149 actions, visitedStates); 150 } 151 152 public CommonToken scan(String startRule) 153 throws RecognitionException 154 { 155 return scan(startRule, null); 156 } 157 158 public CommonToken scan(String startRule, 159 List visitedStates) 160 throws RecognitionException 161 { 162 LexerActionGetTokenType actions = new LexerActionGetTokenType(grammar); 163 scan(startRule, actions, visitedStates); 164 return actions.token; 165 } 166 167 public void parse(String startRule, 168 DebugEventListener actions, 169 List visitedStates) 170 throws RecognitionException 171 { 172 //System.out.println("parse("+startRule+")"); 173 // Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet 174 if ( grammar.getRuleStartState(startRule)==null ) { 175 grammar.buildNFA(); 176 } 177 if ( !grammar.allDecisionDFAHaveBeenCreated() ) { 178 // Create the DFA predictors for each decision 179 grammar.createLookaheadDFAs(); 180 } 181 // do the parse 182 Stack ruleInvocationStack = new Stack(); 183 NFAState start = grammar.getRuleStartState(startRule); 184 NFAState stop = grammar.getRuleStopState(startRule); 185 parseEngine(startRule, start, stop, input, ruleInvocationStack, 186 actions, visitedStates); 187 } 188 189 public ParseTree parse(String startRule) 190 throws RecognitionException 191 { 192 return parse(startRule, null); 193 } 194 195 public ParseTree parse(String startRule, List visitedStates) 196 throws RecognitionException 197 { 198 ParseTreeBuilder actions = new ParseTreeBuilder(grammar.name); 199 try { 200 parse(startRule, actions, visitedStates); 201 } 202 catch (RecognitionException re) { 203 // Errors are tracked via the ANTLRDebugInterface 204 // Exceptions are used just to blast out of the parse engine 205 // The error will be in the parse tree. 206 } 207 return actions.getTree(); 208 } 209 210 /** Fill a list of all NFA states visited during the parse */ 211 protected void parseEngine(String startRule, 212 NFAState start, 213 NFAState stop, 214 IntStream input, 215 Stack ruleInvocationStack, 216 DebugEventListener actions, 217 List visitedStates) 218 throws RecognitionException 219 { 220 NFAState s = start; 221 if ( actions!=null ) { 222 actions.enterRule(s.nfa.grammar.getFileName(), start.enclosingRule.name); 223 } 224 int t = input.LA(1); 225 while ( s!=stop ) { 226 if ( visitedStates!=null ) { 227 visitedStates.add(s); 228 } 229 /* 230 System.out.println("parse state "+s.stateNumber+" input="+ 231 s.nfa.grammar.getTokenDisplayName(t)); 232 */ 233 // CASE 1: decision state 234 if ( s.getDecisionNumber()>0 && s.nfa.grammar.getNumberOfAltsForDecisionNFA(s)>1 ) { 235 // decision point, must predict and jump to alt 236 DFA dfa = s.nfa.grammar.getLookaheadDFA(s.getDecisionNumber()); 237 /* 238 if ( s.nfa.grammar.type!=Grammar.LEXER ) { 239 System.out.println("decision: "+ 240 dfa.getNFADecisionStartState().getDescription()+ 241 " input="+s.nfa.grammar.getTokenDisplayName(t)); 242 } 243 */ 244 int m = input.mark(); 245 int predictedAlt = predict(dfa); 246 if ( predictedAlt == NFA.INVALID_ALT_NUMBER ) { 247 String description = dfa.getNFADecisionStartState().getDescription(); 248 NoViableAltException nvae = 249 new NoViableAltException(description, 250 dfa.getDecisionNumber(), 251 s.stateNumber, 252 input); 253 if ( actions!=null ) { 254 actions.recognitionException(nvae); 255 } 256 input.consume(); // recover 257 throw nvae; 258 } 259 input.rewind(m); 260 int parseAlt = 261 s.translateDisplayAltToWalkAlt(predictedAlt); 262 /* 263 if ( s.nfa.grammar.type!=Grammar.LEXER ) { 264 System.out.println("predicted alt "+predictedAlt+", parseAlt "+ 265 parseAlt); 266 } 267 */ 268 NFAState alt; 269 if ( parseAlt > s.nfa.grammar.getNumberOfAltsForDecisionNFA(s) ) { 270 // implied branch of loop etc... 271 alt = s.nfa.grammar.nfa.getState( s.endOfBlockStateNumber ); 272 } 273 else { 274 alt = s.nfa.grammar.getNFAStateForAltOfDecision(s, parseAlt); 275 } 276 s = (NFAState)alt.transition[0].target; 277 continue; 278 } 279 280 // CASE 2: finished matching a rule 281 if ( s.isAcceptState() ) { // end of rule node 282 if ( actions!=null ) { 283 actions.exitRule(s.nfa.grammar.getFileName(), s.enclosingRule.name); 284 } 285 if ( ruleInvocationStack.empty() ) { 286 // done parsing. Hit the start state. 287 //System.out.println("stack empty in stop state for "+s.getEnclosingRule()); 288 break; 289 } 290 // pop invoking state off the stack to know where to return to 291 NFAState invokingState = (NFAState)ruleInvocationStack.pop(); 292 RuleClosureTransition invokingTransition = 293 (RuleClosureTransition)invokingState.transition[0]; 294 // move to node after state that invoked this rule 295 s = invokingTransition.followState; 296 continue; 297 } 298 299 Transition trans = s.transition[0]; 300 Label label = trans.label; 301 if ( label.isSemanticPredicate() ) { 302 FailedPredicateException fpe = 303 new FailedPredicateException(input, 304 s.enclosingRule.name, 305 "can't deal with predicates yet"); 306 if ( actions!=null ) { 307 actions.recognitionException(fpe); 308 } 309 } 310 311 // CASE 3: epsilon transition 312 if ( label.isEpsilon() ) { 313 // CASE 3a: rule invocation state 314 if ( trans instanceof RuleClosureTransition ) { 315 ruleInvocationStack.push(s); 316 s = (NFAState)trans.target; 317 //System.out.println("call "+s.enclosingRule.name+" from "+s.nfa.grammar.getFileName()); 318 if ( actions!=null ) { 319 actions.enterRule(s.nfa.grammar.getFileName(), s.enclosingRule.name); 320 } 321 // could be jumping to new grammar, make sure DFA created 322 if ( !s.nfa.grammar.allDecisionDFAHaveBeenCreated() ) { 323 s.nfa.grammar.createLookaheadDFAs(); 324 } 325 } 326 // CASE 3b: plain old epsilon transition, just move 327 else { 328 s = (NFAState)trans.target; 329 } 330 } 331 332 // CASE 4: match label on transition 333 else if ( label.matches(t) ) { 334 if ( actions!=null ) { 335 if ( s.nfa.grammar.type == Grammar.PARSER || 336 s.nfa.grammar.type == Grammar.COMBINED ) 337 { 338 actions.consumeToken(((TokenStream)input).LT(1)); 339 } 340 } 341 s = (NFAState)s.transition[0].target; 342 input.consume(); 343 t = input.LA(1); 344 } 345 346 // CASE 5: error condition; label is inconsistent with input 347 else { 348 if ( label.isAtom() ) { 349 MismatchedTokenException mte = 350 new MismatchedTokenException(label.getAtom(), input); 351 if ( actions!=null ) { 352 actions.recognitionException(mte); 353 } 354 input.consume(); // recover 355 throw mte; 356 } 357 else if ( label.isSet() ) { 358 MismatchedSetException mse = 359 new MismatchedSetException(((IntervalSet)label.getSet()).toRuntimeBitSet(), 360 input); 361 if ( actions!=null ) { 362 actions.recognitionException(mse); 363 } 364 input.consume(); // recover 365 throw mse; 366 } 367 else if ( label.isSemanticPredicate() ) { 368 FailedPredicateException fpe = 369 new FailedPredicateException(input, 370 s.enclosingRule.name, 371 label.getSemanticContext().toString()); 372 if ( actions!=null ) { 373 actions.recognitionException(fpe); 374 } 375 input.consume(); // recover 376 throw fpe; 377 } 378 else { 379 throw new RecognitionException(input); // unknown error 380 } 381 } 382 } 383 //System.out.println("hit stop state for "+stop.getEnclosingRule()); 384 if ( actions!=null ) { 385 actions.exitRule(s.nfa.grammar.getFileName(), stop.enclosingRule.name); 386 } 387 } 388 389 /** Given an input stream, return the unique alternative predicted by 390 * matching the input. Upon error, return NFA.INVALID_ALT_NUMBER 391 * The first symbol of lookahead is presumed to be primed; that is, 392 * input.lookahead(1) must point at the input symbol you want to start 393 * predicting with. 394 */ 395 public int predict(DFA dfa) { 396 DFAState s = dfa.startState; 397 int c = input.LA(1); 398 Transition eotTransition = null; 399 dfaLoop: 400 while ( !s.isAcceptState() ) { 401 /* 402 System.out.println("DFA.predict("+s.getStateNumber()+", "+ 403 dfa.getNFA().getGrammar().getTokenName(c)+")"); 404 */ 405 // for each edge of s, look for intersection with current char 406 for (int i=0; i<s.getNumberOfTransitions(); i++) { 407 Transition t = s.transition(i); 408 // special case: EOT matches any char 409 if ( t.label.matches(c) ) { 410 // take transition i 411 s = (DFAState)t.target; 412 input.consume(); 413 c = input.LA(1); 414 continue dfaLoop; 415 } 416 if ( t.label.getAtom()==Label.EOT ) { 417 eotTransition = t; 418 } 419 } 420 if ( eotTransition!=null ) { 421 s = (DFAState)eotTransition.target; 422 continue dfaLoop; 423 } 424 /* 425 ErrorManager.error(ErrorManager.MSG_NO_VIABLE_DFA_ALT, 426 s, 427 dfa.nfa.grammar.getTokenName(c)); 428 */ 429 return NFA.INVALID_ALT_NUMBER; 430 } 431 // woohoo! We know which alt to predict 432 // nothing emanates from a stop state; must terminate anyway 433 /* 434 System.out.println("DFA stop state "+s.getStateNumber()+" predicts "+ 435 s.getUniquelyPredictedAlt()); 436 */ 437 return s.getUniquelyPredictedAlt(); 438 } 439 440 public void reportScanError(RecognitionException re) { 441 CharStream cs = (CharStream)input; 442 // print as good of a message as we can, given that we do not have 443 // a Lexer object and, hence, cannot call the routine to get a 444 // decent error message. 445 System.err.println("problem matching token at "+ 446 cs.getLine()+":"+cs.getCharPositionInLine()+" "+re); 447 } 448 449 public String getSourceName() { 450 return input.getSourceName(); 451 } 452 453} 454