Interpreter.java revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/*
2 * [The "BSD license"]
3 *  Copyright (c) 2010 Terence Parr
4 *  All rights reserved.
5 *
6 *  Redistribution and use in source and binary forms, with or without
7 *  modification, are permitted provided that the following conditions
8 *  are met:
9 *  1. Redistributions of source code must retain the above copyright
10 *      notice, this list of conditions and the following disclaimer.
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *  3. The name of the author may not be used to endorse or promote products
15 *      derived from this software without specific prior written permission.
16 *
17 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28package org.antlr.tool;
29
30import org.antlr.analysis.DFA;
31import org.antlr.analysis.*;
32import org.antlr.misc.IntervalSet;
33import org.antlr.runtime.*;
34import org.antlr.runtime.debug.BlankDebugEventListener;
35import org.antlr.runtime.debug.DebugEventListener;
36import org.antlr.runtime.debug.ParseTreeBuilder;
37import org.antlr.runtime.tree.ParseTree;
38
39import java.util.List;
40import java.util.Stack;
41
42/** The recognition interpreter/engine for grammars.  Separated
43 *  out of Grammar as it's related, but technically not a Grammar function.
44 *  You create an interpreter for a grammar and an input stream.  This object
45 *  can act as a TokenSource so that you can hook up two grammars (via
46 *  a CommonTokenStream) to lex/parse.  Being a token source only makes sense
47 *  for a lexer grammar of course.
48 */
49public class Interpreter implements TokenSource {
50	protected Grammar grammar;
51	protected IntStream input;
52
53	/** A lexer listener that just creates token objects as they
54	 *  are matched.  scan() use this listener to get a single object.
55	 *  To get a stream of tokens, you must call scan() multiple times,
56	 *  recording the token object result after each call.
57	 */
58	class LexerActionGetTokenType extends BlankDebugEventListener {
59		public CommonToken token;
60		Grammar g;
61		public LexerActionGetTokenType(Grammar g) {
62			this.g = g;
63		}
64
65		public void exitRule(String grammarFileName, String ruleName) {
66			if ( !ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ){
67				int type = g.getTokenType(ruleName);
68				int channel = Token.DEFAULT_CHANNEL;
69				token = new CommonToken((CharStream)input,type,channel,0,0);
70			}
71		}
72	}
73
74	public Interpreter(Grammar grammar, IntStream input) {
75		this.grammar = grammar;
76		this.input = input;
77	}
78
79	public Token nextToken() {
80		if ( grammar.type!=Grammar.LEXER ) {
81			return null;
82		}
83		if ( input.LA(1)==CharStream.EOF ) {
84            return new CommonToken((CharStream)input,Token.EOF,Token.DEFAULT_CHANNEL,input.index(),input.index());
85		}
86		int start = input.index();
87		int charPos = ((CharStream)input).getCharPositionInLine();
88		CommonToken token = null;
89		loop:
90		while (input.LA(1)!=CharStream.EOF) {
91			try {
92				token = scan(Grammar.ARTIFICIAL_TOKENS_RULENAME, null);
93				break;
94			}
95			catch (RecognitionException re) {
96				// report a problem and try for another
97				reportScanError(re);
98				continue loop;
99			}
100		}
101		// the scan can only set type
102		// we must set the line, and other junk here to make it a complete token
103		int stop = input.index()-1;
104		if ( token==null ) {
105            return new CommonToken((CharStream)input,Token.EOF,Token.DEFAULT_CHANNEL,start,start);
106		}
107		token.setLine(((CharStream)input).getLine());
108		token.setStartIndex(start);
109		token.setStopIndex(stop);
110		token.setCharPositionInLine(charPos);
111		return token;
112	}
113
114	/** For a given input char stream, try to match against the NFA
115	 *  starting at startRule.  This is a deterministic parse even though
116	 *  it is using an NFA because it uses DFAs at each decision point to
117	 *  predict which alternative will succeed.  This is exactly what the
118	 *  generated parser will do.
119	 *
120	 *  This only does lexer grammars.
121	 *
122	 *  Return the token type associated with the final rule end state.
123	 */
124	public void scan(String startRule,
125					 DebugEventListener actions,
126					 List visitedStates)
127		throws RecognitionException
128	{
129		if ( grammar.type!=Grammar.LEXER ) {
130			return;
131		}
132		CharStream in = (CharStream)this.input;
133		//System.out.println("scan("+startRule+",'"+in.substring(in.index(),in.size()-1)+"')");
134		// Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet
135		if ( grammar.getRuleStartState(startRule)==null ) {
136			grammar.buildNFA();
137		}
138
139		if ( !grammar.allDecisionDFAHaveBeenCreated() ) {
140			// Create the DFA predictors for each decision
141			grammar.createLookaheadDFAs();
142		}
143
144		// do the parse
145		Stack ruleInvocationStack = new Stack();
146		NFAState start = grammar.getRuleStartState(startRule);
147		NFAState stop = grammar.getRuleStopState(startRule);
148		parseEngine(startRule, start, stop, in, ruleInvocationStack,
149					actions, visitedStates);
150	}
151
152	public CommonToken scan(String startRule)
153		throws RecognitionException
154	{
155		return scan(startRule, null);
156	}
157
158	public CommonToken scan(String startRule,
159							List visitedStates)
160		throws RecognitionException
161	{
162		LexerActionGetTokenType actions = new LexerActionGetTokenType(grammar);
163		scan(startRule, actions, visitedStates);
164		return actions.token;
165	}
166
167	public void parse(String startRule,
168					  DebugEventListener actions,
169					  List visitedStates)
170		throws RecognitionException
171	{
172		//System.out.println("parse("+startRule+")");
173		// Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet
174		if ( grammar.getRuleStartState(startRule)==null ) {
175			grammar.buildNFA();
176		}
177		if ( !grammar.allDecisionDFAHaveBeenCreated() ) {
178			// Create the DFA predictors for each decision
179			grammar.createLookaheadDFAs();
180		}
181		// do the parse
182		Stack ruleInvocationStack = new Stack();
183		NFAState start = grammar.getRuleStartState(startRule);
184		NFAState stop = grammar.getRuleStopState(startRule);
185		parseEngine(startRule, start, stop, input, ruleInvocationStack,
186					actions, visitedStates);
187	}
188
189	public ParseTree parse(String startRule)
190		throws RecognitionException
191	{
192		return parse(startRule, null);
193	}
194
195	public ParseTree parse(String startRule, List visitedStates)
196		throws RecognitionException
197	{
198		ParseTreeBuilder actions = new ParseTreeBuilder(grammar.name);
199		try {
200			parse(startRule, actions, visitedStates);
201		}
202		catch (RecognitionException re) {
203			// Errors are tracked via the ANTLRDebugInterface
204			// Exceptions are used just to blast out of the parse engine
205			// The error will be in the parse tree.
206		}
207		return actions.getTree();
208	}
209
210	/** Fill a list of all NFA states visited during the parse */
211	protected void parseEngine(String startRule,
212							   NFAState start,
213							   NFAState stop,
214							   IntStream input,
215							   Stack ruleInvocationStack,
216							   DebugEventListener actions,
217							   List visitedStates)
218		throws RecognitionException
219	{
220		NFAState s = start;
221		if ( actions!=null ) {
222			actions.enterRule(s.nfa.grammar.getFileName(), start.enclosingRule.name);
223		}
224		int t = input.LA(1);
225		while ( s!=stop ) {
226			if ( visitedStates!=null ) {
227				visitedStates.add(s);
228			}
229			/*
230			System.out.println("parse state "+s.stateNumber+" input="+
231				s.nfa.grammar.getTokenDisplayName(t));
232				*/
233			// CASE 1: decision state
234			if ( s.getDecisionNumber()>0 && s.nfa.grammar.getNumberOfAltsForDecisionNFA(s)>1 ) {
235				// decision point, must predict and jump to alt
236				DFA dfa = s.nfa.grammar.getLookaheadDFA(s.getDecisionNumber());
237				/*
238				if ( s.nfa.grammar.type!=Grammar.LEXER ) {
239					System.out.println("decision: "+
240								   dfa.getNFADecisionStartState().getDescription()+
241								   " input="+s.nfa.grammar.getTokenDisplayName(t));
242				}
243				*/
244				int m = input.mark();
245				int predictedAlt = predict(dfa);
246				if ( predictedAlt == NFA.INVALID_ALT_NUMBER ) {
247					String description = dfa.getNFADecisionStartState().getDescription();
248					NoViableAltException nvae =
249						new NoViableAltException(description,
250													  dfa.getDecisionNumber(),
251													  s.stateNumber,
252													  input);
253					if ( actions!=null ) {
254						actions.recognitionException(nvae);
255					}
256					input.consume(); // recover
257					throw nvae;
258				}
259				input.rewind(m);
260				int parseAlt =
261					s.translateDisplayAltToWalkAlt(predictedAlt);
262				/*
263				if ( s.nfa.grammar.type!=Grammar.LEXER ) {
264					System.out.println("predicted alt "+predictedAlt+", parseAlt "+
265									   parseAlt);
266				}
267				*/
268				NFAState alt;
269				if ( parseAlt > s.nfa.grammar.getNumberOfAltsForDecisionNFA(s) ) {
270					// implied branch of loop etc...
271					alt = s.nfa.grammar.nfa.getState( s.endOfBlockStateNumber );
272				}
273				else {
274					alt = s.nfa.grammar.getNFAStateForAltOfDecision(s, parseAlt);
275				}
276				s = (NFAState)alt.transition[0].target;
277				continue;
278			}
279
280			// CASE 2: finished matching a rule
281			if ( s.isAcceptState() ) { // end of rule node
282				if ( actions!=null ) {
283					actions.exitRule(s.nfa.grammar.getFileName(), s.enclosingRule.name);
284				}
285				if ( ruleInvocationStack.empty() ) {
286					// done parsing.  Hit the start state.
287					//System.out.println("stack empty in stop state for "+s.getEnclosingRule());
288					break;
289				}
290				// pop invoking state off the stack to know where to return to
291				NFAState invokingState = (NFAState)ruleInvocationStack.pop();
292				RuleClosureTransition invokingTransition =
293						(RuleClosureTransition)invokingState.transition[0];
294				// move to node after state that invoked this rule
295				s = invokingTransition.followState;
296				continue;
297			}
298
299			Transition trans = s.transition[0];
300			Label label = trans.label;
301			if ( label.isSemanticPredicate() ) {
302				FailedPredicateException fpe =
303					new FailedPredicateException(input,
304												 s.enclosingRule.name,
305												 "can't deal with predicates yet");
306				if ( actions!=null ) {
307					actions.recognitionException(fpe);
308				}
309			}
310
311			// CASE 3: epsilon transition
312			if ( label.isEpsilon() ) {
313				// CASE 3a: rule invocation state
314				if ( trans instanceof RuleClosureTransition ) {
315					ruleInvocationStack.push(s);
316					s = (NFAState)trans.target;
317					//System.out.println("call "+s.enclosingRule.name+" from "+s.nfa.grammar.getFileName());
318					if ( actions!=null ) {
319						actions.enterRule(s.nfa.grammar.getFileName(), s.enclosingRule.name);
320					}
321					// could be jumping to new grammar, make sure DFA created
322					if ( !s.nfa.grammar.allDecisionDFAHaveBeenCreated() ) {
323						s.nfa.grammar.createLookaheadDFAs();
324					}
325				}
326				// CASE 3b: plain old epsilon transition, just move
327				else {
328					s = (NFAState)trans.target;
329				}
330			}
331
332			// CASE 4: match label on transition
333			else if ( label.matches(t) ) {
334				if ( actions!=null ) {
335					if ( s.nfa.grammar.type == Grammar.PARSER ||
336						 s.nfa.grammar.type == Grammar.COMBINED )
337					{
338						actions.consumeToken(((TokenStream)input).LT(1));
339					}
340				}
341				s = (NFAState)s.transition[0].target;
342				input.consume();
343				t = input.LA(1);
344			}
345
346			// CASE 5: error condition; label is inconsistent with input
347			else {
348				if ( label.isAtom() ) {
349					MismatchedTokenException mte =
350						new MismatchedTokenException(label.getAtom(), input);
351					if ( actions!=null ) {
352						actions.recognitionException(mte);
353					}
354					input.consume(); // recover
355					throw mte;
356				}
357				else if ( label.isSet() ) {
358					MismatchedSetException mse =
359						new MismatchedSetException(((IntervalSet)label.getSet()).toRuntimeBitSet(),
360												   input);
361					if ( actions!=null ) {
362						actions.recognitionException(mse);
363					}
364					input.consume(); // recover
365					throw mse;
366				}
367				else if ( label.isSemanticPredicate() ) {
368					FailedPredicateException fpe =
369						new FailedPredicateException(input,
370													 s.enclosingRule.name,
371													 label.getSemanticContext().toString());
372					if ( actions!=null ) {
373						actions.recognitionException(fpe);
374					}
375					input.consume(); // recover
376					throw fpe;
377				}
378				else {
379					throw new RecognitionException(input); // unknown error
380				}
381			}
382		}
383		//System.out.println("hit stop state for "+stop.getEnclosingRule());
384		if ( actions!=null ) {
385			actions.exitRule(s.nfa.grammar.getFileName(), stop.enclosingRule.name);
386		}
387	}
388
389	/** Given an input stream, return the unique alternative predicted by
390	 *  matching the input.  Upon error, return NFA.INVALID_ALT_NUMBER
391	 *  The first symbol of lookahead is presumed to be primed; that is,
392	 *  input.lookahead(1) must point at the input symbol you want to start
393	 *  predicting with.
394	 */
395	public int predict(DFA dfa) {
396		DFAState s = dfa.startState;
397		int c = input.LA(1);
398		Transition eotTransition = null;
399	dfaLoop:
400		while ( !s.isAcceptState() ) {
401			/*
402			System.out.println("DFA.predict("+s.getStateNumber()+", "+
403					dfa.getNFA().getGrammar().getTokenName(c)+")");
404			*/
405			// for each edge of s, look for intersection with current char
406			for (int i=0; i<s.getNumberOfTransitions(); i++) {
407				Transition t = s.transition(i);
408				// special case: EOT matches any char
409				if ( t.label.matches(c) ) {
410					// take transition i
411					s = (DFAState)t.target;
412					input.consume();
413					c = input.LA(1);
414					continue dfaLoop;
415				}
416				if ( t.label.getAtom()==Label.EOT ) {
417					eotTransition = t;
418				}
419			}
420			if ( eotTransition!=null ) {
421				s = (DFAState)eotTransition.target;
422				continue dfaLoop;
423			}
424			/*
425			ErrorManager.error(ErrorManager.MSG_NO_VIABLE_DFA_ALT,
426							   s,
427							   dfa.nfa.grammar.getTokenName(c));
428			*/
429			return NFA.INVALID_ALT_NUMBER;
430		}
431		// woohoo!  We know which alt to predict
432		// nothing emanates from a stop state; must terminate anyway
433		/*
434		System.out.println("DFA stop state "+s.getStateNumber()+" predicts "+
435				s.getUniquelyPredictedAlt());
436		*/
437		return s.getUniquelyPredictedAlt();
438	}
439
440	public void reportScanError(RecognitionException re) {
441		CharStream cs = (CharStream)input;
442		// print as good of a message as we can, given that we do not have
443		// a Lexer object and, hence, cannot call the routine to get a
444		// decent error message.
445		System.err.println("problem matching token at "+
446			cs.getLine()+":"+cs.getCharPositionInLine()+" "+re);
447	}
448
449	public String getSourceName() {
450		return input.getSourceName();
451	}
452
453}
454