antlr/tool/Interpreter.java

/*
 * [The "BSD license"]
 *  Copyright (c) 2010 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.antlr.tool;

import org.antlr.analysis.DFA;
import org.antlr.analysis.*;
import org.antlr.misc.IntervalSet;
import org.antlr.runtime.*;
import org.antlr.runtime.debug.BlankDebugEventListener;
import org.antlr.runtime.debug.DebugEventListener;
import org.antlr.runtime.debug.ParseTreeBuilder;
import org.antlr.runtime.tree.ParseTree;

import java.util.List;
import java.util.Stack;

/** The recognition interpreter/engine for grammars.  Separated
 *  out of Grammar as it's related, but technically not a Grammar function.
 *  You create an interpreter for a grammar and an input stream.  This object
 *  can act as a TokenSource so that you can hook up two grammars (via
 *  a CommonTokenStream) to lex/parse.  Being a token source only makes sense
 *  for a lexer grammar of course.
 */
public class Interpreter implements TokenSource {
	protected Grammar grammar;
	protected IntStream input;

	/** A lexer listener that just creates token objects as they
	 *  are matched.  scan() use this listener to get a single object.
	 *  To get a stream of tokens, you must call scan() multiple times,
	 *  recording the token object result after each call.
	 */
	class LexerActionGetTokenType extends BlankDebugEventListener {
		public CommonToken token;
		Grammar g;
		public LexerActionGetTokenType(Grammar g) {
			this.g = g;
		}

		public void exitRule(String grammarFileName, String ruleName) {
			if ( !ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ){
				int type = g.getTokenType(ruleName);
				int channel = Token.DEFAULT_CHANNEL;
				token = new CommonToken((CharStream)input,type,channel,0,0);
			}
		}
	}

	public Interpreter(Grammar grammar, IntStream input) {
		this.grammar = grammar;
		this.input = input;
	}

	public Token nextToken() {
		if ( grammar.type!=Grammar.LEXER ) {
			return null;
		}
		if ( input.LA(1)==CharStream.EOF ) {
            return new CommonToken((CharStream)input,Token.EOF,Token.DEFAULT_CHANNEL,input.index(),input.index());
		}
		int start = input.index();
		int charPos = ((CharStream)input).getCharPositionInLine();
		CommonToken token = null;
		loop:
		while (input.LA(1)!=CharStream.EOF) {
			try {
				token = scan(Grammar.ARTIFICIAL_TOKENS_RULENAME, null);
				break;
			}
			catch (RecognitionException re) {
				// report a problem and try for another
				reportScanError(re);
				continue loop;
			}
		}
		// the scan can only set type
		// we must set the line, and other junk here to make it a complete token
		int stop = input.index()-1;
		if ( token==null ) {
            return new CommonToken((CharStream)input,Token.EOF,Token.DEFAULT_CHANNEL,start,start);
		}
		token.setLine(((CharStream)input).getLine());
		token.setStartIndex(start);
		token.setStopIndex(stop);
		token.setCharPositionInLine(charPos);
		return token;
	}

	/** For a given input char stream, try to match against the NFA
	 *  starting at startRule.  This is a deterministic parse even though
	 *  it is using an NFA because it uses DFAs at each decision point to
	 *  predict which alternative will succeed.  This is exactly what the
	 *  generated parser will do.
	 *
	 *  This only does lexer grammars.
	 *
	 *  Return the token type associated with the final rule end state.
	 */
	public void scan(String startRule,
					 DebugEventListener actions,
					 List visitedStates)
		throws RecognitionException
	{
		if ( grammar.type!=Grammar.LEXER ) {
			return;
		}
		CharStream in = (CharStream)this.input;
		//System.out.println("scan("+startRule+",'"+in.substring(in.index(),in.size()-1)+"')");
		// Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet
		if ( grammar.getRuleStartState(startRule)==null ) {
			grammar.buildNFA();
		}

		if ( !grammar.allDecisionDFAHaveBeenCreated() ) {
			// Create the DFA predictors for each decision
			grammar.createLookaheadDFAs();
		}

		// do the parse
		Stack ruleInvocationStack = new Stack();
		NFAState start = grammar.getRuleStartState(startRule);
		NFAState stop = grammar.getRuleStopState(startRule);
		parseEngine(startRule, start, stop, in, ruleInvocationStack,
					actions, visitedStates);
	}

	public CommonToken scan(String startRule)
		throws RecognitionException
	{
		return scan(startRule, null);
	}

	public CommonToken scan(String startRule,
							List visitedStates)
		throws RecognitionException
	{
		LexerActionGetTokenType actions = new LexerActionGetTokenType(grammar);
		scan(startRule, actions, visitedStates);
		return actions.token;
	}

	public void parse(String startRule,
					  DebugEventListener actions,
					  List visitedStates)
		throws RecognitionException
	{
		//System.out.println("parse("+startRule+")");
		// Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet
		if ( grammar.getRuleStartState(startRule)==null ) {
			grammar.buildNFA();
		}
		if ( !grammar.allDecisionDFAHaveBeenCreated() ) {
			// Create the DFA predictors for each decision
			grammar.createLookaheadDFAs();
		}
		// do the parse
		Stack ruleInvocationStack = new Stack();
		NFAState start = grammar.getRuleStartState(startRule);
		NFAState stop = grammar.getRuleStopState(startRule);
		parseEngine(startRule, start, stop, input, ruleInvocationStack,
					actions, visitedStates);
	}

	public ParseTree parse(String startRule)
		throws RecognitionException
	{
		return parse(startRule, null);
	}

	public ParseTree parse(String startRule, List visitedStates)
		throws RecognitionException
	{
		ParseTreeBuilder actions = new ParseTreeBuilder(grammar.name);
		try {
			parse(startRule, actions, visitedStates);
		}
		catch (RecognitionException re) {
			// Errors are tracked via the ANTLRDebugInterface
			// Exceptions are used just to blast out of the parse engine
			// The error will be in the parse tree.
		}
		return actions.getTree();
	}

	/** Fill a list of all NFA states visited during the parse */
	protected void parseEngine(String startRule,
							   NFAState start,
							   NFAState stop,
							   IntStream input,
							   Stack ruleInvocationStack,
							   DebugEventListener actions,
							   List visitedStates)
		throws RecognitionException
	{
		NFAState s = start;
		if ( actions!=null ) {
			actions.enterRule(s.nfa.grammar.getFileName(), start.enclosingRule.name);
		}
		int t = input.LA(1);
		while ( s!=stop ) {
			if ( visitedStates!=null ) {
				visitedStates.add(s);
			}
			/*
			System.out.println("parse state "+s.stateNumber+" input="+
				s.nfa.grammar.getTokenDisplayName(t));
				*/
			// CASE 1: decision state
			if ( s.getDecisionNumber()>0 && s.nfa.grammar.getNumberOfAltsForDecisionNFA(s)>1 ) {
				// decision point, must predict and jump to alt
				DFA dfa = s.nfa.grammar.getLookaheadDFA(s.getDecisionNumber());
				/*
				if ( s.nfa.grammar.type!=Grammar.LEXER ) {
					System.out.println("decision: "+
								   dfa.getNFADecisionStartState().getDescription()+
								   " input="+s.nfa.grammar.getTokenDisplayName(t));
				}
				*/
				int m = input.mark();
				int predictedAlt = predict(dfa);
				if ( predictedAlt == NFA.INVALID_ALT_NUMBER ) {
					String description = dfa.getNFADecisionStartState().getDescription();
					NoViableAltException nvae =
						new NoViableAltException(description,
													  dfa.getDecisionNumber(),
													  s.stateNumber,
													  input);
					if ( actions!=null ) {
						actions.recognitionException(nvae);
					}
					input.consume(); // recover
					throw nvae;
				}
				input.rewind(m);
				int parseAlt =
					s.translateDisplayAltToWalkAlt(predictedAlt);
				/*
				if ( s.nfa.grammar.type!=Grammar.LEXER ) {
					System.out.println("predicted alt "+predictedAlt+", parseAlt "+
									   parseAlt);
				}
				*/
				NFAState alt;
				if ( parseAlt > s.nfa.grammar.getNumberOfAltsForDecisionNFA(s) ) {
					// implied branch of loop etc...
					alt = s.nfa.grammar.nfa.getState( s.endOfBlockStateNumber );
				}
				else {
					alt = s.nfa.grammar.getNFAStateForAltOfDecision(s, parseAlt);
				}
				s = (NFAState)alt.transition[0].target;
				continue;
			}

			// CASE 2: finished matching a rule
			if ( s.isAcceptState() ) { // end of rule node
				if ( actions!=null ) {
					actions.exitRule(s.nfa.grammar.getFileName(), s.enclosingRule.name);
				}
				if ( ruleInvocationStack.empty() ) {
					// done parsing.  Hit the start state.
					//System.out.println("stack empty in stop state for "+s.getEnclosingRule());
					break;
				}
				// pop invoking state off the stack to know where to return to
				NFAState invokingState = (NFAState)ruleInvocationStack.pop();
				RuleClosureTransition invokingTransition =
						(RuleClosureTransition)invokingState.transition[0];
				// move to node after state that invoked this rule
				s = invokingTransition.followState;
				continue;
			}

			Transition trans = s.transition[0];
			Label label = trans.label;
			if ( label.isSemanticPredicate() ) {
				FailedPredicateException fpe =
					new FailedPredicateException(input,
												 s.enclosingRule.name,
												 "can't deal with predicates yet");
				if ( actions!=null ) {
					actions.recognitionException(fpe);
				}
			}

			// CASE 3: epsilon transition
			if ( label.isEpsilon() ) {
				// CASE 3a: rule invocation state
				if ( trans instanceof RuleClosureTransition ) {
					ruleInvocationStack.push(s);
					s = (NFAState)trans.target;
					//System.out.println("call "+s.enclosingRule.name+" from "+s.nfa.grammar.getFileName());
					if ( actions!=null ) {
						actions.enterRule(s.nfa.grammar.getFileName(), s.enclosingRule.name);
					}
					// could be jumping to new grammar, make sure DFA created
					if ( !s.nfa.grammar.allDecisionDFAHaveBeenCreated() ) {
						s.nfa.grammar.createLookaheadDFAs();
					}
				}
				// CASE 3b: plain old epsilon transition, just move
				else {
					s = (NFAState)trans.target;
				}
			}

			// CASE 4: match label on transition
			else if ( label.matches(t) ) {
				if ( actions!=null ) {
					if ( s.nfa.grammar.type == Grammar.PARSER ||
						 s.nfa.grammar.type == Grammar.COMBINED )
					{
						actions.consumeToken(((TokenStream)input).LT(1));
					}
				}
				s = (NFAState)s.transition[0].target;
				input.consume();
				t = input.LA(1);
			}

			// CASE 5: error condition; label is inconsistent with input
			else {
				if ( label.isAtom() ) {
					MismatchedTokenException mte =
						new MismatchedTokenException(label.getAtom(), input);
					if ( actions!=null ) {
						actions.recognitionException(mte);
					}
					input.consume(); // recover
					throw mte;
				}
				else if ( label.isSet() ) {
					MismatchedSetException mse =
						new MismatchedSetException(((IntervalSet)label.getSet()).toRuntimeBitSet(),
												   input);
					if ( actions!=null ) {
						actions.recognitionException(mse);
					}
					input.consume(); // recover
					throw mse;
				}
				else if ( label.isSemanticPredicate() ) {
					FailedPredicateException fpe =
						new FailedPredicateException(input,
													 s.enclosingRule.name,
													 label.getSemanticContext().toString());
					if ( actions!=null ) {
						actions.recognitionException(fpe);
					}
					input.consume(); // recover
					throw fpe;
				}
				else {
					throw new RecognitionException(input); // unknown error
				}
			}
		}
		//System.out.println("hit stop state for "+stop.getEnclosingRule());
		if ( actions!=null ) {
			actions.exitRule(s.nfa.grammar.getFileName(), stop.enclosingRule.name);
		}
	}

	/** Given an input stream, return the unique alternative predicted by
	 *  matching the input.  Upon error, return NFA.INVALID_ALT_NUMBER
	 *  The first symbol of lookahead is presumed to be primed; that is,
	 *  input.lookahead(1) must point at the input symbol you want to start
	 *  predicting with.
	 */
	public int predict(DFA dfa) {
		DFAState s = dfa.startState;
		int c = input.LA(1);
		Transition eotTransition = null;
	dfaLoop:
		while ( !s.isAcceptState() ) {
			/*
			System.out.println("DFA.predict("+s.getStateNumber()+", "+
					dfa.getNFA().getGrammar().getTokenName(c)+")");
			*/
			// for each edge of s, look for intersection with current char
			for (int i=0; i<s.getNumberOfTransitions(); i++) {
				Transition t = s.transition(i);
				// special case: EOT matches any char
				if ( t.label.matches(c) ) {
					// take transition i
					s = (DFAState)t.target;
					input.consume();
					c = input.LA(1);
					continue dfaLoop;
				}
				if ( t.label.getAtom()==Label.EOT ) {
					eotTransition = t;
				}
			}
			if ( eotTransition!=null ) {
				s = (DFAState)eotTransition.target;
				continue dfaLoop;
			}
			/*
			ErrorManager.error(ErrorManager.MSG_NO_VIABLE_DFA_ALT,
							   s,
							   dfa.nfa.grammar.getTokenName(c));
			*/
			return NFA.INVALID_ALT_NUMBER;
		}
		// woohoo!  We know which alt to predict
		// nothing emanates from a stop state; must terminate anyway
		/*
		System.out.println("DFA stop state "+s.getStateNumber()+" predicts "+
				s.getUniquelyPredictedAlt());
		*/
		return s.getUniquelyPredictedAlt();
	}

	public void reportScanError(RecognitionException re) {
		CharStream cs = (CharStream)input;
		// print as good of a message as we can, given that we do not have
		// a Lexer object and, hence, cannot call the routine to get a
		// decent error message.
		System.err.println("problem matching token at "+
			cs.getLine()+":"+cs.getCharPositionInLine()+" "+re);
	}

	public String getSourceName() {
		return input.getSourceName();
	}

}