1/*
2 [The "BSD licence"]
3 Copyright (c) 2005-2006 Terence Parr
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 1. Redistributions of source code must retain the above copyright
10    notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14 3. The name of the author may not be used to endorse or promote products
15    derived from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*/
28package org.antlr.runtime {
29
30	/** A lexer is recognizer that draws input symbols from a character stream.
31	 *  lexer grammars result in a subclass of this object. A Lexer object
32	 *  uses simplified match() and error recovery mechanisms in the interest
33	 *  of speed.
34	 */
35	public class Lexer extends BaseRecognizer implements TokenSource {
36		/** Where is the lexer drawing characters from? */
37	    protected var input:CharStream;
38
39		public function Lexer(input:CharStream = null, state:RecognizerSharedState = null) {
40		    super(state);
41			this.input = input;
42		}
43
44		public override function reset():void {
45			super.reset(); // reset all recognizer state variables
46    		// wack Lexer state variables
47    		if ( input!=null ) {
48    			input.seek(0); // rewind the input
49    		}
50    		if ( state==null ) {
51    			return; // no shared state work to do
52    		}
53    		state.token = null;
54    		state.type = TokenConstants.INVALID_TOKEN_TYPE;
55    		state.channel = TokenConstants.DEFAULT_CHANNEL;
56    		state.tokenStartCharIndex = -1;
57    		state.tokenStartCharPositionInLine = -1;
58    		state.tokenStartLine = -1;
59    		state.text = null;
60		}
61
62		/** Return a token from this source; i.e., match a token on the char
63		 *  stream.
64		 */
65	    public function nextToken():Token {
66			while (true) {
67				state.token = null;
68				state.channel = TokenConstants.DEFAULT_CHANNEL;
69				state.tokenStartCharIndex = input.index;
70				state.tokenStartCharPositionInLine = input.charPositionInLine;
71				state.tokenStartLine = input.line;
72				state.text = null;
73				if ( input.LA(1)==CharStreamConstants.EOF ) {
74	                return TokenConstants.EOF_TOKEN;
75	            }
76	            try {
77	                mTokens();
78					if ( state.token==null ) {
79						emit();
80					}
81					else if ( state.token==TokenConstants.SKIP_TOKEN ) {
82						continue;
83					}
84					return state.token;
85				}
86	            catch (nva:NoViableAltException) {
87    				reportError(nva);
88    				recover(nva); // throw out current char and try again
89    			}
90    			catch (re:RecognitionException) {
91    				reportError(re);
92    				// match() routine has already called recover()
93    			}
94	        }
95	        // Can't happen, but will quiet complier error
96	        return null;
97	    }
98
99		/** Instruct the lexer to skip creating a token for current lexer rule
100		 *  and look for another token.  nextToken() knows to keep looking when
101		 *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
102		 *  if token==null at end of any token rule, it creates one for you
103		 *  and emits it.
104		 */
105		public function skip():void {
106			state.token = TokenConstants.SKIP_TOKEN;
107		}
108
109		/** This is the lexer entry point that sets instance var 'token' */
110		public function mTokens():void {
111			// abstract function
112			throw new Error("Not implemented");
113		}
114
115		/** Set the char stream and reset the lexer */
116		public function set charStream(input:CharStream):void {
117			this.input = null;
118			reset();
119			this.input = input;
120		}
121
122		public function get charStream():CharStream {
123			return input;
124		}
125
126		public override function get sourceName():String {
127			return input.sourceName;
128		}
129
130		/** Currently does not support multiple emits per nextToken invocation
131		 *  for efficiency reasons.  Subclass and override this method and
132		 *  nextToken (to push tokens into a list and pull from that list rather
133		 *  than a single variable as this implementation does).
134		 */
135		public function emitToken(token:Token):void {
136			state.token = token;
137		}
138
139		/** The standard method called to automatically emit a token at the
140		 *  outermost lexical rule.  The token object should point into the
141		 *  char buffer start..stop.  If there is a text override in 'text',
142		 *  use that to set the token's text.  Override this method to emit
143		 *  custom Token objects.
144		 */
145		public function emit():Token {
146			var t:Token = CommonToken.createFromStream(input, state.type, state.channel, state.tokenStartCharIndex, charIndex - 1);
147			t.line = state.tokenStartLine;
148			t.text = state.text;
149			t.charPositionInLine = state.tokenStartCharPositionInLine;
150			emitToken(t);
151			return t;
152		}
153
154		public function matchString(s:String):void {
155	        var i:int = 0;
156	        while ( i<s.length ) {
157	            if ( input.LA(1) != s.charCodeAt(i) ) {
158					if ( state.backtracking>0 ) {
159						state.failed = true;
160						return;
161					}
162					var mte:MismatchedTokenException =
163						new MismatchedTokenException(s.charCodeAt(i), input);
164					recover(mte);
165					throw mte;
166	            }
167	            i++;
168	            input.consume();
169				state.failed = false;
170	        }
171	    }
172
173	    public function matchAny():void {
174	        input.consume();
175	    }
176
177	    public function match(c:int):void {
178	        if ( input.LA(1)!=c ) {
179				if ( state.backtracking>0 ) {
180					state.failed = true;
181					return;
182				}
183				var mte:MismatchedTokenException =
184					new MismatchedTokenException(c, input);
185				recover(mte);  // don't really recover; just consume in lexer
186				throw mte;
187	        }
188	        input.consume();
189			state.failed = false;
190	    }
191
192	    public function matchRange(a:int, b:int):void
193		{
194	        if ( input.LA(1)<a || input.LA(1)>b ) {
195				if ( state.backtracking>0 ) {
196					state.failed = true;
197					return;
198				}
199	            var mre:MismatchedRangeException =
200					new MismatchedRangeException(a,b,input);
201				recover(mre);
202				throw mre;
203	        }
204	        input.consume();
205			state.failed = false;
206	    }
207
208	    public function get line():int {
209	        return input.line;
210	    }
211
212	    public function get charPositionInLine():int {
213	        return input.charPositionInLine;
214	    }
215
216		/** What is the index of the current character of lookahead? */
217		public function get charIndex():int {
218			return input.index;
219		}
220
221		/** Return the text matched so far for the current token or any
222		 *  text override.
223		 */
224		public function get text():String {
225			if ( state.text!=null ) {
226				return state.text;
227			}
228			return input.substring(state.tokenStartCharIndex, charIndex-1);
229		}
230
231		/** Set the complete text of this token; it wipes any previous
232		 *  changes to the text.
233		 */
234		public function set text(text:String):void {
235			state.text = text;
236		}
237
238		public override function reportError(e:RecognitionException):void {
239			displayRecognitionError(this.tokenNames, e);
240		}
241
242		public override function getErrorMessage(e:RecognitionException, tokenNames:Array):String {
243			var msg:String = null;
244			if ( e is MismatchedTokenException ) {
245				var mte:MismatchedTokenException = MismatchedTokenException(e);
246				msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting "+getCharErrorDisplay(mte.expecting);
247			}
248			else if ( e is NoViableAltException ) {
249				var nvae:NoViableAltException = NoViableAltException(e);
250				// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
251				// and "(decision="+nvae.decisionNumber+") and
252				// "state "+nvae.stateNumber
253				msg = "no viable alternative at character "+getCharErrorDisplay(e.c);
254			}
255			else if ( e is EarlyExitException ) {
256				var eee:EarlyExitException = EarlyExitException(e);
257				// for development, can add "(decision="+eee.decisionNumber+")"
258				msg = "required (...)+ loop did not match anything at character "+getCharErrorDisplay(e.c);
259			}
260			else if ( e is MismatchedNotSetException ) {
261				var mnse:MismatchedNotSetException = MismatchedNotSetException(e);
262				msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+mnse.expecting;
263			}
264			else if ( e is MismatchedSetException ) {
265				var mse:MismatchedSetException = MismatchedSetException(e);
266				msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+mse.expecting;
267			}
268			else if ( e is MismatchedRangeException ) {
269				var mre:MismatchedRangeException = MismatchedRangeException(e);
270				msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+
271					getCharErrorDisplay(mre.a)+".."+getCharErrorDisplay(mre.b);
272			}
273			else {
274				msg = super.getErrorMessage(e, tokenNames);
275			}
276			return msg;
277		}
278
279		public function getCharErrorDisplay(c:int):String {
280			var s:String = String.fromCharCode(c);
281			switch ( c ) {
282				case TokenConstants.EOF :
283					s = "<EOF>";
284					break;
285				case '\n' :
286					s = "\\n";
287					break;
288				case '\t' :
289					s = "\\t";
290					break;
291				case '\r' :
292					s = "\\r";
293					break;
294			}
295			return "'"+s+"'";
296		}
297
298		/** Lexers can normally match any char in it's vocabulary after matching
299		 *  a token, so do the easy thing and just kill a character and hope
300		 *  it all works out.  You can instead use the rule invocation stack
301		 *  to do sophisticated error recovery if you are in a fragment rule.
302		 *
303		 *  @return This method should return the exception it was provided as an
304		 *  argument.  This differs from the Java runtime so that an exception variable
305		 *  does not need to be declared in the generated code, thus reducing a large
306		 *  number of compiler warnings in generated code.
307		 */
308		public function recover(re:RecognitionException):RecognitionException {
309			input.consume();
310			return re;
311		}
312
313		public function traceIn(ruleName:String, ruleIndex:int):void {
314			var inputSymbol:String = String.fromCharCode(input.LT(1))+" line="+ line +":"+ charPositionInLine;
315			super.traceInSymbol(ruleName, ruleIndex, inputSymbol);
316		}
317
318		public function traceOut(ruleName:String, ruleIndex:int):void {
319			var inputSymbol:String = String.fromCharCode(input.LT(1))+" line="+ line +":"+ charPositionInLine;
320			super.traceOutSymbol(ruleName, ruleIndex, inputSymbol);
321		}
322	}
323}