1/* 2 [The "BSD licence"] 3 Copyright (c) 2005-2006 Terence Parr 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions 8 are met: 9 1. Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 2. Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 3. The name of the author may not be used to endorse or promote products 15 derived from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*/ 28package org.antlr.runtime { 29 30 /** A lexer is recognizer that draws input symbols from a character stream. 31 * lexer grammars result in a subclass of this object. A Lexer object 32 * uses simplified match() and error recovery mechanisms in the interest 33 * of speed. 34 */ 35 public class Lexer extends BaseRecognizer implements TokenSource { 36 /** Where is the lexer drawing characters from? */ 37 protected var input:CharStream; 38 39 public function Lexer(input:CharStream = null, state:RecognizerSharedState = null) { 40 super(state); 41 this.input = input; 42 } 43 44 public override function reset():void { 45 super.reset(); // reset all recognizer state variables 46 // wack Lexer state variables 47 if ( input!=null ) { 48 input.seek(0); // rewind the input 49 } 50 if ( state==null ) { 51 return; // no shared state work to do 52 } 53 state.token = null; 54 state.type = TokenConstants.INVALID_TOKEN_TYPE; 55 state.channel = TokenConstants.DEFAULT_CHANNEL; 56 state.tokenStartCharIndex = -1; 57 state.tokenStartCharPositionInLine = -1; 58 state.tokenStartLine = -1; 59 state.text = null; 60 } 61 62 /** Return a token from this source; i.e., match a token on the char 63 * stream. 64 */ 65 public function nextToken():Token { 66 while (true) { 67 state.token = null; 68 state.channel = TokenConstants.DEFAULT_CHANNEL; 69 state.tokenStartCharIndex = input.index; 70 state.tokenStartCharPositionInLine = input.charPositionInLine; 71 state.tokenStartLine = input.line; 72 state.text = null; 73 if ( input.LA(1)==CharStreamConstants.EOF ) { 74 return TokenConstants.EOF_TOKEN; 75 } 76 try { 77 mTokens(); 78 if ( state.token==null ) { 79 emit(); 80 } 81 else if ( state.token==TokenConstants.SKIP_TOKEN ) { 82 continue; 83 } 84 return state.token; 85 } 86 catch (nva:NoViableAltException) { 87 reportError(nva); 88 recover(nva); // throw out current char and try again 89 } 90 catch (re:RecognitionException) { 91 reportError(re); 92 // match() routine has already called recover() 93 } 94 } 95 // Can't happen, but will quiet complier error 96 return null; 97 } 98 99 /** Instruct the lexer to skip creating a token for current lexer rule 100 * and look for another token. nextToken() knows to keep looking when 101 * a lexer rule finishes with token set to SKIP_TOKEN. Recall that 102 * if token==null at end of any token rule, it creates one for you 103 * and emits it. 104 */ 105 public function skip():void { 106 state.token = TokenConstants.SKIP_TOKEN; 107 } 108 109 /** This is the lexer entry point that sets instance var 'token' */ 110 public function mTokens():void { 111 // abstract function 112 throw new Error("Not implemented"); 113 } 114 115 /** Set the char stream and reset the lexer */ 116 public function set charStream(input:CharStream):void { 117 this.input = null; 118 reset(); 119 this.input = input; 120 } 121 122 public function get charStream():CharStream { 123 return input; 124 } 125 126 public override function get sourceName():String { 127 return input.sourceName; 128 } 129 130 /** Currently does not support multiple emits per nextToken invocation 131 * for efficiency reasons. Subclass and override this method and 132 * nextToken (to push tokens into a list and pull from that list rather 133 * than a single variable as this implementation does). 134 */ 135 public function emitToken(token:Token):void { 136 state.token = token; 137 } 138 139 /** The standard method called to automatically emit a token at the 140 * outermost lexical rule. The token object should point into the 141 * char buffer start..stop. If there is a text override in 'text', 142 * use that to set the token's text. Override this method to emit 143 * custom Token objects. 144 */ 145 public function emit():Token { 146 var t:Token = CommonToken.createFromStream(input, state.type, state.channel, state.tokenStartCharIndex, charIndex - 1); 147 t.line = state.tokenStartLine; 148 t.text = state.text; 149 t.charPositionInLine = state.tokenStartCharPositionInLine; 150 emitToken(t); 151 return t; 152 } 153 154 public function matchString(s:String):void { 155 var i:int = 0; 156 while ( i<s.length ) { 157 if ( input.LA(1) != s.charCodeAt(i) ) { 158 if ( state.backtracking>0 ) { 159 state.failed = true; 160 return; 161 } 162 var mte:MismatchedTokenException = 163 new MismatchedTokenException(s.charCodeAt(i), input); 164 recover(mte); 165 throw mte; 166 } 167 i++; 168 input.consume(); 169 state.failed = false; 170 } 171 } 172 173 public function matchAny():void { 174 input.consume(); 175 } 176 177 public function match(c:int):void { 178 if ( input.LA(1)!=c ) { 179 if ( state.backtracking>0 ) { 180 state.failed = true; 181 return; 182 } 183 var mte:MismatchedTokenException = 184 new MismatchedTokenException(c, input); 185 recover(mte); // don't really recover; just consume in lexer 186 throw mte; 187 } 188 input.consume(); 189 state.failed = false; 190 } 191 192 public function matchRange(a:int, b:int):void 193 { 194 if ( input.LA(1)<a || input.LA(1)>b ) { 195 if ( state.backtracking>0 ) { 196 state.failed = true; 197 return; 198 } 199 var mre:MismatchedRangeException = 200 new MismatchedRangeException(a,b,input); 201 recover(mre); 202 throw mre; 203 } 204 input.consume(); 205 state.failed = false; 206 } 207 208 public function get line():int { 209 return input.line; 210 } 211 212 public function get charPositionInLine():int { 213 return input.charPositionInLine; 214 } 215 216 /** What is the index of the current character of lookahead? */ 217 public function get charIndex():int { 218 return input.index; 219 } 220 221 /** Return the text matched so far for the current token or any 222 * text override. 223 */ 224 public function get text():String { 225 if ( state.text!=null ) { 226 return state.text; 227 } 228 return input.substring(state.tokenStartCharIndex, charIndex-1); 229 } 230 231 /** Set the complete text of this token; it wipes any previous 232 * changes to the text. 233 */ 234 public function set text(text:String):void { 235 state.text = text; 236 } 237 238 public override function reportError(e:RecognitionException):void { 239 displayRecognitionError(this.tokenNames, e); 240 } 241 242 public override function getErrorMessage(e:RecognitionException, tokenNames:Array):String { 243 var msg:String = null; 244 if ( e is MismatchedTokenException ) { 245 var mte:MismatchedTokenException = MismatchedTokenException(e); 246 msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting "+getCharErrorDisplay(mte.expecting); 247 } 248 else if ( e is NoViableAltException ) { 249 var nvae:NoViableAltException = NoViableAltException(e); 250 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 251 // and "(decision="+nvae.decisionNumber+") and 252 // "state "+nvae.stateNumber 253 msg = "no viable alternative at character "+getCharErrorDisplay(e.c); 254 } 255 else if ( e is EarlyExitException ) { 256 var eee:EarlyExitException = EarlyExitException(e); 257 // for development, can add "(decision="+eee.decisionNumber+")" 258 msg = "required (...)+ loop did not match anything at character "+getCharErrorDisplay(e.c); 259 } 260 else if ( e is MismatchedNotSetException ) { 261 var mnse:MismatchedNotSetException = MismatchedNotSetException(e); 262 msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+mnse.expecting; 263 } 264 else if ( e is MismatchedSetException ) { 265 var mse:MismatchedSetException = MismatchedSetException(e); 266 msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+mse.expecting; 267 } 268 else if ( e is MismatchedRangeException ) { 269 var mre:MismatchedRangeException = MismatchedRangeException(e); 270 msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+ 271 getCharErrorDisplay(mre.a)+".."+getCharErrorDisplay(mre.b); 272 } 273 else { 274 msg = super.getErrorMessage(e, tokenNames); 275 } 276 return msg; 277 } 278 279 public function getCharErrorDisplay(c:int):String { 280 var s:String = String.fromCharCode(c); 281 switch ( c ) { 282 case TokenConstants.EOF : 283 s = "<EOF>"; 284 break; 285 case '\n' : 286 s = "\\n"; 287 break; 288 case '\t' : 289 s = "\\t"; 290 break; 291 case '\r' : 292 s = "\\r"; 293 break; 294 } 295 return "'"+s+"'"; 296 } 297 298 /** Lexers can normally match any char in it's vocabulary after matching 299 * a token, so do the easy thing and just kill a character and hope 300 * it all works out. You can instead use the rule invocation stack 301 * to do sophisticated error recovery if you are in a fragment rule. 302 * 303 * @return This method should return the exception it was provided as an 304 * argument. This differs from the Java runtime so that an exception variable 305 * does not need to be declared in the generated code, thus reducing a large 306 * number of compiler warnings in generated code. 307 */ 308 public function recover(re:RecognitionException):RecognitionException { 309 input.consume(); 310 return re; 311 } 312 313 public function traceIn(ruleName:String, ruleIndex:int):void { 314 var inputSymbol:String = String.fromCharCode(input.LT(1))+" line="+ line +":"+ charPositionInLine; 315 super.traceInSymbol(ruleName, ruleIndex, inputSymbol); 316 } 317 318 public function traceOut(ruleName:String, ruleIndex:int):void { 319 var inputSymbol:String = String.fromCharCode(input.LT(1))+" line="+ line +":"+ charPositionInLine; 320 super.traceOutSymbol(ruleName, ruleIndex, inputSymbol); 321 } 322 } 323}