smaliLexer.flex revision 73d1b5d3cbc845e7123f41acd80c8cca4c1a369d
1package org.jf.smali; 2 3import java.io.*; 4import org.antlr.runtime.*; 5import org.jf.util.*; 6import static org.jf.smali.smaliParser.*; 7 8%% 9 10%public 11%class smaliFlexLexer 12%implements TokenSource 13%implements LexerErrorInterface 14%type Token 15%unicode 16%line 17%column 18%char 19 20%{ 21 private StringBuffer sb = new StringBuffer(); 22 private String stringOrCharError = null; 23 private int stringStartLine; 24 private int stringStartCol; 25 private int stringStartChar; 26 27 private int lexerErrors = 0; 28 29 private File sourceFile; 30 31 private boolean suppressErrors; 32 33 public Token nextToken() { 34 try { 35 Token token = yylex(); 36 if (token instanceof InvalidToken) { 37 InvalidToken invalidToken = (InvalidToken)token; 38 if (!suppressErrors) { 39 System.err.println(getErrorHeader(invalidToken) + " Error for input '" + 40 invalidToken.getText() + "': " + invalidToken.getMessage()); 41 } 42 lexerErrors++; 43 } 44 return token; 45 } 46 catch (java.io.IOException e) { 47 System.err.println("shouldn't happen: " + e.getMessage()); 48 return Token.EOF_TOKEN; 49 } 50 } 51 52 public void setLine(int line) { 53 this.yyline = line-1; 54 } 55 56 public void setColumn(int column) { 57 this.yycolumn = column; 58 } 59 60 public int getLine() { 61 return this.yyline+1; 62 } 63 64 public int getColumn() { 65 return this.yycolumn; 66 } 67 68 public void setSuppressErrors(boolean suppressErrors) { 69 this.suppressErrors = suppressErrors; 70 } 71 72 public void setSourceFile(File sourceFile) { 73 this.sourceFile = sourceFile; 74 } 75 76 public String getSourceName() { 77 try { 78 return PathUtil.getRelativeFile(new File("."), sourceFile).getPath(); 79 } catch (IOException ex) { 80 return sourceFile.getAbsolutePath(); 81 } 82 } 83 84 public int getNumberOfSyntaxErrors() { 85 return lexerErrors; 86 } 87 88 private Token newToken(int type, String text, boolean hidden) { 89 CommonToken token = new CommonToken(type, text); 90 if (hidden) { 91 token.setChannel(Token.HIDDEN_CHANNEL); 92 } 93 94 token.setStartIndex(yychar); 95 token.setStopIndex(yychar + yylength() - 1); 96 token.setLine(getLine()); 97 token.setCharPositionInLine(getColumn()); 98 return token; 99 } 100 101 private Token newToken(int type, String text) { 102 return newToken(type, text, false); 103 } 104 105 private Token newToken(int type, boolean hidden) { 106 return newToken(type, yytext(), hidden); 107 } 108 109 private Token newToken(int type) { 110 return newToken(type, yytext(), false); 111 } 112 113 private Token invalidToken(String message, String text) { 114 InvalidToken token = new InvalidToken(message, text); 115 116 token.setStartIndex(yychar); 117 token.setStopIndex(yychar + yylength() - 1); 118 token.setLine(getLine()); 119 token.setCharPositionInLine(getColumn()); 120 121 return token; 122 } 123 124 private Token invalidToken(String message) { 125 return invalidToken(message, yytext()); 126 } 127 128 private void beginStringOrChar(int state) { 129 yybegin(state); 130 sb.setLength(0); 131 stringStartLine = getLine(); 132 stringStartCol = getColumn(); 133 stringStartChar = yychar; 134 stringOrCharError = null; 135 } 136 137 private Token endStringOrChar(int type) { 138 yybegin(YYINITIAL); 139 140 if (stringOrCharError != null) { 141 return invalidStringOrChar(stringOrCharError); 142 } 143 144 CommonToken token = new CommonToken(type, sb.toString()); 145 token.setStartIndex(stringStartChar); 146 token.setStopIndex(yychar + yylength() - 1); 147 token.setLine(stringStartLine); 148 token.setCharPositionInLine(stringStartCol); 149 return token; 150 } 151 152 private void setStringOrCharError(String message) { 153 if (stringOrCharError == null) { 154 stringOrCharError = message; 155 } 156 } 157 158 private Token invalidStringOrChar(String message) { 159 yybegin(YYINITIAL); 160 161 InvalidToken token = new InvalidToken(message, sb.toString()); 162 token.setStartIndex(stringStartChar); 163 token.setStopIndex(yychar + yylength() - 1); 164 token.setLine(stringStartLine); 165 token.setCharPositionInLine(stringStartCol); 166 return token; 167 } 168 169 public String getErrorHeader(InvalidToken token) { 170 return getSourceName()+"["+ token.getLine()+","+token.getCharPositionInLine()+"]"; 171 } 172%} 173 174HexPrefix = 0 [xX] 175 176HexDigit = [0-9a-fA-F] 177HexDigits = [0-9a-fA-F]{4} 178FewerHexDigits = [0-9a-fA-F]{0,3} 179 180Integer1 = 0 181Integer2 = [1-9] [0-9]* 182Integer3 = 0 [0-7]+ 183Integer4 = {HexPrefix} {HexDigit}+ 184Integer = {Integer1} | {Integer2} | {Integer3} | {Integer4} 185 186DecimalExponent = [eE] -? [0-9]+ 187 188BinaryExponent = [pP] -? [0-9]+ 189 190/*This can either be a floating point number or an identifier*/ 191FloatOrID1 = -? [0-9]+ {DecimalExponent} 192FloatOrID2 = -? {HexPrefix} {HexDigit}+ {BinaryExponent} 193FloatOrID3 = -? [iI][nN][fF][iI][nN][iI][tT][yY] 194FloatOrID4 = [nN][aA][nN] 195FloatOrID = {FloatOrID1} | {FloatOrID2} | {FloatOrID3} | {FloatOrID4} 196 197 198/*This can only be a float and not an identifier, due to the decimal point*/ 199Float1 = -? [0-9]+ "." [0-9]* {DecimalExponent}? 200Float2 = -? "." [0-9]+ {DecimalExponent}? 201Float3 = -? {HexPrefix} {HexDigit}+ "." {HexDigit}* {BinaryExponent} 202Float4 = -? {HexPrefix} "." {HexDigit}+ {BinaryExponent} 203Float = {Float1} | {Float2} | {Float3} | {Float4} 204 205SimpleName = [A-Za-z0-9$\-_\u00a1-\u1fff\u2010-\u2027\u2030-\ud7ff\ue000-\uffef]+ 206 207PrimitiveType = [ZBSCIJFD] 208 209ClassDescriptor = L ({SimpleName} "/")* {SimpleName} ; 210 211ArrayDescriptor = "[" + ({PrimitiveType} | {ClassDescriptor}) 212 213Type = {PrimitiveType} | {ClassDescriptor} | {ArrayDescriptor} 214 215 216%state STRING 217%state CHAR 218 219%% 220 221/*Directives*/ 222<YYINITIAL> 223{ 224 ".class" { return newToken(CLASS_DIRECTIVE); } 225 ".super" { return newToken(SUPER_DIRECTIVE); } 226 ".implements" { return newToken(IMPLEMENTS_DIRECTIVE); } 227 ".source" { return newToken(SOURCE_DIRECTIVE); } 228 ".field" { return newToken(FIELD_DIRECTIVE); } 229 ".end field" { return newToken(END_FIELD_DIRECTIVE); } 230 ".subannotation" { return newToken(SUBANNOTATION_DIRECTIVE); } 231 ".end subannotation" { return newToken(END_SUBANNOTATION_DIRECTIVE); } 232 ".annotation" { return newToken(ANNOTATION_DIRECTIVE); } 233 ".end annotation" { return newToken(END_ANNOTATION_DIRECTIVE); } 234 ".enum" { return newToken(ENUM_DIRECTIVE); } 235 ".method" { return newToken(METHOD_DIRECTIVE); } 236 ".end method" { return newToken(END_METHOD_DIRECTIVE); } 237 ".registers" { return newToken(REGISTERS_DIRECTIVE); } 238 ".locals" { return newToken(LOCALS_DIRECTIVE); } 239 ".array-data" { return newToken(ARRAY_DATA_DIRECTIVE); } 240 ".end array-data" { return newToken(END_ARRAY_DATA_DIRECTIVE); } 241 ".packed-switch" { return newToken(PACKED_SWITCH_DIRECTIVE); } 242 ".end packed-switch" { return newToken(END_PACKED_SWITCH_DIRECTIVE); } 243 ".sparse-switch" { return newToken(SPARSE_SWITCH_DIRECTIVE); } 244 ".end sparse-switch" { return newToken(END_SPARSE_SWITCH_DIRECTIVE); } 245 ".catch" { return newToken(CATCH_DIRECTIVE); } 246 ".catchall" { return newToken(CATCHALL_DIRECTIVE); } 247 ".line" { return newToken(LINE_DIRECTIVE); } 248 ".parameter" { return newToken(PARAMETER_DIRECTIVE); } 249 ".end parameter" { return newToken(END_PARAMETER_DIRECTIVE); } 250 ".local" { return newToken(LOCAL_DIRECTIVE); } 251 ".end local" { return newToken(END_LOCAL_DIRECTIVE); } 252 ".restart local" { return newToken(RESTART_LOCAL_DIRECTIVE); } 253 ".prologue" { return newToken(PROLOGUE_DIRECTIVE); } 254 ".epilogue" { return newToken(EPILOGUE_DIRECTIVE); } 255 256 ".end" { return invalidToken("Invalid directive"); } 257 ".end " [a-zA-z0-9\-_]+ { return invalidToken("Invalid directive"); } 258 ".restart" { return invalidToken("Invalid directive"); } 259 ".restart " [a-zA-z0-9\-_]+ { return invalidToken("Invalid directive"); } 260} 261 262/*Literals*/ 263<YYINITIAL> { 264 -? {Integer} { return newToken(INTEGER_LITERAL); } 265 -? {Integer} [lL] { return newToken(LONG_LITERAL); } 266 -? {Integer} [sS] { return newToken(SHORT_LITERAL); } 267 -? {Integer} [tT] { return newToken(BYTE_LITERAL); } 268 269 {FloatOrID} [fF] | -? [0-9]+ [fF] { return newToken(FLOAT_LITERAL_OR_ID); } 270 {FloatOrID} [dD]? | -? [0-9]+ [dD] { return newToken(DOUBLE_LITERAL_OR_ID); } 271 {Float} [fF] { return newToken(FLOAT_LITERAL); } 272 {Float} [dD]? { return newToken(DOUBLE_LITERAL); } 273 274 "true"|"false" { return newToken(BOOL_LITERAL); } 275 "null" { return newToken(NULL_LITERAL); } 276 277 "\"" { beginStringOrChar(STRING); sb.append('"'); } 278 279 ' { beginStringOrChar(CHAR); sb.append('\''); } 280} 281 282<STRING> { 283 "\"" { sb.append('"'); return endStringOrChar(STRING_LITERAL); } 284 285 [^\r\n\"\\]+ { sb.append(yytext()); } 286 "\\b" { sb.append('\b'); } 287 "\\t" { sb.append('\t'); } 288 "\\n" { sb.append('\n'); } 289 "\\f" { sb.append('\f'); } 290 "\\r" { sb.append('\r'); } 291 "\\'" { sb.append('\''); } 292 "\\\"" { sb.append('"'); } 293 "\\\\" { sb.append('\\'); } 294 "\\u" {HexDigits} { sb.append((char)Integer.parseInt(yytext().substring(2,6), 16)); } 295 296 "\\u" {FewerHexDigits} { 297 sb.append(yytext()); 298 setStringOrCharError("Invalid \\u sequence. \\u must be followed by 4 hex digits"); 299 } 300 301 "\\" [^btnfr'\"\\u] { 302 sb.append(yytext()); 303 setStringOrCharError("Invalid escape sequence " + yytext()); 304 } 305 306 [\r\n] { return invalidStringOrChar("Unterminated string literal"); } 307 <<EOF>> { return invalidStringOrChar("Unterminated string literal"); } 308} 309 310<CHAR> { 311 ' { 312 sb.append('\''); 313 if (sb.length() == 2) { 314 return invalidStringOrChar("Empty character literal"); 315 } else if (sb.length() > 3) { 316 return invalidStringOrChar("Character literal with multiple chars"); 317 } 318 319 return endStringOrChar(CHAR_LITERAL); 320 } 321 322 [^\r\n'\\]+ { sb.append(yytext()); } 323 "\\b" { sb.append('\b'); } 324 "\\t" { sb.append('\t'); } 325 "\\n" { sb.append('\n'); } 326 "\\f" { sb.append('\f'); } 327 "\\r" { sb.append('\r'); } 328 "\\'" { sb.append('\''); } 329 "\\\"" { sb.append('"'); } 330 "\\\\" { sb.append('\\'); } 331 "\\u" {HexDigits} { sb.append((char)Integer.parseInt(yytext().substring(2,6), 16)); } 332 333 "\\u" {HexDigit}* { 334 sb.append(yytext()); 335 setStringOrCharError("Invalid \\u sequence. \\u must be followed by exactly 4 hex digits"); 336 } 337 338 "\\" [^btnfr'\"\\u] { 339 sb.append(yytext()); 340 setStringOrCharError("Invalid escape sequence " + yytext()); 341 } 342 343 [\r\n] { return invalidStringOrChar("Unterminated character literal"); } 344 <<EOF>> { return invalidStringOrChar("Unterminated character literal"); } 345} 346 347/*Misc*/ 348<YYINITIAL> { 349 [vp] [0-9]+ { return newToken(REGISTER); } 350 351 "build" | "runtime" | "system" { 352 return newToken(ANNOTATION_VISIBILITY); 353 } 354 355 "public" | "private" | "protected" | "static" | "final" | "synchronized" | "bridge" | "varargs" | "native" | 356 "abstract" | "strictfp" | "synthetic" | "constructor" | "declared-synchronized" | "interface" | "enum" | 357 "annotation" | "volatile" | "transient" { 358 return newToken(ACCESS_SPEC); 359 } 360 361 "vtable@0x" {HexDigit}+ { return newToken(VTABLE_OFFSET); } 362 "field@0x" {HexDigit}+ { return newToken(FIELD_OFFSET); } 363 364 "+" {Integer} { return newToken(OFFSET); } 365 366 # [^\r\n]* { return newToken(LINE_COMMENT, true); } 367} 368 369/*Instructions*/ 370<YYINITIAL> { 371 "goto" { 372 return newToken(INSTRUCTION_FORMAT10t); 373 } 374 375 "return-void" | "nop" { 376 return newToken(INSTRUCTION_FORMAT10x); 377 } 378 379 "const/4" { 380 return newToken(INSTRUCTION_FORMAT11n); 381 } 382 383 "move-result" | "move-result-wide" | "move-result-object" | "move-exception" | "return" | "return-wide" | 384 "return-object" | "monitor-enter" | "monitor-exit" | "throw" { 385 return newToken(INSTRUCTION_FORMAT11x); 386 } 387 388 "move" | "move-wide" | "move-object" | "array-length" | "neg-int" | "not-int" | "neg-long" | "not-long" | 389 "neg-float" | "neg-double" | "int-to-long" | "int-to-float" | "int-to-double" | "long-to-int" | "long-to-float" | 390 "long-to-double" | "float-to-int" | "float-to-long" | "float-to-double" | "double-to-int" | "double-to-long" | 391 "double-to-float" | "int-to-byte" | "int-to-char" | "int-to-short" { 392 return newToken(INSTRUCTION_FORMAT12x_OR_ID); 393 } 394 395 "add-int/2addr" | "sub-int/2addr" | "mul-int/2addr" | "div-int/2addr" | "rem-int/2addr" | "and-int/2addr" | 396 "or-int/2addr" | "xor-int/2addr" | "shl-int/2addr" | "shr-int/2addr" | "ushr-int/2addr" | "add-long/2addr" | 397 "sub-long/2addr" | "mul-long/2addr" | "div-long/2addr" | "rem-long/2addr" | "and-long/2addr" | "or-long/2addr" | 398 "xor-long/2addr" | "shl-long/2addr" | "shr-long/2addr" | "ushr-long/2addr" | "add-float/2addr" | 399 "sub-float/2addr" | "mul-float/2addr" | "div-float/2addr" | "rem-float/2addr" | "add-double/2addr" | 400 "sub-double/2addr" | "mul-double/2addr" | "div-double/2addr" | "rem-double/2addr" { 401 return newToken(INSTRUCTION_FORMAT12x); 402 } 403 404 "goto/16" { 405 return newToken(INSTRUCTION_FORMAT20t); 406 } 407 408 "sget" | "sget-wide" | "sget-object" | "sget-boolean" | "sget-byte" | "sget-char" | "sget-short" | "sput" | 409 "sput-wide" | "sput-object" | "sput-boolean" | "sput-byte" | "sput-char" | "sput-short" { 410 return newToken(INSTRUCTION_FORMAT21c_FIELD); 411 } 412 413 "const-string" { 414 return newToken(INSTRUCTION_FORMAT21c_STRING); 415 } 416 417 "check-cast" | "new-instance" | "const-class" { 418 return newToken(INSTRUCTION_FORMAT21c_TYPE); 419 } 420 421 "const/high16" | "const-wide/high16" { 422 return newToken(INSTRUCTION_FORMAT21h); 423 } 424 425 "const/16" | "const-wide/16" { 426 return newToken(INSTRUCTION_FORMAT21s); 427 } 428 429 "if-eqz" | "if-nez" | "if-ltz" | "if-gez" | "if-gtz" | "if-lez" { 430 return newToken(INSTRUCTION_FORMAT21t); 431 } 432 433 "add-int/lit8" | "rsub-int/lit8" | "mul-int/lit8" | "div-int/lit8" | "rem-int/lit8" | "and-int/lit8" | 434 "or-int/lit8" | "xor-int/lit8" | "shl-int/lit8" | "shr-int/lit8" | "ushr-int/lit8" { 435 return newToken(INSTRUCTION_FORMAT22b); 436 } 437 438 "iget" | "iget-wide" | "iget-object" | "iget-boolean" | "iget-byte" | "iget-char" | "iget-short" | "iput" | 439 "iput-wide" | "iput-object" | "iput-boolean" | "iput-byte" | "iput-char" | "iput-short" { 440 return newToken(INSTRUCTION_FORMAT22c_FIELD); 441 } 442 443 "instance-of" | "new-array" { 444 return newToken(INSTRUCTION_FORMAT22c_TYPE); 445 } 446 447 "iget-quick" | "iget-wide-quick" | "iget-object-quick" | "iput-quick" | "iput-wide-quick" | "iput-object-quick" { 448 return newToken(INSTRUCTION_FORMAT22cs_FIELD); 449 } 450 451 "rsub-int" { 452 return newToken(INSTRUCTION_FORMAT22s_OR_ID); 453 } 454 455 "add-int/lit16" | "mul-int/lit16" | "div-int/lit16" | "rem-int/lit16" | "and-int/lit16" | "or-int/lit16" | 456 "xor-int/lit16" { 457 return newToken(INSTRUCTION_FORMAT22s); 458 } 459 460 "if-eq" | "if-ne" | "if-lt" | "if-ge" | "if-gt" | "if-le" { 461 return newToken(INSTRUCTION_FORMAT22t); 462 } 463 464 "move/from16" | "move-wide/from16" | "move-object/from16" { 465 return newToken(INSTRUCTION_FORMAT22x); 466 } 467 468 "cmpl-float" | "cmpg-float" | "cmpl-double" | "cmpg-double" | "cmp-long" | "aget" | "aget-wide" | "aget-object" | 469 "aget-boolean" | "aget-byte" | "aget-char" | "aget-short" | "aput" | "aput-wide" | "aput-object" | "aput-boolean" | 470 "aput-byte" | "aput-char" | "aput-short" | "add-int" | "sub-int" | "mul-int" | "div-int" | "rem-int" | "and-int" | 471 "or-int" | "xor-int" | "shl-int" | "shr-int" | "ushr-int" | "add-long" | "sub-long" | "mul-long" | "div-long" | 472 "rem-long" | "and-long" | "or-long" | "xor-long" | "shl-long" | "shr-long" | "ushr-long" | "add-float" | 473 "sub-float" | "mul-float" | "div-float" | "rem-float" | "add-double" | "sub-double" | "mul-double" | "div-double" | 474 "rem-double" { 475 return newToken(INSTRUCTION_FORMAT23x); 476 } 477 478 "goto/32" { 479 return newToken(INSTRUCTION_FORMAT30t); 480 } 481 482 "const-string/jumbo" { 483 return newToken(INSTRUCTION_FORMAT31c); 484 } 485 486 "const" { 487 return newToken(INSTRUCTION_FORMAT31i_OR_ID); 488 } 489 490 "const-wide/32" { 491 return newToken(INSTRUCTION_FORMAT31i); 492 } 493 494 "fill-array-data" | "packed-switch" | "sparse-switch" { 495 return newToken(INSTRUCTION_FORMAT31t); 496 } 497 498 "move/16" | "move-wide/16" | "move-object/16" { 499 return newToken(INSTRUCTION_FORMAT32x); 500 } 501 502 "invoke-virtual" | "invoke-super" | "invoke-direct" | "invoke-static" | "invoke-interface" { 503 return newToken(INSTRUCTION_FORMAT35c_METHOD); 504 } 505 506 "filled-new-array" { 507 return newToken(INSTRUCTION_FORMAT35c_TYPE); 508 } 509 510 "invoke-direct-empty" { 511 return newToken(INSTRUCTION_FORMAT35s_METHOD); 512 } 513 514 "execute-inline" | "invoke-virtual-quick" | "invoke-super-quick" { 515 return newToken(INSTRUCTION_FORMAT35ms_METHOD); 516 } 517 518 "invoke-virtual/range" | "invoke-super/range" | "invoke-direct/range" | "invoke-static/range" | 519 "invoke-interface/range" { 520 return newToken(INSTRUCTION_FORMAT3rc_METHOD); 521 } 522 523 "filled-new-array/range" { 524 return newToken(INSTRUCTION_FORMAT3rc_TYPE); 525 } 526 527 "invoke-virtual-quick/range" | "invoke-super-quick/range" { 528 return newToken(INSTRUCTION_FORMAT3rms_METHOD); 529 } 530 531 "const-wide" { 532 return newToken(INSTRUCTION_FORMAT51l); 533 } 534} 535 536/*Types*/ 537<YYINITIAL> { 538 {PrimitiveType} { return newToken(PRIMITIVE_TYPE); } 539 V { return newToken(VOID_TYPE); } 540 {ClassDescriptor} { return newToken(CLASS_DESCRIPTOR); } 541 {ArrayDescriptor} { return newToken(ARRAY_DESCRIPTOR); } 542 {PrimitiveType} {PrimitiveType}+ { return newToken(PARAM_LIST_OR_ID); } 543 {Type} {Type}+ { return newToken(PARAM_LIST); } 544 {SimpleName} { return newToken(SIMPLE_NAME); } 545 "<init>" | "<clinit>" { return newToken(METHOD_NAME); } 546} 547 548/*Symbols/Whitespace/EOF*/ 549<YYINITIAL> { 550 ".." { return newToken(DOTDOT); } 551 "->" { return newToken(ARROW); } 552 "=" { return newToken(EQUAL); } 553 ":" { return newToken(COLON); } 554 "," { return newToken(COMMA); } 555 "{" { return newToken(OPEN_BRACE); } 556 "}" { return newToken(CLOSE_BRACE); } 557 "(" { return newToken(OPEN_PAREN); } 558 ")" { return newToken(CLOSE_PAREN); } 559 [\r\n\t ]+ { return newToken(WHITE_SPACE, true); } 560 <<EOF>> { return newToken(EOF); } 561} 562 563/*catch all*/ 564<YYINITIAL> { 565 "." { return invalidToken("Invalid directive"); } 566 "." [a-zA-z\-_] { return invalidToken("Invalid directive"); } 567 "." [a-zA-z\-_] [a-zA-z0-9\-_]* { return invalidToken("Invalid directive"); } 568 . { return invalidToken("Invalid text"); } 569} 570