smaliLexer.flex revision 35bfbe82f00e0946ca267b8634690b9aeb78ec16
1package org.jf.smali; 2 3import org.antlr.runtime.*; 4 5import static org.jf.smali.smaliParser.*; 6 7%% 8 9%public 10%class smaliFlexLexer 11%implements TokenSource 12%type Token 13%unicode 14%line 15%column 16%char 17 18%{ 19 private StringBuffer sb = new StringBuffer(); 20 private String stringOrCharError = null; 21 private int stringStartLine; 22 private int stringStartCol; 23 private int stringStartChar; 24 25 public Token nextToken() { 26 try { 27 return yylex(); 28 } 29 catch (java.io.IOException e) { 30 System.err.println("shouldn't happen: " + e.getMessage()); 31 return Token.EOF_TOKEN; 32 } 33 } 34 35 public void setLine(int line) { 36 this.yyline = line-1; 37 } 38 39 public void setColumn(int column) { 40 this.yycolumn = column; 41 } 42 43 public int getLine() { 44 return this.yyline+1; 45 } 46 47 public int getColumn() { 48 return this.yycolumn; 49 } 50 51 public String getSourceName() { 52 return ""; 53 } 54 55 private Token newToken(int type, String text, boolean hidden) { 56 CommonToken token = new CommonToken(type, text); 57 if (hidden) { 58 token.setChannel(Token.HIDDEN_CHANNEL); 59 } 60 61 token.setStartIndex(yychar); 62 token.setStopIndex(yychar + yylength() - 1); 63 token.setLine(getLine()); 64 token.setCharPositionInLine(getColumn()); 65 return token; 66 } 67 68 private Token newToken(int type, String text) { 69 return newToken(type, text, false); 70 } 71 72 private Token newToken(int type, boolean hidden) { 73 return newToken(type, yytext(), hidden); 74 } 75 76 private Token newToken(int type) { 77 return newToken(type, yytext(), false); 78 } 79 80 private Token invalidToken(String message, String text) { 81 InvalidToken token = new InvalidToken(message, text); 82 83 token.setStartIndex(yychar); 84 token.setStopIndex(yychar + yylength() - 1); 85 token.setLine(getLine()); 86 token.setCharPositionInLine(getColumn()); 87 88 return token; 89 } 90 91 private Token invalidToken(String message) { 92 return invalidToken(message, yytext()); 93 } 94 95 private void beginStringOrChar(int state) { 96 yybegin(state); 97 sb.setLength(0); 98 stringStartLine = getLine(); 99 stringStartCol = getColumn(); 100 stringStartChar = yychar; 101 stringOrCharError = null; 102 } 103 104 private Token endStringOrChar(int type) { 105 yybegin(YYINITIAL); 106 107 if (stringOrCharError != null) { 108 return invalidStringOrChar(stringOrCharError); 109 } 110 111 CommonToken token = new CommonToken(type, sb.toString()); 112 token.setStartIndex(stringStartChar); 113 token.setStopIndex(yychar + yylength() - 1); 114 token.setLine(stringStartLine); 115 token.setCharPositionInLine(stringStartCol); 116 return token; 117 } 118 119 private void setStringOrCharError(String message) { 120 if (stringOrCharError == null) { 121 stringOrCharError = message; 122 } 123 } 124 125 private Token invalidStringOrChar(String message) { 126 yybegin(YYINITIAL); 127 128 InvalidToken token = new InvalidToken(message, sb.toString()); 129 token.setStartIndex(stringStartChar); 130 token.setStopIndex(yychar + yylength() - 1); 131 token.setLine(stringStartLine); 132 token.setCharPositionInLine(stringStartCol); 133 return token; 134 } 135 136 public String getErrorHeader(RecognitionException e) { 137 return getSourceName()+"["+ e.line+","+e.charPositionInLine+"]"; 138 } 139%} 140 141HexPrefix = 0 [xX] 142 143HexDigit = [0-9a-fA-F] 144HexDigits = [0-9a-fA-F]{4} 145FewerHexDigits = [0-9a-fA-F]{0,3} 146 147Integer1 = 0 148Integer2 = [1-9] [0-9]* 149Integer3 = 0 [0-7]+ 150Integer4 = {HexPrefix} {HexDigit}+ 151Integer = {Integer1} | {Integer2} | {Integer3} | {Integer4} 152 153DecimalExponent = [eE] -? [0-9]+ 154 155BinaryExponent = [pP] -? [0-9]+ 156 157/*This can either be a floating point number or an identifier*/ 158FloatOrID1 = -? [0-9]+ {DecimalExponent} 159FloatOrID2 = -? {HexPrefix} {HexDigit}+ {BinaryExponent} 160FloatOrID3 = -? [iI][nN][fF][iI][nN][iI][tT][yY] 161FloatOrID4 = [nN][aA][nN] 162FloatOrID = {FloatOrID1} | {FloatOrID2} | {FloatOrID3} | {FloatOrID4} 163 164 165/*This can only be a float and not an identifier, due to the decimal point*/ 166Float1 = -? [0-9]+ "." [0-9]* {DecimalExponent}? 167Float2 = -? "." [0-9]+ {DecimalExponent}? 168Float3 = -? {HexPrefix} {HexDigit}+ "." {HexDigit}* {BinaryExponent} 169Float4 = -? {HexPrefix} "." {HexDigit}+ {BinaryExponent} 170Float = {Float1} | {Float2} | {Float3} | {Float4} 171 172SimpleName = [A-Za-z0-9$\-_\u00a1-\u1fff\u2010-\u2027\u2030-\ud7ff\ue000-\uffef]+ 173 174PrimitiveType = [ZBSCIJFD] 175 176ClassDescriptor = L ({SimpleName} "/")* {SimpleName} ; 177 178ArrayDescriptor = "[" + ({PrimitiveType} | {ClassDescriptor}) 179 180Type = {PrimitiveType} | {ClassDescriptor} | {ArrayDescriptor} 181 182 183%state STRING 184%state CHAR 185 186%% 187 188/*Directives*/ 189<YYINITIAL> 190{ 191 ".class" { return newToken(CLASS_DIRECTIVE); } 192 ".super" { return newToken(SUPER_DIRECTIVE); } 193 ".implements" { return newToken(IMPLEMENTS_DIRECTIVE); } 194 ".source" { return newToken(SOURCE_DIRECTIVE); } 195 ".field" { return newToken(FIELD_DIRECTIVE); } 196 ".end field" { return newToken(END_FIELD_DIRECTIVE); } 197 ".subannotation" { return newToken(SUBANNOTATION_DIRECTIVE); } 198 ".end subannotation" { return newToken(END_SUBANNOTATION_DIRECTIVE); } 199 ".annotation" { return newToken(ANNOTATION_DIRECTIVE); } 200 ".end annotation" { return newToken(END_ANNOTATION_DIRECTIVE); } 201 ".enum" { return newToken(ENUM_DIRECTIVE); } 202 ".method" { return newToken(METHOD_DIRECTIVE); } 203 ".end method" { return newToken(END_METHOD_DIRECTIVE); } 204 ".registers" { return newToken(REGISTERS_DIRECTIVE); } 205 ".locals" { return newToken(LOCALS_DIRECTIVE); } 206 ".array-data" { return newToken(ARRAY_DATA_DIRECTIVE); } 207 ".end array-data" { return newToken(END_ARRAY_DATA_DIRECTIVE); } 208 ".packed-switch" { return newToken(PACKED_SWITCH_DIRECTIVE); } 209 ".end packed-switch" { return newToken(END_PACKED_SWITCH_DIRECTIVE); } 210 ".sparse-switch" { return newToken(SPARSE_SWITCH_DIRECTIVE); } 211 ".end sparse-switch" { return newToken(END_SPARSE_SWITCH_DIRECTIVE); } 212 ".catch" { return newToken(CATCH_DIRECTIVE); } 213 ".catchall" { return newToken(CATCHALL_DIRECTIVE); } 214 ".line" { return newToken(LINE_DIRECTIVE); } 215 ".parameter" { return newToken(PARAMETER_DIRECTIVE); } 216 ".end parameter" { return newToken(END_PARAMETER_DIRECTIVE); } 217 ".local" { return newToken(LOCAL_DIRECTIVE); } 218 ".end local" { return newToken(END_LOCAL_DIRECTIVE); } 219 ".restart local" { return newToken(RESTART_LOCAL_DIRECTIVE); } 220 ".prologue" { return newToken(PROLOGUE_DIRECTIVE); } 221 ".epilogue" { return newToken(EPILOGUE_DIRECTIVE); } 222 223 ".end" { return invalidToken("Invalid directive"); } 224 ".end " [a-zA-z0-9\-_]+ { return invalidToken("Invalid directive"); } 225 ".restart" { return invalidToken("Invalid directive"); } 226 ".restart " [a-zA-z0-9\-_]+ { return invalidToken("Invalid directive"); } 227} 228 229/*Literals*/ 230<YYINITIAL> { 231 -? {Integer} { return newToken(INTEGER_LITERAL); } 232 -? {Integer} [lL] { return newToken(LONG_LITERAL); } 233 -? {Integer} [sS] { return newToken(SHORT_LITERAL); } 234 -? {Integer} [tT] { return newToken(BYTE_LITERAL); } 235 236 {FloatOrID} [fF] | -? [0-9]+ [fF] { return newToken(FLOAT_LITERAL_OR_ID); } 237 {FloatOrID} [dD]? | -? [0-9]+ [dD] { return newToken(DOUBLE_LITERAL_OR_ID); } 238 {Float} [fF] { return newToken(FLOAT_LITERAL); } 239 {Float} [dD]? { return newToken(DOUBLE_LITERAL); } 240 241 "true"|"false" { return newToken(BOOL_LITERAL); } 242 "null" { return newToken(NULL_LITERAL); } 243 244 "\"" { beginStringOrChar(STRING); sb.append('"'); } 245 246 ' { beginStringOrChar(CHAR); sb.append('\''); } 247} 248 249<STRING> { 250 "\"" { sb.append('"'); return endStringOrChar(STRING_LITERAL); } 251 252 [^\r\n\"\\]+ { sb.append(yytext()); } 253 "\\b" { sb.append('\b'); } 254 "\\t" { sb.append('\t'); } 255 "\\n" { sb.append('\n'); } 256 "\\f" { sb.append('\f'); } 257 "\\r" { sb.append('\r'); } 258 "\\'" { sb.append('\''); } 259 "\\\"" { sb.append('"'); } 260 "\\\\" { sb.append('\\'); } 261 "\\u" {HexDigits} { sb.append((char)Integer.parseInt(yytext().substring(2,6), 16)); } 262 263 "\\u" {FewerHexDigits} { 264 sb.append(yytext()); 265 setStringOrCharError("Invalid \\u sequence. \\u must be followed by 4 hex digits"); 266 } 267 268 "\\" [^btnfr'\"\\u] { 269 sb.append(yytext()); 270 setStringOrCharError("Invalid escape sequence " + yytext()); 271 } 272 273 [\r\n] { return invalidStringOrChar("Unterminated string literal"); } 274 <<EOF>> { return invalidStringOrChar("Unterminated string literal"); } 275} 276 277<CHAR> { 278 ' { 279 sb.append('\''); 280 if (sb.length() == 2) { 281 return invalidStringOrChar("Empty character literal"); 282 } else if (sb.length() > 3) { 283 return invalidStringOrChar("Character literal with multiple chars"); 284 } 285 286 return endStringOrChar(CHAR_LITERAL); 287 } 288 289 [^\r\n'\\]+ { sb.append(yytext()); } 290 "\\b" { sb.append('\b'); } 291 "\\t" { sb.append('\t'); } 292 "\\n" { sb.append('\n'); } 293 "\\f" { sb.append('\f'); } 294 "\\r" { sb.append('\r'); } 295 "\\'" { sb.append('\''); } 296 "\\\"" { sb.append('"'); } 297 "\\\\" { sb.append('\\'); } 298 "\\u" {HexDigits} { sb.append((char)Integer.parseInt(yytext().substring(2,6), 16)); } 299 300 "\\u" {HexDigit}* { 301 sb.append(yytext()); 302 setStringOrCharError("Invalid \\u sequence. \\u must be followed by exactly 4 hex digits"); 303 } 304 305 "\\" [^btnfr'\"\\u] { 306 sb.append(yytext()); 307 setStringOrCharError("Invalid escape sequence " + yytext()); 308 } 309 310 [\r\n] { return invalidStringOrChar("Unterminated character literal"); } 311 <<EOF>> { return invalidStringOrChar("Unterminated character literal"); } 312} 313 314/*Misc*/ 315<YYINITIAL> { 316 [vp] [0-9]+ { return newToken(REGISTER); } 317 318 "build" | "runtime" | "system" { 319 return newToken(ANNOTATION_VISIBILITY); 320 } 321 322 "public" | "private" | "protected" | "static" | "final" | "synchronized" | "bridge" | "varargs" | "native" | 323 "abstract" | "strictfp" | "synthetic" | "constructor" | "declared-synchronized" | "interface" | "enum" | 324 "annotation" | "volatile" | "transient" { 325 return newToken(ACCESS_SPEC); 326 } 327 328 "vtable@0x" {HexDigit}+ { return newToken(VTABLE_OFFSET); } 329 "field@0x" {HexDigit}+ { return newToken(FIELD_OFFSET); } 330 331 "+" {Integer} { return newToken(OFFSET); } 332 333 # [^\r\n]* ("\r\n" | "\r" | "\n")? { return newToken(LINE_COMMENT, true); } 334} 335 336/*Instructions*/ 337<YYINITIAL> { 338 "goto" { 339 return newToken(INSTRUCTION_FORMAT10t); 340 } 341 342 "return-void" | "nop" { 343 return newToken(INSTRUCTION_FORMAT10x); 344 } 345 346 "const/4" { 347 return newToken(INSTRUCTION_FORMAT11n); 348 } 349 350 "move-result" | "move-result-wide" | "move-result-object" | "move-exception" | "return" | "return-wide" | 351 "return-object" | "monitor-enter" | "monitor-exit" | "throw" { 352 return newToken(INSTRUCTION_FORMAT11x); 353 } 354 355 "move" | "move-wide" | "move-object" | "array-length" | "neg-int" | "not-int" | "neg-long" | "not-long" | 356 "neg-float" | "neg-double" | "int-to-long" | "int-to-float" | "int-to-double" | "long-to-int" | "long-to-float" | 357 "long-to-double" | "float-to-int" | "float-to-long" | "float-to-double" | "double-to-int" | "double-to-long" | 358 "double-to-float" | "int-to-byte" | "int-to-char" | "int-to-short" { 359 return newToken(INSTRUCTION_FORMAT12x_OR_ID); 360 } 361 362 "add-int/2addr" | "sub-int/2addr" | "mul-int/2addr" | "div-int/2addr" | "rem-int/2addr" | "and-int/2addr" | 363 "or-int/2addr" | "xor-int/2addr" | "shl-int/2addr" | "shr-int/2addr" | "ushr-int/2addr" | "add-long/2addr" | 364 "sub-long/2addr" | "mul-long/2addr" | "div-long/2addr" | "rem-long/2addr" | "and-long/2addr" | "or-long/2addr" | 365 "xor-long/2addr" | "shl-long/2addr" | "shr-long/2addr" | "ushr-long/2addr" | "add-float/2addr" | 366 "sub-float/2addr" | "mul-float/2addr" | "div-float/2addr" | "rem-float/2addr" | "add-double/2addr" | 367 "sub-double/2addr" | "mul-double/2addr" | "div-double/2addr" | "rem-double/2addr" { 368 return newToken(INSTRUCTION_FORMAT12x); 369 } 370 371 "goto/16" { 372 return newToken(INSTRUCTION_FORMAT20t); 373 } 374 375 "sget" | "sget-wide" | "sget-object" | "sget-boolean" | "sget-byte" | "sget-char" | "sget-short" | "sput" | 376 "sput-wide" | "sput-object" | "sput-boolean" | "sput-byte" | "sput-char" | "sput-short" { 377 return newToken(INSTRUCTION_FORMAT21c_FIELD); 378 } 379 380 "const-string" { 381 return newToken(INSTRUCTION_FORMAT21c_STRING); 382 } 383 384 "check-cast" | "new-instance" | "const-class" { 385 return newToken(INSTRUCTION_FORMAT21c_TYPE); 386 } 387 388 "const/high16" | "const-wide/high16" { 389 return newToken(INSTRUCTION_FORMAT21h); 390 } 391 392 "const/16" | "const-wide/16" { 393 return newToken(INSTRUCTION_FORMAT21s); 394 } 395 396 "if-eqz" | "if-nez" | "if-ltz" | "if-gez" | "if-gtz" | "if-lez" { 397 return newToken(INSTRUCTION_FORMAT21t); 398 } 399 400 "add-int/lit8" | "rsub-int/lit8" | "mul-int/lit8" | "div-int/lit8" | "rem-int/lit8" | "and-int/lit8" | 401 "or-int/lit8" | "xor-int/lit8" | "shl-int/lit8" | "shr-int/lit8" | "ushr-int/lit8" { 402 return newToken(INSTRUCTION_FORMAT22b); 403 } 404 405 "iget" | "iget-wide" | "iget-object" | "iget-boolean" | "iget-byte" | "iget-char" | "iget-short" | "iput" | 406 "iput-wide" | "iput-object" | "iput-boolean" | "iput-byte" | "iput-char" | "iput-short" { 407 return newToken(INSTRUCTION_FORMAT22c_FIELD); 408 } 409 410 "instance-of" | "new-array" { 411 return newToken(INSTRUCTION_FORMAT22c_TYPE); 412 } 413 414 "iget-quick" | "iget-wide-quick" | "iget-object-quick" | "iput-quick" | "iput-wide-quick" | "iput-object-quick" { 415 return newToken(INSTRUCTION_FORMAT22cs_FIELD); 416 } 417 418 "rsub-int" { 419 return newToken(INSTRUCTION_FORMAT22s_OR_ID); 420 } 421 422 "add-int/lit16" | "mul-int/lit16" | "div-int/lit16" | "rem-int/lit16" | "and-int/lit16" | "or-int/lit16" | 423 "xor-int/lit16" { 424 return newToken(INSTRUCTION_FORMAT22s); 425 } 426 427 "if-eq" | "if-ne" | "if-lt" | "if-ge" | "if-gt" | "if-le" { 428 return newToken(INSTRUCTION_FORMAT22t); 429 } 430 431 "move/from16" | "move-wide/from16" | "move-object/from16" { 432 return newToken(INSTRUCTION_FORMAT22x); 433 } 434 435 "cmpl-float" | "cmpg-float" | "cmpl-double" | "cmpg-double" | "cmp-long" | "aget" | "aget-wide" | "aget-object" | 436 "aget-boolean" | "aget-byte" | "aget-char" | "aget-short" | "aput" | "aput-wide" | "aput-object" | "aput-boolean" | 437 "aput-byte" | "aput-char" | "aput-short" | "add-int" | "sub-int" | "mul-int" | "div-int" | "rem-int" | "and-int" | 438 "or-int" | "xor-int" | "shl-int" | "shr-int" | "ushr-int" | "add-long" | "sub-long" | "mul-long" | "div-long" | 439 "rem-long" | "and-long" | "or-long" | "xor-long" | "shl-long" | "shr-long" | "ushr-long" | "add-float" | 440 "sub-float" | "mul-float" | "div-float" | "rem-float" | "add-double" | "sub-double" | "mul-double" | "div-double" | 441 "rem-double" { 442 return newToken(INSTRUCTION_FORMAT23x); 443 } 444 445 "goto/32" { 446 return newToken(INSTRUCTION_FORMAT30t); 447 } 448 449 "const-string/jumbo" { 450 return newToken(INSTRUCTION_FORMAT31c); 451 } 452 453 "const" { 454 return newToken(INSTRUCTION_FORMAT31i_OR_ID); 455 } 456 457 "const-wide/32" { 458 return newToken(INSTRUCTION_FORMAT31i); 459 } 460 461 "fill-array-data" | "packed-switch" | "sparse-switch" { 462 return newToken(INSTRUCTION_FORMAT31t); 463 } 464 465 "move/16" | "move-wide/16" | "move-object/16" { 466 return newToken(INSTRUCTION_FORMAT32x); 467 } 468 469 "invoke-virtual" | "invoke-super" | "invoke-direct" | "invoke-static" | "invoke-interface" { 470 return newToken(INSTRUCTION_FORMAT35c_METHOD); 471 } 472 473 "filled-new-array" { 474 return newToken(INSTRUCTION_FORMAT35c_TYPE); 475 } 476 477 "invoke-direct-empty" { 478 return newToken(INSTRUCTION_FORMAT35s_METHOD); 479 } 480 481 "execute-inline" | "invoke-virtual-quick" | "invoke-super-quick" { 482 return newToken(INSTRUCTION_FORMAT35ms_METHOD); 483 } 484 485 "invoke-virtual/range" | "invoke-super/range" | "invoke-direct/range" | "invoke-static/range" | 486 "invoke-interface/range" { 487 return newToken(INSTRUCTION_FORMAT3rc_METHOD); 488 } 489 490 "filled-new-array/range" { 491 return newToken(INSTRUCTION_FORMAT3rc_TYPE); 492 } 493 494 "invoke-virtual-quick/range" | "invoke-super-quick/range" { 495 return newToken(INSTRUCTION_FORMAT3rms_METHOD); 496 } 497 498 "const-wide" { 499 return newToken(INSTRUCTION_FORMAT51l); 500 } 501} 502 503/*Types*/ 504<YYINITIAL> { 505 {PrimitiveType} { return newToken(PRIMITIVE_TYPE); } 506 V { return newToken(VOID_TYPE); } 507 {ClassDescriptor} { return newToken(CLASS_DESCRIPTOR); } 508 {ArrayDescriptor} { return newToken(ARRAY_DESCRIPTOR); } 509 {PrimitiveType} {PrimitiveType}+ { return newToken(PARAM_LIST_OR_ID); } 510 {Type} {Type}+ { return newToken(PARAM_LIST); } 511 {SimpleName} { return newToken(SIMPLE_NAME); } 512 "<init>" | "<clinit>" { return newToken(METHOD_NAME); } 513} 514 515/*Symbols/Whitespace/EOF*/ 516<YYINITIAL> { 517 ".." { return newToken(DOTDOT); } 518 "->" { return newToken(ARROW); } 519 "=" { return newToken(EQUAL); } 520 ":" { return newToken(COLON); } 521 "," { return newToken(COMMA); } 522 "{" { return newToken(OPEN_BRACE); } 523 "}" { return newToken(CLOSE_BRACE); } 524 "(" { return newToken(OPEN_PAREN); } 525 ")" { return newToken(CLOSE_PAREN); } 526 [\r\n\t ]+ { return newToken(WHITE_SPACE, true); } 527 <<EOF>> { return newToken(EOF); } 528} 529 530/*catch all*/ 531<YYINITIAL> { 532 "." { return invalidToken("Invalid directive"); } 533 "." [a-zA-z\-_] { return invalidToken("Invalid directive"); } 534 "." [a-zA-z\-_] [a-zA-z0-9\-_]* { return invalidToken("Invalid directive"); } 535 . { return invalidToken("Invalid text"); } 536} 537