smaliLexer.flex revision 73d1b5d3cbc845e7123f41acd80c8cca4c1a369d
1package org.jf.smali;
2
3import java.io.*;
4import org.antlr.runtime.*;
5import org.jf.util.*;
6import static org.jf.smali.smaliParser.*;
7
8%%
9
10%public
11%class smaliFlexLexer
12%implements TokenSource
13%implements LexerErrorInterface
14%type Token
15%unicode
16%line
17%column
18%char
19
20%{
21    private StringBuffer sb = new StringBuffer();
22    private String stringOrCharError = null;
23    private int stringStartLine;
24    private int stringStartCol;
25    private int stringStartChar;
26
27    private int lexerErrors = 0;
28
29    private File sourceFile;
30
31    private boolean suppressErrors;
32
33    public Token nextToken() {
34        try {
35            Token token = yylex();
36            if (token instanceof InvalidToken) {
37                InvalidToken invalidToken = (InvalidToken)token;
38                if (!suppressErrors) {
39                    System.err.println(getErrorHeader(invalidToken) + " Error for input '" +
40                        invalidToken.getText() + "': " + invalidToken.getMessage());
41                }
42                lexerErrors++;
43            }
44            return token;
45        }
46        catch (java.io.IOException e) {
47            System.err.println("shouldn't happen: " + e.getMessage());
48            return Token.EOF_TOKEN;
49        }
50    }
51
52    public void setLine(int line) {
53        this.yyline = line-1;
54    }
55
56    public void setColumn(int column) {
57        this.yycolumn = column;
58    }
59
60    public int getLine() {
61        return this.yyline+1;
62    }
63
64    public int getColumn() {
65        return this.yycolumn;
66    }
67
68    public void setSuppressErrors(boolean suppressErrors) {
69        this.suppressErrors = suppressErrors;
70    }
71
72    public void setSourceFile(File sourceFile) {
73        this.sourceFile = sourceFile;
74    }
75
76    public String getSourceName() {
77        try {
78            return  PathUtil.getRelativeFile(new File("."), sourceFile).getPath();
79        } catch (IOException ex) {
80            return sourceFile.getAbsolutePath();
81        }
82    }
83
84    public int getNumberOfSyntaxErrors() {
85        return lexerErrors;
86    }
87
88    private Token newToken(int type, String text, boolean hidden) {
89        CommonToken token = new CommonToken(type, text);
90        if (hidden) {
91            token.setChannel(Token.HIDDEN_CHANNEL);
92        }
93
94        token.setStartIndex(yychar);
95        token.setStopIndex(yychar + yylength() - 1);
96        token.setLine(getLine());
97        token.setCharPositionInLine(getColumn());
98        return token;
99    }
100
101    private Token newToken(int type, String text) {
102        return newToken(type, text, false);
103    }
104
105    private Token newToken(int type, boolean hidden) {
106        return newToken(type, yytext(), hidden);
107    }
108
109    private Token newToken(int type) {
110        return newToken(type, yytext(), false);
111    }
112
113    private Token invalidToken(String message, String text) {
114        InvalidToken token = new InvalidToken(message, text);
115
116        token.setStartIndex(yychar);
117        token.setStopIndex(yychar + yylength() - 1);
118        token.setLine(getLine());
119        token.setCharPositionInLine(getColumn());
120
121        return token;
122    }
123
124    private Token invalidToken(String message) {
125        return invalidToken(message, yytext());
126    }
127
128    private void beginStringOrChar(int state) {
129        yybegin(state);
130        sb.setLength(0);
131        stringStartLine = getLine();
132        stringStartCol = getColumn();
133        stringStartChar = yychar;
134        stringOrCharError = null;
135    }
136
137    private Token endStringOrChar(int type) {
138        yybegin(YYINITIAL);
139
140        if (stringOrCharError != null) {
141            return invalidStringOrChar(stringOrCharError);
142        }
143
144        CommonToken token = new CommonToken(type, sb.toString());
145        token.setStartIndex(stringStartChar);
146        token.setStopIndex(yychar + yylength() - 1);
147        token.setLine(stringStartLine);
148        token.setCharPositionInLine(stringStartCol);
149        return token;
150    }
151
152    private void setStringOrCharError(String message) {
153        if (stringOrCharError == null) {
154            stringOrCharError = message;
155        }
156    }
157
158    private Token invalidStringOrChar(String message) {
159        yybegin(YYINITIAL);
160
161        InvalidToken token = new InvalidToken(message, sb.toString());
162        token.setStartIndex(stringStartChar);
163        token.setStopIndex(yychar + yylength() - 1);
164        token.setLine(stringStartLine);
165        token.setCharPositionInLine(stringStartCol);
166        return token;
167    }
168
169    public String getErrorHeader(InvalidToken token) {
170        return getSourceName()+"["+ token.getLine()+","+token.getCharPositionInLine()+"]";
171    }
172%}
173
174HexPrefix = 0 [xX]
175
176HexDigit = [0-9a-fA-F]
177HexDigits = [0-9a-fA-F]{4}
178FewerHexDigits = [0-9a-fA-F]{0,3}
179
180Integer1 = 0
181Integer2 = [1-9] [0-9]*
182Integer3 = 0 [0-7]+
183Integer4 = {HexPrefix} {HexDigit}+
184Integer = {Integer1} | {Integer2} | {Integer3} | {Integer4}
185
186DecimalExponent = [eE] -? [0-9]+
187
188BinaryExponent = [pP] -? [0-9]+
189
190/*This can either be a floating point number or an identifier*/
191FloatOrID1 = -? [0-9]+ {DecimalExponent}
192FloatOrID2 = -? {HexPrefix} {HexDigit}+ {BinaryExponent}
193FloatOrID3 = -? [iI][nN][fF][iI][nN][iI][tT][yY]
194FloatOrID4 = [nN][aA][nN]
195FloatOrID =  {FloatOrID1} | {FloatOrID2} | {FloatOrID3} | {FloatOrID4}
196
197
198/*This can only be a float and not an identifier, due to the decimal point*/
199Float1 = -? [0-9]+ "." [0-9]* {DecimalExponent}?
200Float2 = -? "." [0-9]+ {DecimalExponent}?
201Float3 = -? {HexPrefix} {HexDigit}+ "." {HexDigit}* {BinaryExponent}
202Float4 = -? {HexPrefix} "." {HexDigit}+ {BinaryExponent}
203Float =  {Float1} | {Float2} | {Float3} | {Float4}
204
205SimpleName = [A-Za-z0-9$\-_\u00a1-\u1fff\u2010-\u2027\u2030-\ud7ff\ue000-\uffef]+
206
207PrimitiveType = [ZBSCIJFD]
208
209ClassDescriptor = L ({SimpleName} "/")* {SimpleName} ;
210
211ArrayDescriptor = "[" + ({PrimitiveType} | {ClassDescriptor})
212
213Type = {PrimitiveType} | {ClassDescriptor} | {ArrayDescriptor}
214
215
216%state STRING
217%state CHAR
218
219%%
220
221/*Directives*/
222<YYINITIAL>
223{
224    ".class" { return newToken(CLASS_DIRECTIVE); }
225    ".super" { return newToken(SUPER_DIRECTIVE); }
226    ".implements" { return newToken(IMPLEMENTS_DIRECTIVE); }
227    ".source" { return newToken(SOURCE_DIRECTIVE); }
228    ".field" { return newToken(FIELD_DIRECTIVE); }
229    ".end field" { return newToken(END_FIELD_DIRECTIVE); }
230    ".subannotation" { return newToken(SUBANNOTATION_DIRECTIVE); }
231    ".end subannotation" { return newToken(END_SUBANNOTATION_DIRECTIVE); }
232    ".annotation" { return newToken(ANNOTATION_DIRECTIVE); }
233    ".end annotation" { return newToken(END_ANNOTATION_DIRECTIVE); }
234    ".enum" { return newToken(ENUM_DIRECTIVE); }
235    ".method" { return newToken(METHOD_DIRECTIVE); }
236    ".end method" { return newToken(END_METHOD_DIRECTIVE); }
237    ".registers" { return newToken(REGISTERS_DIRECTIVE); }
238    ".locals" { return newToken(LOCALS_DIRECTIVE); }
239    ".array-data" { return newToken(ARRAY_DATA_DIRECTIVE); }
240    ".end array-data" { return newToken(END_ARRAY_DATA_DIRECTIVE); }
241    ".packed-switch" { return newToken(PACKED_SWITCH_DIRECTIVE); }
242    ".end packed-switch" { return newToken(END_PACKED_SWITCH_DIRECTIVE); }
243    ".sparse-switch" { return newToken(SPARSE_SWITCH_DIRECTIVE); }
244    ".end sparse-switch" { return newToken(END_SPARSE_SWITCH_DIRECTIVE); }
245    ".catch" { return newToken(CATCH_DIRECTIVE); }
246    ".catchall" { return newToken(CATCHALL_DIRECTIVE); }
247    ".line" { return newToken(LINE_DIRECTIVE); }
248    ".parameter" { return newToken(PARAMETER_DIRECTIVE); }
249    ".end parameter" { return newToken(END_PARAMETER_DIRECTIVE); }
250    ".local" { return newToken(LOCAL_DIRECTIVE); }
251    ".end local" { return newToken(END_LOCAL_DIRECTIVE); }
252    ".restart local" { return newToken(RESTART_LOCAL_DIRECTIVE); }
253    ".prologue" { return newToken(PROLOGUE_DIRECTIVE); }
254    ".epilogue" { return newToken(EPILOGUE_DIRECTIVE); }
255
256    ".end" { return invalidToken("Invalid directive"); }
257    ".end " [a-zA-z0-9\-_]+ { return invalidToken("Invalid directive"); }
258    ".restart" { return invalidToken("Invalid directive"); }
259    ".restart " [a-zA-z0-9\-_]+ { return invalidToken("Invalid directive"); }
260}
261
262/*Literals*/
263<YYINITIAL> {
264    -? {Integer} { return newToken(INTEGER_LITERAL); }
265    -? {Integer} [lL] { return newToken(LONG_LITERAL); }
266    -? {Integer} [sS] { return newToken(SHORT_LITERAL); }
267    -? {Integer} [tT] { return newToken(BYTE_LITERAL); }
268
269    {FloatOrID} [fF] | -? [0-9]+ [fF] { return newToken(FLOAT_LITERAL_OR_ID); }
270    {FloatOrID} [dD]? | -? [0-9]+ [dD] { return newToken(DOUBLE_LITERAL_OR_ID); }
271    {Float} [fF] { return newToken(FLOAT_LITERAL); }
272    {Float} [dD]? { return newToken(DOUBLE_LITERAL); }
273
274    "true"|"false" { return newToken(BOOL_LITERAL); }
275    "null" { return newToken(NULL_LITERAL); }
276
277    "\"" { beginStringOrChar(STRING); sb.append('"'); }
278
279    ' { beginStringOrChar(CHAR); sb.append('\''); }
280}
281
282<STRING> {
283    "\""  { sb.append('"'); return endStringOrChar(STRING_LITERAL); }
284
285    [^\r\n\"\\]+ { sb.append(yytext()); }
286    "\\b" { sb.append('\b'); }
287    "\\t" { sb.append('\t'); }
288    "\\n" { sb.append('\n'); }
289    "\\f" { sb.append('\f'); }
290    "\\r" { sb.append('\r'); }
291    "\\'" { sb.append('\''); }
292    "\\\"" { sb.append('"'); }
293    "\\\\" { sb.append('\\'); }
294    "\\u" {HexDigits} { sb.append((char)Integer.parseInt(yytext().substring(2,6), 16)); }
295
296    "\\u" {FewerHexDigits} {
297        sb.append(yytext());
298        setStringOrCharError("Invalid \\u sequence. \\u must be followed by 4 hex digits");
299    }
300
301    "\\" [^btnfr'\"\\u] {
302        sb.append(yytext());
303        setStringOrCharError("Invalid escape sequence " + yytext());
304    }
305
306    [\r\n] { return invalidStringOrChar("Unterminated string literal"); }
307    <<EOF>> { return invalidStringOrChar("Unterminated string literal"); }
308}
309
310<CHAR> {
311    ' {
312        sb.append('\'');
313        if (sb.length() == 2) {
314            return invalidStringOrChar("Empty character literal");
315        } else if (sb.length() > 3) {
316            return invalidStringOrChar("Character literal with multiple chars");
317        }
318
319        return endStringOrChar(CHAR_LITERAL);
320    }
321
322    [^\r\n'\\]+ { sb.append(yytext()); }
323    "\\b" { sb.append('\b'); }
324    "\\t" { sb.append('\t'); }
325    "\\n" { sb.append('\n'); }
326    "\\f" { sb.append('\f'); }
327    "\\r" { sb.append('\r'); }
328    "\\'" { sb.append('\''); }
329    "\\\"" { sb.append('"'); }
330    "\\\\" { sb.append('\\'); }
331    "\\u" {HexDigits} { sb.append((char)Integer.parseInt(yytext().substring(2,6), 16)); }
332
333    "\\u" {HexDigit}* {
334        sb.append(yytext());
335        setStringOrCharError("Invalid \\u sequence. \\u must be followed by exactly 4 hex digits");
336    }
337
338    "\\" [^btnfr'\"\\u] {
339        sb.append(yytext());
340        setStringOrCharError("Invalid escape sequence " + yytext());
341    }
342
343    [\r\n] { return invalidStringOrChar("Unterminated character literal"); }
344    <<EOF>> { return invalidStringOrChar("Unterminated character literal"); }
345}
346
347/*Misc*/
348<YYINITIAL> {
349    [vp] [0-9]+ { return newToken(REGISTER); }
350
351    "build" | "runtime" | "system" {
352        return newToken(ANNOTATION_VISIBILITY);
353    }
354
355    "public" | "private" | "protected" | "static" | "final" | "synchronized" | "bridge" | "varargs" | "native" |
356    "abstract" | "strictfp" | "synthetic" | "constructor" | "declared-synchronized" | "interface" | "enum" |
357    "annotation" | "volatile" | "transient" {
358        return newToken(ACCESS_SPEC);
359    }
360
361    "vtable@0x" {HexDigit}+ { return newToken(VTABLE_OFFSET); }
362    "field@0x" {HexDigit}+ { return newToken(FIELD_OFFSET); }
363
364    "+" {Integer} { return newToken(OFFSET); }
365
366    # [^\r\n]* { return newToken(LINE_COMMENT, true); }
367}
368
369/*Instructions*/
370<YYINITIAL> {
371    "goto" {
372        return newToken(INSTRUCTION_FORMAT10t);
373    }
374
375    "return-void" | "nop" {
376        return newToken(INSTRUCTION_FORMAT10x);
377    }
378
379    "const/4" {
380        return newToken(INSTRUCTION_FORMAT11n);
381    }
382
383    "move-result" | "move-result-wide" | "move-result-object" | "move-exception" | "return" | "return-wide" |
384    "return-object" | "monitor-enter" | "monitor-exit" | "throw" {
385        return newToken(INSTRUCTION_FORMAT11x);
386    }
387
388    "move" | "move-wide" | "move-object" | "array-length" | "neg-int" | "not-int" | "neg-long" | "not-long" |
389    "neg-float" | "neg-double" | "int-to-long" | "int-to-float" | "int-to-double" | "long-to-int" | "long-to-float" |
390    "long-to-double" | "float-to-int" | "float-to-long" | "float-to-double" | "double-to-int" | "double-to-long" |
391    "double-to-float" | "int-to-byte" | "int-to-char" | "int-to-short" {
392        return newToken(INSTRUCTION_FORMAT12x_OR_ID);
393    }
394
395    "add-int/2addr" | "sub-int/2addr" | "mul-int/2addr" | "div-int/2addr" | "rem-int/2addr" | "and-int/2addr" |
396    "or-int/2addr" | "xor-int/2addr" | "shl-int/2addr" | "shr-int/2addr" | "ushr-int/2addr" | "add-long/2addr" |
397    "sub-long/2addr" | "mul-long/2addr" | "div-long/2addr" | "rem-long/2addr" | "and-long/2addr" | "or-long/2addr" |
398    "xor-long/2addr" | "shl-long/2addr" | "shr-long/2addr" | "ushr-long/2addr" | "add-float/2addr" |
399    "sub-float/2addr" | "mul-float/2addr" | "div-float/2addr" | "rem-float/2addr" | "add-double/2addr" |
400    "sub-double/2addr" | "mul-double/2addr" | "div-double/2addr" | "rem-double/2addr" {
401        return newToken(INSTRUCTION_FORMAT12x);
402    }
403
404    "goto/16" {
405        return newToken(INSTRUCTION_FORMAT20t);
406    }
407
408    "sget" | "sget-wide" | "sget-object" | "sget-boolean" | "sget-byte" | "sget-char" | "sget-short" | "sput" |
409    "sput-wide" | "sput-object" | "sput-boolean" | "sput-byte" | "sput-char" | "sput-short" {
410        return newToken(INSTRUCTION_FORMAT21c_FIELD);
411    }
412
413    "const-string" {
414        return newToken(INSTRUCTION_FORMAT21c_STRING);
415    }
416
417    "check-cast" | "new-instance" | "const-class" {
418        return newToken(INSTRUCTION_FORMAT21c_TYPE);
419    }
420
421    "const/high16" | "const-wide/high16" {
422        return newToken(INSTRUCTION_FORMAT21h);
423    }
424
425    "const/16" | "const-wide/16" {
426        return newToken(INSTRUCTION_FORMAT21s);
427    }
428
429    "if-eqz" | "if-nez" | "if-ltz" | "if-gez" | "if-gtz" | "if-lez" {
430        return newToken(INSTRUCTION_FORMAT21t);
431    }
432
433    "add-int/lit8" | "rsub-int/lit8" | "mul-int/lit8" | "div-int/lit8" | "rem-int/lit8" | "and-int/lit8" |
434    "or-int/lit8" | "xor-int/lit8" | "shl-int/lit8" | "shr-int/lit8" | "ushr-int/lit8" {
435        return newToken(INSTRUCTION_FORMAT22b);
436    }
437
438    "iget" | "iget-wide" | "iget-object" | "iget-boolean" | "iget-byte" | "iget-char" | "iget-short" | "iput" |
439    "iput-wide" | "iput-object" | "iput-boolean" | "iput-byte" | "iput-char" | "iput-short" {
440        return newToken(INSTRUCTION_FORMAT22c_FIELD);
441    }
442
443    "instance-of" | "new-array" {
444        return newToken(INSTRUCTION_FORMAT22c_TYPE);
445    }
446
447    "iget-quick" | "iget-wide-quick" | "iget-object-quick" | "iput-quick" | "iput-wide-quick" | "iput-object-quick" {
448        return newToken(INSTRUCTION_FORMAT22cs_FIELD);
449    }
450
451    "rsub-int" {
452        return newToken(INSTRUCTION_FORMAT22s_OR_ID);
453    }
454
455    "add-int/lit16" | "mul-int/lit16" | "div-int/lit16" | "rem-int/lit16" | "and-int/lit16" | "or-int/lit16" |
456    "xor-int/lit16" {
457        return newToken(INSTRUCTION_FORMAT22s);
458    }
459
460    "if-eq" | "if-ne" | "if-lt" | "if-ge" | "if-gt" | "if-le" {
461        return newToken(INSTRUCTION_FORMAT22t);
462    }
463
464    "move/from16" | "move-wide/from16" | "move-object/from16" {
465        return newToken(INSTRUCTION_FORMAT22x);
466    }
467
468    "cmpl-float" | "cmpg-float" | "cmpl-double" | "cmpg-double" | "cmp-long" | "aget" | "aget-wide" | "aget-object" |
469    "aget-boolean" | "aget-byte" | "aget-char" | "aget-short" | "aput" | "aput-wide" | "aput-object" | "aput-boolean" |
470    "aput-byte" | "aput-char" | "aput-short" | "add-int" | "sub-int" | "mul-int" | "div-int" | "rem-int" | "and-int" |
471    "or-int" | "xor-int" | "shl-int" | "shr-int" | "ushr-int" | "add-long" | "sub-long" | "mul-long" | "div-long" |
472    "rem-long" | "and-long" | "or-long" | "xor-long" | "shl-long" | "shr-long" | "ushr-long" | "add-float" |
473    "sub-float" | "mul-float" | "div-float" | "rem-float" | "add-double" | "sub-double" | "mul-double" | "div-double" |
474    "rem-double" {
475        return newToken(INSTRUCTION_FORMAT23x);
476    }
477
478    "goto/32" {
479        return newToken(INSTRUCTION_FORMAT30t);
480    }
481
482    "const-string/jumbo" {
483        return newToken(INSTRUCTION_FORMAT31c);
484    }
485
486    "const" {
487        return newToken(INSTRUCTION_FORMAT31i_OR_ID);
488    }
489
490    "const-wide/32" {
491        return newToken(INSTRUCTION_FORMAT31i);
492    }
493
494    "fill-array-data" | "packed-switch" | "sparse-switch" {
495        return newToken(INSTRUCTION_FORMAT31t);
496    }
497
498    "move/16" | "move-wide/16" | "move-object/16" {
499        return newToken(INSTRUCTION_FORMAT32x);
500    }
501
502    "invoke-virtual" | "invoke-super" | "invoke-direct" | "invoke-static" | "invoke-interface" {
503        return newToken(INSTRUCTION_FORMAT35c_METHOD);
504    }
505
506    "filled-new-array" {
507        return newToken(INSTRUCTION_FORMAT35c_TYPE);
508    }
509
510    "invoke-direct-empty" {
511        return newToken(INSTRUCTION_FORMAT35s_METHOD);
512    }
513
514    "execute-inline" | "invoke-virtual-quick" | "invoke-super-quick" {
515        return newToken(INSTRUCTION_FORMAT35ms_METHOD);
516    }
517
518    "invoke-virtual/range" | "invoke-super/range" | "invoke-direct/range" | "invoke-static/range" |
519    "invoke-interface/range" {
520        return newToken(INSTRUCTION_FORMAT3rc_METHOD);
521    }
522
523    "filled-new-array/range" {
524        return newToken(INSTRUCTION_FORMAT3rc_TYPE);
525    }
526
527    "invoke-virtual-quick/range" | "invoke-super-quick/range" {
528        return newToken(INSTRUCTION_FORMAT3rms_METHOD);
529    }
530
531    "const-wide" {
532        return newToken(INSTRUCTION_FORMAT51l);
533    }
534}
535
536/*Types*/
537<YYINITIAL> {
538    {PrimitiveType} { return newToken(PRIMITIVE_TYPE); }
539    V { return newToken(VOID_TYPE); }
540    {ClassDescriptor} { return newToken(CLASS_DESCRIPTOR); }
541    {ArrayDescriptor} { return newToken(ARRAY_DESCRIPTOR); }
542    {PrimitiveType} {PrimitiveType}+ { return newToken(PARAM_LIST_OR_ID); }
543    {Type} {Type}+ { return newToken(PARAM_LIST); }
544    {SimpleName} { return newToken(SIMPLE_NAME); }
545    "<init>" | "<clinit>" { return newToken(METHOD_NAME); }
546}
547
548/*Symbols/Whitespace/EOF*/
549<YYINITIAL> {
550    ".." { return newToken(DOTDOT); }
551    "->" { return newToken(ARROW); }
552    "=" { return newToken(EQUAL); }
553    ":" { return newToken(COLON); }
554    "," { return newToken(COMMA); }
555    "{" { return newToken(OPEN_BRACE); }
556    "}" { return newToken(CLOSE_BRACE); }
557    "(" { return newToken(OPEN_PAREN); }
558    ")" { return newToken(CLOSE_PAREN); }
559    [\r\n\t ]+ { return newToken(WHITE_SPACE, true); }
560    <<EOF>> { return newToken(EOF); }
561}
562
563/*catch all*/
564<YYINITIAL> {
565    "." { return invalidToken("Invalid directive"); }
566    "." [a-zA-z\-_] { return invalidToken("Invalid directive"); }
567    "." [a-zA-z\-_] [a-zA-z0-9\-_]* { return invalidToken("Invalid directive"); }
568    . { return invalidToken("Invalid text"); }
569}
570