opcode-gen.awk revision 3c5df37a2df7368eb274eb097e9cfa2ccc7fffb6
1# Copyright (C) 2007 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15#
16# Awk helper script for opcode-gen.
17#
18
19#
20# Initialization.
21#
22
23BEGIN {
24    MAX_OPCODE = 65535;
25    MAX_PACKED_OPCODE = 511;
26    initIndexTypes();
27    initFlags();
28    if (readBytecodes()) exit 1;
29    deriveOpcodeChains();
30    createPackedTables();
31    consumeUntil = "";
32    emission = "";
33}
34
35#
36# General control (must appear above directive handlers).
37#
38
39# Clear out the preexisting output within a directive section.
40consumeUntil != "" {
41    if (index($0, consumeUntil) != 0) {
42        consumeUntil = "";
43        print;
44    }
45
46    next;
47}
48
49# Detect directives.
50/BEGIN\([a-z-]*\)/ {
51    i = match($0, /BEGIN\([a-z-]*\)/);
52    emission = substr($0, i + 6, RLENGTH - 7);
53    consumeUntil = "END(" emission ")";
54    emissionHandled = 0;
55}
56
57# Most lines just get copied from the source as-is, including the start
58# comment for directives.
59{
60    print;
61}
62
63#
64# Handlers for all of the directives.
65#
66
67emission == "opcodes" {
68    emissionHandled = 1;
69
70    for (i = 0; i <= MAX_OPCODE; i++) {
71        if (isUnused(i) || isOptimized(i)) continue;
72        printf("    public static final int %s = 0x%s;\n",
73               constName[i], hex[i]);
74    }
75}
76
77emission == "first-opcodes" {
78    emissionHandled = 1;
79
80    for (i = 0; i <= MAX_OPCODE; i++) {
81        if (isUnused(i) || isOptimized(i)) continue;
82        if (isFirst[i] == "true") {
83            printf("    //     DalvOps.%s\n", constName[i]);
84        }
85    }
86}
87
88emission == "dops" {
89    emissionHandled = 1;
90
91    for (i = 0; i <= MAX_OPCODE; i++) {
92        if (isUnused(i) || isOptimized(i)) continue;
93
94        nextOp = nextOpcode[i];
95        nextOp = (nextOp == -1) ? "NO_NEXT" : constName[nextOp];
96
97        printf("    public static final Dop %s =\n" \
98               "        new Dop(DalvOps.%s, DalvOps.%s,\n" \
99               "            DalvOps.%s, Form%s.THE_ONE, %s,\n" \
100               "            \"%s\");\n\n",
101               constName[i], constName[i], family[i], nextOp, format[i],
102               hasResult[i], name[i]);
103    }
104}
105
106emission == "opcode-info-defs" {
107    emissionHandled = 1;
108
109    for (i = 0; i <= MAX_OPCODE; i++) {
110        if (isUnused(i) || isOptimized(i)) continue;
111
112        itype = indexType[i];
113        if ((itype == "none") || (itype == "unknown")) {
114            itype = "null";
115        } else {
116            itype = toupper(itype);
117            gsub(/-/, "_", itype);
118            itype = "IndexType." itype;
119        }
120
121        printf("    public static final Info %s =\n" \
122               "        new Info(DalvOps.%s,\n" \
123               "            InstructionCodec.FORMAT_%s, %s);\n\n", \
124                constName[i], constName[i], toupper(format[i]), itype);
125    }
126}
127
128emission == "dops-init" || emission == "opcode-info-init" {
129    emissionHandled = 1;
130
131    for (i = 0; i <= MAX_OPCODE; i++) {
132        if (isUnused(i) || isOptimized(i)) continue;
133        printf("        set(%s);\n", constName[i]);
134    }
135}
136
137emission == "libcore-opcodes" {
138    emissionHandled = 1;
139
140    for (i = 0; i <= MAX_OPCODE; i++) {
141        if (isUnused(i) || isOptimized(i)) continue;
142        printf("    int OP_%-28s = 0x%04x;\n", constName[i], i);
143    }
144}
145
146emission == "libcore-maximum-values" {
147    emissionHandled = 1;
148
149    printf("        MAXIMUM_VALUE = %d;\n", MAX_OPCODE);
150    printf("        MAXIMUM_PACKED_VALUE = %d;\n", MAX_PACKED_OPCODE);
151}
152
153emission == "libdex-maximum-values" {
154    emissionHandled = 1;
155
156    printf("#define kMaxOpcodeValue 0x%x\n", MAX_OPCODE);
157    printf("#define kNumPackedOpcodes 0x%x\n", MAX_PACKED_OPCODE + 1);
158}
159
160emission == "libdex-opcode-enum" {
161    emissionHandled = 1;
162
163    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
164        printf("    OP_%-28s = 0x%02x,\n", packedConstName[i], i);
165    }
166}
167
168emission == "libdex-goto-table" {
169    emissionHandled = 1;
170
171    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
172        content = sprintf("        H(OP_%s),", packedConstName[i]);
173        printf("%-78s\\\n", content);
174    }
175}
176
177emission == "libdex-opcode-names" {
178    emissionHandled = 1;
179
180    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
181        printf("    \"%s\",\n", packedName[i]);
182    }
183}
184
185emission == "libdex-widths" {
186    emissionHandled = 1;
187
188    col = 1;
189    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
190        value = sprintf("%d,", packedWidth[i]);
191        col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 16, 2, "    ");
192    }
193}
194
195emission == "libdex-flags" {
196    emissionHandled = 1;
197
198    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
199        value = flagsToC(packedFlags[i]);
200        printf("    %s,\n", value);
201    }
202}
203
204emission == "libdex-formats" {
205    emissionHandled = 1;
206
207    col = 1;
208    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
209        value = sprintf("kFmt%s,", packedFormat[i]);
210        col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 7, 9, "    ");
211    }
212}
213
214emission == "libdex-index-types" {
215    emissionHandled = 1;
216
217    col = 1;
218    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
219        value = sprintf("%s,", indexTypeValues[packedIndexType[i]]);
220        col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 3, 19, "    ");
221    }
222}
223
224# Handle the end of directive processing (must appear after the directive
225# clauses).
226emission != "" {
227    if (!emissionHandled) {
228        printf("WARNING: unknown tag \"%s\"\n", emission) >"/dev/stderr";
229        consumeUntil = "";
230    }
231
232    emission = "";
233}
234
235#
236# Helper functions.
237#
238
239# Helper to print out an element in a multi-column fashion. It returns
240# the (one-based) column number that the next element will be printed
241# in.
242function colPrint(value, isLast, col, numCols, colWidth, linePrefix) {
243    isLast = (isLast || (col == numCols));
244    printf("%s%-*s%s",
245        (col == 1) ? linePrefix : " ",
246        isLast ? 1 : colWidth, value,
247        isLast ? "\n" : "");
248
249    return (col % numCols) + 1;
250}
251
252# Read the bytecode description file.
253function readBytecodes(i, parts, line, cmd, status, count) {
254    # locals: parts, line, cmd, status, count
255    for (;;) {
256        # Read a line.
257        status = getline line <bytecodeFile;
258        if (status == 0) break;
259        if (status < 0) {
260            print "trouble reading bytecode file";
261            exit 1;
262        }
263
264        # Clean up the line and extract the command.
265        gsub(/  */, " ", line);
266        sub(/ *#.*$/, "", line);
267        sub(/ $/, "", line);
268        sub(/^ /, "", line);
269        count = split(line, parts);
270        if (count == 0) continue; # Blank or comment line.
271        cmd = parts[1];
272        sub(/^[a-z][a-z]* */, "", line); # Remove the command from line.
273
274        if (cmd == "op") {
275            status = defineOpcode(line);
276        } else if (cmd == "format") {
277            status = defineFormat(line);
278        } else {
279            status = -1;
280        }
281
282        if (status != 0) {
283            printf("syntax error on line: %s\n", line) >"/dev/stderr";
284            return 1;
285        }
286    }
287
288    return 0;
289}
290
291# Define an opcode.
292function defineOpcode(line, count, parts, idx) {
293    # locals: count, parts, idx
294    count = split(line, parts);
295    if (count != 6)  return -1;
296    idx = parseHex(parts[1]);
297    if (idx < 0) return -1;
298
299    # Extract directly specified values from the line.
300    hex[idx] = parts[1];
301    name[idx] = parts[2];
302    format[idx] = parts[3];
303    hasResult[idx] = (parts[4] == "n") ? "false" : "true";
304    indexType[idx] = parts[5];
305    flags[idx] = parts[6];
306
307    # Calculate derived values.
308
309    constName[idx] = toupper(name[idx]);
310    gsub("[/-]", "_", constName[idx]);   # Dash and slash become underscore.
311    gsub("[+^]", "", constName[idx]);    # Plus and caret are removed.
312    split(name[idx], parts, "/");
313
314    family[idx] = toupper(parts[1]);
315    gsub("-", "_", family[idx]);         # Dash becomes underscore.
316    gsub("[+^]", "", family[idx]);       # Plus and caret are removed.
317
318    split(format[idx], parts, "");       # Width is the first format char.
319    width[idx] = parts[1];
320
321    # This association is used when computing "next" opcodes.
322    familyFormat[family[idx],format[idx]] = idx;
323
324    # Verify values.
325
326    if (nextFormat[format[idx]] == "") {
327        printf("unknown format: %s\n", format[idx]) >"/dev/stderr";
328        return 1;
329    }
330
331    if (indexTypeValues[indexType[idx]] == "") {
332        printf("unknown index type: %s\n", indexType[idx]) >"/dev/stderr";
333        return 1;
334    }
335
336    if (flagsToC(flags[idx]) == "") {
337        printf("bogus flags: %s\n", flags[idx]) >"/dev/stderr";
338        return 1;
339    }
340
341    return 0;
342}
343
344# Define a format family.
345function defineFormat(line, count, parts, i) {
346    # locals: count, parts, i
347    count = split(line, parts);
348    if (count < 1)  return -1;
349    formats[parts[1]] = line;
350
351    parts[count + 1] = "none";
352    for (i = 1; i <= count; i++) {
353        nextFormat[parts[i]] = parts[i + 1];
354    }
355
356    return 0;
357}
358
359# Produce the nextOpcode and isFirst arrays. The former indicates, for
360# each opcode, which one should be tried next when doing instruction
361# fitting. The latter indicates which opcodes are at the head of an
362# instruction fitting chain.
363function deriveOpcodeChains(i, op) {
364    # locals: i, op
365
366    for (i = 0; i <= MAX_OPCODE; i++) {
367        if (isUnused(i)) continue;
368        isFirst[i] = "true";
369    }
370
371    for (i = 0; i <= MAX_OPCODE; i++) {
372        if (isUnused(i)) continue;
373        op = findNextOpcode(i);
374        nextOpcode[i] = op;
375        if (op != -1) {
376            isFirst[op] = "false";
377        }
378    }
379}
380
381# Given an opcode by index, find the next opcode in the same family
382# (that is, with the same base name) to try when matching instructions
383# to opcodes. This simply walks the nextFormat chain looking for a
384# match. This returns the index of the matching opcode or -1 if there
385# is none.
386function findNextOpcode(idx, fam, fmt, result) {
387    # locals: fam, fmt, result
388    fam = family[idx];
389    fmt = format[idx];
390
391    # Not every opcode has a version with every possible format, so
392    # we have to iterate down the chain until we find one or run out of
393    # formats to try.
394    for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) {
395        result = familyFormat[fam,fmt];
396        if (result != "") {
397            return result;
398        }
399    }
400
401    return -1;
402}
403
404# Construct the tables of info indexed by packed opcode. The packed opcode
405# values are in the range 0-0x1ff, whereas the unpacked opcodes sparsely
406# span the range 0-0xffff.
407function createPackedTables(i, op) {
408    # locals: i, op
409    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
410        op = unpackOpcode(i);
411        if (i == 255) {
412            # Special case: This is the low-opcode slot for a would-be
413            # extended opcode dispatch implementation.
414            packedName[i]      = "dispatch-ff";
415            packedConstName[i] = "DISPATCH_FF";
416            packedFormat[i]    = "00x";
417            packedFlags[i]     = 0;
418            packedWidth[i]     = 0;
419            packedIndexType[i] = "unknown";
420        } else if (isUnused(op)) {
421            packedName[i]      = unusedName(op);
422            packedConstName[i] = unusedConstName(op);
423            packedFormat[i]    = "00x";
424            packedFlags[i]     = 0;
425            packedWidth[i]     = 0;
426            packedIndexType[i] = "unknown";
427        } else {
428            packedName[i]      = name[op];
429            packedConstName[i] = constName[op];
430            packedFormat[i]    = format[op];
431            packedFlags[i]     = flags[op];
432            packedWidth[i]     = width[op];
433            packedIndexType[i] = indexType[op];
434        }
435    }
436}
437
438# Given a packed opcode, returns the raw (unpacked) opcode value.
439function unpackOpcode(idx) {
440    # Note: This must be the inverse of the corresponding code in
441    # libdex/DexOpcodes.h.
442    if (idx <= 255) {
443        return idx;
444    } else {
445        idx -= 256;
446        return (idx * 256) + 255;
447    }
448}
449
450# Returns the "unused" name of the given opcode (by index).
451# That is, this is the human-oriented name to use for an opcode
452# definition in cases
453# where the opcode isn't used.
454function unusedName(idx) {
455    if (idx <= 255) {
456         return sprintf("unused-%02x", idx);
457    } else {
458         return sprintf("unused-%04x", idx);
459    }
460}
461
462# Returns the "unused" constant name of the given opcode (by index).
463# That is, this is the name to use for a constant definition in cases
464# where the opcode isn't used.
465function unusedConstName(idx) {
466    if (idx <= 255) {
467         return toupper(sprintf("UNUSED_%02x", idx));
468    } else {
469         return toupper(sprintf("UNUSED_%04x", idx));
470    }
471}
472
473# Convert a hex value to an int.
474function parseHex(hex, result, chars, count, c, i) {
475    # locals: result, chars, count, c, i
476    hex = tolower(hex);
477    count = split(hex, chars, "");
478    result = 0;
479    for (i = 1; i <= count; i++) {
480        c = index("0123456789abcdef", chars[i]);
481        if (c == 0) {
482            printf("bogus hex value: %s\n", hex) >"/dev/stderr";
483            return -1;
484        }
485        result = (result * 16) + c - 1;
486    }
487    return result;
488}
489
490# Initialize the indexTypes data.
491function initIndexTypes() {
492    indexTypeValues["unknown"]       = "kIndexUnknown";
493    indexTypeValues["none"]          = "kIndexNone";
494    indexTypeValues["varies"]        = "kIndexVaries";
495    indexTypeValues["type-ref"]      = "kIndexTypeRef";
496    indexTypeValues["string-ref"]    = "kIndexStringRef";
497    indexTypeValues["method-ref"]    = "kIndexMethodRef";
498    indexTypeValues["field-ref"]     = "kIndexFieldRef";
499    indexTypeValues["inline-method"] = "kIndexInlineMethod";
500    indexTypeValues["vtable-offset"] = "kIndexVtableOffset";
501    indexTypeValues["field-offset"]  = "kIndexFieldOffset";
502}
503
504# Initialize the flags data.
505function initFlags() {
506    flagValues["branch"]        = "kInstrCanBranch";
507    flagValues["continue"]      = "kInstrCanContinue";
508    flagValues["switch"]        = "kInstrCanSwitch";
509    flagValues["throw"]         = "kInstrCanThrow";
510    flagValues["return"]        = "kInstrCanReturn";
511    flagValues["invoke"]        = "kInstrInvoke";
512    flagValues["optimized"]     = "0"; # Not represented in C output
513    flagValues["0"]             = "0";
514}
515
516# Translate the given flags into the equivalent C expression. Returns
517# "" on error.
518function flagsToC(f, parts, result, i) {
519    # locals: parts, result, i
520    count = split(f, parts, /\|/); # Split input at pipe characters.
521    result = "0";
522
523    for (i = 1; i <= count; i++) {
524        f = flagValues[parts[i]];
525        if (f == "") {
526            printf("bogus flag: %s\n", f) >"/dev/stderr";
527            return ""; # Bogus flag name.
528        } else if (f == "0") {
529            # Nothing to append for this case.
530        } else if (result == "0") {
531            result = f;
532        } else {
533            result = result "|" f;
534        }
535    }
536
537    return result;
538}
539
540# Returns true if the given opcode (by index) is an "optimized" opcode.
541function isOptimized(idx, parts, f) {
542    # locals: parts, f
543    split(flags[idx], parts, /\|/); # Split flags[idx] at pipes.
544    for (f in parts) {
545        if (parts[f] == "optimized") return 1;
546    }
547    return 0;
548}
549
550# Returns true if there is no definition for the given opcode (by index).
551function isUnused(idx) {
552    return (name[idx] == "");
553}
554