opcode-gen.awk revision a277f14c3702a474e18a9981f23845d7d7521163
1# Copyright (C) 2007 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15#
16# Awk helper script for opcode-gen.
17#
18
19#
20# Initialization.
21#
22
23BEGIN {
24    MAX_OPCODE = 65535;
25    MAX_PACKED_OPCODE = 511;
26    MAX_PACKED_OPCODE = 255; # TODO: Not for long!
27    initIndexTypes();
28    initFlags();
29    if (readBytecodes()) exit 1;
30    deriveOpcodeChains();
31    createPackedTables();
32    consumeUntil = "";
33    emission = "";
34}
35
36#
37# General control (must appear above directive handlers).
38#
39
40# Clear out the preexisting output within a directive section.
41consumeUntil != "" {
42    if (index($0, consumeUntil) != 0) {
43        consumeUntil = "";
44        print;
45    }
46
47    next;
48}
49
50# Detect directives.
51/BEGIN\([a-z-]*\)/ {
52    i = match($0, /BEGIN\([a-z-]*\)/);
53    emission = substr($0, i + 6, RLENGTH - 7);
54    consumeUntil = "END(" emission ")";
55    emissionHandled = 0;
56    print;
57}
58
59#
60# Handlers for all of the directives.
61#
62
63emission == "opcodes" {
64    emissionHandled = 1;
65
66    for (i = 0; i <= MAX_OPCODE; i++) {
67        if (isUnused(i) || isOptimized(i)) continue;
68        printf("    public static final int %s = 0x%s;\n",
69               constName[i], hex[i]);
70    }
71}
72
73emission == "first-opcodes" {
74    emissionHandled = 1;
75
76    for (i = 0; i <= MAX_OPCODE; i++) {
77        if (isUnused(i) || isOptimized(i)) continue;
78        if (isFirst[i] == "true") {
79            printf("    //     DalvOps.%s\n", constName[i]);
80        }
81    }
82}
83
84emission == "dops" {
85    emissionHandled = 1;
86
87    for (i = 0; i <= MAX_OPCODE; i++) {
88        if (isUnused(i) || isOptimized(i)) continue;
89
90        nextOp = nextOpcode[i];
91        nextOp = (nextOp == -1) ? "NO_NEXT" : constName[nextOp];
92
93        printf("    public static final Dop %s =\n" \
94               "        new Dop(DalvOps.%s, DalvOps.%s,\n" \
95               "            DalvOps.%s, Form%s.THE_ONE, %s,\n" \
96               "            \"%s\");\n\n",
97               constName[i], constName[i], family[i], nextOp, format[i],
98               hasResult[i], name[i]);
99    }
100}
101
102emission == "dops-init" {
103    emissionHandled = 1;
104
105    for (i = 0; i <= MAX_OPCODE; i++) {
106        if (isUnused(i) || isOptimized(i)) continue;
107        printf("        set(%s);\n", constName[i]);
108    }
109}
110
111emission == "libcore-opcodes" {
112    emissionHandled = 1;
113
114    for (i = 0; i <= MAX_OPCODE; i++) {
115        if (isUnused(i) || isOptimized(i)) continue;
116        printf("    int OP_%-28s = 0x%04x;\n", constName[i], i);
117    }
118}
119
120emission == "libcore-maximum-values" {
121    emissionHandled = 1;
122
123    printf("        MAXIMUM_VALUE = %d;\n", MAX_OPCODE);
124    printf("        MAXIMUM_PACKED_VALUE = %d;\n", MAX_PACKED_OPCODE);
125}
126
127emission == "libdex-opcode-enum" {
128    emissionHandled = 1;
129
130    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
131        printf("    OP_%-28s = 0x%02x,\n", packedConstName[i], i);
132    }
133}
134
135emission == "libdex-goto-table" {
136    emissionHandled = 1;
137
138    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
139        content = sprintf("        H(OP_%s),", packedConstName[i]);
140        printf("%-78s\\\n", content);
141    }
142}
143
144emission == "libdex-opcode-names" {
145    emissionHandled = 1;
146
147    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
148        printf("    \"%s\",\n", packedName[i]);
149    }
150}
151
152emission == "libdex-widths" {
153    emissionHandled = 1;
154
155    col = 1;
156    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
157        value = sprintf("%d,", packedWidth[i]);
158        col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 16, 2, "    ");
159    }
160}
161
162emission == "libdex-flags" {
163    emissionHandled = 1;
164
165    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
166        value = flagsToC(packedFlags[i]);
167        printf("    %s,\n", value);
168    }
169}
170
171emission == "libdex-formats" {
172    emissionHandled = 1;
173
174    col = 1;
175    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
176        value = sprintf("kFmt%s,", packedFormat[i]);
177        col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 7, 9, "    ");
178    }
179}
180
181emission == "libdex-index-types" {
182    emissionHandled = 1;
183
184    col = 1;
185    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
186        value = sprintf("%s,", indexTypeValues[packedIndexType[i]]);
187        col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 3, 19, "    ");
188    }
189}
190
191#
192# General control (must appear after the directives).
193#
194
195# Handle the end of directive processing.
196emission != "" {
197    if (!emissionHandled) {
198        printf("WARNING: unknown tag \"%s\"\n", emission) >"/dev/stderr";
199        consumeUntil = "";
200    }
201
202    emission = "";
203    next;
204}
205
206# Most lines just get copied from the source as-is.
207{ print; }
208
209#
210# Helper functions.
211#
212
213# Helper to print out an element in a multi-column fashion. It returns
214# the (one-based) column number that the next element will be printed
215# in.
216function colPrint(value, isLast, col, numCols, colWidth, linePrefix) {
217    isLast = (isLast || (col == numCols));
218    printf("%s%-*s%s",
219        (col == 1) ? linePrefix : " ",
220        isLast ? 1 : colWidth, value,
221        isLast ? "\n" : "");
222
223    return (col % numCols) + 1;
224}
225
226# Read the bytecode description file.
227function readBytecodes(i, parts, line, cmd, status, count) {
228    # locals: parts, line, cmd, status, count
229    for (;;) {
230        # Read a line.
231        status = getline line <bytecodeFile;
232        if (status == 0) break;
233        if (status < 0) {
234            print "trouble reading bytecode file";
235            exit 1;
236        }
237
238        # Clean up the line and extract the command.
239        gsub(/  */, " ", line);
240        sub(/ *#.*$/, "", line);
241        sub(/ $/, "", line);
242        sub(/^ /, "", line);
243        count = split(line, parts);
244        if (count == 0) continue; # Blank or comment line.
245        cmd = parts[1];
246        sub(/^[a-z][a-z]* */, "", line); # Remove the command from line.
247
248        if (cmd == "op") {
249            status = defineOpcode(line);
250        } else if (cmd == "format") {
251            status = defineFormat(line);
252        } else {
253            status = -1;
254        }
255
256        if (status != 0) {
257            printf("syntax error on line: %s\n", line) >"/dev/stderr";
258            return 1;
259        }
260    }
261
262    return 0;
263}
264
265# Define an opcode.
266function defineOpcode(line, count, parts, idx) {
267    # locals: count, parts, idx
268    count = split(line, parts);
269    if (count != 6)  return -1;
270    idx = parseHex(parts[1]);
271    if (idx < 0) return -1;
272
273    # Extract directly specified values from the line.
274    hex[idx] = parts[1];
275    name[idx] = parts[2];
276    format[idx] = parts[3];
277    hasResult[idx] = (parts[4] == "n") ? "false" : "true";
278    indexType[idx] = parts[5];
279    flags[idx] = parts[6];
280
281    # Calculate derived values.
282
283    constName[idx] = toupper(name[idx]);
284    gsub("[---/]", "_", constName[idx]); # Dash and slash become underscore.
285    gsub("[+^]", "", constName[idx]);    # Plus and caret are removed.
286    split(name[idx], parts, "/");
287
288    family[idx] = toupper(parts[1]);
289    gsub("-", "_", family[idx]);         # Dash becomes underscore.
290    gsub("[+^]", "", family[idx]);       # Plus and caret are removed.
291
292    split(format[idx], parts, "");       # Width is the first format char.
293    width[idx] = parts[1];
294
295    # This association is used when computing "next" opcodes.
296    familyFormat[family[idx],format[idx]] = idx;
297
298    # Verify values.
299
300    if (nextFormat[format[idx]] == "") {
301        printf("unknown format: %s\n", format[idx]) >"/dev/stderr";
302        return 1;
303    }
304
305    if (indexTypeValues[indexType[idx]] == "") {
306        printf("unknown index type: %s\n", indexType[idx]) >"/dev/stderr";
307        return 1;
308    }
309
310    if (flagsToC(flags[idx]) == "") {
311        printf("bogus flags: %s\n", flags[idx]) >"/dev/stderr";
312        return 1;
313    }
314
315    return 0;
316}
317
318# Define a format family.
319function defineFormat(line, count, parts, i) {
320    # locals: count, parts, i
321    count = split(line, parts);
322    if (count < 1)  return -1;
323    formats[parts[1]] = line;
324
325    parts[count + 1] = "none";
326    for (i = 1; i <= count; i++) {
327        nextFormat[parts[i]] = parts[i + 1];
328    }
329
330    return 0;
331}
332
333# Produce the nextOpcode and isFirst arrays. The former indicates, for
334# each opcode, which one should be tried next when doing instruction
335# fitting. The latter indicates which opcodes are at the head of an
336# instruction fitting chain.
337function deriveOpcodeChains(i, op) {
338    # locals: i, op
339
340    for (i = 0; i <= MAX_OPCODE; i++) {
341        if (isUnused(i)) continue;
342        isFirst[i] = "true";
343    }
344
345    for (i = 0; i <= MAX_OPCODE; i++) {
346        if (isUnused(i)) continue;
347        op = findNextOpcode(i);
348        nextOpcode[i] = op;
349        if (op != -1) {
350            isFirst[op] = "false";
351        }
352    }
353}
354
355# Given an opcode by index, find the next opcode in the same family
356# (that is, with the same base name) to try when matching instructions
357# to opcodes. This simply walks the nextFormat chain looking for a
358# match. This returns the index of the matching opcode or -1 if there
359# is none.
360function findNextOpcode(idx, fam, fmt, result) {
361    # locals: fam, fmt, result
362    fam = family[idx];
363    fmt = format[idx];
364
365    # Not every opcode has a version with every possible format, so
366    # we have to iterate down the chain until we find one or run out of
367    # formats to try.
368    for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) {
369        result = familyFormat[fam,fmt];
370        if (result != "") {
371            return result;
372        }
373    }
374
375    return -1;
376}
377
378# Construct the tables of info indexed by packed opcode. The packed opcode
379# values are in the range 0-0x1ff, whereas the unpacked opcodes sparsely
380# span the range 0-0xffff.
381function createPackedTables(i, op) {
382    # locals: i, op
383    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
384        op = unpackOpcode(i);
385        if (i == 255) {
386            # Special case: This is the low-opcode slot for a would-be
387            # extended opcode dispatch implementation.
388            packedName[i]      = "dispatch-ff";
389            packedConstName[i] = "DISPATCH_FF";
390            packedFormat[i]    = "00x";
391            packedFlags[i]     = 0;
392            packedWidth[i]     = 0;
393            packedIndexType[i] = "unknown";
394        } else if (isUnused(op)) {
395            packedName[i]      = unusedName(op);
396            packedConstName[i] = unusedConstName(op);
397            packedFormat[i]    = "00x";
398            packedFlags[i]     = 0;
399            packedWidth[i]     = 0;
400            packedIndexType[i] = "unknown";
401        } else {
402            packedName[i]      = name[op];
403            packedConstName[i] = constName[op];
404            packedFormat[i]    = format[op];
405            packedFlags[i]     = flags[op];
406            packedWidth[i]     = width[op];
407            packedIndexType[i] = indexType[op];
408        }
409    }
410}
411
412# Given a packed opcode, returns the raw (unpacked) opcode value.
413function unpackOpcode(idx) {
414    # Note: This must be the inverse of the corresponding code in
415    # libdex/DexOpcodes.h.
416    if (idx <= 255) {
417        return idx;
418    } else {
419        idx -= 256;
420        return (idx * 256) + 255;
421    }
422}
423
424# Returns the "unused" name of the given opcode (by index).
425# That is, this is the human-oriented name to use for an opcode
426# definition in cases
427# where the opcode isn't used.
428function unusedName(idx) {
429    if (idx <= 255) {
430         return sprintf("unused-%02x", idx);
431    } else {
432         return sprintf("unused-%04x", idx);
433    }
434}
435
436# Returns the "unused" constant name of the given opcode (by index).
437# That is, this is the name to use for a constant definition in cases
438# where the opcode isn't used.
439function unusedConstName(idx) {
440    if (idx <= 255) {
441         return toupper(sprintf("UNUSED_%02x", idx));
442    } else {
443         return toupper(sprintf("UNUSED_%04x", idx));
444    }
445}
446
447# Convert a hex value to an int.
448function parseHex(hex, result, chars, count, c, i) {
449    # locals: result, chars, count, c, i
450    hex = tolower(hex);
451    count = split(hex, chars, "");
452    result = 0;
453    for (i = 1; i <= count; i++) {
454        c = index("0123456789abcdef", chars[i]);
455        if (c == 0) {
456            printf("bogus hex value: %s\n", hex) >"/dev/stderr";
457            return -1;
458        }
459        result = (result * 16) + c - 1;
460    }
461    return result;
462}
463
464# Initialize the indexTypes data.
465function initIndexTypes() {
466    indexTypeValues["unknown"]       = "kIndexUnknown";
467    indexTypeValues["none"]          = "kIndexNone";
468    indexTypeValues["varies"]        = "kIndexVaries";
469    indexTypeValues["type-ref"]      = "kIndexTypeRef";
470    indexTypeValues["string-ref"]    = "kIndexStringRef";
471    indexTypeValues["method-ref"]    = "kIndexMethodRef";
472    indexTypeValues["field-ref"]     = "kIndexFieldRef";
473    indexTypeValues["inline-method"] = "kIndexInlineMethod";
474    indexTypeValues["vtable-offset"] = "kIndexVtableOffset";
475    indexTypeValues["field-offset"]  = "kIndexFieldOffset";
476}
477
478# Initialize the flags data.
479function initFlags() {
480    flagValues["branch"]        = "kInstrCanBranch";
481    flagValues["continue"]      = "kInstrCanContinue";
482    flagValues["switch"]        = "kInstrCanSwitch";
483    flagValues["throw"]         = "kInstrCanThrow";
484    flagValues["return"]        = "kInstrCanReturn";
485    flagValues["invoke"]        = "kInstrInvoke";
486    flagValues["optimized"]     = "0"; # Not represented in C output
487    flagValues["0"]             = "0";
488}
489
490# Translate the given flags into the equivalent C expression. Returns
491# "" on error.
492function flagsToC(f, parts, result, i) {
493    # locals: parts, result, i
494    count = split(f, parts, /\|/); # Split input at pipe characters.
495    result = "0";
496
497    for (i = 1; i <= count; i++) {
498        f = flagValues[parts[i]];
499        if (f == "") {
500            printf("bogus flag: %s\n", f) >"/dev/stderr";
501            return ""; # Bogus flag name.
502        } else if (f == "0") {
503            # Nothing to append for this case.
504        } else if (result == "0") {
505            result = f;
506        } else {
507            result = result "|" f;
508        }
509    }
510
511    return result;
512}
513
514# Returns true if the given opcode (by index) is an "optimized" opcode.
515function isOptimized(idx, parts, f) {
516    # locals: parts, f
517    split(flags[idx], parts, /\|/); # Split flags[idx] at pipes.
518    for (f in parts) {
519        if (parts[f] == "optimized") return 1;
520    }
521    return 0;
522}
523
524# Returns true if there is no definition for the given opcode (by index).
525function isUnused(idx) {
526    return (name[idx] == "");
527}
528