opcode-gen revision 11a4a79b09a2be2bd7a7141ce112de3ad0432e53
1#!/bin/bash
2#
3# Copyright (C) 2007 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17# opcode-gen <file>
18#
19# This script uses the file bytecodes.txt (in this directory) to
20# generate code inside the given <file>, based on the directives found
21# in that file:
22#
23#     opcodes:       static final ints for each opcode (no optimized ops)
24#     dops:          static final objects for each opcode (no optimized ops)
25#     dops-init:     initialization code for the "dops" (no optimized ops)
26#     first-opcodes: a comment indicating which opcodes are at the head
27#                    position of instruction fitting chains (no optimized ops)
28
29file="$1"
30tmpfile="/tmp/$$.txt"
31
32echo "processing `basename $1`"
33
34if [ "x$1" = "x" ]; then
35    echo "must specify a file"
36    exit 1
37fi
38
39# Set up prog to be the path of this script, including following symlinks,
40# and set up progdir to be the fully-qualified pathname of its directory.
41prog="$0"
42while [ -h "${prog}" ]; do
43    newProg=`/bin/ls -ld "${prog}"`
44    newProg=`expr "${newProg}" : ".* -> \(.*\)$"`
45    if expr "x${newProg}" : 'x/' >/dev/null; then
46        prog="${newProg}"
47    else
48        progdir=`dirname "${prog}"`
49        prog="${progdir}/${newProg}"
50    fi
51done
52oldwd=`pwd`
53progdir=`dirname "${prog}"`
54cd "${progdir}"
55progdir=`pwd`
56prog="${progdir}"/`basename "${prog}"`
57cd "${oldwd}"
58
59bytecodeFile="$progdir/bytecode.txt"
60
61awk -v "bytecodeFile=$bytecodeFile" '
62
63BEGIN {
64    MAX_OPCODE = 65535;
65    MAX_LIBDEX_OPCODE = 255; # TODO: Will not be true for long!
66    initIndexTypes();
67    initFlags();
68    if (readBytecodes()) exit 1;
69    deriveOpcodeChains();
70    consumeUntil = "";
71}
72
73consumeUntil != "" {
74    if (index($0, consumeUntil) != 0) {
75        consumeUntil = "";
76    } else {
77        next;
78    }
79}
80
81/BEGIN\(opcodes\)/ {
82    consumeUntil = "END(opcodes)";
83    print;
84
85    for (i = 0; i <= MAX_OPCODE; i++) {
86        if (isUnused(i) || isOptimized(i)) continue;
87        printf("    public static final int %s = 0x%s;\n",
88               uppername[i], hex[i]);
89    }
90
91    next;
92}
93
94/BEGIN\(first-opcodes\)/ {
95    consumeUntil = "END(first-opcodes)";
96    print;
97
98    for (i = 0; i <= MAX_OPCODE; i++) {
99        if (isUnused(i) || isOptimized(i)) continue;
100        if (isFirst[i] == "true") {
101            printf("    //     DalvOps.%s\n", uppername[i]);
102        }
103    }
104
105    next;
106}
107
108/BEGIN\(dops\)/ {
109    consumeUntil = "END(dops)";
110    print;
111
112    for (i = 0; i <= MAX_OPCODE; i++) {
113        if (isUnused(i) || isOptimized(i)) continue;
114
115        nextOp = nextOpcode[i];
116        nextOp = (nextOp == -1) ? "NO_NEXT" : uppername[nextOp];
117
118        printf("    public static final Dop %s =\n" \
119               "        new Dop(DalvOps.%s, DalvOps.%s,\n" \
120               "            DalvOps.%s, Form%s.THE_ONE, %s,\n" \
121               "            \"%s\");\n\n",
122               uppername[i], uppername[i], family[i], nextOp, format[i],
123               hasResult[i], name[i]);
124    }
125
126    next;
127}
128
129/BEGIN\(dops-init\)/ {
130    consumeUntil = "END(dops-init)";
131    print;
132
133    for (i = 0; i <= MAX_OPCODE; i++) {
134        if (isUnused(i) || isOptimized(i)) continue;
135        printf("        set(%s);\n", uppername[i]);
136    }
137
138    next;
139}
140
141/BEGIN\(libdex-opcode-enum\)/ {
142    consumeUntil = "END(libdex-opcode-enum)";
143    print;
144
145    for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) {
146        printf("    OP_%-28s = 0x%02x,\n", uppernameOrUnusedByte(i), i);
147    }
148
149    next;
150}
151
152/BEGIN\(libdex-goto-table\)/ {
153    consumeUntil = "END(libdex-goto-table)";
154    print;
155
156    for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) {
157        content = sprintf("        H(OP_%s),", uppernameOrUnusedByte(i));
158        printf("%-78s\\\n", content);
159    }
160
161    next;
162}
163
164{ print; }
165
166# Read the bytecode description file.
167function readBytecodes(i, parts, line, cmd, status, count) {
168    # locals: parts, line, cmd, status, count
169    for (;;) {
170        # Read a line.
171        status = getline line <bytecodeFile;
172        if (status == 0) break;
173        if (status < 0) {
174            print "trouble reading bytecode file";
175            exit 1;
176        }
177
178        # Clean up the line and extract the command.
179        gsub(/  */, " ", line);
180        sub(/ *#.*$/, "", line);
181        sub(/ $/, "", line);
182        sub(/^ /, "", line);
183        count = split(line, parts);
184        if (count == 0) continue; # Blank or comment line.
185        cmd = parts[1];
186        sub(/^[a-z][a-z]* */, "", line); # Remove the command from line.
187
188        if (cmd == "op") {
189            status = defineOpcode(line);
190        } else if (cmd == "format") {
191            status = defineFormat(line);
192        } else {
193            status = -1;
194        }
195
196        if (status != 0) {
197            printf("syntax error on line: %s\n", line);
198            return 1;
199        }
200    }
201
202    return 0;
203}
204
205# Define an opcode.
206function defineOpcode(line, count, parts, idx) {
207    # locals: count, parts, idx
208    count = split(line, parts);
209    if (count != 6)  return -1;
210    idx = parseHex(parts[1]);
211    if (idx < 0) return -1;
212
213    # Extract directly specified values from the line.
214    hex[idx] = parts[1];
215    name[idx] = parts[2];
216    format[idx] = parts[3];
217    hasResult[idx] = (parts[4] == "n") ? "false" : "true";
218    indexType[idx] = parts[5];
219    flags[idx] = parts[6];
220
221    # Calculate derived values.
222    uppername[idx] = toupper(name[idx]);
223    gsub("[---/]", "_", uppername[idx]);
224    split(name[idx], parts, "/");
225    family[idx] = toupper(parts[1]);
226    gsub("-", "_", family[idx]);
227
228    # This association is used when computing "next" opcodes.
229    familyFormat[family[idx],format[idx]] = idx;
230
231    # Verify values.
232
233    if (nextFormat[format[idx]] == "") {
234        printf("unknown format: %s\n", format[idx]);
235        return 1;
236    }
237
238    if (indexTypeValues[indexType[idx]] == "") {
239        printf("unknown index type: %s\n", indexType[idx]);
240        return 1;
241    }
242
243    if (flagsToC(flags[idx]) == "") {
244        printf("bogus flags: %s\n", flags[idx]);
245        return 1;
246    }
247
248    return 0;
249}
250
251# Define a format family.
252function defineFormat(line, count, parts, i) {
253    # locals: count, parts, i
254    count = split(line, parts);
255    if (count < 1)  return -1;
256    formats[parts[1]] = line;
257
258    parts[count + 1] = "none";
259    for (i = 1; i <= count; i++) {
260        nextFormat[parts[i]] = parts[i + 1];
261    }
262
263    return 0;
264}
265
266# Produce the nextOpcode and isFirst arrays. The former indicates, for
267# each opcode, which one should be tried next when doing instruction
268# fitting. The latter indicates which opcodes are at the head of an
269# instruction fitting chain.
270function deriveOpcodeChains(i, op) {
271    # locals: i, op
272
273    for (i = 0; i <= MAX_OPCODE; i++) {
274        if (isUnused(i)) continue;
275        isFirst[i] = "true";
276    }
277
278    for (i = 0; i <= MAX_OPCODE; i++) {
279        if (isUnused(i)) continue;
280        op = findNextOpcode(i);
281        nextOpcode[i] = op;
282        if (op != -1) {
283            isFirst[op] = "false";
284        }
285    }
286}
287
288# Given an opcode by index, find the next opcode in the same family
289# (that is, with the same base name) to try when matching instructions
290# to opcodes. This simply walks the nextFormat chain looking for a
291# match. This returns the index of the matching opcode or -1 if there
292# is none.
293function findNextOpcode(idx, fam, fmt, result) {
294    # locals: fam, fmt, result
295    fam = family[idx];
296    fmt = format[idx];
297
298    # Not every opcode has a version with every possible format, so
299    # we have to iterate down the chain until we find one or run out of
300    # formats to try.
301    for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) {
302        result = familyFormat[fam,fmt];
303        if (result != "") {
304            return result;
305        }
306    }
307
308    return -1;
309}
310
311# Convert a hex value to an int.
312function parseHex(hex, result, chars, count, c, i) {
313    # locals: result, chars, count, c, i
314    hex = tolower(hex);
315    count = split(hex, chars, "");
316    result = 0;
317    for (i = 1; i <= count; i++) {
318        c = index("0123456789abcdef", chars[i]);
319        if (c == 0) {
320            printf("bogus hex value: %s\n", hex);
321            return -1;
322        }
323        result = (result * 16) + c - 1;
324    }
325    return result;
326}
327
328# Initialize the indexTypes data.
329function initIndexTypes() {
330    indexTypeValues["unknown"]       = "kIndexUnknown";
331    indexTypeValues["none"]          = "kIndexNone";
332    indexTypeValues["varies"]        = "kIndexVaries";
333    indexTypeValues["type-ref"]      = "kIndexTypeRef";
334    indexTypeValues["string-ref"]    = "kIndexStringRef";
335    indexTypeValues["method-ref"]    = "kIndexMethodRef";
336    indexTypeValues["field-ref"]     = "kIndexFieldRef";
337    indexTypeValues["inline-method"] = "kIndexInlineMethod";
338    indexTypeValues["vtable-offset"] = "kIndexVtableOffset";
339    indexTypeValues["field-offset"]  = "kIndexFieldOffset";
340}
341
342# Initialize the flags data.
343function initFlags() {
344    flagValues["branch"]        = "kInstrCanBranch";
345    flagValues["continue"]      = "kInstrCanContinue";
346    flagValues["switch"]        = "kInstrCanSwitch";
347    flagValues["throw"]         = "kInstrCanThrow";
348    flagValues["return"]        = "kInstrCanReturn";
349    flagValues["invoke"]        = "kInstrInvoke";
350    flagValues["optimized"]     = "0"; # Not represented in C output
351    flagValues["0"]             = "0";
352}
353
354# Translate the given flags into the equivalent C expression. Returns
355# "" on error.
356function flagsToC(f, parts, result, i) {
357    # locals: parts, result, i
358    count = split(f, parts, /\|/); # Split input at pipe characters.
359    result = "0";
360
361    for (i = 1; i <= count; i++) {
362        f = flagValues[parts[i]];
363        if (f == "") {
364            printf("bogus flag: %s\n", f);
365            return ""; # Bogus flag name.
366        } else if (f == "0") {
367            # Nothing to append for this case.
368        } else if (result == "0") {
369            result = f;
370        } else {
371            result = result "|" f;
372        }
373    }
374
375    return result;
376}
377
378# Returns true if the given opcode (by index) is an "optimized" opcode.
379function isOptimized(idx, parts, f) {
380    # locals: parts, f
381    split(flags[idx], parts, /\|/); # Split flags[idx] at pipes.
382    for (f in parts) {
383        if (parts[f] == "optimized") return 1;
384    }
385    return 0;
386}
387
388# Returns true if the given opcode (by index) is unused. This is true either
389# if there is no definition at all for the opcode or if there is a definition
390# and the name contains the string "unused".
391function isUnused(idx, n) {
392    # locals: n
393    n = name[idx];
394    return (n == "") || (index(n, "unused") != 0);
395}
396
397# Returns the uppercase name of the given single-byte opcode (by
398# index) or the string "UNUSED_XX" (where XX is the index in hex) if
399# the opcode is unused. The odd case for this function is 255, which
400# is the first extended (two-byte) opcode. For the purposes of this
401# function, it is considered unused. (This is meant as a stop-gap
402# measure for code that is not yet prepared to deal with extended
403# opcodes.)
404function uppernameOrUnusedByte(idx, n) {
405    n = uppername[idx];
406    if ((n == "") || (i == 255)) {
407       return toupper(sprintf("UNUSED_%02x", idx));
408    }
409    return n;
410}
411' "$file" > "$tmpfile"
412
413cp "$tmpfile" "$file"
414rm "$tmpfile"
415