opcode-gen revision 11a4a79b09a2be2bd7a7141ce112de3ad0432e53
1#!/bin/bash 2# 3# Copyright (C) 2007 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17# opcode-gen <file> 18# 19# This script uses the file bytecodes.txt (in this directory) to 20# generate code inside the given <file>, based on the directives found 21# in that file: 22# 23# opcodes: static final ints for each opcode (no optimized ops) 24# dops: static final objects for each opcode (no optimized ops) 25# dops-init: initialization code for the "dops" (no optimized ops) 26# first-opcodes: a comment indicating which opcodes are at the head 27# position of instruction fitting chains (no optimized ops) 28 29file="$1" 30tmpfile="/tmp/$$.txt" 31 32echo "processing `basename $1`" 33 34if [ "x$1" = "x" ]; then 35 echo "must specify a file" 36 exit 1 37fi 38 39# Set up prog to be the path of this script, including following symlinks, 40# and set up progdir to be the fully-qualified pathname of its directory. 41prog="$0" 42while [ -h "${prog}" ]; do 43 newProg=`/bin/ls -ld "${prog}"` 44 newProg=`expr "${newProg}" : ".* -> \(.*\)$"` 45 if expr "x${newProg}" : 'x/' >/dev/null; then 46 prog="${newProg}" 47 else 48 progdir=`dirname "${prog}"` 49 prog="${progdir}/${newProg}" 50 fi 51done 52oldwd=`pwd` 53progdir=`dirname "${prog}"` 54cd "${progdir}" 55progdir=`pwd` 56prog="${progdir}"/`basename "${prog}"` 57cd "${oldwd}" 58 59bytecodeFile="$progdir/bytecode.txt" 60 61awk -v "bytecodeFile=$bytecodeFile" ' 62 63BEGIN { 64 MAX_OPCODE = 65535; 65 MAX_LIBDEX_OPCODE = 255; # TODO: Will not be true for long! 66 initIndexTypes(); 67 initFlags(); 68 if (readBytecodes()) exit 1; 69 deriveOpcodeChains(); 70 consumeUntil = ""; 71} 72 73consumeUntil != "" { 74 if (index($0, consumeUntil) != 0) { 75 consumeUntil = ""; 76 } else { 77 next; 78 } 79} 80 81/BEGIN\(opcodes\)/ { 82 consumeUntil = "END(opcodes)"; 83 print; 84 85 for (i = 0; i <= MAX_OPCODE; i++) { 86 if (isUnused(i) || isOptimized(i)) continue; 87 printf(" public static final int %s = 0x%s;\n", 88 uppername[i], hex[i]); 89 } 90 91 next; 92} 93 94/BEGIN\(first-opcodes\)/ { 95 consumeUntil = "END(first-opcodes)"; 96 print; 97 98 for (i = 0; i <= MAX_OPCODE; i++) { 99 if (isUnused(i) || isOptimized(i)) continue; 100 if (isFirst[i] == "true") { 101 printf(" // DalvOps.%s\n", uppername[i]); 102 } 103 } 104 105 next; 106} 107 108/BEGIN\(dops\)/ { 109 consumeUntil = "END(dops)"; 110 print; 111 112 for (i = 0; i <= MAX_OPCODE; i++) { 113 if (isUnused(i) || isOptimized(i)) continue; 114 115 nextOp = nextOpcode[i]; 116 nextOp = (nextOp == -1) ? "NO_NEXT" : uppername[nextOp]; 117 118 printf(" public static final Dop %s =\n" \ 119 " new Dop(DalvOps.%s, DalvOps.%s,\n" \ 120 " DalvOps.%s, Form%s.THE_ONE, %s,\n" \ 121 " \"%s\");\n\n", 122 uppername[i], uppername[i], family[i], nextOp, format[i], 123 hasResult[i], name[i]); 124 } 125 126 next; 127} 128 129/BEGIN\(dops-init\)/ { 130 consumeUntil = "END(dops-init)"; 131 print; 132 133 for (i = 0; i <= MAX_OPCODE; i++) { 134 if (isUnused(i) || isOptimized(i)) continue; 135 printf(" set(%s);\n", uppername[i]); 136 } 137 138 next; 139} 140 141/BEGIN\(libdex-opcode-enum\)/ { 142 consumeUntil = "END(libdex-opcode-enum)"; 143 print; 144 145 for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) { 146 printf(" OP_%-28s = 0x%02x,\n", uppernameOrUnusedByte(i), i); 147 } 148 149 next; 150} 151 152/BEGIN\(libdex-goto-table\)/ { 153 consumeUntil = "END(libdex-goto-table)"; 154 print; 155 156 for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) { 157 content = sprintf(" H(OP_%s),", uppernameOrUnusedByte(i)); 158 printf("%-78s\\\n", content); 159 } 160 161 next; 162} 163 164{ print; } 165 166# Read the bytecode description file. 167function readBytecodes(i, parts, line, cmd, status, count) { 168 # locals: parts, line, cmd, status, count 169 for (;;) { 170 # Read a line. 171 status = getline line <bytecodeFile; 172 if (status == 0) break; 173 if (status < 0) { 174 print "trouble reading bytecode file"; 175 exit 1; 176 } 177 178 # Clean up the line and extract the command. 179 gsub(/ */, " ", line); 180 sub(/ *#.*$/, "", line); 181 sub(/ $/, "", line); 182 sub(/^ /, "", line); 183 count = split(line, parts); 184 if (count == 0) continue; # Blank or comment line. 185 cmd = parts[1]; 186 sub(/^[a-z][a-z]* */, "", line); # Remove the command from line. 187 188 if (cmd == "op") { 189 status = defineOpcode(line); 190 } else if (cmd == "format") { 191 status = defineFormat(line); 192 } else { 193 status = -1; 194 } 195 196 if (status != 0) { 197 printf("syntax error on line: %s\n", line); 198 return 1; 199 } 200 } 201 202 return 0; 203} 204 205# Define an opcode. 206function defineOpcode(line, count, parts, idx) { 207 # locals: count, parts, idx 208 count = split(line, parts); 209 if (count != 6) return -1; 210 idx = parseHex(parts[1]); 211 if (idx < 0) return -1; 212 213 # Extract directly specified values from the line. 214 hex[idx] = parts[1]; 215 name[idx] = parts[2]; 216 format[idx] = parts[3]; 217 hasResult[idx] = (parts[4] == "n") ? "false" : "true"; 218 indexType[idx] = parts[5]; 219 flags[idx] = parts[6]; 220 221 # Calculate derived values. 222 uppername[idx] = toupper(name[idx]); 223 gsub("[---/]", "_", uppername[idx]); 224 split(name[idx], parts, "/"); 225 family[idx] = toupper(parts[1]); 226 gsub("-", "_", family[idx]); 227 228 # This association is used when computing "next" opcodes. 229 familyFormat[family[idx],format[idx]] = idx; 230 231 # Verify values. 232 233 if (nextFormat[format[idx]] == "") { 234 printf("unknown format: %s\n", format[idx]); 235 return 1; 236 } 237 238 if (indexTypeValues[indexType[idx]] == "") { 239 printf("unknown index type: %s\n", indexType[idx]); 240 return 1; 241 } 242 243 if (flagsToC(flags[idx]) == "") { 244 printf("bogus flags: %s\n", flags[idx]); 245 return 1; 246 } 247 248 return 0; 249} 250 251# Define a format family. 252function defineFormat(line, count, parts, i) { 253 # locals: count, parts, i 254 count = split(line, parts); 255 if (count < 1) return -1; 256 formats[parts[1]] = line; 257 258 parts[count + 1] = "none"; 259 for (i = 1; i <= count; i++) { 260 nextFormat[parts[i]] = parts[i + 1]; 261 } 262 263 return 0; 264} 265 266# Produce the nextOpcode and isFirst arrays. The former indicates, for 267# each opcode, which one should be tried next when doing instruction 268# fitting. The latter indicates which opcodes are at the head of an 269# instruction fitting chain. 270function deriveOpcodeChains(i, op) { 271 # locals: i, op 272 273 for (i = 0; i <= MAX_OPCODE; i++) { 274 if (isUnused(i)) continue; 275 isFirst[i] = "true"; 276 } 277 278 for (i = 0; i <= MAX_OPCODE; i++) { 279 if (isUnused(i)) continue; 280 op = findNextOpcode(i); 281 nextOpcode[i] = op; 282 if (op != -1) { 283 isFirst[op] = "false"; 284 } 285 } 286} 287 288# Given an opcode by index, find the next opcode in the same family 289# (that is, with the same base name) to try when matching instructions 290# to opcodes. This simply walks the nextFormat chain looking for a 291# match. This returns the index of the matching opcode or -1 if there 292# is none. 293function findNextOpcode(idx, fam, fmt, result) { 294 # locals: fam, fmt, result 295 fam = family[idx]; 296 fmt = format[idx]; 297 298 # Not every opcode has a version with every possible format, so 299 # we have to iterate down the chain until we find one or run out of 300 # formats to try. 301 for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) { 302 result = familyFormat[fam,fmt]; 303 if (result != "") { 304 return result; 305 } 306 } 307 308 return -1; 309} 310 311# Convert a hex value to an int. 312function parseHex(hex, result, chars, count, c, i) { 313 # locals: result, chars, count, c, i 314 hex = tolower(hex); 315 count = split(hex, chars, ""); 316 result = 0; 317 for (i = 1; i <= count; i++) { 318 c = index("0123456789abcdef", chars[i]); 319 if (c == 0) { 320 printf("bogus hex value: %s\n", hex); 321 return -1; 322 } 323 result = (result * 16) + c - 1; 324 } 325 return result; 326} 327 328# Initialize the indexTypes data. 329function initIndexTypes() { 330 indexTypeValues["unknown"] = "kIndexUnknown"; 331 indexTypeValues["none"] = "kIndexNone"; 332 indexTypeValues["varies"] = "kIndexVaries"; 333 indexTypeValues["type-ref"] = "kIndexTypeRef"; 334 indexTypeValues["string-ref"] = "kIndexStringRef"; 335 indexTypeValues["method-ref"] = "kIndexMethodRef"; 336 indexTypeValues["field-ref"] = "kIndexFieldRef"; 337 indexTypeValues["inline-method"] = "kIndexInlineMethod"; 338 indexTypeValues["vtable-offset"] = "kIndexVtableOffset"; 339 indexTypeValues["field-offset"] = "kIndexFieldOffset"; 340} 341 342# Initialize the flags data. 343function initFlags() { 344 flagValues["branch"] = "kInstrCanBranch"; 345 flagValues["continue"] = "kInstrCanContinue"; 346 flagValues["switch"] = "kInstrCanSwitch"; 347 flagValues["throw"] = "kInstrCanThrow"; 348 flagValues["return"] = "kInstrCanReturn"; 349 flagValues["invoke"] = "kInstrInvoke"; 350 flagValues["optimized"] = "0"; # Not represented in C output 351 flagValues["0"] = "0"; 352} 353 354# Translate the given flags into the equivalent C expression. Returns 355# "" on error. 356function flagsToC(f, parts, result, i) { 357 # locals: parts, result, i 358 count = split(f, parts, /\|/); # Split input at pipe characters. 359 result = "0"; 360 361 for (i = 1; i <= count; i++) { 362 f = flagValues[parts[i]]; 363 if (f == "") { 364 printf("bogus flag: %s\n", f); 365 return ""; # Bogus flag name. 366 } else if (f == "0") { 367 # Nothing to append for this case. 368 } else if (result == "0") { 369 result = f; 370 } else { 371 result = result "|" f; 372 } 373 } 374 375 return result; 376} 377 378# Returns true if the given opcode (by index) is an "optimized" opcode. 379function isOptimized(idx, parts, f) { 380 # locals: parts, f 381 split(flags[idx], parts, /\|/); # Split flags[idx] at pipes. 382 for (f in parts) { 383 if (parts[f] == "optimized") return 1; 384 } 385 return 0; 386} 387 388# Returns true if the given opcode (by index) is unused. This is true either 389# if there is no definition at all for the opcode or if there is a definition 390# and the name contains the string "unused". 391function isUnused(idx, n) { 392 # locals: n 393 n = name[idx]; 394 return (n == "") || (index(n, "unused") != 0); 395} 396 397# Returns the uppercase name of the given single-byte opcode (by 398# index) or the string "UNUSED_XX" (where XX is the index in hex) if 399# the opcode is unused. The odd case for this function is 255, which 400# is the first extended (two-byte) opcode. For the purposes of this 401# function, it is considered unused. (This is meant as a stop-gap 402# measure for code that is not yet prepared to deal with extended 403# opcodes.) 404function uppernameOrUnusedByte(idx, n) { 405 n = uppername[idx]; 406 if ((n == "") || (i == 255)) { 407 return toupper(sprintf("UNUSED_%02x", idx)); 408 } 409 return n; 410} 411' "$file" > "$tmpfile" 412 413cp "$tmpfile" "$file" 414rm "$tmpfile" 415