opcode-gen.awk revision a277f14c3702a474e18a9981f23845d7d7521163
1# Copyright (C) 2007 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15# 16# Awk helper script for opcode-gen. 17# 18 19# 20# Initialization. 21# 22 23BEGIN { 24 MAX_OPCODE = 65535; 25 MAX_PACKED_OPCODE = 511; 26 MAX_PACKED_OPCODE = 255; # TODO: Not for long! 27 initIndexTypes(); 28 initFlags(); 29 if (readBytecodes()) exit 1; 30 deriveOpcodeChains(); 31 createPackedTables(); 32 consumeUntil = ""; 33 emission = ""; 34} 35 36# 37# General control (must appear above directive handlers). 38# 39 40# Clear out the preexisting output within a directive section. 41consumeUntil != "" { 42 if (index($0, consumeUntil) != 0) { 43 consumeUntil = ""; 44 print; 45 } 46 47 next; 48} 49 50# Detect directives. 51/BEGIN\([a-z-]*\)/ { 52 i = match($0, /BEGIN\([a-z-]*\)/); 53 emission = substr($0, i + 6, RLENGTH - 7); 54 consumeUntil = "END(" emission ")"; 55 emissionHandled = 0; 56 print; 57} 58 59# 60# Handlers for all of the directives. 61# 62 63emission == "opcodes" { 64 emissionHandled = 1; 65 66 for (i = 0; i <= MAX_OPCODE; i++) { 67 if (isUnused(i) || isOptimized(i)) continue; 68 printf(" public static final int %s = 0x%s;\n", 69 constName[i], hex[i]); 70 } 71} 72 73emission == "first-opcodes" { 74 emissionHandled = 1; 75 76 for (i = 0; i <= MAX_OPCODE; i++) { 77 if (isUnused(i) || isOptimized(i)) continue; 78 if (isFirst[i] == "true") { 79 printf(" // DalvOps.%s\n", constName[i]); 80 } 81 } 82} 83 84emission == "dops" { 85 emissionHandled = 1; 86 87 for (i = 0; i <= MAX_OPCODE; i++) { 88 if (isUnused(i) || isOptimized(i)) continue; 89 90 nextOp = nextOpcode[i]; 91 nextOp = (nextOp == -1) ? "NO_NEXT" : constName[nextOp]; 92 93 printf(" public static final Dop %s =\n" \ 94 " new Dop(DalvOps.%s, DalvOps.%s,\n" \ 95 " DalvOps.%s, Form%s.THE_ONE, %s,\n" \ 96 " \"%s\");\n\n", 97 constName[i], constName[i], family[i], nextOp, format[i], 98 hasResult[i], name[i]); 99 } 100} 101 102emission == "dops-init" { 103 emissionHandled = 1; 104 105 for (i = 0; i <= MAX_OPCODE; i++) { 106 if (isUnused(i) || isOptimized(i)) continue; 107 printf(" set(%s);\n", constName[i]); 108 } 109} 110 111emission == "libcore-opcodes" { 112 emissionHandled = 1; 113 114 for (i = 0; i <= MAX_OPCODE; i++) { 115 if (isUnused(i) || isOptimized(i)) continue; 116 printf(" int OP_%-28s = 0x%04x;\n", constName[i], i); 117 } 118} 119 120emission == "libcore-maximum-values" { 121 emissionHandled = 1; 122 123 printf(" MAXIMUM_VALUE = %d;\n", MAX_OPCODE); 124 printf(" MAXIMUM_PACKED_VALUE = %d;\n", MAX_PACKED_OPCODE); 125} 126 127emission == "libdex-opcode-enum" { 128 emissionHandled = 1; 129 130 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 131 printf(" OP_%-28s = 0x%02x,\n", packedConstName[i], i); 132 } 133} 134 135emission == "libdex-goto-table" { 136 emissionHandled = 1; 137 138 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 139 content = sprintf(" H(OP_%s),", packedConstName[i]); 140 printf("%-78s\\\n", content); 141 } 142} 143 144emission == "libdex-opcode-names" { 145 emissionHandled = 1; 146 147 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 148 printf(" \"%s\",\n", packedName[i]); 149 } 150} 151 152emission == "libdex-widths" { 153 emissionHandled = 1; 154 155 col = 1; 156 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 157 value = sprintf("%d,", packedWidth[i]); 158 col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 16, 2, " "); 159 } 160} 161 162emission == "libdex-flags" { 163 emissionHandled = 1; 164 165 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 166 value = flagsToC(packedFlags[i]); 167 printf(" %s,\n", value); 168 } 169} 170 171emission == "libdex-formats" { 172 emissionHandled = 1; 173 174 col = 1; 175 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 176 value = sprintf("kFmt%s,", packedFormat[i]); 177 col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 7, 9, " "); 178 } 179} 180 181emission == "libdex-index-types" { 182 emissionHandled = 1; 183 184 col = 1; 185 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 186 value = sprintf("%s,", indexTypeValues[packedIndexType[i]]); 187 col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 3, 19, " "); 188 } 189} 190 191# 192# General control (must appear after the directives). 193# 194 195# Handle the end of directive processing. 196emission != "" { 197 if (!emissionHandled) { 198 printf("WARNING: unknown tag \"%s\"\n", emission) >"/dev/stderr"; 199 consumeUntil = ""; 200 } 201 202 emission = ""; 203 next; 204} 205 206# Most lines just get copied from the source as-is. 207{ print; } 208 209# 210# Helper functions. 211# 212 213# Helper to print out an element in a multi-column fashion. It returns 214# the (one-based) column number that the next element will be printed 215# in. 216function colPrint(value, isLast, col, numCols, colWidth, linePrefix) { 217 isLast = (isLast || (col == numCols)); 218 printf("%s%-*s%s", 219 (col == 1) ? linePrefix : " ", 220 isLast ? 1 : colWidth, value, 221 isLast ? "\n" : ""); 222 223 return (col % numCols) + 1; 224} 225 226# Read the bytecode description file. 227function readBytecodes(i, parts, line, cmd, status, count) { 228 # locals: parts, line, cmd, status, count 229 for (;;) { 230 # Read a line. 231 status = getline line <bytecodeFile; 232 if (status == 0) break; 233 if (status < 0) { 234 print "trouble reading bytecode file"; 235 exit 1; 236 } 237 238 # Clean up the line and extract the command. 239 gsub(/ */, " ", line); 240 sub(/ *#.*$/, "", line); 241 sub(/ $/, "", line); 242 sub(/^ /, "", line); 243 count = split(line, parts); 244 if (count == 0) continue; # Blank or comment line. 245 cmd = parts[1]; 246 sub(/^[a-z][a-z]* */, "", line); # Remove the command from line. 247 248 if (cmd == "op") { 249 status = defineOpcode(line); 250 } else if (cmd == "format") { 251 status = defineFormat(line); 252 } else { 253 status = -1; 254 } 255 256 if (status != 0) { 257 printf("syntax error on line: %s\n", line) >"/dev/stderr"; 258 return 1; 259 } 260 } 261 262 return 0; 263} 264 265# Define an opcode. 266function defineOpcode(line, count, parts, idx) { 267 # locals: count, parts, idx 268 count = split(line, parts); 269 if (count != 6) return -1; 270 idx = parseHex(parts[1]); 271 if (idx < 0) return -1; 272 273 # Extract directly specified values from the line. 274 hex[idx] = parts[1]; 275 name[idx] = parts[2]; 276 format[idx] = parts[3]; 277 hasResult[idx] = (parts[4] == "n") ? "false" : "true"; 278 indexType[idx] = parts[5]; 279 flags[idx] = parts[6]; 280 281 # Calculate derived values. 282 283 constName[idx] = toupper(name[idx]); 284 gsub("[---/]", "_", constName[idx]); # Dash and slash become underscore. 285 gsub("[+^]", "", constName[idx]); # Plus and caret are removed. 286 split(name[idx], parts, "/"); 287 288 family[idx] = toupper(parts[1]); 289 gsub("-", "_", family[idx]); # Dash becomes underscore. 290 gsub("[+^]", "", family[idx]); # Plus and caret are removed. 291 292 split(format[idx], parts, ""); # Width is the first format char. 293 width[idx] = parts[1]; 294 295 # This association is used when computing "next" opcodes. 296 familyFormat[family[idx],format[idx]] = idx; 297 298 # Verify values. 299 300 if (nextFormat[format[idx]] == "") { 301 printf("unknown format: %s\n", format[idx]) >"/dev/stderr"; 302 return 1; 303 } 304 305 if (indexTypeValues[indexType[idx]] == "") { 306 printf("unknown index type: %s\n", indexType[idx]) >"/dev/stderr"; 307 return 1; 308 } 309 310 if (flagsToC(flags[idx]) == "") { 311 printf("bogus flags: %s\n", flags[idx]) >"/dev/stderr"; 312 return 1; 313 } 314 315 return 0; 316} 317 318# Define a format family. 319function defineFormat(line, count, parts, i) { 320 # locals: count, parts, i 321 count = split(line, parts); 322 if (count < 1) return -1; 323 formats[parts[1]] = line; 324 325 parts[count + 1] = "none"; 326 for (i = 1; i <= count; i++) { 327 nextFormat[parts[i]] = parts[i + 1]; 328 } 329 330 return 0; 331} 332 333# Produce the nextOpcode and isFirst arrays. The former indicates, for 334# each opcode, which one should be tried next when doing instruction 335# fitting. The latter indicates which opcodes are at the head of an 336# instruction fitting chain. 337function deriveOpcodeChains(i, op) { 338 # locals: i, op 339 340 for (i = 0; i <= MAX_OPCODE; i++) { 341 if (isUnused(i)) continue; 342 isFirst[i] = "true"; 343 } 344 345 for (i = 0; i <= MAX_OPCODE; i++) { 346 if (isUnused(i)) continue; 347 op = findNextOpcode(i); 348 nextOpcode[i] = op; 349 if (op != -1) { 350 isFirst[op] = "false"; 351 } 352 } 353} 354 355# Given an opcode by index, find the next opcode in the same family 356# (that is, with the same base name) to try when matching instructions 357# to opcodes. This simply walks the nextFormat chain looking for a 358# match. This returns the index of the matching opcode or -1 if there 359# is none. 360function findNextOpcode(idx, fam, fmt, result) { 361 # locals: fam, fmt, result 362 fam = family[idx]; 363 fmt = format[idx]; 364 365 # Not every opcode has a version with every possible format, so 366 # we have to iterate down the chain until we find one or run out of 367 # formats to try. 368 for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) { 369 result = familyFormat[fam,fmt]; 370 if (result != "") { 371 return result; 372 } 373 } 374 375 return -1; 376} 377 378# Construct the tables of info indexed by packed opcode. The packed opcode 379# values are in the range 0-0x1ff, whereas the unpacked opcodes sparsely 380# span the range 0-0xffff. 381function createPackedTables(i, op) { 382 # locals: i, op 383 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 384 op = unpackOpcode(i); 385 if (i == 255) { 386 # Special case: This is the low-opcode slot for a would-be 387 # extended opcode dispatch implementation. 388 packedName[i] = "dispatch-ff"; 389 packedConstName[i] = "DISPATCH_FF"; 390 packedFormat[i] = "00x"; 391 packedFlags[i] = 0; 392 packedWidth[i] = 0; 393 packedIndexType[i] = "unknown"; 394 } else if (isUnused(op)) { 395 packedName[i] = unusedName(op); 396 packedConstName[i] = unusedConstName(op); 397 packedFormat[i] = "00x"; 398 packedFlags[i] = 0; 399 packedWidth[i] = 0; 400 packedIndexType[i] = "unknown"; 401 } else { 402 packedName[i] = name[op]; 403 packedConstName[i] = constName[op]; 404 packedFormat[i] = format[op]; 405 packedFlags[i] = flags[op]; 406 packedWidth[i] = width[op]; 407 packedIndexType[i] = indexType[op]; 408 } 409 } 410} 411 412# Given a packed opcode, returns the raw (unpacked) opcode value. 413function unpackOpcode(idx) { 414 # Note: This must be the inverse of the corresponding code in 415 # libdex/DexOpcodes.h. 416 if (idx <= 255) { 417 return idx; 418 } else { 419 idx -= 256; 420 return (idx * 256) + 255; 421 } 422} 423 424# Returns the "unused" name of the given opcode (by index). 425# That is, this is the human-oriented name to use for an opcode 426# definition in cases 427# where the opcode isn't used. 428function unusedName(idx) { 429 if (idx <= 255) { 430 return sprintf("unused-%02x", idx); 431 } else { 432 return sprintf("unused-%04x", idx); 433 } 434} 435 436# Returns the "unused" constant name of the given opcode (by index). 437# That is, this is the name to use for a constant definition in cases 438# where the opcode isn't used. 439function unusedConstName(idx) { 440 if (idx <= 255) { 441 return toupper(sprintf("UNUSED_%02x", idx)); 442 } else { 443 return toupper(sprintf("UNUSED_%04x", idx)); 444 } 445} 446 447# Convert a hex value to an int. 448function parseHex(hex, result, chars, count, c, i) { 449 # locals: result, chars, count, c, i 450 hex = tolower(hex); 451 count = split(hex, chars, ""); 452 result = 0; 453 for (i = 1; i <= count; i++) { 454 c = index("0123456789abcdef", chars[i]); 455 if (c == 0) { 456 printf("bogus hex value: %s\n", hex) >"/dev/stderr"; 457 return -1; 458 } 459 result = (result * 16) + c - 1; 460 } 461 return result; 462} 463 464# Initialize the indexTypes data. 465function initIndexTypes() { 466 indexTypeValues["unknown"] = "kIndexUnknown"; 467 indexTypeValues["none"] = "kIndexNone"; 468 indexTypeValues["varies"] = "kIndexVaries"; 469 indexTypeValues["type-ref"] = "kIndexTypeRef"; 470 indexTypeValues["string-ref"] = "kIndexStringRef"; 471 indexTypeValues["method-ref"] = "kIndexMethodRef"; 472 indexTypeValues["field-ref"] = "kIndexFieldRef"; 473 indexTypeValues["inline-method"] = "kIndexInlineMethod"; 474 indexTypeValues["vtable-offset"] = "kIndexVtableOffset"; 475 indexTypeValues["field-offset"] = "kIndexFieldOffset"; 476} 477 478# Initialize the flags data. 479function initFlags() { 480 flagValues["branch"] = "kInstrCanBranch"; 481 flagValues["continue"] = "kInstrCanContinue"; 482 flagValues["switch"] = "kInstrCanSwitch"; 483 flagValues["throw"] = "kInstrCanThrow"; 484 flagValues["return"] = "kInstrCanReturn"; 485 flagValues["invoke"] = "kInstrInvoke"; 486 flagValues["optimized"] = "0"; # Not represented in C output 487 flagValues["0"] = "0"; 488} 489 490# Translate the given flags into the equivalent C expression. Returns 491# "" on error. 492function flagsToC(f, parts, result, i) { 493 # locals: parts, result, i 494 count = split(f, parts, /\|/); # Split input at pipe characters. 495 result = "0"; 496 497 for (i = 1; i <= count; i++) { 498 f = flagValues[parts[i]]; 499 if (f == "") { 500 printf("bogus flag: %s\n", f) >"/dev/stderr"; 501 return ""; # Bogus flag name. 502 } else if (f == "0") { 503 # Nothing to append for this case. 504 } else if (result == "0") { 505 result = f; 506 } else { 507 result = result "|" f; 508 } 509 } 510 511 return result; 512} 513 514# Returns true if the given opcode (by index) is an "optimized" opcode. 515function isOptimized(idx, parts, f) { 516 # locals: parts, f 517 split(flags[idx], parts, /\|/); # Split flags[idx] at pipes. 518 for (f in parts) { 519 if (parts[f] == "optimized") return 1; 520 } 521 return 0; 522} 523 524# Returns true if there is no definition for the given opcode (by index). 525function isUnused(idx) { 526 return (name[idx] == ""); 527} 528