opcode-gen.awk revision 3c5df37a2df7368eb274eb097e9cfa2ccc7fffb6
1# Copyright (C) 2007 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15# 16# Awk helper script for opcode-gen. 17# 18 19# 20# Initialization. 21# 22 23BEGIN { 24 MAX_OPCODE = 65535; 25 MAX_PACKED_OPCODE = 511; 26 initIndexTypes(); 27 initFlags(); 28 if (readBytecodes()) exit 1; 29 deriveOpcodeChains(); 30 createPackedTables(); 31 consumeUntil = ""; 32 emission = ""; 33} 34 35# 36# General control (must appear above directive handlers). 37# 38 39# Clear out the preexisting output within a directive section. 40consumeUntil != "" { 41 if (index($0, consumeUntil) != 0) { 42 consumeUntil = ""; 43 print; 44 } 45 46 next; 47} 48 49# Detect directives. 50/BEGIN\([a-z-]*\)/ { 51 i = match($0, /BEGIN\([a-z-]*\)/); 52 emission = substr($0, i + 6, RLENGTH - 7); 53 consumeUntil = "END(" emission ")"; 54 emissionHandled = 0; 55} 56 57# Most lines just get copied from the source as-is, including the start 58# comment for directives. 59{ 60 print; 61} 62 63# 64# Handlers for all of the directives. 65# 66 67emission == "opcodes" { 68 emissionHandled = 1; 69 70 for (i = 0; i <= MAX_OPCODE; i++) { 71 if (isUnused(i) || isOptimized(i)) continue; 72 printf(" public static final int %s = 0x%s;\n", 73 constName[i], hex[i]); 74 } 75} 76 77emission == "first-opcodes" { 78 emissionHandled = 1; 79 80 for (i = 0; i <= MAX_OPCODE; i++) { 81 if (isUnused(i) || isOptimized(i)) continue; 82 if (isFirst[i] == "true") { 83 printf(" // DalvOps.%s\n", constName[i]); 84 } 85 } 86} 87 88emission == "dops" { 89 emissionHandled = 1; 90 91 for (i = 0; i <= MAX_OPCODE; i++) { 92 if (isUnused(i) || isOptimized(i)) continue; 93 94 nextOp = nextOpcode[i]; 95 nextOp = (nextOp == -1) ? "NO_NEXT" : constName[nextOp]; 96 97 printf(" public static final Dop %s =\n" \ 98 " new Dop(DalvOps.%s, DalvOps.%s,\n" \ 99 " DalvOps.%s, Form%s.THE_ONE, %s,\n" \ 100 " \"%s\");\n\n", 101 constName[i], constName[i], family[i], nextOp, format[i], 102 hasResult[i], name[i]); 103 } 104} 105 106emission == "opcode-info-defs" { 107 emissionHandled = 1; 108 109 for (i = 0; i <= MAX_OPCODE; i++) { 110 if (isUnused(i) || isOptimized(i)) continue; 111 112 itype = indexType[i]; 113 if ((itype == "none") || (itype == "unknown")) { 114 itype = "null"; 115 } else { 116 itype = toupper(itype); 117 gsub(/-/, "_", itype); 118 itype = "IndexType." itype; 119 } 120 121 printf(" public static final Info %s =\n" \ 122 " new Info(DalvOps.%s,\n" \ 123 " InstructionCodec.FORMAT_%s, %s);\n\n", \ 124 constName[i], constName[i], toupper(format[i]), itype); 125 } 126} 127 128emission == "dops-init" || emission == "opcode-info-init" { 129 emissionHandled = 1; 130 131 for (i = 0; i <= MAX_OPCODE; i++) { 132 if (isUnused(i) || isOptimized(i)) continue; 133 printf(" set(%s);\n", constName[i]); 134 } 135} 136 137emission == "libcore-opcodes" { 138 emissionHandled = 1; 139 140 for (i = 0; i <= MAX_OPCODE; i++) { 141 if (isUnused(i) || isOptimized(i)) continue; 142 printf(" int OP_%-28s = 0x%04x;\n", constName[i], i); 143 } 144} 145 146emission == "libcore-maximum-values" { 147 emissionHandled = 1; 148 149 printf(" MAXIMUM_VALUE = %d;\n", MAX_OPCODE); 150 printf(" MAXIMUM_PACKED_VALUE = %d;\n", MAX_PACKED_OPCODE); 151} 152 153emission == "libdex-maximum-values" { 154 emissionHandled = 1; 155 156 printf("#define kMaxOpcodeValue 0x%x\n", MAX_OPCODE); 157 printf("#define kNumPackedOpcodes 0x%x\n", MAX_PACKED_OPCODE + 1); 158} 159 160emission == "libdex-opcode-enum" { 161 emissionHandled = 1; 162 163 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 164 printf(" OP_%-28s = 0x%02x,\n", packedConstName[i], i); 165 } 166} 167 168emission == "libdex-goto-table" { 169 emissionHandled = 1; 170 171 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 172 content = sprintf(" H(OP_%s),", packedConstName[i]); 173 printf("%-78s\\\n", content); 174 } 175} 176 177emission == "libdex-opcode-names" { 178 emissionHandled = 1; 179 180 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 181 printf(" \"%s\",\n", packedName[i]); 182 } 183} 184 185emission == "libdex-widths" { 186 emissionHandled = 1; 187 188 col = 1; 189 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 190 value = sprintf("%d,", packedWidth[i]); 191 col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 16, 2, " "); 192 } 193} 194 195emission == "libdex-flags" { 196 emissionHandled = 1; 197 198 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 199 value = flagsToC(packedFlags[i]); 200 printf(" %s,\n", value); 201 } 202} 203 204emission == "libdex-formats" { 205 emissionHandled = 1; 206 207 col = 1; 208 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 209 value = sprintf("kFmt%s,", packedFormat[i]); 210 col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 7, 9, " "); 211 } 212} 213 214emission == "libdex-index-types" { 215 emissionHandled = 1; 216 217 col = 1; 218 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 219 value = sprintf("%s,", indexTypeValues[packedIndexType[i]]); 220 col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 3, 19, " "); 221 } 222} 223 224# Handle the end of directive processing (must appear after the directive 225# clauses). 226emission != "" { 227 if (!emissionHandled) { 228 printf("WARNING: unknown tag \"%s\"\n", emission) >"/dev/stderr"; 229 consumeUntil = ""; 230 } 231 232 emission = ""; 233} 234 235# 236# Helper functions. 237# 238 239# Helper to print out an element in a multi-column fashion. It returns 240# the (one-based) column number that the next element will be printed 241# in. 242function colPrint(value, isLast, col, numCols, colWidth, linePrefix) { 243 isLast = (isLast || (col == numCols)); 244 printf("%s%-*s%s", 245 (col == 1) ? linePrefix : " ", 246 isLast ? 1 : colWidth, value, 247 isLast ? "\n" : ""); 248 249 return (col % numCols) + 1; 250} 251 252# Read the bytecode description file. 253function readBytecodes(i, parts, line, cmd, status, count) { 254 # locals: parts, line, cmd, status, count 255 for (;;) { 256 # Read a line. 257 status = getline line <bytecodeFile; 258 if (status == 0) break; 259 if (status < 0) { 260 print "trouble reading bytecode file"; 261 exit 1; 262 } 263 264 # Clean up the line and extract the command. 265 gsub(/ */, " ", line); 266 sub(/ *#.*$/, "", line); 267 sub(/ $/, "", line); 268 sub(/^ /, "", line); 269 count = split(line, parts); 270 if (count == 0) continue; # Blank or comment line. 271 cmd = parts[1]; 272 sub(/^[a-z][a-z]* */, "", line); # Remove the command from line. 273 274 if (cmd == "op") { 275 status = defineOpcode(line); 276 } else if (cmd == "format") { 277 status = defineFormat(line); 278 } else { 279 status = -1; 280 } 281 282 if (status != 0) { 283 printf("syntax error on line: %s\n", line) >"/dev/stderr"; 284 return 1; 285 } 286 } 287 288 return 0; 289} 290 291# Define an opcode. 292function defineOpcode(line, count, parts, idx) { 293 # locals: count, parts, idx 294 count = split(line, parts); 295 if (count != 6) return -1; 296 idx = parseHex(parts[1]); 297 if (idx < 0) return -1; 298 299 # Extract directly specified values from the line. 300 hex[idx] = parts[1]; 301 name[idx] = parts[2]; 302 format[idx] = parts[3]; 303 hasResult[idx] = (parts[4] == "n") ? "false" : "true"; 304 indexType[idx] = parts[5]; 305 flags[idx] = parts[6]; 306 307 # Calculate derived values. 308 309 constName[idx] = toupper(name[idx]); 310 gsub("[/-]", "_", constName[idx]); # Dash and slash become underscore. 311 gsub("[+^]", "", constName[idx]); # Plus and caret are removed. 312 split(name[idx], parts, "/"); 313 314 family[idx] = toupper(parts[1]); 315 gsub("-", "_", family[idx]); # Dash becomes underscore. 316 gsub("[+^]", "", family[idx]); # Plus and caret are removed. 317 318 split(format[idx], parts, ""); # Width is the first format char. 319 width[idx] = parts[1]; 320 321 # This association is used when computing "next" opcodes. 322 familyFormat[family[idx],format[idx]] = idx; 323 324 # Verify values. 325 326 if (nextFormat[format[idx]] == "") { 327 printf("unknown format: %s\n", format[idx]) >"/dev/stderr"; 328 return 1; 329 } 330 331 if (indexTypeValues[indexType[idx]] == "") { 332 printf("unknown index type: %s\n", indexType[idx]) >"/dev/stderr"; 333 return 1; 334 } 335 336 if (flagsToC(flags[idx]) == "") { 337 printf("bogus flags: %s\n", flags[idx]) >"/dev/stderr"; 338 return 1; 339 } 340 341 return 0; 342} 343 344# Define a format family. 345function defineFormat(line, count, parts, i) { 346 # locals: count, parts, i 347 count = split(line, parts); 348 if (count < 1) return -1; 349 formats[parts[1]] = line; 350 351 parts[count + 1] = "none"; 352 for (i = 1; i <= count; i++) { 353 nextFormat[parts[i]] = parts[i + 1]; 354 } 355 356 return 0; 357} 358 359# Produce the nextOpcode and isFirst arrays. The former indicates, for 360# each opcode, which one should be tried next when doing instruction 361# fitting. The latter indicates which opcodes are at the head of an 362# instruction fitting chain. 363function deriveOpcodeChains(i, op) { 364 # locals: i, op 365 366 for (i = 0; i <= MAX_OPCODE; i++) { 367 if (isUnused(i)) continue; 368 isFirst[i] = "true"; 369 } 370 371 for (i = 0; i <= MAX_OPCODE; i++) { 372 if (isUnused(i)) continue; 373 op = findNextOpcode(i); 374 nextOpcode[i] = op; 375 if (op != -1) { 376 isFirst[op] = "false"; 377 } 378 } 379} 380 381# Given an opcode by index, find the next opcode in the same family 382# (that is, with the same base name) to try when matching instructions 383# to opcodes. This simply walks the nextFormat chain looking for a 384# match. This returns the index of the matching opcode or -1 if there 385# is none. 386function findNextOpcode(idx, fam, fmt, result) { 387 # locals: fam, fmt, result 388 fam = family[idx]; 389 fmt = format[idx]; 390 391 # Not every opcode has a version with every possible format, so 392 # we have to iterate down the chain until we find one or run out of 393 # formats to try. 394 for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) { 395 result = familyFormat[fam,fmt]; 396 if (result != "") { 397 return result; 398 } 399 } 400 401 return -1; 402} 403 404# Construct the tables of info indexed by packed opcode. The packed opcode 405# values are in the range 0-0x1ff, whereas the unpacked opcodes sparsely 406# span the range 0-0xffff. 407function createPackedTables(i, op) { 408 # locals: i, op 409 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 410 op = unpackOpcode(i); 411 if (i == 255) { 412 # Special case: This is the low-opcode slot for a would-be 413 # extended opcode dispatch implementation. 414 packedName[i] = "dispatch-ff"; 415 packedConstName[i] = "DISPATCH_FF"; 416 packedFormat[i] = "00x"; 417 packedFlags[i] = 0; 418 packedWidth[i] = 0; 419 packedIndexType[i] = "unknown"; 420 } else if (isUnused(op)) { 421 packedName[i] = unusedName(op); 422 packedConstName[i] = unusedConstName(op); 423 packedFormat[i] = "00x"; 424 packedFlags[i] = 0; 425 packedWidth[i] = 0; 426 packedIndexType[i] = "unknown"; 427 } else { 428 packedName[i] = name[op]; 429 packedConstName[i] = constName[op]; 430 packedFormat[i] = format[op]; 431 packedFlags[i] = flags[op]; 432 packedWidth[i] = width[op]; 433 packedIndexType[i] = indexType[op]; 434 } 435 } 436} 437 438# Given a packed opcode, returns the raw (unpacked) opcode value. 439function unpackOpcode(idx) { 440 # Note: This must be the inverse of the corresponding code in 441 # libdex/DexOpcodes.h. 442 if (idx <= 255) { 443 return idx; 444 } else { 445 idx -= 256; 446 return (idx * 256) + 255; 447 } 448} 449 450# Returns the "unused" name of the given opcode (by index). 451# That is, this is the human-oriented name to use for an opcode 452# definition in cases 453# where the opcode isn't used. 454function unusedName(idx) { 455 if (idx <= 255) { 456 return sprintf("unused-%02x", idx); 457 } else { 458 return sprintf("unused-%04x", idx); 459 } 460} 461 462# Returns the "unused" constant name of the given opcode (by index). 463# That is, this is the name to use for a constant definition in cases 464# where the opcode isn't used. 465function unusedConstName(idx) { 466 if (idx <= 255) { 467 return toupper(sprintf("UNUSED_%02x", idx)); 468 } else { 469 return toupper(sprintf("UNUSED_%04x", idx)); 470 } 471} 472 473# Convert a hex value to an int. 474function parseHex(hex, result, chars, count, c, i) { 475 # locals: result, chars, count, c, i 476 hex = tolower(hex); 477 count = split(hex, chars, ""); 478 result = 0; 479 for (i = 1; i <= count; i++) { 480 c = index("0123456789abcdef", chars[i]); 481 if (c == 0) { 482 printf("bogus hex value: %s\n", hex) >"/dev/stderr"; 483 return -1; 484 } 485 result = (result * 16) + c - 1; 486 } 487 return result; 488} 489 490# Initialize the indexTypes data. 491function initIndexTypes() { 492 indexTypeValues["unknown"] = "kIndexUnknown"; 493 indexTypeValues["none"] = "kIndexNone"; 494 indexTypeValues["varies"] = "kIndexVaries"; 495 indexTypeValues["type-ref"] = "kIndexTypeRef"; 496 indexTypeValues["string-ref"] = "kIndexStringRef"; 497 indexTypeValues["method-ref"] = "kIndexMethodRef"; 498 indexTypeValues["field-ref"] = "kIndexFieldRef"; 499 indexTypeValues["inline-method"] = "kIndexInlineMethod"; 500 indexTypeValues["vtable-offset"] = "kIndexVtableOffset"; 501 indexTypeValues["field-offset"] = "kIndexFieldOffset"; 502} 503 504# Initialize the flags data. 505function initFlags() { 506 flagValues["branch"] = "kInstrCanBranch"; 507 flagValues["continue"] = "kInstrCanContinue"; 508 flagValues["switch"] = "kInstrCanSwitch"; 509 flagValues["throw"] = "kInstrCanThrow"; 510 flagValues["return"] = "kInstrCanReturn"; 511 flagValues["invoke"] = "kInstrInvoke"; 512 flagValues["optimized"] = "0"; # Not represented in C output 513 flagValues["0"] = "0"; 514} 515 516# Translate the given flags into the equivalent C expression. Returns 517# "" on error. 518function flagsToC(f, parts, result, i) { 519 # locals: parts, result, i 520 count = split(f, parts, /\|/); # Split input at pipe characters. 521 result = "0"; 522 523 for (i = 1; i <= count; i++) { 524 f = flagValues[parts[i]]; 525 if (f == "") { 526 printf("bogus flag: %s\n", f) >"/dev/stderr"; 527 return ""; # Bogus flag name. 528 } else if (f == "0") { 529 # Nothing to append for this case. 530 } else if (result == "0") { 531 result = f; 532 } else { 533 result = result "|" f; 534 } 535 } 536 537 return result; 538} 539 540# Returns true if the given opcode (by index) is an "optimized" opcode. 541function isOptimized(idx, parts, f) { 542 # locals: parts, f 543 split(flags[idx], parts, /\|/); # Split flags[idx] at pipes. 544 for (f in parts) { 545 if (parts[f] == "optimized") return 1; 546 } 547 return 0; 548} 549 550# Returns true if there is no definition for the given opcode (by index). 551function isUnused(idx) { 552 return (name[idx] == ""); 553} 554