X86DisassemblerDecoder.c revision 41e59c7c344c7f854f8f29774d660c0e62b90af8
1/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==* 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is distributed under the University of Illinois Open Source 6 * License. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===* 9 * 10 * This file is part of the X86 Disassembler. 11 * It contains the implementation of the instruction decoder. 12 * Documentation for the disassembler can be found in X86Disassembler.h. 13 * 14 *===----------------------------------------------------------------------===*/ 15 16#include <stdarg.h> /* for va_*() */ 17#include <stdio.h> /* for vsnprintf() */ 18#include <stdlib.h> /* for exit() */ 19#include <string.h> /* for memset() */ 20 21#include "X86DisassemblerDecoder.h" 22 23#include "X86GenDisassemblerTables.inc" 24 25#define TRUE 1 26#define FALSE 0 27 28typedef int8_t bool; 29 30#ifndef NDEBUG 31#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) 32#else 33#define debug(s) do { } while (0) 34#endif 35 36 37/* 38 * contextForAttrs - Client for the instruction context table. Takes a set of 39 * attributes and returns the appropriate decode context. 40 * 41 * @param attrMask - Attributes, from the enumeration attributeBits. 42 * @return - The InstructionContext to use when looking up an 43 * an instruction with these attributes. 44 */ 45static InstructionContext contextForAttrs(uint8_t attrMask) { 46 return CONTEXTS_SYM[attrMask]; 47} 48 49/* 50 * modRMRequired - Reads the appropriate instruction table to determine whether 51 * the ModR/M byte is required to decode a particular instruction. 52 * 53 * @param type - The opcode type (i.e., how many bytes it has). 54 * @param insnContext - The context for the instruction, as returned by 55 * contextForAttrs. 56 * @param opcode - The last byte of the instruction's opcode, not counting 57 * ModR/M extensions and escapes. 58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise. 59 */ 60static int modRMRequired(OpcodeType type, 61 InstructionContext insnContext, 62 uint8_t opcode) { 63 const struct ContextDecision* decision = 0; 64 65 switch (type) { 66 case ONEBYTE: 67 decision = &ONEBYTE_SYM; 68 break; 69 case TWOBYTE: 70 decision = &TWOBYTE_SYM; 71 break; 72 case THREEBYTE_38: 73 decision = &THREEBYTE38_SYM; 74 break; 75 case THREEBYTE_3A: 76 decision = &THREEBYTE3A_SYM; 77 break; 78 case THREEBYTE_A6: 79 decision = &THREEBYTEA6_SYM; 80 break; 81 case THREEBYTE_A7: 82 decision = &THREEBYTEA7_SYM; 83 break; 84 } 85 86 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 87 modrm_type != MODRM_ONEENTRY; 88 89 return 0; 90} 91 92/* 93 * decode - Reads the appropriate instruction table to obtain the unique ID of 94 * an instruction. 95 * 96 * @param type - See modRMRequired(). 97 * @param insnContext - See modRMRequired(). 98 * @param opcode - See modRMRequired(). 99 * @param modRM - The ModR/M byte if required, or any value if not. 100 * @return - The UID of the instruction, or 0 on failure. 101 */ 102static InstrUID decode(OpcodeType type, 103 InstructionContext insnContext, 104 uint8_t opcode, 105 uint8_t modRM) { 106 const struct ModRMDecision* dec; 107 108 switch (type) { 109 default: 110 debug("Unknown opcode type"); 111 return 0; 112 case ONEBYTE: 113 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 114 break; 115 case TWOBYTE: 116 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 117 break; 118 case THREEBYTE_38: 119 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 120 break; 121 case THREEBYTE_3A: 122 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 123 break; 124 case THREEBYTE_A6: 125 dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 126 break; 127 case THREEBYTE_A7: 128 dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 129 break; 130 } 131 132 switch (dec->modrm_type) { 133 default: 134 debug("Corrupt table! Unknown modrm_type"); 135 return 0; 136 case MODRM_ONEENTRY: 137 return dec->instructionIDs[0]; 138 case MODRM_SPLITRM: 139 if (modFromModRM(modRM) == 0x3) 140 return dec->instructionIDs[1]; 141 else 142 return dec->instructionIDs[0]; 143 case MODRM_FULL: 144 return dec->instructionIDs[modRM]; 145 } 146} 147 148/* 149 * specifierForUID - Given a UID, returns the name and operand specification for 150 * that instruction. 151 * 152 * @param uid - The unique ID for the instruction. This should be returned by 153 * decode(); specifierForUID will not check bounds. 154 * @return - A pointer to the specification for that instruction. 155 */ 156static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 157 return &INSTRUCTIONS_SYM[uid]; 158} 159 160/* 161 * consumeByte - Uses the reader function provided by the user to consume one 162 * byte from the instruction's memory and advance the cursor. 163 * 164 * @param insn - The instruction with the reader function to use. The cursor 165 * for this instruction is advanced. 166 * @param byte - A pointer to a pre-allocated memory buffer to be populated 167 * with the data read. 168 * @return - 0 if the read was successful; nonzero otherwise. 169 */ 170static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 171 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 172 173 if (!ret) 174 ++(insn->readerCursor); 175 176 return ret; 177} 178 179/* 180 * lookAtByte - Like consumeByte, but does not advance the cursor. 181 * 182 * @param insn - See consumeByte(). 183 * @param byte - See consumeByte(). 184 * @return - See consumeByte(). 185 */ 186static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 187 return insn->reader(insn->readerArg, byte, insn->readerCursor); 188} 189 190static void unconsumeByte(struct InternalInstruction* insn) { 191 insn->readerCursor--; 192} 193 194#define CONSUME_FUNC(name, type) \ 195 static int name(struct InternalInstruction* insn, type* ptr) { \ 196 type combined = 0; \ 197 unsigned offset; \ 198 for (offset = 0; offset < sizeof(type); ++offset) { \ 199 uint8_t byte; \ 200 int ret = insn->reader(insn->readerArg, \ 201 &byte, \ 202 insn->readerCursor + offset); \ 203 if (ret) \ 204 return ret; \ 205 combined = combined | ((type)byte << ((type)offset * 8)); \ 206 } \ 207 *ptr = combined; \ 208 insn->readerCursor += sizeof(type); \ 209 return 0; \ 210 } 211 212/* 213 * consume* - Use the reader function provided by the user to consume data 214 * values of various sizes from the instruction's memory and advance the 215 * cursor appropriately. These readers perform endian conversion. 216 * 217 * @param insn - See consumeByte(). 218 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 219 * be populated with the data read. 220 * @return - See consumeByte(). 221 */ 222CONSUME_FUNC(consumeInt8, int8_t) 223CONSUME_FUNC(consumeInt16, int16_t) 224CONSUME_FUNC(consumeInt32, int32_t) 225CONSUME_FUNC(consumeUInt16, uint16_t) 226CONSUME_FUNC(consumeUInt32, uint32_t) 227CONSUME_FUNC(consumeUInt64, uint64_t) 228 229/* 230 * dbgprintf - Uses the logging function provided by the user to log a single 231 * message, typically without a carriage-return. 232 * 233 * @param insn - The instruction containing the logging function. 234 * @param format - See printf(). 235 * @param ... - See printf(). 236 */ 237static void dbgprintf(struct InternalInstruction* insn, 238 const char* format, 239 ...) { 240 char buffer[256]; 241 va_list ap; 242 243 if (!insn->dlog) 244 return; 245 246 va_start(ap, format); 247 (void)vsnprintf(buffer, sizeof(buffer), format, ap); 248 va_end(ap); 249 250 insn->dlog(insn->dlogArg, buffer); 251 252 return; 253} 254 255/* 256 * setPrefixPresent - Marks that a particular prefix is present at a particular 257 * location. 258 * 259 * @param insn - The instruction to be marked as having the prefix. 260 * @param prefix - The prefix that is present. 261 * @param location - The location where the prefix is located (in the address 262 * space of the instruction's reader). 263 */ 264static void setPrefixPresent(struct InternalInstruction* insn, 265 uint8_t prefix, 266 uint64_t location) 267{ 268 insn->prefixPresent[prefix] = 1; 269 insn->prefixLocations[prefix] = location; 270} 271 272/* 273 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 274 * present at a given location. 275 * 276 * @param insn - The instruction to be queried. 277 * @param prefix - The prefix. 278 * @param location - The location to query. 279 * @return - Whether the prefix is at that location. 280 */ 281static BOOL isPrefixAtLocation(struct InternalInstruction* insn, 282 uint8_t prefix, 283 uint64_t location) 284{ 285 if (insn->prefixPresent[prefix] == 1 && 286 insn->prefixLocations[prefix] == location) 287 return TRUE; 288 else 289 return FALSE; 290} 291 292/* 293 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 294 * instruction as having them. Also sets the instruction's default operand, 295 * address, and other relevant data sizes to report operands correctly. 296 * 297 * @param insn - The instruction whose prefixes are to be read. 298 * @return - 0 if the instruction could be read until the end of the prefix 299 * bytes, and no prefixes conflicted; nonzero otherwise. 300 */ 301static int readPrefixes(struct InternalInstruction* insn) { 302 BOOL isPrefix = TRUE; 303 BOOL prefixGroups[4] = { FALSE }; 304 uint64_t prefixLocation; 305 uint8_t byte = 0; 306 307 BOOL hasAdSize = FALSE; 308 BOOL hasOpSize = FALSE; 309 310 dbgprintf(insn, "readPrefixes()"); 311 312 while (isPrefix) { 313 prefixLocation = insn->readerCursor; 314 315 if (consumeByte(insn, &byte)) 316 return -1; 317 318 switch (byte) { 319 case 0xf0: /* LOCK */ 320 case 0xf2: /* REPNE/REPNZ */ 321 case 0xf3: /* REP or REPE/REPZ */ 322 if (prefixGroups[0]) 323 dbgprintf(insn, "Redundant Group 1 prefix"); 324 prefixGroups[0] = TRUE; 325 setPrefixPresent(insn, byte, prefixLocation); 326 break; 327 case 0x2e: /* CS segment override -OR- Branch not taken */ 328 case 0x36: /* SS segment override -OR- Branch taken */ 329 case 0x3e: /* DS segment override */ 330 case 0x26: /* ES segment override */ 331 case 0x64: /* FS segment override */ 332 case 0x65: /* GS segment override */ 333 switch (byte) { 334 case 0x2e: 335 insn->segmentOverride = SEG_OVERRIDE_CS; 336 break; 337 case 0x36: 338 insn->segmentOverride = SEG_OVERRIDE_SS; 339 break; 340 case 0x3e: 341 insn->segmentOverride = SEG_OVERRIDE_DS; 342 break; 343 case 0x26: 344 insn->segmentOverride = SEG_OVERRIDE_ES; 345 break; 346 case 0x64: 347 insn->segmentOverride = SEG_OVERRIDE_FS; 348 break; 349 case 0x65: 350 insn->segmentOverride = SEG_OVERRIDE_GS; 351 break; 352 default: 353 debug("Unhandled override"); 354 return -1; 355 } 356 if (prefixGroups[1]) 357 dbgprintf(insn, "Redundant Group 2 prefix"); 358 prefixGroups[1] = TRUE; 359 setPrefixPresent(insn, byte, prefixLocation); 360 break; 361 case 0x66: /* Operand-size override */ 362 if (prefixGroups[2]) 363 dbgprintf(insn, "Redundant Group 3 prefix"); 364 prefixGroups[2] = TRUE; 365 hasOpSize = TRUE; 366 setPrefixPresent(insn, byte, prefixLocation); 367 break; 368 case 0x67: /* Address-size override */ 369 if (prefixGroups[3]) 370 dbgprintf(insn, "Redundant Group 4 prefix"); 371 prefixGroups[3] = TRUE; 372 hasAdSize = TRUE; 373 setPrefixPresent(insn, byte, prefixLocation); 374 break; 375 default: /* Not a prefix byte */ 376 isPrefix = FALSE; 377 break; 378 } 379 380 if (isPrefix) 381 dbgprintf(insn, "Found prefix 0x%hhx", byte); 382 } 383 384 insn->vexSize = 0; 385 386 if (byte == 0xc4) { 387 uint8_t byte1; 388 389 if (lookAtByte(insn, &byte1)) { 390 dbgprintf(insn, "Couldn't read second byte of VEX"); 391 return -1; 392 } 393 394 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 395 insn->vexSize = 3; 396 insn->necessaryPrefixLocation = insn->readerCursor - 1; 397 } 398 else { 399 unconsumeByte(insn); 400 insn->necessaryPrefixLocation = insn->readerCursor - 1; 401 } 402 403 if (insn->vexSize == 3) { 404 insn->vexPrefix[0] = byte; 405 consumeByte(insn, &insn->vexPrefix[1]); 406 consumeByte(insn, &insn->vexPrefix[2]); 407 408 /* We simulate the REX prefix for simplicity's sake */ 409 410 if (insn->mode == MODE_64BIT) { 411 insn->rexPrefix = 0x40 412 | (wFromVEX3of3(insn->vexPrefix[2]) << 3) 413 | (rFromVEX2of3(insn->vexPrefix[1]) << 2) 414 | (xFromVEX2of3(insn->vexPrefix[1]) << 1) 415 | (bFromVEX2of3(insn->vexPrefix[1]) << 0); 416 } 417 418 switch (ppFromVEX3of3(insn->vexPrefix[2])) 419 { 420 default: 421 break; 422 case VEX_PREFIX_66: 423 hasOpSize = TRUE; 424 break; 425 } 426 427 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); 428 } 429 } 430 else if (byte == 0xc5) { 431 uint8_t byte1; 432 433 if (lookAtByte(insn, &byte1)) { 434 dbgprintf(insn, "Couldn't read second byte of VEX"); 435 return -1; 436 } 437 438 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 439 insn->vexSize = 2; 440 } 441 else { 442 unconsumeByte(insn); 443 } 444 445 if (insn->vexSize == 2) { 446 insn->vexPrefix[0] = byte; 447 consumeByte(insn, &insn->vexPrefix[1]); 448 449 if (insn->mode == MODE_64BIT) { 450 insn->rexPrefix = 0x40 451 | (rFromVEX2of2(insn->vexPrefix[1]) << 2); 452 } 453 454 switch (ppFromVEX2of2(insn->vexPrefix[1])) 455 { 456 default: 457 break; 458 case VEX_PREFIX_66: 459 hasOpSize = TRUE; 460 break; 461 } 462 463 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); 464 } 465 } 466 else { 467 if (insn->mode == MODE_64BIT) { 468 if ((byte & 0xf0) == 0x40) { 469 uint8_t opcodeByte; 470 471 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { 472 dbgprintf(insn, "Redundant REX prefix"); 473 return -1; 474 } 475 476 insn->rexPrefix = byte; 477 insn->necessaryPrefixLocation = insn->readerCursor - 2; 478 479 dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 480 } else { 481 unconsumeByte(insn); 482 insn->necessaryPrefixLocation = insn->readerCursor - 1; 483 } 484 } else { 485 unconsumeByte(insn); 486 insn->necessaryPrefixLocation = insn->readerCursor - 1; 487 } 488 } 489 490 if (insn->mode == MODE_16BIT) { 491 insn->registerSize = (hasOpSize ? 4 : 2); 492 insn->addressSize = (hasAdSize ? 4 : 2); 493 insn->displacementSize = (hasAdSize ? 4 : 2); 494 insn->immediateSize = (hasOpSize ? 4 : 2); 495 } else if (insn->mode == MODE_32BIT) { 496 insn->registerSize = (hasOpSize ? 2 : 4); 497 insn->addressSize = (hasAdSize ? 2 : 4); 498 insn->displacementSize = (hasAdSize ? 2 : 4); 499 insn->immediateSize = (hasOpSize ? 2 : 4); 500 } else if (insn->mode == MODE_64BIT) { 501 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 502 insn->registerSize = 8; 503 insn->addressSize = (hasAdSize ? 4 : 8); 504 insn->displacementSize = 4; 505 insn->immediateSize = 4; 506 } else if (insn->rexPrefix) { 507 insn->registerSize = (hasOpSize ? 2 : 4); 508 insn->addressSize = (hasAdSize ? 4 : 8); 509 insn->displacementSize = (hasOpSize ? 2 : 4); 510 insn->immediateSize = (hasOpSize ? 2 : 4); 511 } else { 512 insn->registerSize = (hasOpSize ? 2 : 4); 513 insn->addressSize = (hasAdSize ? 4 : 8); 514 insn->displacementSize = (hasOpSize ? 2 : 4); 515 insn->immediateSize = (hasOpSize ? 2 : 4); 516 } 517 } 518 519 return 0; 520} 521 522/* 523 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 524 * extended or escape opcodes). 525 * 526 * @param insn - The instruction whose opcode is to be read. 527 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 528 */ 529static int readOpcode(struct InternalInstruction* insn) { 530 /* Determine the length of the primary opcode */ 531 532 uint8_t current; 533 534 dbgprintf(insn, "readOpcode()"); 535 536 insn->opcodeType = ONEBYTE; 537 538 if (insn->vexSize == 3) 539 { 540 switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) 541 { 542 default: 543 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); 544 return -1; 545 case 0: 546 break; 547 case VEX_LOB_0F: 548 insn->twoByteEscape = 0x0f; 549 insn->opcodeType = TWOBYTE; 550 return consumeByte(insn, &insn->opcode); 551 case VEX_LOB_0F38: 552 insn->twoByteEscape = 0x0f; 553 insn->threeByteEscape = 0x38; 554 insn->opcodeType = THREEBYTE_38; 555 return consumeByte(insn, &insn->opcode); 556 case VEX_LOB_0F3A: 557 insn->twoByteEscape = 0x0f; 558 insn->threeByteEscape = 0x3a; 559 insn->opcodeType = THREEBYTE_3A; 560 return consumeByte(insn, &insn->opcode); 561 } 562 } 563 else if (insn->vexSize == 2) 564 { 565 insn->twoByteEscape = 0x0f; 566 insn->opcodeType = TWOBYTE; 567 return consumeByte(insn, &insn->opcode); 568 } 569 570 if (consumeByte(insn, ¤t)) 571 return -1; 572 573 if (current == 0x0f) { 574 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 575 576 insn->twoByteEscape = current; 577 578 if (consumeByte(insn, ¤t)) 579 return -1; 580 581 if (current == 0x38) { 582 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 583 584 insn->threeByteEscape = current; 585 586 if (consumeByte(insn, ¤t)) 587 return -1; 588 589 insn->opcodeType = THREEBYTE_38; 590 } else if (current == 0x3a) { 591 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 592 593 insn->threeByteEscape = current; 594 595 if (consumeByte(insn, ¤t)) 596 return -1; 597 598 insn->opcodeType = THREEBYTE_3A; 599 } else if (current == 0xa6) { 600 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 601 602 insn->threeByteEscape = current; 603 604 if (consumeByte(insn, ¤t)) 605 return -1; 606 607 insn->opcodeType = THREEBYTE_A6; 608 } else if (current == 0xa7) { 609 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 610 611 insn->threeByteEscape = current; 612 613 if (consumeByte(insn, ¤t)) 614 return -1; 615 616 insn->opcodeType = THREEBYTE_A7; 617 } else { 618 dbgprintf(insn, "Didn't find a three-byte escape prefix"); 619 620 insn->opcodeType = TWOBYTE; 621 } 622 } 623 624 /* 625 * At this point we have consumed the full opcode. 626 * Anything we consume from here on must be unconsumed. 627 */ 628 629 insn->opcode = current; 630 631 return 0; 632} 633 634static int readModRM(struct InternalInstruction* insn); 635 636/* 637 * getIDWithAttrMask - Determines the ID of an instruction, consuming 638 * the ModR/M byte as appropriate for extended and escape opcodes, 639 * and using a supplied attribute mask. 640 * 641 * @param instructionID - A pointer whose target is filled in with the ID of the 642 * instruction. 643 * @param insn - The instruction whose ID is to be determined. 644 * @param attrMask - The attribute mask to search. 645 * @return - 0 if the ModR/M could be read when needed or was not 646 * needed; nonzero otherwise. 647 */ 648static int getIDWithAttrMask(uint16_t* instructionID, 649 struct InternalInstruction* insn, 650 uint8_t attrMask) { 651 BOOL hasModRMExtension; 652 653 uint8_t instructionClass; 654 655 instructionClass = contextForAttrs(attrMask); 656 657 hasModRMExtension = modRMRequired(insn->opcodeType, 658 instructionClass, 659 insn->opcode); 660 661 if (hasModRMExtension) { 662 if (readModRM(insn)) 663 return -1; 664 665 *instructionID = decode(insn->opcodeType, 666 instructionClass, 667 insn->opcode, 668 insn->modRM); 669 } else { 670 *instructionID = decode(insn->opcodeType, 671 instructionClass, 672 insn->opcode, 673 0); 674 } 675 676 return 0; 677} 678 679/* 680 * is16BitEquivalent - Determines whether two instruction names refer to 681 * equivalent instructions but one is 16-bit whereas the other is not. 682 * 683 * @param orig - The instruction that is not 16-bit 684 * @param equiv - The instruction that is 16-bit 685 */ 686static BOOL is16BitEquvalent(const char* orig, const char* equiv) { 687 off_t i; 688 689 for (i = 0;; i++) { 690 if (orig[i] == '\0' && equiv[i] == '\0') 691 return TRUE; 692 if (orig[i] == '\0' || equiv[i] == '\0') 693 return FALSE; 694 if (orig[i] != equiv[i]) { 695 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 696 continue; 697 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 698 continue; 699 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 700 continue; 701 return FALSE; 702 } 703 } 704} 705 706/* 707 * is64BitEquivalent - Determines whether two instruction names refer to 708 * equivalent instructions but one is 64-bit whereas the other is not. 709 * 710 * @param orig - The instruction that is not 64-bit 711 * @param equiv - The instruction that is 64-bit 712 */ 713static BOOL is64BitEquivalent(const char* orig, const char* equiv) { 714 off_t i; 715 716 for (i = 0;; i++) { 717 if (orig[i] == '\0' && equiv[i] == '\0') 718 return TRUE; 719 if (orig[i] == '\0' || equiv[i] == '\0') 720 return FALSE; 721 if (orig[i] != equiv[i]) { 722 if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q') 723 continue; 724 if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6') 725 continue; 726 if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4') 727 continue; 728 return FALSE; 729 } 730 } 731} 732 733 734/* 735 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 736 * appropriate for extended and escape opcodes. Determines the attributes and 737 * context for the instruction before doing so. 738 * 739 * @param insn - The instruction whose ID is to be determined. 740 * @return - 0 if the ModR/M could be read when needed or was not needed; 741 * nonzero otherwise. 742 */ 743static int getID(struct InternalInstruction* insn) { 744 uint8_t attrMask; 745 uint16_t instructionID; 746 747 dbgprintf(insn, "getID()"); 748 749 attrMask = ATTR_NONE; 750 751 if (insn->mode == MODE_64BIT) 752 attrMask |= ATTR_64BIT; 753 754 if (insn->vexSize) { 755 attrMask |= ATTR_VEX; 756 757 if (insn->vexSize == 3) { 758 switch (ppFromVEX3of3(insn->vexPrefix[2])) { 759 case VEX_PREFIX_66: 760 attrMask |= ATTR_OPSIZE; 761 break; 762 case VEX_PREFIX_F3: 763 attrMask |= ATTR_XS; 764 break; 765 case VEX_PREFIX_F2: 766 attrMask |= ATTR_XD; 767 break; 768 } 769 770 if (lFromVEX3of3(insn->vexPrefix[2])) 771 attrMask |= ATTR_VEXL; 772 } 773 else if (insn->vexSize == 2) { 774 switch (ppFromVEX2of2(insn->vexPrefix[1])) { 775 case VEX_PREFIX_66: 776 attrMask |= ATTR_OPSIZE; 777 break; 778 case VEX_PREFIX_F3: 779 attrMask |= ATTR_XS; 780 break; 781 case VEX_PREFIX_F2: 782 attrMask |= ATTR_XD; 783 break; 784 } 785 786 if (lFromVEX2of2(insn->vexPrefix[1])) 787 attrMask |= ATTR_VEXL; 788 } 789 else { 790 return -1; 791 } 792 } 793 else { 794 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) 795 attrMask |= ATTR_OPSIZE; 796 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) 797 attrMask |= ATTR_XS; 798 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) 799 attrMask |= ATTR_XD; 800 } 801 802 if (insn->rexPrefix & 0x08) 803 attrMask |= ATTR_REXW; 804 805 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 806 return -1; 807 808 /* The following clauses compensate for limitations of the tables. */ 809 810 if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) { 811 /* 812 * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit 813 * has precedence since there are no L-bit with W-bit entries in the tables. 814 * So if the L-bit isn't significant we should use the W-bit instead. 815 */ 816 817 const struct InstructionSpecifier *spec; 818 uint16_t instructionIDWithWBit; 819 const struct InstructionSpecifier *specWithWBit; 820 821 spec = specifierForUID(instructionID); 822 823 if (getIDWithAttrMask(&instructionIDWithWBit, 824 insn, 825 (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) { 826 insn->instructionID = instructionID; 827 insn->spec = spec; 828 return 0; 829 } 830 831 specWithWBit = specifierForUID(instructionIDWithWBit); 832 833 if (instructionID != instructionIDWithWBit) { 834 insn->instructionID = instructionIDWithWBit; 835 insn->spec = specWithWBit; 836 } else { 837 insn->instructionID = instructionID; 838 insn->spec = spec; 839 } 840 return 0; 841 } 842 843 if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) { 844 /* 845 * Although for SSE instructions it is usually necessary to treat REX.W+F2 846 * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is 847 * an occasional instruction where F2 is incidental and REX.W is the more 848 * significant. If the decoded instruction is 32-bit and adding REX.W 849 * instead of F2 changes a 32 to a 64, we adopt the new encoding. 850 */ 851 852 const struct InstructionSpecifier *spec; 853 uint16_t instructionIDWithREXw; 854 const struct InstructionSpecifier *specWithREXw; 855 856 spec = specifierForUID(instructionID); 857 858 if (getIDWithAttrMask(&instructionIDWithREXw, 859 insn, 860 attrMask & (~ATTR_XD))) { 861 /* 862 * Decoding with REX.w would yield nothing; give up and return original 863 * decode. 864 */ 865 866 insn->instructionID = instructionID; 867 insn->spec = spec; 868 return 0; 869 } 870 871 specWithREXw = specifierForUID(instructionIDWithREXw); 872 873 if (is64BitEquivalent(spec->name, specWithREXw->name)) { 874 insn->instructionID = instructionIDWithREXw; 875 insn->spec = specWithREXw; 876 } else { 877 insn->instructionID = instructionID; 878 insn->spec = spec; 879 } 880 return 0; 881 } 882 883 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { 884 /* 885 * The instruction tables make no distinction between instructions that 886 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 887 * particular spot (i.e., many MMX operations). In general we're 888 * conservative, but in the specific case where OpSize is present but not 889 * in the right place we check if there's a 16-bit operation. 890 */ 891 892 const struct InstructionSpecifier *spec; 893 uint16_t instructionIDWithOpsize; 894 const struct InstructionSpecifier *specWithOpsize; 895 896 spec = specifierForUID(instructionID); 897 898 if (getIDWithAttrMask(&instructionIDWithOpsize, 899 insn, 900 attrMask | ATTR_OPSIZE)) { 901 /* 902 * ModRM required with OpSize but not present; give up and return version 903 * without OpSize set 904 */ 905 906 insn->instructionID = instructionID; 907 insn->spec = spec; 908 return 0; 909 } 910 911 specWithOpsize = specifierForUID(instructionIDWithOpsize); 912 913 if (is16BitEquvalent(spec->name, specWithOpsize->name)) { 914 insn->instructionID = instructionIDWithOpsize; 915 insn->spec = specWithOpsize; 916 } else { 917 insn->instructionID = instructionID; 918 insn->spec = spec; 919 } 920 return 0; 921 } 922 923 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 924 insn->rexPrefix & 0x01) { 925 /* 926 * NOOP shouldn't decode as NOOP if REX.b is set. Instead 927 * it should decode as XCHG %r8, %eax. 928 */ 929 930 const struct InstructionSpecifier *spec; 931 uint16_t instructionIDWithNewOpcode; 932 const struct InstructionSpecifier *specWithNewOpcode; 933 934 spec = specifierForUID(instructionID); 935 936 /* Borrow opcode from one of the other XCHGar opcodes */ 937 insn->opcode = 0x91; 938 939 if (getIDWithAttrMask(&instructionIDWithNewOpcode, 940 insn, 941 attrMask)) { 942 insn->opcode = 0x90; 943 944 insn->instructionID = instructionID; 945 insn->spec = spec; 946 return 0; 947 } 948 949 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 950 951 /* Change back */ 952 insn->opcode = 0x90; 953 954 insn->instructionID = instructionIDWithNewOpcode; 955 insn->spec = specWithNewOpcode; 956 957 return 0; 958 } 959 960 insn->instructionID = instructionID; 961 insn->spec = specifierForUID(insn->instructionID); 962 963 return 0; 964} 965 966/* 967 * readSIB - Consumes the SIB byte to determine addressing information for an 968 * instruction. 969 * 970 * @param insn - The instruction whose SIB byte is to be read. 971 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 972 */ 973static int readSIB(struct InternalInstruction* insn) { 974 SIBIndex sibIndexBase = 0; 975 SIBBase sibBaseBase = 0; 976 uint8_t index, base; 977 978 dbgprintf(insn, "readSIB()"); 979 980 if (insn->consumedSIB) 981 return 0; 982 983 insn->consumedSIB = TRUE; 984 985 switch (insn->addressSize) { 986 case 2: 987 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 988 return -1; 989 break; 990 case 4: 991 sibIndexBase = SIB_INDEX_EAX; 992 sibBaseBase = SIB_BASE_EAX; 993 break; 994 case 8: 995 sibIndexBase = SIB_INDEX_RAX; 996 sibBaseBase = SIB_BASE_RAX; 997 break; 998 } 999 1000 if (consumeByte(insn, &insn->sib)) 1001 return -1; 1002 1003 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 1004 1005 switch (index) { 1006 case 0x4: 1007 insn->sibIndex = SIB_INDEX_NONE; 1008 break; 1009 default: 1010 insn->sibIndex = (SIBIndex)(sibIndexBase + index); 1011 if (insn->sibIndex == SIB_INDEX_sib || 1012 insn->sibIndex == SIB_INDEX_sib64) 1013 insn->sibIndex = SIB_INDEX_NONE; 1014 break; 1015 } 1016 1017 switch (scaleFromSIB(insn->sib)) { 1018 case 0: 1019 insn->sibScale = 1; 1020 break; 1021 case 1: 1022 insn->sibScale = 2; 1023 break; 1024 case 2: 1025 insn->sibScale = 4; 1026 break; 1027 case 3: 1028 insn->sibScale = 8; 1029 break; 1030 } 1031 1032 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 1033 1034 switch (base) { 1035 case 0x5: 1036 switch (modFromModRM(insn->modRM)) { 1037 case 0x0: 1038 insn->eaDisplacement = EA_DISP_32; 1039 insn->sibBase = SIB_BASE_NONE; 1040 break; 1041 case 0x1: 1042 insn->eaDisplacement = EA_DISP_8; 1043 insn->sibBase = (insn->addressSize == 4 ? 1044 SIB_BASE_EBP : SIB_BASE_RBP); 1045 break; 1046 case 0x2: 1047 insn->eaDisplacement = EA_DISP_32; 1048 insn->sibBase = (insn->addressSize == 4 ? 1049 SIB_BASE_EBP : SIB_BASE_RBP); 1050 break; 1051 case 0x3: 1052 debug("Cannot have Mod = 0b11 and a SIB byte"); 1053 return -1; 1054 } 1055 break; 1056 default: 1057 insn->sibBase = (SIBBase)(sibBaseBase + base); 1058 break; 1059 } 1060 1061 return 0; 1062} 1063 1064/* 1065 * readDisplacement - Consumes the displacement of an instruction. 1066 * 1067 * @param insn - The instruction whose displacement is to be read. 1068 * @return - 0 if the displacement byte was successfully read; nonzero 1069 * otherwise. 1070 */ 1071static int readDisplacement(struct InternalInstruction* insn) { 1072 int8_t d8; 1073 int16_t d16; 1074 int32_t d32; 1075 1076 dbgprintf(insn, "readDisplacement()"); 1077 1078 if (insn->consumedDisplacement) 1079 return 0; 1080 1081 insn->consumedDisplacement = TRUE; 1082 1083 switch (insn->eaDisplacement) { 1084 case EA_DISP_NONE: 1085 insn->consumedDisplacement = FALSE; 1086 break; 1087 case EA_DISP_8: 1088 if (consumeInt8(insn, &d8)) 1089 return -1; 1090 insn->displacement = d8; 1091 break; 1092 case EA_DISP_16: 1093 if (consumeInt16(insn, &d16)) 1094 return -1; 1095 insn->displacement = d16; 1096 break; 1097 case EA_DISP_32: 1098 if (consumeInt32(insn, &d32)) 1099 return -1; 1100 insn->displacement = d32; 1101 break; 1102 } 1103 1104 insn->consumedDisplacement = TRUE; 1105 return 0; 1106} 1107 1108/* 1109 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 1110 * displacement) for an instruction and interprets it. 1111 * 1112 * @param insn - The instruction whose addressing information is to be read. 1113 * @return - 0 if the information was successfully read; nonzero otherwise. 1114 */ 1115static int readModRM(struct InternalInstruction* insn) { 1116 uint8_t mod, rm, reg; 1117 1118 dbgprintf(insn, "readModRM()"); 1119 1120 if (insn->consumedModRM) 1121 return 0; 1122 1123 if (consumeByte(insn, &insn->modRM)) 1124 return -1; 1125 insn->consumedModRM = TRUE; 1126 1127 mod = modFromModRM(insn->modRM); 1128 rm = rmFromModRM(insn->modRM); 1129 reg = regFromModRM(insn->modRM); 1130 1131 /* 1132 * This goes by insn->registerSize to pick the correct register, which messes 1133 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 1134 * fixupReg(). 1135 */ 1136 switch (insn->registerSize) { 1137 case 2: 1138 insn->regBase = MODRM_REG_AX; 1139 insn->eaRegBase = EA_REG_AX; 1140 break; 1141 case 4: 1142 insn->regBase = MODRM_REG_EAX; 1143 insn->eaRegBase = EA_REG_EAX; 1144 break; 1145 case 8: 1146 insn->regBase = MODRM_REG_RAX; 1147 insn->eaRegBase = EA_REG_RAX; 1148 break; 1149 } 1150 1151 reg |= rFromREX(insn->rexPrefix) << 3; 1152 rm |= bFromREX(insn->rexPrefix) << 3; 1153 1154 insn->reg = (Reg)(insn->regBase + reg); 1155 1156 switch (insn->addressSize) { 1157 case 2: 1158 insn->eaBaseBase = EA_BASE_BX_SI; 1159 1160 switch (mod) { 1161 case 0x0: 1162 if (rm == 0x6) { 1163 insn->eaBase = EA_BASE_NONE; 1164 insn->eaDisplacement = EA_DISP_16; 1165 if (readDisplacement(insn)) 1166 return -1; 1167 } else { 1168 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1169 insn->eaDisplacement = EA_DISP_NONE; 1170 } 1171 break; 1172 case 0x1: 1173 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1174 insn->eaDisplacement = EA_DISP_8; 1175 if (readDisplacement(insn)) 1176 return -1; 1177 break; 1178 case 0x2: 1179 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1180 insn->eaDisplacement = EA_DISP_16; 1181 if (readDisplacement(insn)) 1182 return -1; 1183 break; 1184 case 0x3: 1185 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1186 if (readDisplacement(insn)) 1187 return -1; 1188 break; 1189 } 1190 break; 1191 case 4: 1192 case 8: 1193 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 1194 1195 switch (mod) { 1196 case 0x0: 1197 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 1198 switch (rm) { 1199 case 0x4: 1200 case 0xc: /* in case REXW.b is set */ 1201 insn->eaBase = (insn->addressSize == 4 ? 1202 EA_BASE_sib : EA_BASE_sib64); 1203 readSIB(insn); 1204 if (readDisplacement(insn)) 1205 return -1; 1206 break; 1207 case 0x5: 1208 insn->eaBase = EA_BASE_NONE; 1209 insn->eaDisplacement = EA_DISP_32; 1210 if (readDisplacement(insn)) 1211 return -1; 1212 break; 1213 default: 1214 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1215 break; 1216 } 1217 break; 1218 case 0x1: 1219 case 0x2: 1220 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 1221 switch (rm) { 1222 case 0x4: 1223 case 0xc: /* in case REXW.b is set */ 1224 insn->eaBase = EA_BASE_sib; 1225 readSIB(insn); 1226 if (readDisplacement(insn)) 1227 return -1; 1228 break; 1229 default: 1230 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1231 if (readDisplacement(insn)) 1232 return -1; 1233 break; 1234 } 1235 break; 1236 case 0x3: 1237 insn->eaDisplacement = EA_DISP_NONE; 1238 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1239 break; 1240 } 1241 break; 1242 } /* switch (insn->addressSize) */ 1243 1244 return 0; 1245} 1246 1247#define GENERIC_FIXUP_FUNC(name, base, prefix) \ 1248 static uint8_t name(struct InternalInstruction *insn, \ 1249 OperandType type, \ 1250 uint8_t index, \ 1251 uint8_t *valid) { \ 1252 *valid = 1; \ 1253 switch (type) { \ 1254 default: \ 1255 debug("Unhandled register type"); \ 1256 *valid = 0; \ 1257 return 0; \ 1258 case TYPE_Rv: \ 1259 return base + index; \ 1260 case TYPE_R8: \ 1261 if (insn->rexPrefix && \ 1262 index >= 4 && index <= 7) { \ 1263 return prefix##_SPL + (index - 4); \ 1264 } else { \ 1265 return prefix##_AL + index; \ 1266 } \ 1267 case TYPE_R16: \ 1268 return prefix##_AX + index; \ 1269 case TYPE_R32: \ 1270 return prefix##_EAX + index; \ 1271 case TYPE_R64: \ 1272 return prefix##_RAX + index; \ 1273 case TYPE_XMM256: \ 1274 return prefix##_YMM0 + index; \ 1275 case TYPE_XMM128: \ 1276 case TYPE_XMM64: \ 1277 case TYPE_XMM32: \ 1278 case TYPE_XMM: \ 1279 return prefix##_XMM0 + index; \ 1280 case TYPE_MM64: \ 1281 case TYPE_MM32: \ 1282 case TYPE_MM: \ 1283 if (index > 7) \ 1284 *valid = 0; \ 1285 return prefix##_MM0 + index; \ 1286 case TYPE_SEGMENTREG: \ 1287 if (index > 5) \ 1288 *valid = 0; \ 1289 return prefix##_ES + index; \ 1290 case TYPE_DEBUGREG: \ 1291 if (index > 7) \ 1292 *valid = 0; \ 1293 return prefix##_DR0 + index; \ 1294 case TYPE_CONTROLREG: \ 1295 if (index > 8) \ 1296 *valid = 0; \ 1297 return prefix##_CR0 + index; \ 1298 } \ 1299 } 1300 1301/* 1302 * fixup*Value - Consults an operand type to determine the meaning of the 1303 * reg or R/M field. If the operand is an XMM operand, for example, an 1304 * operand would be XMM0 instead of AX, which readModRM() would otherwise 1305 * misinterpret it as. 1306 * 1307 * @param insn - The instruction containing the operand. 1308 * @param type - The operand type. 1309 * @param index - The existing value of the field as reported by readModRM(). 1310 * @param valid - The address of a uint8_t. The target is set to 1 if the 1311 * field is valid for the register class; 0 if not. 1312 * @return - The proper value. 1313 */ 1314GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 1315GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 1316 1317/* 1318 * fixupReg - Consults an operand specifier to determine which of the 1319 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 1320 * 1321 * @param insn - See fixup*Value(). 1322 * @param op - The operand specifier. 1323 * @return - 0 if fixup was successful; -1 if the register returned was 1324 * invalid for its class. 1325 */ 1326static int fixupReg(struct InternalInstruction *insn, 1327 const struct OperandSpecifier *op) { 1328 uint8_t valid; 1329 1330 dbgprintf(insn, "fixupReg()"); 1331 1332 switch ((OperandEncoding)op->encoding) { 1333 default: 1334 debug("Expected a REG or R/M encoding in fixupReg"); 1335 return -1; 1336 case ENCODING_VVVV: 1337 insn->vvvv = (Reg)fixupRegValue(insn, 1338 (OperandType)op->type, 1339 insn->vvvv, 1340 &valid); 1341 if (!valid) 1342 return -1; 1343 break; 1344 case ENCODING_REG: 1345 insn->reg = (Reg)fixupRegValue(insn, 1346 (OperandType)op->type, 1347 insn->reg - insn->regBase, 1348 &valid); 1349 if (!valid) 1350 return -1; 1351 break; 1352 case ENCODING_RM: 1353 if (insn->eaBase >= insn->eaRegBase) { 1354 insn->eaBase = (EABase)fixupRMValue(insn, 1355 (OperandType)op->type, 1356 insn->eaBase - insn->eaRegBase, 1357 &valid); 1358 if (!valid) 1359 return -1; 1360 } 1361 break; 1362 } 1363 1364 return 0; 1365} 1366 1367/* 1368 * readOpcodeModifier - Reads an operand from the opcode field of an 1369 * instruction. Handles AddRegFrm instructions. 1370 * 1371 * @param insn - The instruction whose opcode field is to be read. 1372 * @param inModRM - Indicates that the opcode field is to be read from the 1373 * ModR/M extension; useful for escape opcodes 1374 * @return - 0 on success; nonzero otherwise. 1375 */ 1376static int readOpcodeModifier(struct InternalInstruction* insn) { 1377 dbgprintf(insn, "readOpcodeModifier()"); 1378 1379 if (insn->consumedOpcodeModifier) 1380 return 0; 1381 1382 insn->consumedOpcodeModifier = TRUE; 1383 1384 switch (insn->spec->modifierType) { 1385 default: 1386 debug("Unknown modifier type."); 1387 return -1; 1388 case MODIFIER_NONE: 1389 debug("No modifier but an operand expects one."); 1390 return -1; 1391 case MODIFIER_OPCODE: 1392 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; 1393 return 0; 1394 case MODIFIER_MODRM: 1395 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; 1396 return 0; 1397 } 1398} 1399 1400/* 1401 * readOpcodeRegister - Reads an operand from the opcode field of an 1402 * instruction and interprets it appropriately given the operand width. 1403 * Handles AddRegFrm instructions. 1404 * 1405 * @param insn - See readOpcodeModifier(). 1406 * @param size - The width (in bytes) of the register being specified. 1407 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1408 * RAX. 1409 * @return - 0 on success; nonzero otherwise. 1410 */ 1411static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 1412 dbgprintf(insn, "readOpcodeRegister()"); 1413 1414 if (readOpcodeModifier(insn)) 1415 return -1; 1416 1417 if (size == 0) 1418 size = insn->registerSize; 1419 1420 switch (size) { 1421 case 1: 1422 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 1423 | insn->opcodeModifier)); 1424 if (insn->rexPrefix && 1425 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1426 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1427 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 1428 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1429 } 1430 1431 break; 1432 case 2: 1433 insn->opcodeRegister = (Reg)(MODRM_REG_AX 1434 + ((bFromREX(insn->rexPrefix) << 3) 1435 | insn->opcodeModifier)); 1436 break; 1437 case 4: 1438 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 1439 + ((bFromREX(insn->rexPrefix) << 3) 1440 | insn->opcodeModifier)); 1441 break; 1442 case 8: 1443 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 1444 + ((bFromREX(insn->rexPrefix) << 3) 1445 | insn->opcodeModifier)); 1446 break; 1447 } 1448 1449 return 0; 1450} 1451 1452/* 1453 * readImmediate - Consumes an immediate operand from an instruction, given the 1454 * desired operand size. 1455 * 1456 * @param insn - The instruction whose operand is to be read. 1457 * @param size - The width (in bytes) of the operand. 1458 * @return - 0 if the immediate was successfully consumed; nonzero 1459 * otherwise. 1460 */ 1461static int readImmediate(struct InternalInstruction* insn, uint8_t size) { 1462 uint8_t imm8; 1463 uint16_t imm16; 1464 uint32_t imm32; 1465 uint64_t imm64; 1466 1467 dbgprintf(insn, "readImmediate()"); 1468 1469 if (insn->numImmediatesConsumed == 2) { 1470 debug("Already consumed two immediates"); 1471 return -1; 1472 } 1473 1474 if (size == 0) 1475 size = insn->immediateSize; 1476 else 1477 insn->immediateSize = size; 1478 1479 switch (size) { 1480 case 1: 1481 if (consumeByte(insn, &imm8)) 1482 return -1; 1483 insn->immediates[insn->numImmediatesConsumed] = imm8; 1484 break; 1485 case 2: 1486 if (consumeUInt16(insn, &imm16)) 1487 return -1; 1488 insn->immediates[insn->numImmediatesConsumed] = imm16; 1489 break; 1490 case 4: 1491 if (consumeUInt32(insn, &imm32)) 1492 return -1; 1493 insn->immediates[insn->numImmediatesConsumed] = imm32; 1494 break; 1495 case 8: 1496 if (consumeUInt64(insn, &imm64)) 1497 return -1; 1498 insn->immediates[insn->numImmediatesConsumed] = imm64; 1499 break; 1500 } 1501 1502 insn->numImmediatesConsumed++; 1503 1504 return 0; 1505} 1506 1507/* 1508 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 1509 * 1510 * @param insn - The instruction whose operand is to be read. 1511 * @return - 0 if the vvvv was successfully consumed; nonzero 1512 * otherwise. 1513 */ 1514static int readVVVV(struct InternalInstruction* insn) { 1515 dbgprintf(insn, "readVVVV()"); 1516 1517 if (insn->vexSize == 3) 1518 insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); 1519 else if (insn->vexSize == 2) 1520 insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]); 1521 else 1522 return -1; 1523 1524 if (insn->mode != MODE_64BIT) 1525 insn->vvvv &= 0x7; 1526 1527 return 0; 1528} 1529 1530/* 1531 * readOperands - Consults the specifier for an instruction and consumes all 1532 * operands for that instruction, interpreting them as it goes. 1533 * 1534 * @param insn - The instruction whose operands are to be read and interpreted. 1535 * @return - 0 if all operands could be read; nonzero otherwise. 1536 */ 1537static int readOperands(struct InternalInstruction* insn) { 1538 int index; 1539 int hasVVVV, needVVVV; 1540 1541 dbgprintf(insn, "readOperands()"); 1542 1543 /* If non-zero vvvv specified, need to make sure one of the operands 1544 uses it. */ 1545 hasVVVV = !readVVVV(insn); 1546 needVVVV = hasVVVV && (insn->vvvv != 0); 1547 1548 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 1549 switch (insn->spec->operands[index].encoding) { 1550 case ENCODING_NONE: 1551 break; 1552 case ENCODING_REG: 1553 case ENCODING_RM: 1554 if (readModRM(insn)) 1555 return -1; 1556 if (fixupReg(insn, &insn->spec->operands[index])) 1557 return -1; 1558 break; 1559 case ENCODING_CB: 1560 case ENCODING_CW: 1561 case ENCODING_CD: 1562 case ENCODING_CP: 1563 case ENCODING_CO: 1564 case ENCODING_CT: 1565 dbgprintf(insn, "We currently don't hande code-offset encodings"); 1566 return -1; 1567 case ENCODING_IB: 1568 if (readImmediate(insn, 1)) 1569 return -1; 1570 if (insn->spec->operands[index].type == TYPE_IMM3 && 1571 insn->immediates[insn->numImmediatesConsumed - 1] > 7) 1572 return -1; 1573 break; 1574 case ENCODING_IW: 1575 if (readImmediate(insn, 2)) 1576 return -1; 1577 break; 1578 case ENCODING_ID: 1579 if (readImmediate(insn, 4)) 1580 return -1; 1581 break; 1582 case ENCODING_IO: 1583 if (readImmediate(insn, 8)) 1584 return -1; 1585 break; 1586 case ENCODING_Iv: 1587 if (readImmediate(insn, insn->immediateSize)) 1588 return -1; 1589 break; 1590 case ENCODING_Ia: 1591 if (readImmediate(insn, insn->addressSize)) 1592 return -1; 1593 break; 1594 case ENCODING_RB: 1595 if (readOpcodeRegister(insn, 1)) 1596 return -1; 1597 break; 1598 case ENCODING_RW: 1599 if (readOpcodeRegister(insn, 2)) 1600 return -1; 1601 break; 1602 case ENCODING_RD: 1603 if (readOpcodeRegister(insn, 4)) 1604 return -1; 1605 break; 1606 case ENCODING_RO: 1607 if (readOpcodeRegister(insn, 8)) 1608 return -1; 1609 break; 1610 case ENCODING_Rv: 1611 if (readOpcodeRegister(insn, 0)) 1612 return -1; 1613 break; 1614 case ENCODING_I: 1615 if (readOpcodeModifier(insn)) 1616 return -1; 1617 break; 1618 case ENCODING_VVVV: 1619 needVVVV = 0; /* Mark that we have found a VVVV operand. */ 1620 if (!hasVVVV) 1621 return -1; 1622 if (fixupReg(insn, &insn->spec->operands[index])) 1623 return -1; 1624 break; 1625 case ENCODING_DUP: 1626 break; 1627 default: 1628 dbgprintf(insn, "Encountered an operand with an unknown encoding."); 1629 return -1; 1630 } 1631 } 1632 1633 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 1634 if (needVVVV) return -1; 1635 1636 return 0; 1637} 1638 1639/* 1640 * decodeInstruction - Reads and interprets a full instruction provided by the 1641 * user. 1642 * 1643 * @param insn - A pointer to the instruction to be populated. Must be 1644 * pre-allocated. 1645 * @param reader - The function to be used to read the instruction's bytes. 1646 * @param readerArg - A generic argument to be passed to the reader to store 1647 * any internal state. 1648 * @param logger - If non-NULL, the function to be used to write log messages 1649 * and warnings. 1650 * @param loggerArg - A generic argument to be passed to the logger to store 1651 * any internal state. 1652 * @param startLoc - The address (in the reader's address space) of the first 1653 * byte in the instruction. 1654 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 1655 * decode the instruction in. 1656 * @return - 0 if the instruction's memory could be read; nonzero if 1657 * not. 1658 */ 1659int decodeInstruction(struct InternalInstruction* insn, 1660 byteReader_t reader, 1661 void* readerArg, 1662 dlog_t logger, 1663 void* loggerArg, 1664 uint64_t startLoc, 1665 DisassemblerMode mode) { 1666 memset(insn, 0, sizeof(struct InternalInstruction)); 1667 1668 insn->reader = reader; 1669 insn->readerArg = readerArg; 1670 insn->dlog = logger; 1671 insn->dlogArg = loggerArg; 1672 insn->startLocation = startLoc; 1673 insn->readerCursor = startLoc; 1674 insn->mode = mode; 1675 insn->numImmediatesConsumed = 0; 1676 1677 if (readPrefixes(insn) || 1678 readOpcode(insn) || 1679 getID(insn) || 1680 insn->instructionID == 0 || 1681 readOperands(insn)) 1682 return -1; 1683 1684 insn->length = insn->readerCursor - insn->startLocation; 1685 1686 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 1687 startLoc, insn->readerCursor, insn->length); 1688 1689 if (insn->length > 15) 1690 dbgprintf(insn, "Instruction exceeds 15-byte limit"); 1691 1692 return 0; 1693} 1694