1/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===* 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is distributed under the University of Illinois Open Source 6 * License. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===* 9 * 10 * This file is part of the X86 Disassembler. 11 * It contains the implementation of the instruction decoder. 12 * Documentation for the disassembler can be found in X86Disassembler.h. 13 * 14 *===----------------------------------------------------------------------===*/ 15 16#include <stdarg.h> /* for va_*() */ 17#include <stdio.h> /* for vsnprintf() */ 18#include <stdlib.h> /* for exit() */ 19#include <string.h> /* for memset() */ 20 21#include "X86DisassemblerDecoder.h" 22 23#include "X86GenDisassemblerTables.inc" 24 25#define TRUE 1 26#define FALSE 0 27 28typedef int8_t bool; 29 30#ifndef NDEBUG 31#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) 32#else 33#define debug(s) do { } while (0) 34#endif 35 36 37/* 38 * contextForAttrs - Client for the instruction context table. Takes a set of 39 * attributes and returns the appropriate decode context. 40 * 41 * @param attrMask - Attributes, from the enumeration attributeBits. 42 * @return - The InstructionContext to use when looking up an 43 * an instruction with these attributes. 44 */ 45static InstructionContext contextForAttrs(uint8_t attrMask) { 46 return CONTEXTS_SYM[attrMask]; 47} 48 49/* 50 * modRMRequired - Reads the appropriate instruction table to determine whether 51 * the ModR/M byte is required to decode a particular instruction. 52 * 53 * @param type - The opcode type (i.e., how many bytes it has). 54 * @param insnContext - The context for the instruction, as returned by 55 * contextForAttrs. 56 * @param opcode - The last byte of the instruction's opcode, not counting 57 * ModR/M extensions and escapes. 58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise. 59 */ 60static int modRMRequired(OpcodeType type, 61 InstructionContext insnContext, 62 uint8_t opcode) { 63 const struct ContextDecision* decision = 0; 64 65 switch (type) { 66 case ONEBYTE: 67 decision = &ONEBYTE_SYM; 68 break; 69 case TWOBYTE: 70 decision = &TWOBYTE_SYM; 71 break; 72 case THREEBYTE_38: 73 decision = &THREEBYTE38_SYM; 74 break; 75 case THREEBYTE_3A: 76 decision = &THREEBYTE3A_SYM; 77 break; 78 case THREEBYTE_A6: 79 decision = &THREEBYTEA6_SYM; 80 break; 81 case THREEBYTE_A7: 82 decision = &THREEBYTEA7_SYM; 83 break; 84 } 85 86 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 87 modrm_type != MODRM_ONEENTRY; 88} 89 90/* 91 * decode - Reads the appropriate instruction table to obtain the unique ID of 92 * an instruction. 93 * 94 * @param type - See modRMRequired(). 95 * @param insnContext - See modRMRequired(). 96 * @param opcode - See modRMRequired(). 97 * @param modRM - The ModR/M byte if required, or any value if not. 98 * @return - The UID of the instruction, or 0 on failure. 99 */ 100static InstrUID decode(OpcodeType type, 101 InstructionContext insnContext, 102 uint8_t opcode, 103 uint8_t modRM) { 104 const struct ModRMDecision* dec = 0; 105 106 switch (type) { 107 case ONEBYTE: 108 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 109 break; 110 case TWOBYTE: 111 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 112 break; 113 case THREEBYTE_38: 114 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 115 break; 116 case THREEBYTE_3A: 117 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 118 break; 119 case THREEBYTE_A6: 120 dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 121 break; 122 case THREEBYTE_A7: 123 dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 124 break; 125 } 126 127 switch (dec->modrm_type) { 128 default: 129 debug("Corrupt table! Unknown modrm_type"); 130 return 0; 131 case MODRM_ONEENTRY: 132 return modRMTable[dec->instructionIDs]; 133 case MODRM_SPLITRM: 134 if (modFromModRM(modRM) == 0x3) 135 return modRMTable[dec->instructionIDs+1]; 136 return modRMTable[dec->instructionIDs]; 137 case MODRM_SPLITREG: 138 if (modFromModRM(modRM) == 0x3) 139 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; 140 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 141 case MODRM_FULL: 142 return modRMTable[dec->instructionIDs+modRM]; 143 } 144} 145 146/* 147 * specifierForUID - Given a UID, returns the name and operand specification for 148 * that instruction. 149 * 150 * @param uid - The unique ID for the instruction. This should be returned by 151 * decode(); specifierForUID will not check bounds. 152 * @return - A pointer to the specification for that instruction. 153 */ 154static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 155 return &INSTRUCTIONS_SYM[uid]; 156} 157 158/* 159 * consumeByte - Uses the reader function provided by the user to consume one 160 * byte from the instruction's memory and advance the cursor. 161 * 162 * @param insn - The instruction with the reader function to use. The cursor 163 * for this instruction is advanced. 164 * @param byte - A pointer to a pre-allocated memory buffer to be populated 165 * with the data read. 166 * @return - 0 if the read was successful; nonzero otherwise. 167 */ 168static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 169 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 170 171 if (!ret) 172 ++(insn->readerCursor); 173 174 return ret; 175} 176 177/* 178 * lookAtByte - Like consumeByte, but does not advance the cursor. 179 * 180 * @param insn - See consumeByte(). 181 * @param byte - See consumeByte(). 182 * @return - See consumeByte(). 183 */ 184static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 185 return insn->reader(insn->readerArg, byte, insn->readerCursor); 186} 187 188static void unconsumeByte(struct InternalInstruction* insn) { 189 insn->readerCursor--; 190} 191 192#define CONSUME_FUNC(name, type) \ 193 static int name(struct InternalInstruction* insn, type* ptr) { \ 194 type combined = 0; \ 195 unsigned offset; \ 196 for (offset = 0; offset < sizeof(type); ++offset) { \ 197 uint8_t byte; \ 198 int ret = insn->reader(insn->readerArg, \ 199 &byte, \ 200 insn->readerCursor + offset); \ 201 if (ret) \ 202 return ret; \ 203 combined = combined | ((uint64_t)byte << (offset * 8)); \ 204 } \ 205 *ptr = combined; \ 206 insn->readerCursor += sizeof(type); \ 207 return 0; \ 208 } 209 210/* 211 * consume* - Use the reader function provided by the user to consume data 212 * values of various sizes from the instruction's memory and advance the 213 * cursor appropriately. These readers perform endian conversion. 214 * 215 * @param insn - See consumeByte(). 216 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 217 * be populated with the data read. 218 * @return - See consumeByte(). 219 */ 220CONSUME_FUNC(consumeInt8, int8_t) 221CONSUME_FUNC(consumeInt16, int16_t) 222CONSUME_FUNC(consumeInt32, int32_t) 223CONSUME_FUNC(consumeUInt16, uint16_t) 224CONSUME_FUNC(consumeUInt32, uint32_t) 225CONSUME_FUNC(consumeUInt64, uint64_t) 226 227/* 228 * dbgprintf - Uses the logging function provided by the user to log a single 229 * message, typically without a carriage-return. 230 * 231 * @param insn - The instruction containing the logging function. 232 * @param format - See printf(). 233 * @param ... - See printf(). 234 */ 235static void dbgprintf(struct InternalInstruction* insn, 236 const char* format, 237 ...) { 238 char buffer[256]; 239 va_list ap; 240 241 if (!insn->dlog) 242 return; 243 244 va_start(ap, format); 245 (void)vsnprintf(buffer, sizeof(buffer), format, ap); 246 va_end(ap); 247 248 insn->dlog(insn->dlogArg, buffer); 249 250 return; 251} 252 253/* 254 * setPrefixPresent - Marks that a particular prefix is present at a particular 255 * location. 256 * 257 * @param insn - The instruction to be marked as having the prefix. 258 * @param prefix - The prefix that is present. 259 * @param location - The location where the prefix is located (in the address 260 * space of the instruction's reader). 261 */ 262static void setPrefixPresent(struct InternalInstruction* insn, 263 uint8_t prefix, 264 uint64_t location) 265{ 266 insn->prefixPresent[prefix] = 1; 267 insn->prefixLocations[prefix] = location; 268} 269 270/* 271 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 272 * present at a given location. 273 * 274 * @param insn - The instruction to be queried. 275 * @param prefix - The prefix. 276 * @param location - The location to query. 277 * @return - Whether the prefix is at that location. 278 */ 279static BOOL isPrefixAtLocation(struct InternalInstruction* insn, 280 uint8_t prefix, 281 uint64_t location) 282{ 283 if (insn->prefixPresent[prefix] == 1 && 284 insn->prefixLocations[prefix] == location) 285 return TRUE; 286 else 287 return FALSE; 288} 289 290/* 291 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 292 * instruction as having them. Also sets the instruction's default operand, 293 * address, and other relevant data sizes to report operands correctly. 294 * 295 * @param insn - The instruction whose prefixes are to be read. 296 * @return - 0 if the instruction could be read until the end of the prefix 297 * bytes, and no prefixes conflicted; nonzero otherwise. 298 */ 299static int readPrefixes(struct InternalInstruction* insn) { 300 BOOL isPrefix = TRUE; 301 BOOL prefixGroups[4] = { FALSE }; 302 uint64_t prefixLocation; 303 uint8_t byte = 0; 304 305 BOOL hasAdSize = FALSE; 306 BOOL hasOpSize = FALSE; 307 308 dbgprintf(insn, "readPrefixes()"); 309 310 while (isPrefix) { 311 prefixLocation = insn->readerCursor; 312 313 if (consumeByte(insn, &byte)) 314 return -1; 315 316 /* 317 * If the first byte is a LOCK prefix break and let it be disassembled 318 * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>. 319 * FIXME there is currently no way to get the disassembler to print the 320 * lock prefix if it is not the first byte. 321 */ 322 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) 323 break; 324 325 switch (byte) { 326 case 0xf0: /* LOCK */ 327 case 0xf2: /* REPNE/REPNZ */ 328 case 0xf3: /* REP or REPE/REPZ */ 329 if (prefixGroups[0]) 330 dbgprintf(insn, "Redundant Group 1 prefix"); 331 prefixGroups[0] = TRUE; 332 setPrefixPresent(insn, byte, prefixLocation); 333 break; 334 case 0x2e: /* CS segment override -OR- Branch not taken */ 335 case 0x36: /* SS segment override -OR- Branch taken */ 336 case 0x3e: /* DS segment override */ 337 case 0x26: /* ES segment override */ 338 case 0x64: /* FS segment override */ 339 case 0x65: /* GS segment override */ 340 switch (byte) { 341 case 0x2e: 342 insn->segmentOverride = SEG_OVERRIDE_CS; 343 break; 344 case 0x36: 345 insn->segmentOverride = SEG_OVERRIDE_SS; 346 break; 347 case 0x3e: 348 insn->segmentOverride = SEG_OVERRIDE_DS; 349 break; 350 case 0x26: 351 insn->segmentOverride = SEG_OVERRIDE_ES; 352 break; 353 case 0x64: 354 insn->segmentOverride = SEG_OVERRIDE_FS; 355 break; 356 case 0x65: 357 insn->segmentOverride = SEG_OVERRIDE_GS; 358 break; 359 default: 360 debug("Unhandled override"); 361 return -1; 362 } 363 if (prefixGroups[1]) 364 dbgprintf(insn, "Redundant Group 2 prefix"); 365 prefixGroups[1] = TRUE; 366 setPrefixPresent(insn, byte, prefixLocation); 367 break; 368 case 0x66: /* Operand-size override */ 369 if (prefixGroups[2]) 370 dbgprintf(insn, "Redundant Group 3 prefix"); 371 prefixGroups[2] = TRUE; 372 hasOpSize = TRUE; 373 setPrefixPresent(insn, byte, prefixLocation); 374 break; 375 case 0x67: /* Address-size override */ 376 if (prefixGroups[3]) 377 dbgprintf(insn, "Redundant Group 4 prefix"); 378 prefixGroups[3] = TRUE; 379 hasAdSize = TRUE; 380 setPrefixPresent(insn, byte, prefixLocation); 381 break; 382 default: /* Not a prefix byte */ 383 isPrefix = FALSE; 384 break; 385 } 386 387 if (isPrefix) 388 dbgprintf(insn, "Found prefix 0x%hhx", byte); 389 } 390 391 insn->vexSize = 0; 392 393 if (byte == 0xc4) { 394 uint8_t byte1; 395 396 if (lookAtByte(insn, &byte1)) { 397 dbgprintf(insn, "Couldn't read second byte of VEX"); 398 return -1; 399 } 400 401 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 402 insn->vexSize = 3; 403 insn->necessaryPrefixLocation = insn->readerCursor - 1; 404 } 405 else { 406 unconsumeByte(insn); 407 insn->necessaryPrefixLocation = insn->readerCursor - 1; 408 } 409 410 if (insn->vexSize == 3) { 411 insn->vexPrefix[0] = byte; 412 consumeByte(insn, &insn->vexPrefix[1]); 413 consumeByte(insn, &insn->vexPrefix[2]); 414 415 /* We simulate the REX prefix for simplicity's sake */ 416 417 if (insn->mode == MODE_64BIT) { 418 insn->rexPrefix = 0x40 419 | (wFromVEX3of3(insn->vexPrefix[2]) << 3) 420 | (rFromVEX2of3(insn->vexPrefix[1]) << 2) 421 | (xFromVEX2of3(insn->vexPrefix[1]) << 1) 422 | (bFromVEX2of3(insn->vexPrefix[1]) << 0); 423 } 424 425 switch (ppFromVEX3of3(insn->vexPrefix[2])) 426 { 427 default: 428 break; 429 case VEX_PREFIX_66: 430 hasOpSize = TRUE; 431 break; 432 } 433 434 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); 435 } 436 } 437 else if (byte == 0xc5) { 438 uint8_t byte1; 439 440 if (lookAtByte(insn, &byte1)) { 441 dbgprintf(insn, "Couldn't read second byte of VEX"); 442 return -1; 443 } 444 445 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 446 insn->vexSize = 2; 447 } 448 else { 449 unconsumeByte(insn); 450 } 451 452 if (insn->vexSize == 2) { 453 insn->vexPrefix[0] = byte; 454 consumeByte(insn, &insn->vexPrefix[1]); 455 456 if (insn->mode == MODE_64BIT) { 457 insn->rexPrefix = 0x40 458 | (rFromVEX2of2(insn->vexPrefix[1]) << 2); 459 } 460 461 switch (ppFromVEX2of2(insn->vexPrefix[1])) 462 { 463 default: 464 break; 465 case VEX_PREFIX_66: 466 hasOpSize = TRUE; 467 break; 468 } 469 470 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); 471 } 472 } 473 else { 474 if (insn->mode == MODE_64BIT) { 475 if ((byte & 0xf0) == 0x40) { 476 uint8_t opcodeByte; 477 478 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { 479 dbgprintf(insn, "Redundant REX prefix"); 480 return -1; 481 } 482 483 insn->rexPrefix = byte; 484 insn->necessaryPrefixLocation = insn->readerCursor - 2; 485 486 dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 487 } else { 488 unconsumeByte(insn); 489 insn->necessaryPrefixLocation = insn->readerCursor - 1; 490 } 491 } else { 492 unconsumeByte(insn); 493 insn->necessaryPrefixLocation = insn->readerCursor - 1; 494 } 495 } 496 497 if (insn->mode == MODE_16BIT) { 498 insn->registerSize = (hasOpSize ? 4 : 2); 499 insn->addressSize = (hasAdSize ? 4 : 2); 500 insn->displacementSize = (hasAdSize ? 4 : 2); 501 insn->immediateSize = (hasOpSize ? 4 : 2); 502 } else if (insn->mode == MODE_32BIT) { 503 insn->registerSize = (hasOpSize ? 2 : 4); 504 insn->addressSize = (hasAdSize ? 2 : 4); 505 insn->displacementSize = (hasAdSize ? 2 : 4); 506 insn->immediateSize = (hasOpSize ? 2 : 4); 507 } else if (insn->mode == MODE_64BIT) { 508 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 509 insn->registerSize = 8; 510 insn->addressSize = (hasAdSize ? 4 : 8); 511 insn->displacementSize = 4; 512 insn->immediateSize = 4; 513 } else if (insn->rexPrefix) { 514 insn->registerSize = (hasOpSize ? 2 : 4); 515 insn->addressSize = (hasAdSize ? 4 : 8); 516 insn->displacementSize = (hasOpSize ? 2 : 4); 517 insn->immediateSize = (hasOpSize ? 2 : 4); 518 } else { 519 insn->registerSize = (hasOpSize ? 2 : 4); 520 insn->addressSize = (hasAdSize ? 4 : 8); 521 insn->displacementSize = (hasOpSize ? 2 : 4); 522 insn->immediateSize = (hasOpSize ? 2 : 4); 523 } 524 } 525 526 return 0; 527} 528 529/* 530 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 531 * extended or escape opcodes). 532 * 533 * @param insn - The instruction whose opcode is to be read. 534 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 535 */ 536static int readOpcode(struct InternalInstruction* insn) { 537 /* Determine the length of the primary opcode */ 538 539 uint8_t current; 540 541 dbgprintf(insn, "readOpcode()"); 542 543 insn->opcodeType = ONEBYTE; 544 545 if (insn->vexSize == 3) 546 { 547 switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) 548 { 549 default: 550 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); 551 return -1; 552 case 0: 553 break; 554 case VEX_LOB_0F: 555 insn->twoByteEscape = 0x0f; 556 insn->opcodeType = TWOBYTE; 557 return consumeByte(insn, &insn->opcode); 558 case VEX_LOB_0F38: 559 insn->twoByteEscape = 0x0f; 560 insn->threeByteEscape = 0x38; 561 insn->opcodeType = THREEBYTE_38; 562 return consumeByte(insn, &insn->opcode); 563 case VEX_LOB_0F3A: 564 insn->twoByteEscape = 0x0f; 565 insn->threeByteEscape = 0x3a; 566 insn->opcodeType = THREEBYTE_3A; 567 return consumeByte(insn, &insn->opcode); 568 } 569 } 570 else if (insn->vexSize == 2) 571 { 572 insn->twoByteEscape = 0x0f; 573 insn->opcodeType = TWOBYTE; 574 return consumeByte(insn, &insn->opcode); 575 } 576 577 if (consumeByte(insn, ¤t)) 578 return -1; 579 580 if (current == 0x0f) { 581 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 582 583 insn->twoByteEscape = current; 584 585 if (consumeByte(insn, ¤t)) 586 return -1; 587 588 if (current == 0x38) { 589 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 590 591 insn->threeByteEscape = current; 592 593 if (consumeByte(insn, ¤t)) 594 return -1; 595 596 insn->opcodeType = THREEBYTE_38; 597 } else if (current == 0x3a) { 598 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 599 600 insn->threeByteEscape = current; 601 602 if (consumeByte(insn, ¤t)) 603 return -1; 604 605 insn->opcodeType = THREEBYTE_3A; 606 } else if (current == 0xa6) { 607 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 608 609 insn->threeByteEscape = current; 610 611 if (consumeByte(insn, ¤t)) 612 return -1; 613 614 insn->opcodeType = THREEBYTE_A6; 615 } else if (current == 0xa7) { 616 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 617 618 insn->threeByteEscape = current; 619 620 if (consumeByte(insn, ¤t)) 621 return -1; 622 623 insn->opcodeType = THREEBYTE_A7; 624 } else { 625 dbgprintf(insn, "Didn't find a three-byte escape prefix"); 626 627 insn->opcodeType = TWOBYTE; 628 } 629 } 630 631 /* 632 * At this point we have consumed the full opcode. 633 * Anything we consume from here on must be unconsumed. 634 */ 635 636 insn->opcode = current; 637 638 return 0; 639} 640 641static int readModRM(struct InternalInstruction* insn); 642 643/* 644 * getIDWithAttrMask - Determines the ID of an instruction, consuming 645 * the ModR/M byte as appropriate for extended and escape opcodes, 646 * and using a supplied attribute mask. 647 * 648 * @param instructionID - A pointer whose target is filled in with the ID of the 649 * instruction. 650 * @param insn - The instruction whose ID is to be determined. 651 * @param attrMask - The attribute mask to search. 652 * @return - 0 if the ModR/M could be read when needed or was not 653 * needed; nonzero otherwise. 654 */ 655static int getIDWithAttrMask(uint16_t* instructionID, 656 struct InternalInstruction* insn, 657 uint8_t attrMask) { 658 BOOL hasModRMExtension; 659 660 uint8_t instructionClass; 661 662 instructionClass = contextForAttrs(attrMask); 663 664 hasModRMExtension = modRMRequired(insn->opcodeType, 665 instructionClass, 666 insn->opcode); 667 668 if (hasModRMExtension) { 669 if (readModRM(insn)) 670 return -1; 671 672 *instructionID = decode(insn->opcodeType, 673 instructionClass, 674 insn->opcode, 675 insn->modRM); 676 } else { 677 *instructionID = decode(insn->opcodeType, 678 instructionClass, 679 insn->opcode, 680 0); 681 } 682 683 return 0; 684} 685 686/* 687 * is16BitEquivalent - Determines whether two instruction names refer to 688 * equivalent instructions but one is 16-bit whereas the other is not. 689 * 690 * @param orig - The instruction that is not 16-bit 691 * @param equiv - The instruction that is 16-bit 692 */ 693static BOOL is16BitEquvalent(const char* orig, const char* equiv) { 694 off_t i; 695 696 for (i = 0;; i++) { 697 if (orig[i] == '\0' && equiv[i] == '\0') 698 return TRUE; 699 if (orig[i] == '\0' || equiv[i] == '\0') 700 return FALSE; 701 if (orig[i] != equiv[i]) { 702 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 703 continue; 704 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 705 continue; 706 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 707 continue; 708 return FALSE; 709 } 710 } 711} 712 713/* 714 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 715 * appropriate for extended and escape opcodes. Determines the attributes and 716 * context for the instruction before doing so. 717 * 718 * @param insn - The instruction whose ID is to be determined. 719 * @return - 0 if the ModR/M could be read when needed or was not needed; 720 * nonzero otherwise. 721 */ 722static int getID(struct InternalInstruction* insn, const void *miiArg) { 723 uint8_t attrMask; 724 uint16_t instructionID; 725 726 dbgprintf(insn, "getID()"); 727 728 attrMask = ATTR_NONE; 729 730 if (insn->mode == MODE_64BIT) 731 attrMask |= ATTR_64BIT; 732 733 if (insn->vexSize) { 734 attrMask |= ATTR_VEX; 735 736 if (insn->vexSize == 3) { 737 switch (ppFromVEX3of3(insn->vexPrefix[2])) { 738 case VEX_PREFIX_66: 739 attrMask |= ATTR_OPSIZE; 740 break; 741 case VEX_PREFIX_F3: 742 attrMask |= ATTR_XS; 743 break; 744 case VEX_PREFIX_F2: 745 attrMask |= ATTR_XD; 746 break; 747 } 748 749 if (lFromVEX3of3(insn->vexPrefix[2])) 750 attrMask |= ATTR_VEXL; 751 } 752 else if (insn->vexSize == 2) { 753 switch (ppFromVEX2of2(insn->vexPrefix[1])) { 754 case VEX_PREFIX_66: 755 attrMask |= ATTR_OPSIZE; 756 break; 757 case VEX_PREFIX_F3: 758 attrMask |= ATTR_XS; 759 break; 760 case VEX_PREFIX_F2: 761 attrMask |= ATTR_XD; 762 break; 763 } 764 765 if (lFromVEX2of2(insn->vexPrefix[1])) 766 attrMask |= ATTR_VEXL; 767 } 768 else { 769 return -1; 770 } 771 } 772 else { 773 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) 774 attrMask |= ATTR_OPSIZE; 775 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) 776 attrMask |= ATTR_ADSIZE; 777 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) 778 attrMask |= ATTR_XS; 779 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) 780 attrMask |= ATTR_XD; 781 } 782 783 if (insn->rexPrefix & 0x08) 784 attrMask |= ATTR_REXW; 785 786 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 787 return -1; 788 789 /* The following clauses compensate for limitations of the tables. */ 790 791 if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) && 792 !(attrMask & ATTR_OPSIZE)) { 793 /* 794 * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit 795 * has precedence since there are no L-bit with W-bit entries in the tables. 796 * So if the L-bit isn't significant we should use the W-bit instead. 797 * We only need to do this if the instruction doesn't specify OpSize since 798 * there is a VEX_L_W_OPSIZE table. 799 */ 800 801 const struct InstructionSpecifier *spec; 802 uint16_t instructionIDWithWBit; 803 const struct InstructionSpecifier *specWithWBit; 804 805 spec = specifierForUID(instructionID); 806 807 if (getIDWithAttrMask(&instructionIDWithWBit, 808 insn, 809 (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) { 810 insn->instructionID = instructionID; 811 insn->spec = spec; 812 return 0; 813 } 814 815 specWithWBit = specifierForUID(instructionIDWithWBit); 816 817 if (instructionID != instructionIDWithWBit) { 818 insn->instructionID = instructionIDWithWBit; 819 insn->spec = specWithWBit; 820 } else { 821 insn->instructionID = instructionID; 822 insn->spec = spec; 823 } 824 return 0; 825 } 826 827 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { 828 /* 829 * The instruction tables make no distinction between instructions that 830 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 831 * particular spot (i.e., many MMX operations). In general we're 832 * conservative, but in the specific case where OpSize is present but not 833 * in the right place we check if there's a 16-bit operation. 834 */ 835 836 const struct InstructionSpecifier *spec; 837 uint16_t instructionIDWithOpsize; 838 const char *specName, *specWithOpSizeName; 839 840 spec = specifierForUID(instructionID); 841 842 if (getIDWithAttrMask(&instructionIDWithOpsize, 843 insn, 844 attrMask | ATTR_OPSIZE)) { 845 /* 846 * ModRM required with OpSize but not present; give up and return version 847 * without OpSize set 848 */ 849 850 insn->instructionID = instructionID; 851 insn->spec = spec; 852 return 0; 853 } 854 855 specName = x86DisassemblerGetInstrName(instructionID, miiArg); 856 specWithOpSizeName = 857 x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); 858 859 if (is16BitEquvalent(specName, specWithOpSizeName)) { 860 insn->instructionID = instructionIDWithOpsize; 861 insn->spec = specifierForUID(instructionIDWithOpsize); 862 } else { 863 insn->instructionID = instructionID; 864 insn->spec = spec; 865 } 866 return 0; 867 } 868 869 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 870 insn->rexPrefix & 0x01) { 871 /* 872 * NOOP shouldn't decode as NOOP if REX.b is set. Instead 873 * it should decode as XCHG %r8, %eax. 874 */ 875 876 const struct InstructionSpecifier *spec; 877 uint16_t instructionIDWithNewOpcode; 878 const struct InstructionSpecifier *specWithNewOpcode; 879 880 spec = specifierForUID(instructionID); 881 882 /* Borrow opcode from one of the other XCHGar opcodes */ 883 insn->opcode = 0x91; 884 885 if (getIDWithAttrMask(&instructionIDWithNewOpcode, 886 insn, 887 attrMask)) { 888 insn->opcode = 0x90; 889 890 insn->instructionID = instructionID; 891 insn->spec = spec; 892 return 0; 893 } 894 895 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 896 897 /* Change back */ 898 insn->opcode = 0x90; 899 900 insn->instructionID = instructionIDWithNewOpcode; 901 insn->spec = specWithNewOpcode; 902 903 return 0; 904 } 905 906 insn->instructionID = instructionID; 907 insn->spec = specifierForUID(insn->instructionID); 908 909 return 0; 910} 911 912/* 913 * readSIB - Consumes the SIB byte to determine addressing information for an 914 * instruction. 915 * 916 * @param insn - The instruction whose SIB byte is to be read. 917 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 918 */ 919static int readSIB(struct InternalInstruction* insn) { 920 SIBIndex sibIndexBase = 0; 921 SIBBase sibBaseBase = 0; 922 uint8_t index, base; 923 924 dbgprintf(insn, "readSIB()"); 925 926 if (insn->consumedSIB) 927 return 0; 928 929 insn->consumedSIB = TRUE; 930 931 switch (insn->addressSize) { 932 case 2: 933 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 934 return -1; 935 break; 936 case 4: 937 sibIndexBase = SIB_INDEX_EAX; 938 sibBaseBase = SIB_BASE_EAX; 939 break; 940 case 8: 941 sibIndexBase = SIB_INDEX_RAX; 942 sibBaseBase = SIB_BASE_RAX; 943 break; 944 } 945 946 if (consumeByte(insn, &insn->sib)) 947 return -1; 948 949 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 950 951 switch (index) { 952 case 0x4: 953 insn->sibIndex = SIB_INDEX_NONE; 954 break; 955 default: 956 insn->sibIndex = (SIBIndex)(sibIndexBase + index); 957 if (insn->sibIndex == SIB_INDEX_sib || 958 insn->sibIndex == SIB_INDEX_sib64) 959 insn->sibIndex = SIB_INDEX_NONE; 960 break; 961 } 962 963 switch (scaleFromSIB(insn->sib)) { 964 case 0: 965 insn->sibScale = 1; 966 break; 967 case 1: 968 insn->sibScale = 2; 969 break; 970 case 2: 971 insn->sibScale = 4; 972 break; 973 case 3: 974 insn->sibScale = 8; 975 break; 976 } 977 978 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 979 980 switch (base) { 981 case 0x5: 982 switch (modFromModRM(insn->modRM)) { 983 case 0x0: 984 insn->eaDisplacement = EA_DISP_32; 985 insn->sibBase = SIB_BASE_NONE; 986 break; 987 case 0x1: 988 insn->eaDisplacement = EA_DISP_8; 989 insn->sibBase = (insn->addressSize == 4 ? 990 SIB_BASE_EBP : SIB_BASE_RBP); 991 break; 992 case 0x2: 993 insn->eaDisplacement = EA_DISP_32; 994 insn->sibBase = (insn->addressSize == 4 ? 995 SIB_BASE_EBP : SIB_BASE_RBP); 996 break; 997 case 0x3: 998 debug("Cannot have Mod = 0b11 and a SIB byte"); 999 return -1; 1000 } 1001 break; 1002 default: 1003 insn->sibBase = (SIBBase)(sibBaseBase + base); 1004 break; 1005 } 1006 1007 return 0; 1008} 1009 1010/* 1011 * readDisplacement - Consumes the displacement of an instruction. 1012 * 1013 * @param insn - The instruction whose displacement is to be read. 1014 * @return - 0 if the displacement byte was successfully read; nonzero 1015 * otherwise. 1016 */ 1017static int readDisplacement(struct InternalInstruction* insn) { 1018 int8_t d8; 1019 int16_t d16; 1020 int32_t d32; 1021 1022 dbgprintf(insn, "readDisplacement()"); 1023 1024 if (insn->consumedDisplacement) 1025 return 0; 1026 1027 insn->consumedDisplacement = TRUE; 1028 insn->displacementOffset = insn->readerCursor - insn->startLocation; 1029 1030 switch (insn->eaDisplacement) { 1031 case EA_DISP_NONE: 1032 insn->consumedDisplacement = FALSE; 1033 break; 1034 case EA_DISP_8: 1035 if (consumeInt8(insn, &d8)) 1036 return -1; 1037 insn->displacement = d8; 1038 break; 1039 case EA_DISP_16: 1040 if (consumeInt16(insn, &d16)) 1041 return -1; 1042 insn->displacement = d16; 1043 break; 1044 case EA_DISP_32: 1045 if (consumeInt32(insn, &d32)) 1046 return -1; 1047 insn->displacement = d32; 1048 break; 1049 } 1050 1051 insn->consumedDisplacement = TRUE; 1052 return 0; 1053} 1054 1055/* 1056 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 1057 * displacement) for an instruction and interprets it. 1058 * 1059 * @param insn - The instruction whose addressing information is to be read. 1060 * @return - 0 if the information was successfully read; nonzero otherwise. 1061 */ 1062static int readModRM(struct InternalInstruction* insn) { 1063 uint8_t mod, rm, reg; 1064 1065 dbgprintf(insn, "readModRM()"); 1066 1067 if (insn->consumedModRM) 1068 return 0; 1069 1070 if (consumeByte(insn, &insn->modRM)) 1071 return -1; 1072 insn->consumedModRM = TRUE; 1073 1074 mod = modFromModRM(insn->modRM); 1075 rm = rmFromModRM(insn->modRM); 1076 reg = regFromModRM(insn->modRM); 1077 1078 /* 1079 * This goes by insn->registerSize to pick the correct register, which messes 1080 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 1081 * fixupReg(). 1082 */ 1083 switch (insn->registerSize) { 1084 case 2: 1085 insn->regBase = MODRM_REG_AX; 1086 insn->eaRegBase = EA_REG_AX; 1087 break; 1088 case 4: 1089 insn->regBase = MODRM_REG_EAX; 1090 insn->eaRegBase = EA_REG_EAX; 1091 break; 1092 case 8: 1093 insn->regBase = MODRM_REG_RAX; 1094 insn->eaRegBase = EA_REG_RAX; 1095 break; 1096 } 1097 1098 reg |= rFromREX(insn->rexPrefix) << 3; 1099 rm |= bFromREX(insn->rexPrefix) << 3; 1100 1101 insn->reg = (Reg)(insn->regBase + reg); 1102 1103 switch (insn->addressSize) { 1104 case 2: 1105 insn->eaBaseBase = EA_BASE_BX_SI; 1106 1107 switch (mod) { 1108 case 0x0: 1109 if (rm == 0x6) { 1110 insn->eaBase = EA_BASE_NONE; 1111 insn->eaDisplacement = EA_DISP_16; 1112 if (readDisplacement(insn)) 1113 return -1; 1114 } else { 1115 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1116 insn->eaDisplacement = EA_DISP_NONE; 1117 } 1118 break; 1119 case 0x1: 1120 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1121 insn->eaDisplacement = EA_DISP_8; 1122 if (readDisplacement(insn)) 1123 return -1; 1124 break; 1125 case 0x2: 1126 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1127 insn->eaDisplacement = EA_DISP_16; 1128 if (readDisplacement(insn)) 1129 return -1; 1130 break; 1131 case 0x3: 1132 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1133 if (readDisplacement(insn)) 1134 return -1; 1135 break; 1136 } 1137 break; 1138 case 4: 1139 case 8: 1140 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 1141 1142 switch (mod) { 1143 case 0x0: 1144 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 1145 switch (rm) { 1146 case 0x4: 1147 case 0xc: /* in case REXW.b is set */ 1148 insn->eaBase = (insn->addressSize == 4 ? 1149 EA_BASE_sib : EA_BASE_sib64); 1150 readSIB(insn); 1151 if (readDisplacement(insn)) 1152 return -1; 1153 break; 1154 case 0x5: 1155 insn->eaBase = EA_BASE_NONE; 1156 insn->eaDisplacement = EA_DISP_32; 1157 if (readDisplacement(insn)) 1158 return -1; 1159 break; 1160 default: 1161 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1162 break; 1163 } 1164 break; 1165 case 0x1: 1166 case 0x2: 1167 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 1168 switch (rm) { 1169 case 0x4: 1170 case 0xc: /* in case REXW.b is set */ 1171 insn->eaBase = EA_BASE_sib; 1172 readSIB(insn); 1173 if (readDisplacement(insn)) 1174 return -1; 1175 break; 1176 default: 1177 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1178 if (readDisplacement(insn)) 1179 return -1; 1180 break; 1181 } 1182 break; 1183 case 0x3: 1184 insn->eaDisplacement = EA_DISP_NONE; 1185 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1186 break; 1187 } 1188 break; 1189 } /* switch (insn->addressSize) */ 1190 1191 return 0; 1192} 1193 1194#define GENERIC_FIXUP_FUNC(name, base, prefix) \ 1195 static uint8_t name(struct InternalInstruction *insn, \ 1196 OperandType type, \ 1197 uint8_t index, \ 1198 uint8_t *valid) { \ 1199 *valid = 1; \ 1200 switch (type) { \ 1201 default: \ 1202 debug("Unhandled register type"); \ 1203 *valid = 0; \ 1204 return 0; \ 1205 case TYPE_Rv: \ 1206 return base + index; \ 1207 case TYPE_R8: \ 1208 if (insn->rexPrefix && \ 1209 index >= 4 && index <= 7) { \ 1210 return prefix##_SPL + (index - 4); \ 1211 } else { \ 1212 return prefix##_AL + index; \ 1213 } \ 1214 case TYPE_R16: \ 1215 return prefix##_AX + index; \ 1216 case TYPE_R32: \ 1217 return prefix##_EAX + index; \ 1218 case TYPE_R64: \ 1219 return prefix##_RAX + index; \ 1220 case TYPE_XMM256: \ 1221 return prefix##_YMM0 + index; \ 1222 case TYPE_XMM128: \ 1223 case TYPE_XMM64: \ 1224 case TYPE_XMM32: \ 1225 case TYPE_XMM: \ 1226 return prefix##_XMM0 + index; \ 1227 case TYPE_MM64: \ 1228 case TYPE_MM32: \ 1229 case TYPE_MM: \ 1230 if (index > 7) \ 1231 *valid = 0; \ 1232 return prefix##_MM0 + index; \ 1233 case TYPE_SEGMENTREG: \ 1234 if (index > 5) \ 1235 *valid = 0; \ 1236 return prefix##_ES + index; \ 1237 case TYPE_DEBUGREG: \ 1238 if (index > 7) \ 1239 *valid = 0; \ 1240 return prefix##_DR0 + index; \ 1241 case TYPE_CONTROLREG: \ 1242 if (index > 8) \ 1243 *valid = 0; \ 1244 return prefix##_CR0 + index; \ 1245 } \ 1246 } 1247 1248/* 1249 * fixup*Value - Consults an operand type to determine the meaning of the 1250 * reg or R/M field. If the operand is an XMM operand, for example, an 1251 * operand would be XMM0 instead of AX, which readModRM() would otherwise 1252 * misinterpret it as. 1253 * 1254 * @param insn - The instruction containing the operand. 1255 * @param type - The operand type. 1256 * @param index - The existing value of the field as reported by readModRM(). 1257 * @param valid - The address of a uint8_t. The target is set to 1 if the 1258 * field is valid for the register class; 0 if not. 1259 * @return - The proper value. 1260 */ 1261GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 1262GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 1263 1264/* 1265 * fixupReg - Consults an operand specifier to determine which of the 1266 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 1267 * 1268 * @param insn - See fixup*Value(). 1269 * @param op - The operand specifier. 1270 * @return - 0 if fixup was successful; -1 if the register returned was 1271 * invalid for its class. 1272 */ 1273static int fixupReg(struct InternalInstruction *insn, 1274 const struct OperandSpecifier *op) { 1275 uint8_t valid; 1276 1277 dbgprintf(insn, "fixupReg()"); 1278 1279 switch ((OperandEncoding)op->encoding) { 1280 default: 1281 debug("Expected a REG or R/M encoding in fixupReg"); 1282 return -1; 1283 case ENCODING_VVVV: 1284 insn->vvvv = (Reg)fixupRegValue(insn, 1285 (OperandType)op->type, 1286 insn->vvvv, 1287 &valid); 1288 if (!valid) 1289 return -1; 1290 break; 1291 case ENCODING_REG: 1292 insn->reg = (Reg)fixupRegValue(insn, 1293 (OperandType)op->type, 1294 insn->reg - insn->regBase, 1295 &valid); 1296 if (!valid) 1297 return -1; 1298 break; 1299 case ENCODING_RM: 1300 if (insn->eaBase >= insn->eaRegBase) { 1301 insn->eaBase = (EABase)fixupRMValue(insn, 1302 (OperandType)op->type, 1303 insn->eaBase - insn->eaRegBase, 1304 &valid); 1305 if (!valid) 1306 return -1; 1307 } 1308 break; 1309 } 1310 1311 return 0; 1312} 1313 1314/* 1315 * readOpcodeModifier - Reads an operand from the opcode field of an 1316 * instruction. Handles AddRegFrm instructions. 1317 * 1318 * @param insn - The instruction whose opcode field is to be read. 1319 * @param inModRM - Indicates that the opcode field is to be read from the 1320 * ModR/M extension; useful for escape opcodes 1321 * @return - 0 on success; nonzero otherwise. 1322 */ 1323static int readOpcodeModifier(struct InternalInstruction* insn) { 1324 dbgprintf(insn, "readOpcodeModifier()"); 1325 1326 if (insn->consumedOpcodeModifier) 1327 return 0; 1328 1329 insn->consumedOpcodeModifier = TRUE; 1330 1331 switch (insn->spec->modifierType) { 1332 default: 1333 debug("Unknown modifier type."); 1334 return -1; 1335 case MODIFIER_NONE: 1336 debug("No modifier but an operand expects one."); 1337 return -1; 1338 case MODIFIER_OPCODE: 1339 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; 1340 return 0; 1341 case MODIFIER_MODRM: 1342 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; 1343 return 0; 1344 } 1345} 1346 1347/* 1348 * readOpcodeRegister - Reads an operand from the opcode field of an 1349 * instruction and interprets it appropriately given the operand width. 1350 * Handles AddRegFrm instructions. 1351 * 1352 * @param insn - See readOpcodeModifier(). 1353 * @param size - The width (in bytes) of the register being specified. 1354 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1355 * RAX. 1356 * @return - 0 on success; nonzero otherwise. 1357 */ 1358static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 1359 dbgprintf(insn, "readOpcodeRegister()"); 1360 1361 if (readOpcodeModifier(insn)) 1362 return -1; 1363 1364 if (size == 0) 1365 size = insn->registerSize; 1366 1367 switch (size) { 1368 case 1: 1369 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 1370 | insn->opcodeModifier)); 1371 if (insn->rexPrefix && 1372 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1373 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1374 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 1375 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1376 } 1377 1378 break; 1379 case 2: 1380 insn->opcodeRegister = (Reg)(MODRM_REG_AX 1381 + ((bFromREX(insn->rexPrefix) << 3) 1382 | insn->opcodeModifier)); 1383 break; 1384 case 4: 1385 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 1386 + ((bFromREX(insn->rexPrefix) << 3) 1387 | insn->opcodeModifier)); 1388 break; 1389 case 8: 1390 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 1391 + ((bFromREX(insn->rexPrefix) << 3) 1392 | insn->opcodeModifier)); 1393 break; 1394 } 1395 1396 return 0; 1397} 1398 1399/* 1400 * readImmediate - Consumes an immediate operand from an instruction, given the 1401 * desired operand size. 1402 * 1403 * @param insn - The instruction whose operand is to be read. 1404 * @param size - The width (in bytes) of the operand. 1405 * @return - 0 if the immediate was successfully consumed; nonzero 1406 * otherwise. 1407 */ 1408static int readImmediate(struct InternalInstruction* insn, uint8_t size) { 1409 uint8_t imm8; 1410 uint16_t imm16; 1411 uint32_t imm32; 1412 uint64_t imm64; 1413 1414 dbgprintf(insn, "readImmediate()"); 1415 1416 if (insn->numImmediatesConsumed == 2) { 1417 debug("Already consumed two immediates"); 1418 return -1; 1419 } 1420 1421 if (size == 0) 1422 size = insn->immediateSize; 1423 else 1424 insn->immediateSize = size; 1425 insn->immediateOffset = insn->readerCursor - insn->startLocation; 1426 1427 switch (size) { 1428 case 1: 1429 if (consumeByte(insn, &imm8)) 1430 return -1; 1431 insn->immediates[insn->numImmediatesConsumed] = imm8; 1432 break; 1433 case 2: 1434 if (consumeUInt16(insn, &imm16)) 1435 return -1; 1436 insn->immediates[insn->numImmediatesConsumed] = imm16; 1437 break; 1438 case 4: 1439 if (consumeUInt32(insn, &imm32)) 1440 return -1; 1441 insn->immediates[insn->numImmediatesConsumed] = imm32; 1442 break; 1443 case 8: 1444 if (consumeUInt64(insn, &imm64)) 1445 return -1; 1446 insn->immediates[insn->numImmediatesConsumed] = imm64; 1447 break; 1448 } 1449 1450 insn->numImmediatesConsumed++; 1451 1452 return 0; 1453} 1454 1455/* 1456 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 1457 * 1458 * @param insn - The instruction whose operand is to be read. 1459 * @return - 0 if the vvvv was successfully consumed; nonzero 1460 * otherwise. 1461 */ 1462static int readVVVV(struct InternalInstruction* insn) { 1463 dbgprintf(insn, "readVVVV()"); 1464 1465 if (insn->vexSize == 3) 1466 insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); 1467 else if (insn->vexSize == 2) 1468 insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]); 1469 else 1470 return -1; 1471 1472 if (insn->mode != MODE_64BIT) 1473 insn->vvvv &= 0x7; 1474 1475 return 0; 1476} 1477 1478/* 1479 * readOperands - Consults the specifier for an instruction and consumes all 1480 * operands for that instruction, interpreting them as it goes. 1481 * 1482 * @param insn - The instruction whose operands are to be read and interpreted. 1483 * @return - 0 if all operands could be read; nonzero otherwise. 1484 */ 1485static int readOperands(struct InternalInstruction* insn) { 1486 int index; 1487 int hasVVVV, needVVVV; 1488 int sawRegImm = 0; 1489 1490 dbgprintf(insn, "readOperands()"); 1491 1492 /* If non-zero vvvv specified, need to make sure one of the operands 1493 uses it. */ 1494 hasVVVV = !readVVVV(insn); 1495 needVVVV = hasVVVV && (insn->vvvv != 0); 1496 1497 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 1498 switch (x86OperandSets[insn->spec->operands][index].encoding) { 1499 case ENCODING_NONE: 1500 break; 1501 case ENCODING_REG: 1502 case ENCODING_RM: 1503 if (readModRM(insn)) 1504 return -1; 1505 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 1506 return -1; 1507 break; 1508 case ENCODING_CB: 1509 case ENCODING_CW: 1510 case ENCODING_CD: 1511 case ENCODING_CP: 1512 case ENCODING_CO: 1513 case ENCODING_CT: 1514 dbgprintf(insn, "We currently don't hande code-offset encodings"); 1515 return -1; 1516 case ENCODING_IB: 1517 if (sawRegImm) { 1518 /* Saw a register immediate so don't read again and instead split the 1519 previous immediate. FIXME: This is a hack. */ 1520 insn->immediates[insn->numImmediatesConsumed] = 1521 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1522 ++insn->numImmediatesConsumed; 1523 break; 1524 } 1525 if (readImmediate(insn, 1)) 1526 return -1; 1527 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 && 1528 insn->immediates[insn->numImmediatesConsumed - 1] > 7) 1529 return -1; 1530 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 && 1531 insn->immediates[insn->numImmediatesConsumed - 1] > 31) 1532 return -1; 1533 if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || 1534 x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) 1535 sawRegImm = 1; 1536 break; 1537 case ENCODING_IW: 1538 if (readImmediate(insn, 2)) 1539 return -1; 1540 break; 1541 case ENCODING_ID: 1542 if (readImmediate(insn, 4)) 1543 return -1; 1544 break; 1545 case ENCODING_IO: 1546 if (readImmediate(insn, 8)) 1547 return -1; 1548 break; 1549 case ENCODING_Iv: 1550 if (readImmediate(insn, insn->immediateSize)) 1551 return -1; 1552 break; 1553 case ENCODING_Ia: 1554 if (readImmediate(insn, insn->addressSize)) 1555 return -1; 1556 break; 1557 case ENCODING_RB: 1558 if (readOpcodeRegister(insn, 1)) 1559 return -1; 1560 break; 1561 case ENCODING_RW: 1562 if (readOpcodeRegister(insn, 2)) 1563 return -1; 1564 break; 1565 case ENCODING_RD: 1566 if (readOpcodeRegister(insn, 4)) 1567 return -1; 1568 break; 1569 case ENCODING_RO: 1570 if (readOpcodeRegister(insn, 8)) 1571 return -1; 1572 break; 1573 case ENCODING_Rv: 1574 if (readOpcodeRegister(insn, 0)) 1575 return -1; 1576 break; 1577 case ENCODING_I: 1578 if (readOpcodeModifier(insn)) 1579 return -1; 1580 break; 1581 case ENCODING_VVVV: 1582 needVVVV = 0; /* Mark that we have found a VVVV operand. */ 1583 if (!hasVVVV) 1584 return -1; 1585 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 1586 return -1; 1587 break; 1588 case ENCODING_DUP: 1589 break; 1590 default: 1591 dbgprintf(insn, "Encountered an operand with an unknown encoding."); 1592 return -1; 1593 } 1594 } 1595 1596 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 1597 if (needVVVV) return -1; 1598 1599 return 0; 1600} 1601 1602/* 1603 * decodeInstruction - Reads and interprets a full instruction provided by the 1604 * user. 1605 * 1606 * @param insn - A pointer to the instruction to be populated. Must be 1607 * pre-allocated. 1608 * @param reader - The function to be used to read the instruction's bytes. 1609 * @param readerArg - A generic argument to be passed to the reader to store 1610 * any internal state. 1611 * @param logger - If non-NULL, the function to be used to write log messages 1612 * and warnings. 1613 * @param loggerArg - A generic argument to be passed to the logger to store 1614 * any internal state. 1615 * @param startLoc - The address (in the reader's address space) of the first 1616 * byte in the instruction. 1617 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 1618 * decode the instruction in. 1619 * @return - 0 if the instruction's memory could be read; nonzero if 1620 * not. 1621 */ 1622int decodeInstruction(struct InternalInstruction* insn, 1623 byteReader_t reader, 1624 const void* readerArg, 1625 dlog_t logger, 1626 void* loggerArg, 1627 const void* miiArg, 1628 uint64_t startLoc, 1629 DisassemblerMode mode) { 1630 memset(insn, 0, sizeof(struct InternalInstruction)); 1631 1632 insn->reader = reader; 1633 insn->readerArg = readerArg; 1634 insn->dlog = logger; 1635 insn->dlogArg = loggerArg; 1636 insn->startLocation = startLoc; 1637 insn->readerCursor = startLoc; 1638 insn->mode = mode; 1639 insn->numImmediatesConsumed = 0; 1640 1641 if (readPrefixes(insn) || 1642 readOpcode(insn) || 1643 getID(insn, miiArg) || 1644 insn->instructionID == 0 || 1645 readOperands(insn)) 1646 return -1; 1647 1648 insn->operands = &x86OperandSets[insn->spec->operands][0]; 1649 1650 insn->length = insn->readerCursor - insn->startLocation; 1651 1652 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 1653 startLoc, insn->readerCursor, insn->length); 1654 1655 if (insn->length > 15) 1656 dbgprintf(insn, "Instruction exceeds 15-byte limit"); 1657 1658 return 0; 1659} 1660