X86DisassemblerDecoder.c revision ce8f4c58d8804ee2cfda7751cdc6520bc5f43dd7
1/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==* 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is distributed under the University of Illinois Open Source 6 * License. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===* 9 * 10 * This file is part of the X86 Disassembler. 11 * It contains the implementation of the instruction decoder. 12 * Documentation for the disassembler can be found in X86Disassembler.h. 13 * 14 *===----------------------------------------------------------------------===*/ 15 16#include <stdarg.h> /* for va_*() */ 17#include <stdio.h> /* for vsnprintf() */ 18#include <stdlib.h> /* for exit() */ 19#include <string.h> /* for memset() */ 20 21#include "X86DisassemblerDecoder.h" 22 23#include "X86GenDisassemblerTables.inc" 24 25#define TRUE 1 26#define FALSE 0 27 28typedef int8_t bool; 29 30#ifndef NDEBUG 31#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) 32#else 33#define debug(s) do { } while (0) 34#endif 35 36 37/* 38 * contextForAttrs - Client for the instruction context table. Takes a set of 39 * attributes and returns the appropriate decode context. 40 * 41 * @param attrMask - Attributes, from the enumeration attributeBits. 42 * @return - The InstructionContext to use when looking up an 43 * an instruction with these attributes. 44 */ 45static InstructionContext contextForAttrs(uint8_t attrMask) { 46 return CONTEXTS_SYM[attrMask]; 47} 48 49/* 50 * modRMRequired - Reads the appropriate instruction table to determine whether 51 * the ModR/M byte is required to decode a particular instruction. 52 * 53 * @param type - The opcode type (i.e., how many bytes it has). 54 * @param insnContext - The context for the instruction, as returned by 55 * contextForAttrs. 56 * @param opcode - The last byte of the instruction's opcode, not counting 57 * ModR/M extensions and escapes. 58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise. 59 */ 60static int modRMRequired(OpcodeType type, 61 InstructionContext insnContext, 62 uint8_t opcode) { 63 const struct ContextDecision* decision = 0; 64 65 switch (type) { 66 case ONEBYTE: 67 decision = &ONEBYTE_SYM; 68 break; 69 case TWOBYTE: 70 decision = &TWOBYTE_SYM; 71 break; 72 case THREEBYTE_38: 73 decision = &THREEBYTE38_SYM; 74 break; 75 case THREEBYTE_3A: 76 decision = &THREEBYTE3A_SYM; 77 break; 78 case THREEBYTE_A6: 79 decision = &THREEBYTEA6_SYM; 80 break; 81 case THREEBYTE_A7: 82 decision = &THREEBYTEA7_SYM; 83 break; 84 } 85 86 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 87 modrm_type != MODRM_ONEENTRY; 88 89 return 0; 90} 91 92/* 93 * decode - Reads the appropriate instruction table to obtain the unique ID of 94 * an instruction. 95 * 96 * @param type - See modRMRequired(). 97 * @param insnContext - See modRMRequired(). 98 * @param opcode - See modRMRequired(). 99 * @param modRM - The ModR/M byte if required, or any value if not. 100 * @return - The UID of the instruction, or 0 on failure. 101 */ 102static InstrUID decode(OpcodeType type, 103 InstructionContext insnContext, 104 uint8_t opcode, 105 uint8_t modRM) { 106 const struct ModRMDecision* dec = 0; 107 108 switch (type) { 109 case ONEBYTE: 110 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 111 break; 112 case TWOBYTE: 113 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 114 break; 115 case THREEBYTE_38: 116 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 117 break; 118 case THREEBYTE_3A: 119 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 120 break; 121 case THREEBYTE_A6: 122 dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 123 break; 124 case THREEBYTE_A7: 125 dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 126 break; 127 } 128 129 switch (dec->modrm_type) { 130 default: 131 debug("Corrupt table! Unknown modrm_type"); 132 return 0; 133 case MODRM_ONEENTRY: 134 return modRMTable[dec->instructionIDs]; 135 case MODRM_SPLITRM: 136 if (modFromModRM(modRM) == 0x3) 137 return modRMTable[dec->instructionIDs+1]; 138 return modRMTable[dec->instructionIDs]; 139 case MODRM_FULL: 140 return modRMTable[dec->instructionIDs+modRM]; 141 } 142} 143 144/* 145 * specifierForUID - Given a UID, returns the name and operand specification for 146 * that instruction. 147 * 148 * @param uid - The unique ID for the instruction. This should be returned by 149 * decode(); specifierForUID will not check bounds. 150 * @return - A pointer to the specification for that instruction. 151 */ 152static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 153 return &INSTRUCTIONS_SYM[uid]; 154} 155 156/* 157 * consumeByte - Uses the reader function provided by the user to consume one 158 * byte from the instruction's memory and advance the cursor. 159 * 160 * @param insn - The instruction with the reader function to use. The cursor 161 * for this instruction is advanced. 162 * @param byte - A pointer to a pre-allocated memory buffer to be populated 163 * with the data read. 164 * @return - 0 if the read was successful; nonzero otherwise. 165 */ 166static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 167 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 168 169 if (!ret) 170 ++(insn->readerCursor); 171 172 return ret; 173} 174 175/* 176 * lookAtByte - Like consumeByte, but does not advance the cursor. 177 * 178 * @param insn - See consumeByte(). 179 * @param byte - See consumeByte(). 180 * @return - See consumeByte(). 181 */ 182static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 183 return insn->reader(insn->readerArg, byte, insn->readerCursor); 184} 185 186static void unconsumeByte(struct InternalInstruction* insn) { 187 insn->readerCursor--; 188} 189 190#define CONSUME_FUNC(name, type) \ 191 static int name(struct InternalInstruction* insn, type* ptr) { \ 192 type combined = 0; \ 193 unsigned offset; \ 194 for (offset = 0; offset < sizeof(type); ++offset) { \ 195 uint8_t byte; \ 196 int ret = insn->reader(insn->readerArg, \ 197 &byte, \ 198 insn->readerCursor + offset); \ 199 if (ret) \ 200 return ret; \ 201 combined = combined | ((type)byte << ((type)offset * 8)); \ 202 } \ 203 *ptr = combined; \ 204 insn->readerCursor += sizeof(type); \ 205 return 0; \ 206 } 207 208/* 209 * consume* - Use the reader function provided by the user to consume data 210 * values of various sizes from the instruction's memory and advance the 211 * cursor appropriately. These readers perform endian conversion. 212 * 213 * @param insn - See consumeByte(). 214 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 215 * be populated with the data read. 216 * @return - See consumeByte(). 217 */ 218CONSUME_FUNC(consumeInt8, int8_t) 219CONSUME_FUNC(consumeInt16, int16_t) 220CONSUME_FUNC(consumeInt32, int32_t) 221CONSUME_FUNC(consumeUInt16, uint16_t) 222CONSUME_FUNC(consumeUInt32, uint32_t) 223CONSUME_FUNC(consumeUInt64, uint64_t) 224 225/* 226 * dbgprintf - Uses the logging function provided by the user to log a single 227 * message, typically without a carriage-return. 228 * 229 * @param insn - The instruction containing the logging function. 230 * @param format - See printf(). 231 * @param ... - See printf(). 232 */ 233static void dbgprintf(struct InternalInstruction* insn, 234 const char* format, 235 ...) { 236 char buffer[256]; 237 va_list ap; 238 239 if (!insn->dlog) 240 return; 241 242 va_start(ap, format); 243 (void)vsnprintf(buffer, sizeof(buffer), format, ap); 244 va_end(ap); 245 246 insn->dlog(insn->dlogArg, buffer); 247 248 return; 249} 250 251/* 252 * setPrefixPresent - Marks that a particular prefix is present at a particular 253 * location. 254 * 255 * @param insn - The instruction to be marked as having the prefix. 256 * @param prefix - The prefix that is present. 257 * @param location - The location where the prefix is located (in the address 258 * space of the instruction's reader). 259 */ 260static void setPrefixPresent(struct InternalInstruction* insn, 261 uint8_t prefix, 262 uint64_t location) 263{ 264 insn->prefixPresent[prefix] = 1; 265 insn->prefixLocations[prefix] = location; 266} 267 268/* 269 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 270 * present at a given location. 271 * 272 * @param insn - The instruction to be queried. 273 * @param prefix - The prefix. 274 * @param location - The location to query. 275 * @return - Whether the prefix is at that location. 276 */ 277static BOOL isPrefixAtLocation(struct InternalInstruction* insn, 278 uint8_t prefix, 279 uint64_t location) 280{ 281 if (insn->prefixPresent[prefix] == 1 && 282 insn->prefixLocations[prefix] == location) 283 return TRUE; 284 else 285 return FALSE; 286} 287 288/* 289 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 290 * instruction as having them. Also sets the instruction's default operand, 291 * address, and other relevant data sizes to report operands correctly. 292 * 293 * @param insn - The instruction whose prefixes are to be read. 294 * @return - 0 if the instruction could be read until the end of the prefix 295 * bytes, and no prefixes conflicted; nonzero otherwise. 296 */ 297static int readPrefixes(struct InternalInstruction* insn) { 298 BOOL isPrefix = TRUE; 299 BOOL prefixGroups[4] = { FALSE }; 300 uint64_t prefixLocation; 301 uint8_t byte = 0; 302 303 BOOL hasAdSize = FALSE; 304 BOOL hasOpSize = FALSE; 305 306 dbgprintf(insn, "readPrefixes()"); 307 308 while (isPrefix) { 309 prefixLocation = insn->readerCursor; 310 311 if (consumeByte(insn, &byte)) 312 return -1; 313 314 switch (byte) { 315 case 0xf0: /* LOCK */ 316 case 0xf2: /* REPNE/REPNZ */ 317 case 0xf3: /* REP or REPE/REPZ */ 318 if (prefixGroups[0]) 319 dbgprintf(insn, "Redundant Group 1 prefix"); 320 prefixGroups[0] = TRUE; 321 setPrefixPresent(insn, byte, prefixLocation); 322 break; 323 case 0x2e: /* CS segment override -OR- Branch not taken */ 324 case 0x36: /* SS segment override -OR- Branch taken */ 325 case 0x3e: /* DS segment override */ 326 case 0x26: /* ES segment override */ 327 case 0x64: /* FS segment override */ 328 case 0x65: /* GS segment override */ 329 switch (byte) { 330 case 0x2e: 331 insn->segmentOverride = SEG_OVERRIDE_CS; 332 break; 333 case 0x36: 334 insn->segmentOverride = SEG_OVERRIDE_SS; 335 break; 336 case 0x3e: 337 insn->segmentOverride = SEG_OVERRIDE_DS; 338 break; 339 case 0x26: 340 insn->segmentOverride = SEG_OVERRIDE_ES; 341 break; 342 case 0x64: 343 insn->segmentOverride = SEG_OVERRIDE_FS; 344 break; 345 case 0x65: 346 insn->segmentOverride = SEG_OVERRIDE_GS; 347 break; 348 default: 349 debug("Unhandled override"); 350 return -1; 351 } 352 if (prefixGroups[1]) 353 dbgprintf(insn, "Redundant Group 2 prefix"); 354 prefixGroups[1] = TRUE; 355 setPrefixPresent(insn, byte, prefixLocation); 356 break; 357 case 0x66: /* Operand-size override */ 358 if (prefixGroups[2]) 359 dbgprintf(insn, "Redundant Group 3 prefix"); 360 prefixGroups[2] = TRUE; 361 hasOpSize = TRUE; 362 setPrefixPresent(insn, byte, prefixLocation); 363 break; 364 case 0x67: /* Address-size override */ 365 if (prefixGroups[3]) 366 dbgprintf(insn, "Redundant Group 4 prefix"); 367 prefixGroups[3] = TRUE; 368 hasAdSize = TRUE; 369 setPrefixPresent(insn, byte, prefixLocation); 370 break; 371 default: /* Not a prefix byte */ 372 isPrefix = FALSE; 373 break; 374 } 375 376 if (isPrefix) 377 dbgprintf(insn, "Found prefix 0x%hhx", byte); 378 } 379 380 insn->vexSize = 0; 381 382 if (byte == 0xc4) { 383 uint8_t byte1; 384 385 if (lookAtByte(insn, &byte1)) { 386 dbgprintf(insn, "Couldn't read second byte of VEX"); 387 return -1; 388 } 389 390 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 391 insn->vexSize = 3; 392 insn->necessaryPrefixLocation = insn->readerCursor - 1; 393 } 394 else { 395 unconsumeByte(insn); 396 insn->necessaryPrefixLocation = insn->readerCursor - 1; 397 } 398 399 if (insn->vexSize == 3) { 400 insn->vexPrefix[0] = byte; 401 consumeByte(insn, &insn->vexPrefix[1]); 402 consumeByte(insn, &insn->vexPrefix[2]); 403 404 /* We simulate the REX prefix for simplicity's sake */ 405 406 if (insn->mode == MODE_64BIT) { 407 insn->rexPrefix = 0x40 408 | (wFromVEX3of3(insn->vexPrefix[2]) << 3) 409 | (rFromVEX2of3(insn->vexPrefix[1]) << 2) 410 | (xFromVEX2of3(insn->vexPrefix[1]) << 1) 411 | (bFromVEX2of3(insn->vexPrefix[1]) << 0); 412 } 413 414 switch (ppFromVEX3of3(insn->vexPrefix[2])) 415 { 416 default: 417 break; 418 case VEX_PREFIX_66: 419 hasOpSize = TRUE; 420 break; 421 } 422 423 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); 424 } 425 } 426 else if (byte == 0xc5) { 427 uint8_t byte1; 428 429 if (lookAtByte(insn, &byte1)) { 430 dbgprintf(insn, "Couldn't read second byte of VEX"); 431 return -1; 432 } 433 434 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 435 insn->vexSize = 2; 436 } 437 else { 438 unconsumeByte(insn); 439 } 440 441 if (insn->vexSize == 2) { 442 insn->vexPrefix[0] = byte; 443 consumeByte(insn, &insn->vexPrefix[1]); 444 445 if (insn->mode == MODE_64BIT) { 446 insn->rexPrefix = 0x40 447 | (rFromVEX2of2(insn->vexPrefix[1]) << 2); 448 } 449 450 switch (ppFromVEX2of2(insn->vexPrefix[1])) 451 { 452 default: 453 break; 454 case VEX_PREFIX_66: 455 hasOpSize = TRUE; 456 break; 457 } 458 459 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); 460 } 461 } 462 else { 463 if (insn->mode == MODE_64BIT) { 464 if ((byte & 0xf0) == 0x40) { 465 uint8_t opcodeByte; 466 467 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { 468 dbgprintf(insn, "Redundant REX prefix"); 469 return -1; 470 } 471 472 insn->rexPrefix = byte; 473 insn->necessaryPrefixLocation = insn->readerCursor - 2; 474 475 dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 476 } else { 477 unconsumeByte(insn); 478 insn->necessaryPrefixLocation = insn->readerCursor - 1; 479 } 480 } else { 481 unconsumeByte(insn); 482 insn->necessaryPrefixLocation = insn->readerCursor - 1; 483 } 484 } 485 486 if (insn->mode == MODE_16BIT) { 487 insn->registerSize = (hasOpSize ? 4 : 2); 488 insn->addressSize = (hasAdSize ? 4 : 2); 489 insn->displacementSize = (hasAdSize ? 4 : 2); 490 insn->immediateSize = (hasOpSize ? 4 : 2); 491 } else if (insn->mode == MODE_32BIT) { 492 insn->registerSize = (hasOpSize ? 2 : 4); 493 insn->addressSize = (hasAdSize ? 2 : 4); 494 insn->displacementSize = (hasAdSize ? 2 : 4); 495 insn->immediateSize = (hasOpSize ? 2 : 4); 496 } else if (insn->mode == MODE_64BIT) { 497 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 498 insn->registerSize = 8; 499 insn->addressSize = (hasAdSize ? 4 : 8); 500 insn->displacementSize = 4; 501 insn->immediateSize = 4; 502 } else if (insn->rexPrefix) { 503 insn->registerSize = (hasOpSize ? 2 : 4); 504 insn->addressSize = (hasAdSize ? 4 : 8); 505 insn->displacementSize = (hasOpSize ? 2 : 4); 506 insn->immediateSize = (hasOpSize ? 2 : 4); 507 } else { 508 insn->registerSize = (hasOpSize ? 2 : 4); 509 insn->addressSize = (hasAdSize ? 4 : 8); 510 insn->displacementSize = (hasOpSize ? 2 : 4); 511 insn->immediateSize = (hasOpSize ? 2 : 4); 512 } 513 } 514 515 return 0; 516} 517 518/* 519 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 520 * extended or escape opcodes). 521 * 522 * @param insn - The instruction whose opcode is to be read. 523 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 524 */ 525static int readOpcode(struct InternalInstruction* insn) { 526 /* Determine the length of the primary opcode */ 527 528 uint8_t current; 529 530 dbgprintf(insn, "readOpcode()"); 531 532 insn->opcodeType = ONEBYTE; 533 534 if (insn->vexSize == 3) 535 { 536 switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) 537 { 538 default: 539 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); 540 return -1; 541 case 0: 542 break; 543 case VEX_LOB_0F: 544 insn->twoByteEscape = 0x0f; 545 insn->opcodeType = TWOBYTE; 546 return consumeByte(insn, &insn->opcode); 547 case VEX_LOB_0F38: 548 insn->twoByteEscape = 0x0f; 549 insn->threeByteEscape = 0x38; 550 insn->opcodeType = THREEBYTE_38; 551 return consumeByte(insn, &insn->opcode); 552 case VEX_LOB_0F3A: 553 insn->twoByteEscape = 0x0f; 554 insn->threeByteEscape = 0x3a; 555 insn->opcodeType = THREEBYTE_3A; 556 return consumeByte(insn, &insn->opcode); 557 } 558 } 559 else if (insn->vexSize == 2) 560 { 561 insn->twoByteEscape = 0x0f; 562 insn->opcodeType = TWOBYTE; 563 return consumeByte(insn, &insn->opcode); 564 } 565 566 if (consumeByte(insn, ¤t)) 567 return -1; 568 569 if (current == 0x0f) { 570 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 571 572 insn->twoByteEscape = current; 573 574 if (consumeByte(insn, ¤t)) 575 return -1; 576 577 if (current == 0x38) { 578 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 579 580 insn->threeByteEscape = current; 581 582 if (consumeByte(insn, ¤t)) 583 return -1; 584 585 insn->opcodeType = THREEBYTE_38; 586 } else if (current == 0x3a) { 587 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 588 589 insn->threeByteEscape = current; 590 591 if (consumeByte(insn, ¤t)) 592 return -1; 593 594 insn->opcodeType = THREEBYTE_3A; 595 } else if (current == 0xa6) { 596 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 597 598 insn->threeByteEscape = current; 599 600 if (consumeByte(insn, ¤t)) 601 return -1; 602 603 insn->opcodeType = THREEBYTE_A6; 604 } else if (current == 0xa7) { 605 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 606 607 insn->threeByteEscape = current; 608 609 if (consumeByte(insn, ¤t)) 610 return -1; 611 612 insn->opcodeType = THREEBYTE_A7; 613 } else { 614 dbgprintf(insn, "Didn't find a three-byte escape prefix"); 615 616 insn->opcodeType = TWOBYTE; 617 } 618 } 619 620 /* 621 * At this point we have consumed the full opcode. 622 * Anything we consume from here on must be unconsumed. 623 */ 624 625 insn->opcode = current; 626 627 return 0; 628} 629 630static int readModRM(struct InternalInstruction* insn); 631 632/* 633 * getIDWithAttrMask - Determines the ID of an instruction, consuming 634 * the ModR/M byte as appropriate for extended and escape opcodes, 635 * and using a supplied attribute mask. 636 * 637 * @param instructionID - A pointer whose target is filled in with the ID of the 638 * instruction. 639 * @param insn - The instruction whose ID is to be determined. 640 * @param attrMask - The attribute mask to search. 641 * @return - 0 if the ModR/M could be read when needed or was not 642 * needed; nonzero otherwise. 643 */ 644static int getIDWithAttrMask(uint16_t* instructionID, 645 struct InternalInstruction* insn, 646 uint8_t attrMask) { 647 BOOL hasModRMExtension; 648 649 uint8_t instructionClass; 650 651 instructionClass = contextForAttrs(attrMask); 652 653 hasModRMExtension = modRMRequired(insn->opcodeType, 654 instructionClass, 655 insn->opcode); 656 657 if (hasModRMExtension) { 658 if (readModRM(insn)) 659 return -1; 660 661 *instructionID = decode(insn->opcodeType, 662 instructionClass, 663 insn->opcode, 664 insn->modRM); 665 } else { 666 *instructionID = decode(insn->opcodeType, 667 instructionClass, 668 insn->opcode, 669 0); 670 } 671 672 return 0; 673} 674 675/* 676 * is16BitEquivalent - Determines whether two instruction names refer to 677 * equivalent instructions but one is 16-bit whereas the other is not. 678 * 679 * @param orig - The instruction that is not 16-bit 680 * @param equiv - The instruction that is 16-bit 681 */ 682static BOOL is16BitEquvalent(const char* orig, const char* equiv) { 683 off_t i; 684 685 for (i = 0;; i++) { 686 if (orig[i] == '\0' && equiv[i] == '\0') 687 return TRUE; 688 if (orig[i] == '\0' || equiv[i] == '\0') 689 return FALSE; 690 if (orig[i] != equiv[i]) { 691 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 692 continue; 693 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 694 continue; 695 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 696 continue; 697 return FALSE; 698 } 699 } 700} 701 702/* 703 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 704 * appropriate for extended and escape opcodes. Determines the attributes and 705 * context for the instruction before doing so. 706 * 707 * @param insn - The instruction whose ID is to be determined. 708 * @return - 0 if the ModR/M could be read when needed or was not needed; 709 * nonzero otherwise. 710 */ 711static int getID(struct InternalInstruction* insn) { 712 uint8_t attrMask; 713 uint16_t instructionID; 714 715 dbgprintf(insn, "getID()"); 716 717 attrMask = ATTR_NONE; 718 719 if (insn->mode == MODE_64BIT) 720 attrMask |= ATTR_64BIT; 721 722 if (insn->vexSize) { 723 attrMask |= ATTR_VEX; 724 725 if (insn->vexSize == 3) { 726 switch (ppFromVEX3of3(insn->vexPrefix[2])) { 727 case VEX_PREFIX_66: 728 attrMask |= ATTR_OPSIZE; 729 break; 730 case VEX_PREFIX_F3: 731 attrMask |= ATTR_XS; 732 break; 733 case VEX_PREFIX_F2: 734 attrMask |= ATTR_XD; 735 break; 736 } 737 738 if (lFromVEX3of3(insn->vexPrefix[2])) 739 attrMask |= ATTR_VEXL; 740 } 741 else if (insn->vexSize == 2) { 742 switch (ppFromVEX2of2(insn->vexPrefix[1])) { 743 case VEX_PREFIX_66: 744 attrMask |= ATTR_OPSIZE; 745 break; 746 case VEX_PREFIX_F3: 747 attrMask |= ATTR_XS; 748 break; 749 case VEX_PREFIX_F2: 750 attrMask |= ATTR_XD; 751 break; 752 } 753 754 if (lFromVEX2of2(insn->vexPrefix[1])) 755 attrMask |= ATTR_VEXL; 756 } 757 else { 758 return -1; 759 } 760 } 761 else { 762 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) 763 attrMask |= ATTR_OPSIZE; 764 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) 765 attrMask |= ATTR_XS; 766 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) 767 attrMask |= ATTR_XD; 768 } 769 770 if (insn->rexPrefix & 0x08) 771 attrMask |= ATTR_REXW; 772 773 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 774 return -1; 775 776 /* The following clauses compensate for limitations of the tables. */ 777 778 if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) && 779 !(attrMask & ATTR_OPSIZE)) { 780 /* 781 * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit 782 * has precedence since there are no L-bit with W-bit entries in the tables. 783 * So if the L-bit isn't significant we should use the W-bit instead. 784 * We only need to do this if the instruction doesn't specify OpSize since 785 * there is a VEX_L_W_OPSIZE table. 786 */ 787 788 const struct InstructionSpecifier *spec; 789 uint16_t instructionIDWithWBit; 790 const struct InstructionSpecifier *specWithWBit; 791 792 spec = specifierForUID(instructionID); 793 794 if (getIDWithAttrMask(&instructionIDWithWBit, 795 insn, 796 (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) { 797 insn->instructionID = instructionID; 798 insn->spec = spec; 799 return 0; 800 } 801 802 specWithWBit = specifierForUID(instructionIDWithWBit); 803 804 if (instructionID != instructionIDWithWBit) { 805 insn->instructionID = instructionIDWithWBit; 806 insn->spec = specWithWBit; 807 } else { 808 insn->instructionID = instructionID; 809 insn->spec = spec; 810 } 811 return 0; 812 } 813 814 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { 815 /* 816 * The instruction tables make no distinction between instructions that 817 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 818 * particular spot (i.e., many MMX operations). In general we're 819 * conservative, but in the specific case where OpSize is present but not 820 * in the right place we check if there's a 16-bit operation. 821 */ 822 823 const struct InstructionSpecifier *spec; 824 uint16_t instructionIDWithOpsize; 825 const struct InstructionSpecifier *specWithOpsize; 826 827 spec = specifierForUID(instructionID); 828 829 if (getIDWithAttrMask(&instructionIDWithOpsize, 830 insn, 831 attrMask | ATTR_OPSIZE)) { 832 /* 833 * ModRM required with OpSize but not present; give up and return version 834 * without OpSize set 835 */ 836 837 insn->instructionID = instructionID; 838 insn->spec = spec; 839 return 0; 840 } 841 842 specWithOpsize = specifierForUID(instructionIDWithOpsize); 843 844 if (is16BitEquvalent(spec->name, specWithOpsize->name)) { 845 insn->instructionID = instructionIDWithOpsize; 846 insn->spec = specWithOpsize; 847 } else { 848 insn->instructionID = instructionID; 849 insn->spec = spec; 850 } 851 return 0; 852 } 853 854 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 855 insn->rexPrefix & 0x01) { 856 /* 857 * NOOP shouldn't decode as NOOP if REX.b is set. Instead 858 * it should decode as XCHG %r8, %eax. 859 */ 860 861 const struct InstructionSpecifier *spec; 862 uint16_t instructionIDWithNewOpcode; 863 const struct InstructionSpecifier *specWithNewOpcode; 864 865 spec = specifierForUID(instructionID); 866 867 /* Borrow opcode from one of the other XCHGar opcodes */ 868 insn->opcode = 0x91; 869 870 if (getIDWithAttrMask(&instructionIDWithNewOpcode, 871 insn, 872 attrMask)) { 873 insn->opcode = 0x90; 874 875 insn->instructionID = instructionID; 876 insn->spec = spec; 877 return 0; 878 } 879 880 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 881 882 /* Change back */ 883 insn->opcode = 0x90; 884 885 insn->instructionID = instructionIDWithNewOpcode; 886 insn->spec = specWithNewOpcode; 887 888 return 0; 889 } 890 891 insn->instructionID = instructionID; 892 insn->spec = specifierForUID(insn->instructionID); 893 894 return 0; 895} 896 897/* 898 * readSIB - Consumes the SIB byte to determine addressing information for an 899 * instruction. 900 * 901 * @param insn - The instruction whose SIB byte is to be read. 902 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 903 */ 904static int readSIB(struct InternalInstruction* insn) { 905 SIBIndex sibIndexBase = 0; 906 SIBBase sibBaseBase = 0; 907 uint8_t index, base; 908 909 dbgprintf(insn, "readSIB()"); 910 911 if (insn->consumedSIB) 912 return 0; 913 914 insn->consumedSIB = TRUE; 915 916 switch (insn->addressSize) { 917 case 2: 918 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 919 return -1; 920 break; 921 case 4: 922 sibIndexBase = SIB_INDEX_EAX; 923 sibBaseBase = SIB_BASE_EAX; 924 break; 925 case 8: 926 sibIndexBase = SIB_INDEX_RAX; 927 sibBaseBase = SIB_BASE_RAX; 928 break; 929 } 930 931 if (consumeByte(insn, &insn->sib)) 932 return -1; 933 934 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 935 936 switch (index) { 937 case 0x4: 938 insn->sibIndex = SIB_INDEX_NONE; 939 break; 940 default: 941 insn->sibIndex = (SIBIndex)(sibIndexBase + index); 942 if (insn->sibIndex == SIB_INDEX_sib || 943 insn->sibIndex == SIB_INDEX_sib64) 944 insn->sibIndex = SIB_INDEX_NONE; 945 break; 946 } 947 948 switch (scaleFromSIB(insn->sib)) { 949 case 0: 950 insn->sibScale = 1; 951 break; 952 case 1: 953 insn->sibScale = 2; 954 break; 955 case 2: 956 insn->sibScale = 4; 957 break; 958 case 3: 959 insn->sibScale = 8; 960 break; 961 } 962 963 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 964 965 switch (base) { 966 case 0x5: 967 switch (modFromModRM(insn->modRM)) { 968 case 0x0: 969 insn->eaDisplacement = EA_DISP_32; 970 insn->sibBase = SIB_BASE_NONE; 971 break; 972 case 0x1: 973 insn->eaDisplacement = EA_DISP_8; 974 insn->sibBase = (insn->addressSize == 4 ? 975 SIB_BASE_EBP : SIB_BASE_RBP); 976 break; 977 case 0x2: 978 insn->eaDisplacement = EA_DISP_32; 979 insn->sibBase = (insn->addressSize == 4 ? 980 SIB_BASE_EBP : SIB_BASE_RBP); 981 break; 982 case 0x3: 983 debug("Cannot have Mod = 0b11 and a SIB byte"); 984 return -1; 985 } 986 break; 987 default: 988 insn->sibBase = (SIBBase)(sibBaseBase + base); 989 break; 990 } 991 992 return 0; 993} 994 995/* 996 * readDisplacement - Consumes the displacement of an instruction. 997 * 998 * @param insn - The instruction whose displacement is to be read. 999 * @return - 0 if the displacement byte was successfully read; nonzero 1000 * otherwise. 1001 */ 1002static int readDisplacement(struct InternalInstruction* insn) { 1003 int8_t d8; 1004 int16_t d16; 1005 int32_t d32; 1006 1007 dbgprintf(insn, "readDisplacement()"); 1008 1009 if (insn->consumedDisplacement) 1010 return 0; 1011 1012 insn->consumedDisplacement = TRUE; 1013 1014 switch (insn->eaDisplacement) { 1015 case EA_DISP_NONE: 1016 insn->consumedDisplacement = FALSE; 1017 break; 1018 case EA_DISP_8: 1019 if (consumeInt8(insn, &d8)) 1020 return -1; 1021 insn->displacement = d8; 1022 break; 1023 case EA_DISP_16: 1024 if (consumeInt16(insn, &d16)) 1025 return -1; 1026 insn->displacement = d16; 1027 break; 1028 case EA_DISP_32: 1029 if (consumeInt32(insn, &d32)) 1030 return -1; 1031 insn->displacement = d32; 1032 break; 1033 } 1034 1035 insn->consumedDisplacement = TRUE; 1036 return 0; 1037} 1038 1039/* 1040 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 1041 * displacement) for an instruction and interprets it. 1042 * 1043 * @param insn - The instruction whose addressing information is to be read. 1044 * @return - 0 if the information was successfully read; nonzero otherwise. 1045 */ 1046static int readModRM(struct InternalInstruction* insn) { 1047 uint8_t mod, rm, reg; 1048 1049 dbgprintf(insn, "readModRM()"); 1050 1051 if (insn->consumedModRM) 1052 return 0; 1053 1054 if (consumeByte(insn, &insn->modRM)) 1055 return -1; 1056 insn->consumedModRM = TRUE; 1057 1058 mod = modFromModRM(insn->modRM); 1059 rm = rmFromModRM(insn->modRM); 1060 reg = regFromModRM(insn->modRM); 1061 1062 /* 1063 * This goes by insn->registerSize to pick the correct register, which messes 1064 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 1065 * fixupReg(). 1066 */ 1067 switch (insn->registerSize) { 1068 case 2: 1069 insn->regBase = MODRM_REG_AX; 1070 insn->eaRegBase = EA_REG_AX; 1071 break; 1072 case 4: 1073 insn->regBase = MODRM_REG_EAX; 1074 insn->eaRegBase = EA_REG_EAX; 1075 break; 1076 case 8: 1077 insn->regBase = MODRM_REG_RAX; 1078 insn->eaRegBase = EA_REG_RAX; 1079 break; 1080 } 1081 1082 reg |= rFromREX(insn->rexPrefix) << 3; 1083 rm |= bFromREX(insn->rexPrefix) << 3; 1084 1085 insn->reg = (Reg)(insn->regBase + reg); 1086 1087 switch (insn->addressSize) { 1088 case 2: 1089 insn->eaBaseBase = EA_BASE_BX_SI; 1090 1091 switch (mod) { 1092 case 0x0: 1093 if (rm == 0x6) { 1094 insn->eaBase = EA_BASE_NONE; 1095 insn->eaDisplacement = EA_DISP_16; 1096 if (readDisplacement(insn)) 1097 return -1; 1098 } else { 1099 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1100 insn->eaDisplacement = EA_DISP_NONE; 1101 } 1102 break; 1103 case 0x1: 1104 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1105 insn->eaDisplacement = EA_DISP_8; 1106 if (readDisplacement(insn)) 1107 return -1; 1108 break; 1109 case 0x2: 1110 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1111 insn->eaDisplacement = EA_DISP_16; 1112 if (readDisplacement(insn)) 1113 return -1; 1114 break; 1115 case 0x3: 1116 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1117 if (readDisplacement(insn)) 1118 return -1; 1119 break; 1120 } 1121 break; 1122 case 4: 1123 case 8: 1124 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 1125 1126 switch (mod) { 1127 case 0x0: 1128 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 1129 switch (rm) { 1130 case 0x4: 1131 case 0xc: /* in case REXW.b is set */ 1132 insn->eaBase = (insn->addressSize == 4 ? 1133 EA_BASE_sib : EA_BASE_sib64); 1134 readSIB(insn); 1135 if (readDisplacement(insn)) 1136 return -1; 1137 break; 1138 case 0x5: 1139 insn->eaBase = EA_BASE_NONE; 1140 insn->eaDisplacement = EA_DISP_32; 1141 if (readDisplacement(insn)) 1142 return -1; 1143 break; 1144 default: 1145 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1146 break; 1147 } 1148 break; 1149 case 0x1: 1150 case 0x2: 1151 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 1152 switch (rm) { 1153 case 0x4: 1154 case 0xc: /* in case REXW.b is set */ 1155 insn->eaBase = EA_BASE_sib; 1156 readSIB(insn); 1157 if (readDisplacement(insn)) 1158 return -1; 1159 break; 1160 default: 1161 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1162 if (readDisplacement(insn)) 1163 return -1; 1164 break; 1165 } 1166 break; 1167 case 0x3: 1168 insn->eaDisplacement = EA_DISP_NONE; 1169 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1170 break; 1171 } 1172 break; 1173 } /* switch (insn->addressSize) */ 1174 1175 return 0; 1176} 1177 1178#define GENERIC_FIXUP_FUNC(name, base, prefix) \ 1179 static uint8_t name(struct InternalInstruction *insn, \ 1180 OperandType type, \ 1181 uint8_t index, \ 1182 uint8_t *valid) { \ 1183 *valid = 1; \ 1184 switch (type) { \ 1185 default: \ 1186 debug("Unhandled register type"); \ 1187 *valid = 0; \ 1188 return 0; \ 1189 case TYPE_Rv: \ 1190 return base + index; \ 1191 case TYPE_R8: \ 1192 if (insn->rexPrefix && \ 1193 index >= 4 && index <= 7) { \ 1194 return prefix##_SPL + (index - 4); \ 1195 } else { \ 1196 return prefix##_AL + index; \ 1197 } \ 1198 case TYPE_R16: \ 1199 return prefix##_AX + index; \ 1200 case TYPE_R32: \ 1201 return prefix##_EAX + index; \ 1202 case TYPE_R64: \ 1203 return prefix##_RAX + index; \ 1204 case TYPE_XMM256: \ 1205 return prefix##_YMM0 + index; \ 1206 case TYPE_XMM128: \ 1207 case TYPE_XMM64: \ 1208 case TYPE_XMM32: \ 1209 case TYPE_XMM: \ 1210 return prefix##_XMM0 + index; \ 1211 case TYPE_MM64: \ 1212 case TYPE_MM32: \ 1213 case TYPE_MM: \ 1214 if (index > 7) \ 1215 *valid = 0; \ 1216 return prefix##_MM0 + index; \ 1217 case TYPE_SEGMENTREG: \ 1218 if (index > 5) \ 1219 *valid = 0; \ 1220 return prefix##_ES + index; \ 1221 case TYPE_DEBUGREG: \ 1222 if (index > 7) \ 1223 *valid = 0; \ 1224 return prefix##_DR0 + index; \ 1225 case TYPE_CONTROLREG: \ 1226 if (index > 8) \ 1227 *valid = 0; \ 1228 return prefix##_CR0 + index; \ 1229 } \ 1230 } 1231 1232/* 1233 * fixup*Value - Consults an operand type to determine the meaning of the 1234 * reg or R/M field. If the operand is an XMM operand, for example, an 1235 * operand would be XMM0 instead of AX, which readModRM() would otherwise 1236 * misinterpret it as. 1237 * 1238 * @param insn - The instruction containing the operand. 1239 * @param type - The operand type. 1240 * @param index - The existing value of the field as reported by readModRM(). 1241 * @param valid - The address of a uint8_t. The target is set to 1 if the 1242 * field is valid for the register class; 0 if not. 1243 * @return - The proper value. 1244 */ 1245GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 1246GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 1247 1248/* 1249 * fixupReg - Consults an operand specifier to determine which of the 1250 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 1251 * 1252 * @param insn - See fixup*Value(). 1253 * @param op - The operand specifier. 1254 * @return - 0 if fixup was successful; -1 if the register returned was 1255 * invalid for its class. 1256 */ 1257static int fixupReg(struct InternalInstruction *insn, 1258 const struct OperandSpecifier *op) { 1259 uint8_t valid; 1260 1261 dbgprintf(insn, "fixupReg()"); 1262 1263 switch ((OperandEncoding)op->encoding) { 1264 default: 1265 debug("Expected a REG or R/M encoding in fixupReg"); 1266 return -1; 1267 case ENCODING_VVVV: 1268 insn->vvvv = (Reg)fixupRegValue(insn, 1269 (OperandType)op->type, 1270 insn->vvvv, 1271 &valid); 1272 if (!valid) 1273 return -1; 1274 break; 1275 case ENCODING_REG: 1276 insn->reg = (Reg)fixupRegValue(insn, 1277 (OperandType)op->type, 1278 insn->reg - insn->regBase, 1279 &valid); 1280 if (!valid) 1281 return -1; 1282 break; 1283 case ENCODING_RM: 1284 if (insn->eaBase >= insn->eaRegBase) { 1285 insn->eaBase = (EABase)fixupRMValue(insn, 1286 (OperandType)op->type, 1287 insn->eaBase - insn->eaRegBase, 1288 &valid); 1289 if (!valid) 1290 return -1; 1291 } 1292 break; 1293 } 1294 1295 return 0; 1296} 1297 1298/* 1299 * readOpcodeModifier - Reads an operand from the opcode field of an 1300 * instruction. Handles AddRegFrm instructions. 1301 * 1302 * @param insn - The instruction whose opcode field is to be read. 1303 * @param inModRM - Indicates that the opcode field is to be read from the 1304 * ModR/M extension; useful for escape opcodes 1305 * @return - 0 on success; nonzero otherwise. 1306 */ 1307static int readOpcodeModifier(struct InternalInstruction* insn) { 1308 dbgprintf(insn, "readOpcodeModifier()"); 1309 1310 if (insn->consumedOpcodeModifier) 1311 return 0; 1312 1313 insn->consumedOpcodeModifier = TRUE; 1314 1315 switch (insn->spec->modifierType) { 1316 default: 1317 debug("Unknown modifier type."); 1318 return -1; 1319 case MODIFIER_NONE: 1320 debug("No modifier but an operand expects one."); 1321 return -1; 1322 case MODIFIER_OPCODE: 1323 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; 1324 return 0; 1325 case MODIFIER_MODRM: 1326 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; 1327 return 0; 1328 } 1329} 1330 1331/* 1332 * readOpcodeRegister - Reads an operand from the opcode field of an 1333 * instruction and interprets it appropriately given the operand width. 1334 * Handles AddRegFrm instructions. 1335 * 1336 * @param insn - See readOpcodeModifier(). 1337 * @param size - The width (in bytes) of the register being specified. 1338 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1339 * RAX. 1340 * @return - 0 on success; nonzero otherwise. 1341 */ 1342static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 1343 dbgprintf(insn, "readOpcodeRegister()"); 1344 1345 if (readOpcodeModifier(insn)) 1346 return -1; 1347 1348 if (size == 0) 1349 size = insn->registerSize; 1350 1351 switch (size) { 1352 case 1: 1353 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 1354 | insn->opcodeModifier)); 1355 if (insn->rexPrefix && 1356 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1357 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1358 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 1359 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1360 } 1361 1362 break; 1363 case 2: 1364 insn->opcodeRegister = (Reg)(MODRM_REG_AX 1365 + ((bFromREX(insn->rexPrefix) << 3) 1366 | insn->opcodeModifier)); 1367 break; 1368 case 4: 1369 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 1370 + ((bFromREX(insn->rexPrefix) << 3) 1371 | insn->opcodeModifier)); 1372 break; 1373 case 8: 1374 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 1375 + ((bFromREX(insn->rexPrefix) << 3) 1376 | insn->opcodeModifier)); 1377 break; 1378 } 1379 1380 return 0; 1381} 1382 1383/* 1384 * readImmediate - Consumes an immediate operand from an instruction, given the 1385 * desired operand size. 1386 * 1387 * @param insn - The instruction whose operand is to be read. 1388 * @param size - The width (in bytes) of the operand. 1389 * @return - 0 if the immediate was successfully consumed; nonzero 1390 * otherwise. 1391 */ 1392static int readImmediate(struct InternalInstruction* insn, uint8_t size) { 1393 uint8_t imm8; 1394 uint16_t imm16; 1395 uint32_t imm32; 1396 uint64_t imm64; 1397 1398 dbgprintf(insn, "readImmediate()"); 1399 1400 if (insn->numImmediatesConsumed == 2) { 1401 debug("Already consumed two immediates"); 1402 return -1; 1403 } 1404 1405 if (size == 0) 1406 size = insn->immediateSize; 1407 else 1408 insn->immediateSize = size; 1409 1410 switch (size) { 1411 case 1: 1412 if (consumeByte(insn, &imm8)) 1413 return -1; 1414 insn->immediates[insn->numImmediatesConsumed] = imm8; 1415 break; 1416 case 2: 1417 if (consumeUInt16(insn, &imm16)) 1418 return -1; 1419 insn->immediates[insn->numImmediatesConsumed] = imm16; 1420 break; 1421 case 4: 1422 if (consumeUInt32(insn, &imm32)) 1423 return -1; 1424 insn->immediates[insn->numImmediatesConsumed] = imm32; 1425 break; 1426 case 8: 1427 if (consumeUInt64(insn, &imm64)) 1428 return -1; 1429 insn->immediates[insn->numImmediatesConsumed] = imm64; 1430 break; 1431 } 1432 1433 insn->numImmediatesConsumed++; 1434 1435 return 0; 1436} 1437 1438/* 1439 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 1440 * 1441 * @param insn - The instruction whose operand is to be read. 1442 * @return - 0 if the vvvv was successfully consumed; nonzero 1443 * otherwise. 1444 */ 1445static int readVVVV(struct InternalInstruction* insn) { 1446 dbgprintf(insn, "readVVVV()"); 1447 1448 if (insn->vexSize == 3) 1449 insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); 1450 else if (insn->vexSize == 2) 1451 insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]); 1452 else 1453 return -1; 1454 1455 if (insn->mode != MODE_64BIT) 1456 insn->vvvv &= 0x7; 1457 1458 return 0; 1459} 1460 1461/* 1462 * readOperands - Consults the specifier for an instruction and consumes all 1463 * operands for that instruction, interpreting them as it goes. 1464 * 1465 * @param insn - The instruction whose operands are to be read and interpreted. 1466 * @return - 0 if all operands could be read; nonzero otherwise. 1467 */ 1468static int readOperands(struct InternalInstruction* insn) { 1469 int index; 1470 int hasVVVV, needVVVV; 1471 int sawRegImm = 0; 1472 1473 dbgprintf(insn, "readOperands()"); 1474 1475 /* If non-zero vvvv specified, need to make sure one of the operands 1476 uses it. */ 1477 hasVVVV = !readVVVV(insn); 1478 needVVVV = hasVVVV && (insn->vvvv != 0); 1479 1480 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 1481 switch (insn->spec->operands[index].encoding) { 1482 case ENCODING_NONE: 1483 break; 1484 case ENCODING_REG: 1485 case ENCODING_RM: 1486 if (readModRM(insn)) 1487 return -1; 1488 if (fixupReg(insn, &insn->spec->operands[index])) 1489 return -1; 1490 break; 1491 case ENCODING_CB: 1492 case ENCODING_CW: 1493 case ENCODING_CD: 1494 case ENCODING_CP: 1495 case ENCODING_CO: 1496 case ENCODING_CT: 1497 dbgprintf(insn, "We currently don't hande code-offset encodings"); 1498 return -1; 1499 case ENCODING_IB: 1500 if (sawRegImm) { 1501 /* Saw a register immediate so don't read again and instead split the 1502 previous immediate. FIXME: This is a hack. */ 1503 insn->immediates[insn->numImmediatesConsumed] = 1504 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1505 ++insn->numImmediatesConsumed; 1506 break; 1507 } 1508 if (readImmediate(insn, 1)) 1509 return -1; 1510 if (insn->spec->operands[index].type == TYPE_IMM3 && 1511 insn->immediates[insn->numImmediatesConsumed - 1] > 7) 1512 return -1; 1513 if (insn->spec->operands[index].type == TYPE_XMM128 || 1514 insn->spec->operands[index].type == TYPE_XMM256) 1515 sawRegImm = 1; 1516 break; 1517 case ENCODING_IW: 1518 if (readImmediate(insn, 2)) 1519 return -1; 1520 break; 1521 case ENCODING_ID: 1522 if (readImmediate(insn, 4)) 1523 return -1; 1524 break; 1525 case ENCODING_IO: 1526 if (readImmediate(insn, 8)) 1527 return -1; 1528 break; 1529 case ENCODING_Iv: 1530 if (readImmediate(insn, insn->immediateSize)) 1531 return -1; 1532 break; 1533 case ENCODING_Ia: 1534 if (readImmediate(insn, insn->addressSize)) 1535 return -1; 1536 break; 1537 case ENCODING_RB: 1538 if (readOpcodeRegister(insn, 1)) 1539 return -1; 1540 break; 1541 case ENCODING_RW: 1542 if (readOpcodeRegister(insn, 2)) 1543 return -1; 1544 break; 1545 case ENCODING_RD: 1546 if (readOpcodeRegister(insn, 4)) 1547 return -1; 1548 break; 1549 case ENCODING_RO: 1550 if (readOpcodeRegister(insn, 8)) 1551 return -1; 1552 break; 1553 case ENCODING_Rv: 1554 if (readOpcodeRegister(insn, 0)) 1555 return -1; 1556 break; 1557 case ENCODING_I: 1558 if (readOpcodeModifier(insn)) 1559 return -1; 1560 break; 1561 case ENCODING_VVVV: 1562 needVVVV = 0; /* Mark that we have found a VVVV operand. */ 1563 if (!hasVVVV) 1564 return -1; 1565 if (fixupReg(insn, &insn->spec->operands[index])) 1566 return -1; 1567 break; 1568 case ENCODING_DUP: 1569 break; 1570 default: 1571 dbgprintf(insn, "Encountered an operand with an unknown encoding."); 1572 return -1; 1573 } 1574 } 1575 1576 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 1577 if (needVVVV) return -1; 1578 1579 return 0; 1580} 1581 1582/* 1583 * decodeInstruction - Reads and interprets a full instruction provided by the 1584 * user. 1585 * 1586 * @param insn - A pointer to the instruction to be populated. Must be 1587 * pre-allocated. 1588 * @param reader - The function to be used to read the instruction's bytes. 1589 * @param readerArg - A generic argument to be passed to the reader to store 1590 * any internal state. 1591 * @param logger - If non-NULL, the function to be used to write log messages 1592 * and warnings. 1593 * @param loggerArg - A generic argument to be passed to the logger to store 1594 * any internal state. 1595 * @param startLoc - The address (in the reader's address space) of the first 1596 * byte in the instruction. 1597 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 1598 * decode the instruction in. 1599 * @return - 0 if the instruction's memory could be read; nonzero if 1600 * not. 1601 */ 1602int decodeInstruction(struct InternalInstruction* insn, 1603 byteReader_t reader, 1604 void* readerArg, 1605 dlog_t logger, 1606 void* loggerArg, 1607 uint64_t startLoc, 1608 DisassemblerMode mode) { 1609 memset(insn, 0, sizeof(struct InternalInstruction)); 1610 1611 insn->reader = reader; 1612 insn->readerArg = readerArg; 1613 insn->dlog = logger; 1614 insn->dlogArg = loggerArg; 1615 insn->startLocation = startLoc; 1616 insn->readerCursor = startLoc; 1617 insn->mode = mode; 1618 insn->numImmediatesConsumed = 0; 1619 1620 if (readPrefixes(insn) || 1621 readOpcode(insn) || 1622 getID(insn) || 1623 insn->instructionID == 0 || 1624 readOperands(insn)) 1625 return -1; 1626 1627 insn->length = insn->readerCursor - insn->startLocation; 1628 1629 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 1630 startLoc, insn->readerCursor, insn->length); 1631 1632 if (insn->length > 15) 1633 dbgprintf(insn, "Instruction exceeds 15-byte limit"); 1634 1635 return 0; 1636} 1637