1/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===* 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is distributed under the University of Illinois Open Source 6 * License. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===* 9 * 10 * This file is part of the X86 Disassembler. 11 * It contains the implementation of the instruction decoder. 12 * Documentation for the disassembler can be found in X86Disassembler.h. 13 * 14 *===----------------------------------------------------------------------===*/ 15 16/* Capstone Disassembly Engine */ 17/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 */ 18 19#ifdef CAPSTONE_HAS_X86 20 21#include <stdarg.h> /* for va_*() */ 22#if defined(CAPSTONE_HAS_OSXKERNEL) 23#include <libkern/libkern.h> 24#else 25#include <stdlib.h> /* for exit() */ 26#endif 27 28#include "../../cs_priv.h" 29#include "../../utils.h" 30 31#include "X86DisassemblerDecoder.h" 32 33/// Specifies whether a ModR/M byte is needed and (if so) which 34/// instruction each possible value of the ModR/M byte corresponds to. Once 35/// this information is known, we have narrowed down to a single instruction. 36struct ModRMDecision { 37 uint8_t modrm_type; 38 uint16_t instructionIDs; 39}; 40 41/// Specifies which set of ModR/M->instruction tables to look at 42/// given a particular opcode. 43struct OpcodeDecision { 44 struct ModRMDecision modRMDecisions[256]; 45}; 46 47/// Specifies which opcode->instruction tables to look at given 48/// a particular context (set of attributes). Since there are many possible 49/// contexts, the decoder first uses CONTEXTS_SYM to determine which context 50/// applies given a specific set of attributes. Hence there are only IC_max 51/// entries in this table, rather than 2^(ATTR_max). 52struct ContextDecision { 53 struct OpcodeDecision opcodeDecisions[IC_max]; 54}; 55 56#ifdef CAPSTONE_X86_REDUCE 57#include "X86GenDisassemblerTables_reduce.inc" 58#else 59#include "X86GenDisassemblerTables.inc" 60#endif 61 62//#define GET_INSTRINFO_ENUM 63#define GET_INSTRINFO_MC_DESC 64#ifdef CAPSTONE_X86_REDUCE 65#include "X86GenInstrInfo_reduce.inc" 66#else 67#include "X86GenInstrInfo.inc" 68#endif 69 70/* 71 * contextForAttrs - Client for the instruction context table. Takes a set of 72 * attributes and returns the appropriate decode context. 73 * 74 * @param attrMask - Attributes, from the enumeration attributeBits. 75 * @return - The InstructionContext to use when looking up an 76 * an instruction with these attributes. 77 */ 78static InstructionContext contextForAttrs(uint16_t attrMask) 79{ 80 return CONTEXTS_SYM[attrMask]; 81} 82 83/* 84 * modRMRequired - Reads the appropriate instruction table to determine whether 85 * the ModR/M byte is required to decode a particular instruction. 86 * 87 * @param type - The opcode type (i.e., how many bytes it has). 88 * @param insnContext - The context for the instruction, as returned by 89 * contextForAttrs. 90 * @param opcode - The last byte of the instruction's opcode, not counting 91 * ModR/M extensions and escapes. 92 * @return - true if the ModR/M byte is required, false otherwise. 93 */ 94static int modRMRequired(OpcodeType type, 95 InstructionContext insnContext, 96 uint16_t opcode) 97{ 98 const struct OpcodeDecision *decision = NULL; 99 const uint8_t *indextable = NULL; 100 uint8_t index; 101 102 switch (type) { 103 default: 104 case ONEBYTE: 105 decision = ONEBYTE_SYM; 106 indextable = index_x86DisassemblerOneByteOpcodes; 107 break; 108 case TWOBYTE: 109 decision = TWOBYTE_SYM; 110 indextable = index_x86DisassemblerTwoByteOpcodes; 111 break; 112 case THREEBYTE_38: 113 decision = THREEBYTE38_SYM; 114 indextable = index_x86DisassemblerThreeByte38Opcodes; 115 break; 116 case THREEBYTE_3A: 117 decision = THREEBYTE3A_SYM; 118 indextable = index_x86DisassemblerThreeByte3AOpcodes; 119 break; 120#ifndef CAPSTONE_X86_REDUCE 121 case XOP8_MAP: 122 decision = XOP8_MAP_SYM; 123 indextable = index_x86DisassemblerXOP8Opcodes; 124 break; 125 case XOP9_MAP: 126 decision = XOP9_MAP_SYM; 127 indextable = index_x86DisassemblerXOP9Opcodes; 128 break; 129 case XOPA_MAP: 130 decision = XOPA_MAP_SYM; 131 indextable = index_x86DisassemblerXOPAOpcodes; 132 break; 133 case T3DNOW_MAP: 134 // 3DNow instructions always have ModRM byte 135 return true; 136#endif 137 } 138 139 index = indextable[insnContext]; 140 if (index) 141 return decision[index - 1].modRMDecisions[opcode].modrm_type != MODRM_ONEENTRY; 142 else 143 return false; 144} 145 146/* 147 * decode - Reads the appropriate instruction table to obtain the unique ID of 148 * an instruction. 149 * 150 * @param type - See modRMRequired(). 151 * @param insnContext - See modRMRequired(). 152 * @param opcode - See modRMRequired(). 153 * @param modRM - The ModR/M byte if required, or any value if not. 154 * @return - The UID of the instruction, or 0 on failure. 155 */ 156static InstrUID decode(OpcodeType type, 157 InstructionContext insnContext, 158 uint8_t opcode, 159 uint8_t modRM) 160{ 161 const struct ModRMDecision *dec = NULL; 162 const uint8_t *indextable = NULL; 163 uint8_t index; 164 165 switch (type) { 166 default: 167 case ONEBYTE: 168 indextable = index_x86DisassemblerOneByteOpcodes; 169 index = indextable[insnContext]; 170 if (index) 171 dec = &ONEBYTE_SYM[index - 1].modRMDecisions[opcode]; 172 else 173 dec = &emptyTable.modRMDecisions[opcode]; 174 break; 175 case TWOBYTE: 176 indextable = index_x86DisassemblerTwoByteOpcodes; 177 index = indextable[insnContext]; 178 if (index) 179 dec = &TWOBYTE_SYM[index - 1].modRMDecisions[opcode]; 180 else 181 dec = &emptyTable.modRMDecisions[opcode]; 182 break; 183 case THREEBYTE_38: 184 indextable = index_x86DisassemblerThreeByte38Opcodes; 185 index = indextable[insnContext]; 186 if (index) 187 dec = &THREEBYTE38_SYM[index - 1].modRMDecisions[opcode]; 188 else 189 dec = &emptyTable.modRMDecisions[opcode]; 190 break; 191 case THREEBYTE_3A: 192 indextable = index_x86DisassemblerThreeByte3AOpcodes; 193 index = indextable[insnContext]; 194 if (index) 195 dec = &THREEBYTE3A_SYM[index - 1].modRMDecisions[opcode]; 196 else 197 dec = &emptyTable.modRMDecisions[opcode]; 198 break; 199#ifndef CAPSTONE_X86_REDUCE 200 case XOP8_MAP: 201 indextable = index_x86DisassemblerXOP8Opcodes; 202 index = indextable[insnContext]; 203 if (index) 204 dec = &XOP8_MAP_SYM[index - 1].modRMDecisions[opcode]; 205 else 206 dec = &emptyTable.modRMDecisions[opcode]; 207 break; 208 case XOP9_MAP: 209 indextable = index_x86DisassemblerXOP9Opcodes; 210 index = indextable[insnContext]; 211 if (index) 212 dec = &XOP9_MAP_SYM[index - 1].modRMDecisions[opcode]; 213 else 214 dec = &emptyTable.modRMDecisions[opcode]; 215 break; 216 case XOPA_MAP: 217 indextable = index_x86DisassemblerXOPAOpcodes; 218 index = indextable[insnContext]; 219 if (index) 220 dec = &XOPA_MAP_SYM[index - 1].modRMDecisions[opcode]; 221 else 222 dec = &emptyTable.modRMDecisions[opcode]; 223 break; 224 case T3DNOW_MAP: 225 indextable = index_x86DisassemblerT3DNOWOpcodes; 226 index = indextable[insnContext]; 227 if (index) 228 dec = &T3DNOW_MAP_SYM[index - 1].modRMDecisions[opcode]; 229 else 230 dec = &emptyTable.modRMDecisions[opcode]; 231 break; 232#endif 233 } 234 235 switch (dec->modrm_type) { 236 default: 237 //debug("Corrupt table! Unknown modrm_type"); 238 return 0; 239 case MODRM_ONEENTRY: 240 return modRMTable[dec->instructionIDs]; 241 case MODRM_SPLITRM: 242 if (modFromModRM(modRM) == 0x3) 243 return modRMTable[dec->instructionIDs+1]; 244 return modRMTable[dec->instructionIDs]; 245 case MODRM_SPLITREG: 246 if (modFromModRM(modRM) == 0x3) 247 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; 248 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 249 case MODRM_SPLITMISC: 250 if (modFromModRM(modRM) == 0x3) 251 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; 252 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 253 case MODRM_FULL: 254 return modRMTable[dec->instructionIDs+modRM]; 255 } 256} 257 258/* 259 * specifierForUID - Given a UID, returns the name and operand specification for 260 * that instruction. 261 * 262 * @param uid - The unique ID for the instruction. This should be returned by 263 * decode(); specifierForUID will not check bounds. 264 * @return - A pointer to the specification for that instruction. 265 */ 266static const struct InstructionSpecifier *specifierForUID(InstrUID uid) 267{ 268 return &INSTRUCTIONS_SYM[uid]; 269} 270 271/* 272 * consumeByte - Uses the reader function provided by the user to consume one 273 * byte from the instruction's memory and advance the cursor. 274 * 275 * @param insn - The instruction with the reader function to use. The cursor 276 * for this instruction is advanced. 277 * @param byte - A pointer to a pre-allocated memory buffer to be populated 278 * with the data read. 279 * @return - 0 if the read was successful; nonzero otherwise. 280 */ 281static int consumeByte(struct InternalInstruction *insn, uint8_t *byte) 282{ 283 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 284 285 if (!ret) 286 ++(insn->readerCursor); 287 288 return ret; 289} 290 291/* 292 * lookAtByte - Like consumeByte, but does not advance the cursor. 293 * 294 * @param insn - See consumeByte(). 295 * @param byte - See consumeByte(). 296 * @return - See consumeByte(). 297 */ 298static int lookAtByte(struct InternalInstruction *insn, uint8_t *byte) 299{ 300 return insn->reader(insn->readerArg, byte, insn->readerCursor); 301} 302 303static void unconsumeByte(struct InternalInstruction *insn) 304{ 305 insn->readerCursor--; 306} 307 308#define CONSUME_FUNC(name, type) \ 309 static int name(struct InternalInstruction *insn, type *ptr) { \ 310 type combined = 0; \ 311 unsigned offset; \ 312 for (offset = 0; offset < sizeof(type); ++offset) { \ 313 uint8_t byte; \ 314 int ret = insn->reader(insn->readerArg, \ 315 &byte, \ 316 insn->readerCursor + offset); \ 317 if (ret) \ 318 return ret; \ 319 combined = combined | (type)((uint64_t)byte << (offset * 8)); \ 320 } \ 321 *ptr = combined; \ 322 insn->readerCursor += sizeof(type); \ 323 return 0; \ 324 } 325 326/* 327 * consume* - Use the reader function provided by the user to consume data 328 * values of various sizes from the instruction's memory and advance the 329 * cursor appropriately. These readers perform endian conversion. 330 * 331 * @param insn - See consumeByte(). 332 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 333 * be populated with the data read. 334 * @return - See consumeByte(). 335 */ 336CONSUME_FUNC(consumeInt8, int8_t) 337CONSUME_FUNC(consumeInt16, int16_t) 338CONSUME_FUNC(consumeInt32, int32_t) 339CONSUME_FUNC(consumeUInt16, uint16_t) 340CONSUME_FUNC(consumeUInt32, uint32_t) 341CONSUME_FUNC(consumeUInt64, uint64_t) 342 343/* 344 * setPrefixPresent - Marks that a particular prefix is present at a particular 345 * location. 346 * 347 * @param insn - The instruction to be marked as having the prefix. 348 * @param prefix - The prefix that is present. 349 * @param location - The location where the prefix is located (in the address 350 * space of the instruction's reader). 351 */ 352static void setPrefixPresent(struct InternalInstruction *insn, 353 uint8_t prefix, uint64_t location) 354{ 355 switch (prefix) { 356 case 0x26: 357 insn->isPrefix26 = true; 358 insn->prefix26 = location; 359 break; 360 case 0x2e: 361 insn->isPrefix2e = true; 362 insn->prefix2e = location; 363 break; 364 case 0x36: 365 insn->isPrefix36 = true; 366 insn->prefix36 = location; 367 break; 368 case 0x3e: 369 insn->isPrefix3e = true; 370 insn->prefix3e = location; 371 break; 372 case 0x64: 373 insn->isPrefix64 = true; 374 insn->prefix64 = location; 375 break; 376 case 0x65: 377 insn->isPrefix65 = true; 378 insn->prefix65 = location; 379 break; 380 case 0x66: 381 insn->isPrefix66 = true; 382 insn->prefix66 = location; 383 break; 384 case 0x67: 385 insn->isPrefix67 = true; 386 insn->prefix67 = location; 387 break; 388 case 0xf0: 389 insn->isPrefixf0 = true; 390 insn->prefixf0 = location; 391 break; 392 case 0xf2: 393 insn->isPrefixf2 = true; 394 insn->prefixf2 = location; 395 break; 396 case 0xf3: 397 insn->isPrefixf3 = true; 398 insn->prefixf3 = location; 399 break; 400 default: 401 break; 402 } 403} 404 405/* 406 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 407 * present at a given location. 408 * 409 * @param insn - The instruction to be queried. 410 * @param prefix - The prefix. 411 * @param location - The location to query. 412 * @return - Whether the prefix is at that location. 413 */ 414static bool isPrefixAtLocation(struct InternalInstruction *insn, uint8_t prefix, 415 uint64_t location) 416{ 417 switch (prefix) { 418 case 0x26: 419 if (insn->isPrefix26 && insn->prefix26 == location) 420 return true; 421 break; 422 case 0x2e: 423 if (insn->isPrefix2e && insn->prefix2e == location) 424 return true; 425 break; 426 case 0x36: 427 if (insn->isPrefix36 && insn->prefix36 == location) 428 return true; 429 break; 430 case 0x3e: 431 if (insn->isPrefix3e && insn->prefix3e == location) 432 return true; 433 break; 434 case 0x64: 435 if (insn->isPrefix64 && insn->prefix64 == location) 436 return true; 437 break; 438 case 0x65: 439 if (insn->isPrefix65 && insn->prefix65 == location) 440 return true; 441 break; 442 case 0x66: 443 if (insn->isPrefix66 && insn->prefix66 == location) 444 return true; 445 break; 446 case 0x67: 447 if (insn->isPrefix67 && insn->prefix67 == location) 448 return true; 449 break; 450 case 0xf0: 451 if (insn->isPrefixf0 && insn->prefixf0 == location) 452 return true; 453 break; 454 case 0xf2: 455 if (insn->isPrefixf2 && insn->prefixf2 == location) 456 return true; 457 break; 458 case 0xf3: 459 if (insn->isPrefixf3 && insn->prefixf3 == location) 460 return true; 461 break; 462 default: 463 break; 464 } 465 return false; 466} 467 468/* 469 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 470 * instruction as having them. Also sets the instruction's default operand, 471 * address, and other relevant data sizes to report operands correctly. 472 * 473 * @param insn - The instruction whose prefixes are to be read. 474 * @return - 0 if the instruction could be read until the end of the prefix 475 * bytes, and no prefixes conflicted; nonzero otherwise. 476 */ 477static int readPrefixes(struct InternalInstruction *insn) 478{ 479 bool isPrefix = true; 480 uint64_t prefixLocation; 481 uint8_t byte = 0, nextByte; 482 483 bool hasAdSize = false; 484 bool hasOpSize = false; 485 486 while (isPrefix) { 487 if (insn->mode == MODE_64BIT) { 488 // eliminate consecutive redundant REX bytes in front 489 if (consumeByte(insn, &byte)) 490 return -1; 491 492 if ((byte & 0xf0) == 0x40) { 493 while(true) { 494 if (lookAtByte(insn, &byte)) // out of input code 495 return -1; 496 if ((byte & 0xf0) == 0x40) { 497 // another REX prefix, but we only remember the last one 498 if (consumeByte(insn, &byte)) 499 return -1; 500 } else 501 break; 502 } 503 504 // recover the last REX byte if next byte is not a legacy prefix 505 switch (byte) { 506 case 0xf2: /* REPNE/REPNZ */ 507 case 0xf3: /* REP or REPE/REPZ */ 508 case 0xf0: /* LOCK */ 509 case 0x2e: /* CS segment override -OR- Branch not taken */ 510 case 0x36: /* SS segment override -OR- Branch taken */ 511 case 0x3e: /* DS segment override */ 512 case 0x26: /* ES segment override */ 513 case 0x64: /* FS segment override */ 514 case 0x65: /* GS segment override */ 515 case 0x66: /* Operand-size override */ 516 case 0x67: /* Address-size override */ 517 break; 518 default: /* Not a prefix byte */ 519 unconsumeByte(insn); 520 break; 521 } 522 } else { 523 unconsumeByte(insn); 524 } 525 } 526 527 prefixLocation = insn->readerCursor; 528 529 /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ 530 if (consumeByte(insn, &byte)) 531 return -1; 532 533 if (insn->readerCursor - 1 == insn->startLocation 534 && (byte == 0xf2 || byte == 0xf3)) { 535 536 if (lookAtByte(insn, &nextByte)) 537 return -1; 538 539 /* 540 * If the byte is 0xf2 or 0xf3, and any of the following conditions are 541 * met: 542 * - it is followed by a LOCK (0xf0) prefix 543 * - it is followed by an xchg instruction 544 * then it should be disassembled as a xacquire/xrelease not repne/rep. 545 */ 546 if ((byte == 0xf2 || byte == 0xf3) && 547 ((nextByte == 0xf0) | 548 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) 549 insn->xAcquireRelease = true; 550 /* 551 * Also if the byte is 0xf3, and the following condition is met: 552 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or 553 * "mov mem, imm" (opcode 0xc6/0xc7) instructions. 554 * then it should be disassembled as an xrelease not rep. 555 */ 556 if (byte == 0xf3 && 557 (nextByte == 0x88 || nextByte == 0x89 || 558 nextByte == 0xc6 || nextByte == 0xc7)) 559 insn->xAcquireRelease = true; 560 561 if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { 562 if (consumeByte(insn, &nextByte)) 563 return -1; 564 if (lookAtByte(insn, &nextByte)) 565 return -1; 566 unconsumeByte(insn); 567 } 568 } 569 570 switch (byte) { 571 case 0xf2: /* REPNE/REPNZ */ 572 case 0xf3: /* REP or REPE/REPZ */ 573 case 0xf0: /* LOCK */ 574 // only accept the last prefix 575 insn->isPrefixf2 = false; 576 insn->isPrefixf3 = false; 577 insn->isPrefixf0 = false; 578 setPrefixPresent(insn, byte, prefixLocation); 579 insn->prefix0 = byte; 580 break; 581 case 0x2e: /* CS segment override -OR- Branch not taken */ 582 insn->segmentOverride = SEG_OVERRIDE_CS; 583 // only accept the last prefix 584 insn->isPrefix2e = false; 585 insn->isPrefix36 = false; 586 insn->isPrefix3e = false; 587 insn->isPrefix26 = false; 588 insn->isPrefix64 = false; 589 insn->isPrefix65 = false; 590 591 setPrefixPresent(insn, byte, prefixLocation); 592 insn->prefix1 = byte; 593 break; 594 case 0x36: /* SS segment override -OR- Branch taken */ 595 insn->segmentOverride = SEG_OVERRIDE_SS; 596 // only accept the last prefix 597 insn->isPrefix2e = false; 598 insn->isPrefix36 = false; 599 insn->isPrefix3e = false; 600 insn->isPrefix26 = false; 601 insn->isPrefix64 = false; 602 insn->isPrefix65 = false; 603 604 setPrefixPresent(insn, byte, prefixLocation); 605 insn->prefix1 = byte; 606 break; 607 case 0x3e: /* DS segment override */ 608 insn->segmentOverride = SEG_OVERRIDE_DS; 609 // only accept the last prefix 610 insn->isPrefix2e = false; 611 insn->isPrefix36 = false; 612 insn->isPrefix3e = false; 613 insn->isPrefix26 = false; 614 insn->isPrefix64 = false; 615 insn->isPrefix65 = false; 616 617 setPrefixPresent(insn, byte, prefixLocation); 618 insn->prefix1 = byte; 619 break; 620 case 0x26: /* ES segment override */ 621 insn->segmentOverride = SEG_OVERRIDE_ES; 622 // only accept the last prefix 623 insn->isPrefix2e = false; 624 insn->isPrefix36 = false; 625 insn->isPrefix3e = false; 626 insn->isPrefix26 = false; 627 insn->isPrefix64 = false; 628 insn->isPrefix65 = false; 629 630 setPrefixPresent(insn, byte, prefixLocation); 631 insn->prefix1 = byte; 632 break; 633 case 0x64: /* FS segment override */ 634 insn->segmentOverride = SEG_OVERRIDE_FS; 635 // only accept the last prefix 636 insn->isPrefix2e = false; 637 insn->isPrefix36 = false; 638 insn->isPrefix3e = false; 639 insn->isPrefix26 = false; 640 insn->isPrefix64 = false; 641 insn->isPrefix65 = false; 642 643 setPrefixPresent(insn, byte, prefixLocation); 644 insn->prefix1 = byte; 645 break; 646 case 0x65: /* GS segment override */ 647 insn->segmentOverride = SEG_OVERRIDE_GS; 648 // only accept the last prefix 649 insn->isPrefix2e = false; 650 insn->isPrefix36 = false; 651 insn->isPrefix3e = false; 652 insn->isPrefix26 = false; 653 insn->isPrefix64 = false; 654 insn->isPrefix65 = false; 655 656 setPrefixPresent(insn, byte, prefixLocation); 657 insn->prefix1 = byte; 658 break; 659 case 0x66: /* Operand-size override */ 660 hasOpSize = true; 661 setPrefixPresent(insn, byte, prefixLocation); 662 insn->prefix2 = byte; 663 break; 664 case 0x67: /* Address-size override */ 665 hasAdSize = true; 666 setPrefixPresent(insn, byte, prefixLocation); 667 insn->prefix3 = byte; 668 break; 669 default: /* Not a prefix byte */ 670 isPrefix = false; 671 break; 672 } 673 674 //if (isPrefix) 675 // dbgprintf(insn, "Found prefix 0x%hhx", byte); 676 } 677 678 insn->vectorExtensionType = TYPE_NO_VEX_XOP; 679 680 681 if (byte == 0x62) { 682 uint8_t byte1, byte2; 683 684 if (consumeByte(insn, &byte1)) { 685 //dbgprintf(insn, "Couldn't read second byte of EVEX prefix"); 686 return -1; 687 } 688 689 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && 690 ((~byte1 & 0xc) == 0xc)) { 691 if (lookAtByte(insn, &byte2)) { 692 //dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); 693 return -1; 694 } 695 696 if ((byte2 & 0x4) == 0x4) { 697 insn->vectorExtensionType = TYPE_EVEX; 698 } else { 699 unconsumeByte(insn); /* unconsume byte1 */ 700 unconsumeByte(insn); /* unconsume byte */ 701 insn->necessaryPrefixLocation = insn->readerCursor - 2; 702 } 703 704 if (insn->vectorExtensionType == TYPE_EVEX) { 705 insn->vectorExtensionPrefix[0] = byte; 706 insn->vectorExtensionPrefix[1] = byte1; 707 708 if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) { 709 //dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); 710 return -1; 711 } 712 713 if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) { 714 //dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix"); 715 return -1; 716 } 717 718 /* We simulate the REX prefix for simplicity's sake */ 719 if (insn->mode == MODE_64BIT) { 720 insn->rexPrefix = 0x40 721 | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) 722 | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) 723 | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) 724 | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); 725 } 726 727 //dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", 728 // insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 729 // insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]); 730 } 731 } else { 732 // BOUND instruction 733 unconsumeByte(insn); /* unconsume byte1 */ 734 unconsumeByte(insn); /* unconsume byte */ 735 } 736 } else if (byte == 0xc4) { 737 uint8_t byte1; 738 739 if (lookAtByte(insn, &byte1)) { 740 //dbgprintf(insn, "Couldn't read second byte of VEX"); 741 return -1; 742 } 743 744 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 745 insn->vectorExtensionType = TYPE_VEX_3B; 746 insn->necessaryPrefixLocation = insn->readerCursor - 1; 747 } else { 748 unconsumeByte(insn); 749 insn->necessaryPrefixLocation = insn->readerCursor - 1; 750 } 751 752 if (insn->vectorExtensionType == TYPE_VEX_3B) { 753 insn->vectorExtensionPrefix[0] = byte; 754 if (consumeByte(insn, &insn->vectorExtensionPrefix[1])) 755 return -1; 756 if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) 757 return -1; 758 759 /* We simulate the REX prefix for simplicity's sake */ 760 if (insn->mode == MODE_64BIT) { 761 insn->rexPrefix = 0x40 762 | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) 763 | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) 764 | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) 765 | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); 766 767 } 768 } 769 } else if (byte == 0xc5) { 770 uint8_t byte1; 771 772 if (lookAtByte(insn, &byte1)) { 773 //dbgprintf(insn, "Couldn't read second byte of VEX"); 774 return -1; 775 } 776 777 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 778 insn->vectorExtensionType = TYPE_VEX_2B; 779 } else { 780 unconsumeByte(insn); 781 } 782 783 if (insn->vectorExtensionType == TYPE_VEX_2B) { 784 insn->vectorExtensionPrefix[0] = byte; 785 if (consumeByte(insn, &insn->vectorExtensionPrefix[1])) 786 return -1; 787 788 if (insn->mode == MODE_64BIT) { 789 insn->rexPrefix = 0x40 790 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); 791 } 792 793 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 794 default: 795 break; 796 case VEX_PREFIX_66: 797 hasOpSize = true; 798 break; 799 } 800 } 801 } else if (byte == 0x8f) { 802 uint8_t byte1; 803 804 if (lookAtByte(insn, &byte1)) { 805 // dbgprintf(insn, "Couldn't read second byte of XOP"); 806 return -1; 807 } 808 809 if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */ 810 insn->vectorExtensionType = TYPE_XOP; 811 insn->necessaryPrefixLocation = insn->readerCursor - 1; 812 } else { 813 unconsumeByte(insn); 814 insn->necessaryPrefixLocation = insn->readerCursor - 1; 815 } 816 817 if (insn->vectorExtensionType == TYPE_XOP) { 818 insn->vectorExtensionPrefix[0] = byte; 819 if (consumeByte(insn, &insn->vectorExtensionPrefix[1])) 820 return -1; 821 if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) 822 return -1; 823 824 /* We simulate the REX prefix for simplicity's sake */ 825 if (insn->mode == MODE_64BIT) { 826 insn->rexPrefix = 0x40 827 | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) 828 | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) 829 | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) 830 | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); 831 } 832 833 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 834 default: 835 break; 836 case VEX_PREFIX_66: 837 hasOpSize = true; 838 break; 839 } 840 } 841 } else { 842 if (insn->mode == MODE_64BIT) { 843 if ((byte & 0xf0) == 0x40) { 844 uint8_t opcodeByte; 845 846 while(true) { 847 if (lookAtByte(insn, &opcodeByte)) // out of input code 848 return -1; 849 if ((opcodeByte & 0xf0) == 0x40) { 850 // another REX prefix, but we only remember the last one 851 if (consumeByte(insn, &byte)) 852 return -1; 853 } else 854 break; 855 } 856 857 insn->rexPrefix = byte; 858 insn->necessaryPrefixLocation = insn->readerCursor - 2; 859 // dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 860 } else { 861 unconsumeByte(insn); 862 insn->necessaryPrefixLocation = insn->readerCursor - 1; 863 } 864 } else { 865 unconsumeByte(insn); 866 insn->necessaryPrefixLocation = insn->readerCursor - 1; 867 } 868 } 869 870 if (insn->mode == MODE_16BIT) { 871 insn->registerSize = (hasOpSize ? 4 : 2); 872 insn->addressSize = (hasAdSize ? 4 : 2); 873 insn->displacementSize = (hasAdSize ? 4 : 2); 874 insn->immediateSize = (hasOpSize ? 4 : 2); 875 insn->immSize = (hasOpSize ? 4 : 2); 876 } else if (insn->mode == MODE_32BIT) { 877 insn->registerSize = (hasOpSize ? 2 : 4); 878 insn->addressSize = (hasAdSize ? 2 : 4); 879 insn->displacementSize = (hasAdSize ? 2 : 4); 880 insn->immediateSize = (hasOpSize ? 2 : 4); 881 insn->immSize = (hasOpSize ? 2 : 4); 882 } else if (insn->mode == MODE_64BIT) { 883 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 884 insn->registerSize = 8; 885 insn->addressSize = (hasAdSize ? 4 : 8); 886 insn->displacementSize = 4; 887 insn->immediateSize = 4; 888 insn->immSize = 4; 889 } else if (insn->rexPrefix) { 890 insn->registerSize = (hasOpSize ? 2 : 4); 891 insn->addressSize = (hasAdSize ? 4 : 8); 892 insn->displacementSize = (hasOpSize ? 2 : 4); 893 insn->immediateSize = (hasOpSize ? 2 : 4); 894 insn->immSize = (hasOpSize ? 2 : 4); 895 } else { 896 insn->registerSize = (hasOpSize ? 2 : 4); 897 insn->addressSize = (hasAdSize ? 4 : 8); 898 insn->displacementSize = (hasOpSize ? 2 : 4); 899 insn->immediateSize = (hasOpSize ? 2 : 4); 900 insn->immSize = (hasOpSize ? 4 : 8); 901 } 902 } 903 904 return 0; 905} 906 907static int readModRM(struct InternalInstruction *insn); 908 909/* 910 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 911 * extended or escape opcodes). 912 * 913 * @param insn - The instruction whose opcode is to be read. 914 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 915 */ 916static int readOpcode(struct InternalInstruction *insn) 917{ 918 /* Determine the length of the primary opcode */ 919 uint8_t current; 920 921 // printf(">>> readOpcode() = %x\n", insn->readerCursor); 922 923 insn->opcodeType = ONEBYTE; 924 insn->firstByte = 0x00; 925 926 if (insn->vectorExtensionType == TYPE_EVEX) { 927 switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { 928 default: 929 // dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)", 930 // mmFromEVEX2of4(insn->vectorExtensionPrefix[1])); 931 return -1; 932 case VEX_LOB_0F: 933 insn->opcodeType = TWOBYTE; 934 return consumeByte(insn, &insn->opcode); 935 case VEX_LOB_0F38: 936 insn->opcodeType = THREEBYTE_38; 937 return consumeByte(insn, &insn->opcode); 938 case VEX_LOB_0F3A: 939 insn->opcodeType = THREEBYTE_3A; 940 return consumeByte(insn, &insn->opcode); 941 } 942 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 943 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { 944 default: 945 // dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", 946 // mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); 947 return -1; 948 case VEX_LOB_0F: 949 insn->twoByteEscape = 0x0f; 950 insn->opcodeType = TWOBYTE; 951 return consumeByte(insn, &insn->opcode); 952 case VEX_LOB_0F38: 953 insn->twoByteEscape = 0x0f; 954 insn->threeByteEscape = 0x38; 955 insn->opcodeType = THREEBYTE_38; 956 return consumeByte(insn, &insn->opcode); 957 case VEX_LOB_0F3A: 958 insn->twoByteEscape = 0x0f; 959 insn->threeByteEscape = 0x3a; 960 insn->opcodeType = THREEBYTE_3A; 961 return consumeByte(insn, &insn->opcode); 962 } 963 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 964 insn->twoByteEscape = 0x0f; 965 insn->opcodeType = TWOBYTE; 966 return consumeByte(insn, &insn->opcode); 967 } else if (insn->vectorExtensionType == TYPE_XOP) { 968 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { 969 default: 970 // dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", 971 // mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); 972 return -1; 973 case XOP_MAP_SELECT_8: 974 // FIXME: twoByteEscape? 975 insn->opcodeType = XOP8_MAP; 976 return consumeByte(insn, &insn->opcode); 977 case XOP_MAP_SELECT_9: 978 // FIXME: twoByteEscape? 979 insn->opcodeType = XOP9_MAP; 980 return consumeByte(insn, &insn->opcode); 981 case XOP_MAP_SELECT_A: 982 // FIXME: twoByteEscape? 983 insn->opcodeType = XOPA_MAP; 984 return consumeByte(insn, &insn->opcode); 985 } 986 } 987 988 if (consumeByte(insn, ¤t)) 989 return -1; 990 991 // save this first byte for MOVcr, MOVdr, MOVrc, MOVrd 992 insn->firstByte = current; 993 994 if (current == 0x0f) { 995 // dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 996 997 insn->twoByteEscape = current; 998 999 if (consumeByte(insn, ¤t)) 1000 return -1; 1001 1002 if (current == 0x38) { 1003 // dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 1004 1005 insn->threeByteEscape = current; 1006 1007 if (consumeByte(insn, ¤t)) 1008 return -1; 1009 1010 insn->opcodeType = THREEBYTE_38; 1011 } else if (current == 0x3a) { 1012 // dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 1013 1014 insn->threeByteEscape = current; 1015 1016 if (consumeByte(insn, ¤t)) 1017 return -1; 1018 1019 insn->opcodeType = THREEBYTE_3A; 1020 } else { 1021#ifndef CAPSTONE_X86_REDUCE 1022 switch(current) { 1023 default: 1024 // dbgprintf(insn, "Didn't find a three-byte escape prefix"); 1025 insn->opcodeType = TWOBYTE; 1026 break; 1027 case 0x0e: // HACK for femms. to be handled properly in next version 3.x 1028 insn->opcodeType = T3DNOW_MAP; 1029 // this encode does not have ModRM 1030 insn->consumedModRM = true; 1031 break; 1032 case 0x0f: 1033 // 3DNow instruction has weird format: ModRM/SIB/displacement + opcode 1034 if (readModRM(insn)) 1035 return -1; 1036 // next is 3DNow opcode 1037 if (consumeByte(insn, ¤t)) 1038 return -1; 1039 insn->opcodeType = T3DNOW_MAP; 1040 break; 1041 } 1042#endif 1043 } 1044 } 1045 1046 /* 1047 * At this point we have consumed the full opcode. 1048 * Anything we consume from here on must be unconsumed. 1049 */ 1050 1051 insn->opcode = current; 1052 1053 return 0; 1054} 1055 1056// Hacky for FEMMS 1057#define GET_INSTRINFO_ENUM 1058#ifndef CAPSTONE_X86_REDUCE 1059#include "X86GenInstrInfo.inc" 1060#else 1061#include "X86GenInstrInfo_reduce.inc" 1062#endif 1063 1064/* 1065 * getIDWithAttrMask - Determines the ID of an instruction, consuming 1066 * the ModR/M byte as appropriate for extended and escape opcodes, 1067 * and using a supplied attribute mask. 1068 * 1069 * @param instructionID - A pointer whose target is filled in with the ID of the 1070 * instruction. 1071 * @param insn - The instruction whose ID is to be determined. 1072 * @param attrMask - The attribute mask to search. 1073 * @return - 0 if the ModR/M could be read when needed or was not 1074 * needed; nonzero otherwise. 1075 */ 1076static int getIDWithAttrMask(uint16_t *instructionID, 1077 struct InternalInstruction *insn, 1078 uint16_t attrMask) 1079{ 1080 bool hasModRMExtension; 1081 1082 InstructionContext instructionClass; 1083 1084#ifndef CAPSTONE_X86_REDUCE 1085 // HACK for femms. to be handled properly in next version 3.x 1086 if (insn->opcode == 0x0e && insn->opcodeType == T3DNOW_MAP) { 1087 *instructionID = X86_FEMMS; 1088 return 0; 1089 } 1090#endif 1091 1092 if (insn->opcodeType == T3DNOW_MAP) 1093 instructionClass = IC_OF; 1094 else 1095 instructionClass = contextForAttrs(attrMask); 1096 1097 hasModRMExtension = modRMRequired(insn->opcodeType, 1098 instructionClass, 1099 insn->opcode) != 0; 1100 1101 if (hasModRMExtension) { 1102 if (readModRM(insn)) 1103 return -1; 1104 1105 *instructionID = decode(insn->opcodeType, 1106 instructionClass, 1107 insn->opcode, 1108 insn->modRM); 1109 } else { 1110 *instructionID = decode(insn->opcodeType, 1111 instructionClass, 1112 insn->opcode, 1113 0); 1114 } 1115 1116 return 0; 1117} 1118 1119/* 1120 * is16BitEquivalent - Determines whether two instruction names refer to 1121 * equivalent instructions but one is 16-bit whereas the other is not. 1122 * 1123 * @param orig - The instruction ID that is not 16-bit 1124 * @param equiv - The instruction ID that is 16-bit 1125 */ 1126static bool is16BitEquivalent(unsigned orig, unsigned equiv) 1127{ 1128 size_t i; 1129 uint16_t idx; 1130 1131 if ((idx = x86_16_bit_eq_lookup[orig]) != 0) { 1132 for (i = idx - 1; i < ARR_SIZE(x86_16_bit_eq_tbl) && x86_16_bit_eq_tbl[i].first == orig; i++) { 1133 if (x86_16_bit_eq_tbl[i].second == equiv) 1134 return true; 1135 } 1136 } 1137 1138 return false; 1139} 1140 1141/* 1142 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 1143 * appropriate for extended and escape opcodes. Determines the attributes and 1144 * context for the instruction before doing so. 1145 * 1146 * @param insn - The instruction whose ID is to be determined. 1147 * @return - 0 if the ModR/M could be read when needed or was not needed; 1148 * nonzero otherwise. 1149 */ 1150static int getID(struct InternalInstruction *insn) 1151{ 1152 uint16_t attrMask; 1153 uint16_t instructionID; 1154 const struct InstructionSpecifier *spec; 1155 1156 // printf(">>> getID()\n"); 1157 attrMask = ATTR_NONE; 1158 1159 if (insn->mode == MODE_64BIT) 1160 attrMask |= ATTR_64BIT; 1161 1162 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1163 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; 1164 1165 if (insn->vectorExtensionType == TYPE_EVEX) { 1166 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { 1167 case VEX_PREFIX_66: 1168 attrMask |= ATTR_OPSIZE; 1169 break; 1170 case VEX_PREFIX_F3: 1171 attrMask |= ATTR_XS; 1172 break; 1173 case VEX_PREFIX_F2: 1174 attrMask |= ATTR_XD; 1175 break; 1176 } 1177 1178 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1179 attrMask |= ATTR_EVEXKZ; 1180 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1181 attrMask |= ATTR_EVEXB; 1182 if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1183 attrMask |= ATTR_EVEXK; 1184 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1185 attrMask |= ATTR_EVEXL; 1186 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 1187 attrMask |= ATTR_EVEXL2; 1188 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 1189 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { 1190 case VEX_PREFIX_66: 1191 attrMask |= ATTR_OPSIZE; 1192 break; 1193 case VEX_PREFIX_F3: 1194 attrMask |= ATTR_XS; 1195 break; 1196 case VEX_PREFIX_F2: 1197 attrMask |= ATTR_XD; 1198 break; 1199 } 1200 1201 if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) 1202 attrMask |= ATTR_VEXL; 1203 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 1204 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 1205 case VEX_PREFIX_66: 1206 attrMask |= ATTR_OPSIZE; 1207 break; 1208 case VEX_PREFIX_F3: 1209 attrMask |= ATTR_XS; 1210 break; 1211 case VEX_PREFIX_F2: 1212 attrMask |= ATTR_XD; 1213 break; 1214 } 1215 1216 if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) 1217 attrMask |= ATTR_VEXL; 1218 } else if (insn->vectorExtensionType == TYPE_XOP) { 1219 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 1220 case VEX_PREFIX_66: 1221 attrMask |= ATTR_OPSIZE; 1222 break; 1223 case VEX_PREFIX_F3: 1224 attrMask |= ATTR_XS; 1225 break; 1226 case VEX_PREFIX_F2: 1227 attrMask |= ATTR_XD; 1228 break; 1229 } 1230 1231 if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) 1232 attrMask |= ATTR_VEXL; 1233 } else { 1234 return -1; 1235 } 1236 } else { 1237 if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) { 1238 attrMask |= ATTR_OPSIZE; 1239 } else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) { 1240 attrMask |= ATTR_ADSIZE; 1241 } else if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) { 1242 attrMask |= ATTR_XS; 1243 } else if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) { 1244 attrMask |= ATTR_XD; 1245 } 1246 } 1247 1248 if (insn->rexPrefix & 0x08) 1249 attrMask |= ATTR_REXW; 1250 1251 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 1252 return -1; 1253 1254 /* Fixing CALL and JMP instruction when in 64bit mode and x66 prefix is used */ 1255 if (insn->mode == MODE_64BIT && insn->isPrefix66 && 1256 (insn->opcode == 0xE8 || insn->opcode == 0xE9)) 1257 { 1258 attrMask ^= ATTR_OPSIZE; 1259 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 1260 return -1; 1261 } 1262 1263 1264 /* 1265 * JCXZ/JECXZ need special handling for 16-bit mode because the meaning 1266 * of the AdSize prefix is inverted w.r.t. 32-bit mode. 1267 */ 1268 if (insn->mode == MODE_16BIT && insn->opcode == 0xE3) { 1269 spec = specifierForUID(instructionID); 1270 1271 /* 1272 * Check for Ii8PCRel instructions. We could alternatively do a 1273 * string-compare on the names, but this is probably cheaper. 1274 */ 1275 if (x86OperandSets[spec->operands][0].type == TYPE_REL8) { 1276 attrMask ^= ATTR_ADSIZE; 1277 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 1278 return -1; 1279 } 1280 } 1281 1282 /* The following clauses compensate for limitations of the tables. */ 1283 if ((insn->mode == MODE_16BIT || insn->isPrefix66) && 1284 !(attrMask & ATTR_OPSIZE)) { 1285 /* 1286 * The instruction tables make no distinction between instructions that 1287 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 1288 * particular spot (i.e., many MMX operations). In general we're 1289 * conservative, but in the specific case where OpSize is present but not 1290 * in the right place we check if there's a 16-bit operation. 1291 */ 1292 1293 const struct InstructionSpecifier *spec; 1294 uint16_t instructionIDWithOpsize; 1295 1296 spec = specifierForUID(instructionID); 1297 1298 if (getIDWithAttrMask(&instructionIDWithOpsize, 1299 insn, attrMask | ATTR_OPSIZE)) { 1300 /* 1301 * ModRM required with OpSize but not present; give up and return version 1302 * without OpSize set 1303 */ 1304 1305 insn->instructionID = instructionID; 1306 insn->spec = spec; 1307 return 0; 1308 } 1309 1310 if (is16BitEquivalent(instructionID, instructionIDWithOpsize) && 1311 (insn->mode == MODE_16BIT) ^ insn->isPrefix66) { 1312 insn->instructionID = instructionIDWithOpsize; 1313 insn->spec = specifierForUID(instructionIDWithOpsize); 1314 } else { 1315 insn->instructionID = instructionID; 1316 insn->spec = spec; 1317 } 1318 return 0; 1319 } 1320 1321 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 1322 insn->rexPrefix & 0x01) { 1323 /* 1324 * NOOP shouldn't decode as NOOP if REX.b is set. Instead 1325 * it should decode as XCHG %r8, %eax. 1326 */ 1327 1328 const struct InstructionSpecifier *spec; 1329 uint16_t instructionIDWithNewOpcode; 1330 const struct InstructionSpecifier *specWithNewOpcode; 1331 1332 spec = specifierForUID(instructionID); 1333 1334 /* Borrow opcode from one of the other XCHGar opcodes */ 1335 insn->opcode = 0x91; 1336 1337 if (getIDWithAttrMask(&instructionIDWithNewOpcode, 1338 insn, 1339 attrMask)) { 1340 insn->opcode = 0x90; 1341 1342 insn->instructionID = instructionID; 1343 insn->spec = spec; 1344 return 0; 1345 } 1346 1347 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 1348 1349 /* Change back */ 1350 insn->opcode = 0x90; 1351 1352 insn->instructionID = instructionIDWithNewOpcode; 1353 insn->spec = specWithNewOpcode; 1354 1355 return 0; 1356 } 1357 1358 insn->instructionID = instructionID; 1359 insn->spec = specifierForUID(insn->instructionID); 1360 1361 return 0; 1362} 1363 1364/* 1365 * readSIB - Consumes the SIB byte to determine addressing information for an 1366 * instruction. 1367 * 1368 * @param insn - The instruction whose SIB byte is to be read. 1369 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 1370 */ 1371static int readSIB(struct InternalInstruction *insn) 1372{ 1373 SIBIndex sibIndexBase = SIB_INDEX_NONE; 1374 SIBBase sibBaseBase = SIB_BASE_NONE; 1375 uint8_t index, base; 1376 1377 // dbgprintf(insn, "readSIB()"); 1378 1379 if (insn->consumedSIB) 1380 return 0; 1381 1382 insn->consumedSIB = true; 1383 1384 switch (insn->addressSize) { 1385 case 2: 1386 // dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 1387 return -1; 1388 case 4: 1389 sibIndexBase = SIB_INDEX_EAX; 1390 sibBaseBase = SIB_BASE_EAX; 1391 break; 1392 case 8: 1393 sibIndexBase = SIB_INDEX_RAX; 1394 sibBaseBase = SIB_BASE_RAX; 1395 break; 1396 } 1397 1398 if (consumeByte(insn, &insn->sib)) 1399 return -1; 1400 1401 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 1402 if (insn->vectorExtensionType == TYPE_EVEX) 1403 index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4; 1404 1405 switch (index) { 1406 case 0x4: 1407 insn->sibIndex = SIB_INDEX_NONE; 1408 break; 1409 default: 1410 insn->sibIndex = (SIBIndex)(sibIndexBase + index); 1411 if (insn->sibIndex == SIB_INDEX_sib || 1412 insn->sibIndex == SIB_INDEX_sib64) 1413 insn->sibIndex = SIB_INDEX_NONE; 1414 break; 1415 } 1416 1417 switch (scaleFromSIB(insn->sib)) { 1418 case 0: 1419 insn->sibScale = 1; 1420 break; 1421 case 1: 1422 insn->sibScale = 2; 1423 break; 1424 case 2: 1425 insn->sibScale = 4; 1426 break; 1427 case 3: 1428 insn->sibScale = 8; 1429 break; 1430 } 1431 1432 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 1433 1434 switch (base) { 1435 case 0x5: 1436 case 0xd: 1437 switch (modFromModRM(insn->modRM)) { 1438 case 0x0: 1439 insn->eaDisplacement = EA_DISP_32; 1440 insn->sibBase = SIB_BASE_NONE; 1441 break; 1442 case 0x1: 1443 insn->eaDisplacement = EA_DISP_8; 1444 insn->sibBase = (SIBBase)(sibBaseBase + base); 1445 break; 1446 case 0x2: 1447 insn->eaDisplacement = EA_DISP_32; 1448 insn->sibBase = (SIBBase)(sibBaseBase + base); 1449 break; 1450 case 0x3: 1451 //debug("Cannot have Mod = 0b11 and a SIB byte"); 1452 return -1; 1453 } 1454 break; 1455 default: 1456 insn->sibBase = (SIBBase)(sibBaseBase + base); 1457 break; 1458 } 1459 1460 return 0; 1461} 1462 1463/* 1464 * readDisplacement - Consumes the displacement of an instruction. 1465 * 1466 * @param insn - The instruction whose displacement is to be read. 1467 * @return - 0 if the displacement byte was successfully read; nonzero 1468 * otherwise. 1469 */ 1470static int readDisplacement(struct InternalInstruction *insn) 1471{ 1472 int8_t d8; 1473 int16_t d16; 1474 int32_t d32; 1475 1476 // dbgprintf(insn, "readDisplacement()"); 1477 1478 if (insn->consumedDisplacement) 1479 return 0; 1480 1481 insn->consumedDisplacement = true; 1482 insn->displacementOffset = (uint8_t)(insn->readerCursor - insn->startLocation); 1483 1484 switch (insn->eaDisplacement) { 1485 case EA_DISP_NONE: 1486 insn->consumedDisplacement = false; 1487 break; 1488 case EA_DISP_8: 1489 if (consumeInt8(insn, &d8)) 1490 return -1; 1491 insn->displacement = d8; 1492 break; 1493 case EA_DISP_16: 1494 if (consumeInt16(insn, &d16)) 1495 return -1; 1496 insn->displacement = d16; 1497 break; 1498 case EA_DISP_32: 1499 if (consumeInt32(insn, &d32)) 1500 return -1; 1501 insn->displacement = d32; 1502 break; 1503 } 1504 1505 insn->consumedDisplacement = true; 1506 return 0; 1507} 1508 1509/* 1510 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 1511 * displacement) for an instruction and interprets it. 1512 * 1513 * @param insn - The instruction whose addressing information is to be read. 1514 * @return - 0 if the information was successfully read; nonzero otherwise. 1515 */ 1516static int readModRM(struct InternalInstruction *insn) 1517{ 1518 uint8_t mod, rm, reg; 1519 1520 // dbgprintf(insn, "readModRM()"); 1521 1522 // already got ModRM byte? 1523 if (insn->consumedModRM) 1524 return 0; 1525 1526 if (consumeByte(insn, &insn->modRM)) 1527 return -1; 1528 1529 // mark that we already got ModRM 1530 insn->consumedModRM = true; 1531 1532 // save original ModRM for later reference 1533 insn->orgModRM = insn->modRM; 1534 1535 // handle MOVcr, MOVdr, MOVrc, MOVrd by pretending they have MRM.mod = 3 1536 if ((insn->firstByte == 0x0f && insn->opcodeType == TWOBYTE) && 1537 (insn->opcode >= 0x20 && insn->opcode <= 0x23 )) 1538 insn->modRM |= 0xC0; 1539 1540 mod = modFromModRM(insn->modRM); 1541 rm = rmFromModRM(insn->modRM); 1542 reg = regFromModRM(insn->modRM); 1543 1544 /* 1545 * This goes by insn->registerSize to pick the correct register, which messes 1546 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 1547 * fixupReg(). 1548 */ 1549 switch (insn->registerSize) { 1550 case 2: 1551 insn->regBase = MODRM_REG_AX; 1552 insn->eaRegBase = EA_REG_AX; 1553 break; 1554 case 4: 1555 insn->regBase = MODRM_REG_EAX; 1556 insn->eaRegBase = EA_REG_EAX; 1557 break; 1558 case 8: 1559 insn->regBase = MODRM_REG_RAX; 1560 insn->eaRegBase = EA_REG_RAX; 1561 break; 1562 } 1563 1564 reg |= rFromREX(insn->rexPrefix) << 3; 1565 rm |= bFromREX(insn->rexPrefix) << 3; 1566 if (insn->vectorExtensionType == TYPE_EVEX) { 1567 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 1568 rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 1569 } 1570 1571 insn->reg = (Reg)(insn->regBase + reg); 1572 1573 switch (insn->addressSize) { 1574 case 2: 1575 insn->eaBaseBase = EA_BASE_BX_SI; 1576 1577 switch (mod) { 1578 case 0x0: 1579 if (rm == 0x6) { 1580 insn->eaBase = EA_BASE_NONE; 1581 insn->eaDisplacement = EA_DISP_16; 1582 if (readDisplacement(insn)) 1583 return -1; 1584 } else { 1585 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1586 insn->eaDisplacement = EA_DISP_NONE; 1587 } 1588 break; 1589 case 0x1: 1590 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1591 insn->eaDisplacement = EA_DISP_8; 1592 insn->displacementSize = 1; 1593 if (readDisplacement(insn)) 1594 return -1; 1595 break; 1596 case 0x2: 1597 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1598 insn->eaDisplacement = EA_DISP_16; 1599 if (readDisplacement(insn)) 1600 return -1; 1601 break; 1602 case 0x3: 1603 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1604 insn->eaDisplacement = EA_DISP_NONE; 1605 if (readDisplacement(insn)) 1606 return -1; 1607 break; 1608 } 1609 break; 1610 case 4: 1611 case 8: 1612 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 1613 1614 switch (mod) { 1615 case 0x0: 1616 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 1617 switch (rm) { 1618 case 0x14: 1619 case 0x4: 1620 case 0xc: /* in case REXW.b is set */ 1621 insn->eaBase = (insn->addressSize == 4 ? 1622 EA_BASE_sib : EA_BASE_sib64); 1623 if (readSIB(insn) || readDisplacement(insn)) 1624 return -1; 1625 break; 1626 case 0x5: 1627 case 0xd: 1628 insn->eaBase = EA_BASE_NONE; 1629 insn->eaDisplacement = EA_DISP_32; 1630 if (readDisplacement(insn)) 1631 return -1; 1632 break; 1633 default: 1634 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1635 break; 1636 } 1637 1638 break; 1639 case 0x1: 1640 insn->displacementSize = 1; 1641 /* FALLTHROUGH */ 1642 case 0x2: 1643 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 1644 switch (rm) { 1645 case 0x14: 1646 case 0x4: 1647 case 0xc: /* in case REXW.b is set */ 1648 insn->eaBase = EA_BASE_sib; 1649 if (readSIB(insn) || readDisplacement(insn)) 1650 return -1; 1651 break; 1652 default: 1653 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1654 if (readDisplacement(insn)) 1655 return -1; 1656 break; 1657 } 1658 break; 1659 case 0x3: 1660 insn->eaDisplacement = EA_DISP_NONE; 1661 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1662 break; 1663 } 1664 break; 1665 } /* switch (insn->addressSize) */ 1666 1667 return 0; 1668} 1669 1670#define GENERIC_FIXUP_FUNC(name, base, prefix) \ 1671 static uint8_t name(struct InternalInstruction *insn, \ 1672 OperandType type, \ 1673 uint8_t index, \ 1674 uint8_t *valid) { \ 1675 *valid = 1; \ 1676 switch (type) { \ 1677 case TYPE_R8: \ 1678 insn->operandSize = 1; \ 1679 break; \ 1680 case TYPE_R16: \ 1681 insn->operandSize = 2; \ 1682 break; \ 1683 case TYPE_R32: \ 1684 insn->operandSize = 4; \ 1685 break; \ 1686 case TYPE_R64: \ 1687 insn->operandSize = 8; \ 1688 break; \ 1689 case TYPE_XMM512: \ 1690 insn->operandSize = 64; \ 1691 break; \ 1692 case TYPE_XMM256: \ 1693 insn->operandSize = 32; \ 1694 break; \ 1695 case TYPE_XMM128: \ 1696 insn->operandSize = 16; \ 1697 break; \ 1698 case TYPE_XMM64: \ 1699 insn->operandSize = 8; \ 1700 break; \ 1701 case TYPE_XMM32: \ 1702 insn->operandSize = 4; \ 1703 break; \ 1704 case TYPE_XMM: \ 1705 insn->operandSize = 2; \ 1706 break; \ 1707 case TYPE_MM64: \ 1708 insn->operandSize = 8; \ 1709 break; \ 1710 case TYPE_MM32: \ 1711 insn->operandSize = 4; \ 1712 break; \ 1713 case TYPE_MM: \ 1714 insn->operandSize = 2; \ 1715 break; \ 1716 case TYPE_CONTROLREG: \ 1717 insn->operandSize = 4; \ 1718 break; \ 1719 default: break; \ 1720 } \ 1721 switch (type) { \ 1722 default: \ 1723 *valid = 0; \ 1724 return 0; \ 1725 case TYPE_Rv: \ 1726 return (uint8_t)(base + index); \ 1727 case TYPE_R8: \ 1728 if (insn->rexPrefix && \ 1729 index >= 4 && index <= 7) { \ 1730 return prefix##_SPL + (index - 4); \ 1731 } else { \ 1732 return prefix##_AL + index; \ 1733 } \ 1734 case TYPE_R16: \ 1735 return prefix##_AX + index; \ 1736 case TYPE_R32: \ 1737 return prefix##_EAX + index; \ 1738 case TYPE_R64: \ 1739 return prefix##_RAX + index; \ 1740 case TYPE_XMM512: \ 1741 return prefix##_ZMM0 + index; \ 1742 case TYPE_XMM256: \ 1743 return prefix##_YMM0 + index; \ 1744 case TYPE_XMM128: \ 1745 case TYPE_XMM64: \ 1746 case TYPE_XMM32: \ 1747 case TYPE_XMM: \ 1748 return prefix##_XMM0 + index; \ 1749 case TYPE_VK1: \ 1750 case TYPE_VK8: \ 1751 case TYPE_VK16: \ 1752 if (index > 7) \ 1753 *valid = 0; \ 1754 return prefix##_K0 + index; \ 1755 case TYPE_MM64: \ 1756 case TYPE_MM32: \ 1757 case TYPE_MM: \ 1758 return prefix##_MM0 + (index & 7); \ 1759 case TYPE_SEGMENTREG: \ 1760 if (index > 5) \ 1761 *valid = 0; \ 1762 return prefix##_ES + index; \ 1763 case TYPE_DEBUGREG: \ 1764 if (index > 7) \ 1765 *valid = 0; \ 1766 return prefix##_DR0 + index; \ 1767 case TYPE_CONTROLREG: \ 1768 return prefix##_CR0 + index; \ 1769 } \ 1770 } 1771 1772/* 1773 * fixup*Value - Consults an operand type to determine the meaning of the 1774 * reg or R/M field. If the operand is an XMM operand, for example, an 1775 * operand would be XMM0 instead of AX, which readModRM() would otherwise 1776 * misinterpret it as. 1777 * 1778 * @param insn - The instruction containing the operand. 1779 * @param type - The operand type. 1780 * @param index - The existing value of the field as reported by readModRM(). 1781 * @param valid - The address of a uint8_t. The target is set to 1 if the 1782 * field is valid for the register class; 0 if not. 1783 * @return - The proper value. 1784 */ 1785GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 1786GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 1787 1788/* 1789 * fixupReg - Consults an operand specifier to determine which of the 1790 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 1791 * 1792 * @param insn - See fixup*Value(). 1793 * @param op - The operand specifier. 1794 * @return - 0 if fixup was successful; -1 if the register returned was 1795 * invalid for its class. 1796 */ 1797static int fixupReg(struct InternalInstruction *insn, 1798 const struct OperandSpecifier *op) 1799{ 1800 uint8_t valid; 1801 1802 // dbgprintf(insn, "fixupReg()"); 1803 1804 switch ((OperandEncoding)op->encoding) { 1805 default: 1806 //debug("Expected a REG or R/M encoding in fixupReg"); 1807 return -1; 1808 case ENCODING_VVVV: 1809 insn->vvvv = (Reg)fixupRegValue(insn, 1810 (OperandType)op->type, 1811 insn->vvvv, 1812 &valid); 1813 if (!valid) 1814 return -1; 1815 break; 1816 case ENCODING_REG: 1817 insn->reg = (Reg)fixupRegValue(insn, 1818 (OperandType)op->type, 1819 (uint8_t)(insn->reg - insn->regBase), 1820 &valid); 1821 if (!valid) 1822 return -1; 1823 break; 1824 CASE_ENCODING_RM: 1825 if (insn->eaBase >= insn->eaRegBase) { 1826 insn->eaBase = (EABase)fixupRMValue(insn, 1827 (OperandType)op->type, 1828 (uint8_t)(insn->eaBase - insn->eaRegBase), 1829 &valid); 1830 if (!valid) 1831 return -1; 1832 } 1833 break; 1834 } 1835 1836 return 0; 1837} 1838 1839/* 1840 * readOpcodeRegister - Reads an operand from the opcode field of an 1841 * instruction and interprets it appropriately given the operand width. 1842 * Handles AddRegFrm instructions. 1843 * 1844 * @param insn - the instruction whose opcode field is to be read. 1845 * @param size - The width (in bytes) of the register being specified. 1846 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1847 * RAX. 1848 * @return - 0 on success; nonzero otherwise. 1849 */ 1850static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) 1851{ 1852 // dbgprintf(insn, "readOpcodeRegister()"); 1853 1854 if (size == 0) 1855 size = insn->registerSize; 1856 1857 insn->operandSize = size; 1858 1859 switch (size) { 1860 case 1: 1861 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 1862 | (insn->opcode & 7))); 1863 if (insn->rexPrefix && 1864 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1865 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1866 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 1867 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1868 } 1869 1870 break; 1871 case 2: 1872 insn->opcodeRegister = (Reg)(MODRM_REG_AX 1873 + ((bFromREX(insn->rexPrefix) << 3) 1874 | (insn->opcode & 7))); 1875 break; 1876 case 4: 1877 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 1878 + ((bFromREX(insn->rexPrefix) << 3) 1879 | (insn->opcode & 7))); 1880 break; 1881 case 8: 1882 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 1883 + ((bFromREX(insn->rexPrefix) << 3) 1884 | (insn->opcode & 7))); 1885 break; 1886 } 1887 1888 return 0; 1889} 1890 1891/* 1892 * readImmediate - Consumes an immediate operand from an instruction, given the 1893 * desired operand size. 1894 * 1895 * @param insn - The instruction whose operand is to be read. 1896 * @param size - The width (in bytes) of the operand. 1897 * @return - 0 if the immediate was successfully consumed; nonzero 1898 * otherwise. 1899 */ 1900static int readImmediate(struct InternalInstruction *insn, uint8_t size) 1901{ 1902 uint8_t imm8; 1903 uint16_t imm16; 1904 uint32_t imm32; 1905 uint64_t imm64; 1906 1907 // dbgprintf(insn, "readImmediate()"); 1908 1909 if (insn->numImmediatesConsumed == 2) { 1910 //debug("Already consumed two immediates"); 1911 return -1; 1912 } 1913 1914 if (size == 0) 1915 size = insn->immediateSize; 1916 else 1917 insn->immediateSize = size; 1918 insn->immediateOffset = (uint8_t)(insn->readerCursor - insn->startLocation); 1919 1920 switch (size) { 1921 case 1: 1922 if (consumeByte(insn, &imm8)) 1923 return -1; 1924 insn->immediates[insn->numImmediatesConsumed] = imm8; 1925 break; 1926 case 2: 1927 if (consumeUInt16(insn, &imm16)) 1928 return -1; 1929 insn->immediates[insn->numImmediatesConsumed] = imm16; 1930 break; 1931 case 4: 1932 if (consumeUInt32(insn, &imm32)) 1933 return -1; 1934 insn->immediates[insn->numImmediatesConsumed] = imm32; 1935 break; 1936 case 8: 1937 if (consumeUInt64(insn, &imm64)) 1938 return -1; 1939 insn->immediates[insn->numImmediatesConsumed] = imm64; 1940 break; 1941 } 1942 1943 insn->numImmediatesConsumed++; 1944 1945 return 0; 1946} 1947 1948/* 1949 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 1950 * 1951 * @param insn - The instruction whose operand is to be read. 1952 * @return - 0 if the vvvv was successfully consumed; nonzero 1953 * otherwise. 1954 */ 1955static int readVVVV(struct InternalInstruction *insn) 1956{ 1957 int vvvv; 1958 // dbgprintf(insn, "readVVVV()"); 1959 1960 if (insn->vectorExtensionType == TYPE_EVEX) 1961 vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 | 1962 vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2])); 1963 else if (insn->vectorExtensionType == TYPE_VEX_3B) 1964 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); 1965 else if (insn->vectorExtensionType == TYPE_VEX_2B) 1966 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); 1967 else if (insn->vectorExtensionType == TYPE_XOP) 1968 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); 1969 else 1970 return -1; 1971 1972 if (insn->mode != MODE_64BIT) 1973 vvvv &= 0x7; 1974 1975 insn->vvvv = vvvv; 1976 1977 return 0; 1978} 1979 1980/* 1981 * readMaskRegister - Reads an mask register from the opcode field of an 1982 * instruction. 1983 * 1984 * @param insn - The instruction whose opcode field is to be read. 1985 * @return - 0 on success; nonzero otherwise. 1986 */ 1987static int readMaskRegister(struct InternalInstruction *insn) 1988{ 1989 // dbgprintf(insn, "readMaskRegister()"); 1990 1991 if (insn->vectorExtensionType != TYPE_EVEX) 1992 return -1; 1993 1994 insn->writemask = aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]); 1995 1996 return 0; 1997} 1998 1999/* 2000 * readOperands - Consults the specifier for an instruction and consumes all 2001 * operands for that instruction, interpreting them as it goes. 2002 * 2003 * @param insn - The instruction whose operands are to be read and interpreted. 2004 * @return - 0 if all operands could be read; nonzero otherwise. 2005 */ 2006static int readOperands(struct InternalInstruction *insn) 2007{ 2008 int index; 2009 int hasVVVV, needVVVV; 2010 int sawRegImm = 0; 2011 2012 // printf(">>> readOperands()\n"); 2013 /* If non-zero vvvv specified, need to make sure one of the operands 2014 uses it. */ 2015 hasVVVV = !readVVVV(insn); 2016 needVVVV = hasVVVV && (insn->vvvv != 0); 2017 2018 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 2019 //printf(">>> encoding[%u] = %u\n", index, x86OperandSets[insn->spec->operands][index].encoding); 2020 switch (x86OperandSets[insn->spec->operands][index].encoding) { 2021 case ENCODING_NONE: 2022 case ENCODING_SI: 2023 case ENCODING_DI: 2024 break; 2025 case ENCODING_REG: 2026 CASE_ENCODING_RM: 2027 if (readModRM(insn)) 2028 return -1; 2029 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 2030 return -1; 2031 // Apply the AVX512 compressed displacement scaling factor. 2032 if (x86OperandSets[insn->spec->operands][index].encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 2033 insn->displacement *= 1 << (x86OperandSets[insn->spec->operands][index].encoding - ENCODING_RM); 2034 break; 2035 case ENCODING_CB: 2036 case ENCODING_CW: 2037 case ENCODING_CD: 2038 case ENCODING_CP: 2039 case ENCODING_CO: 2040 case ENCODING_CT: 2041 // dbgprintf(insn, "We currently don't hande code-offset encodings"); 2042 return -1; 2043 case ENCODING_IB: 2044 if (sawRegImm) { 2045 /* Saw a register immediate so don't read again and instead split the 2046 previous immediate. FIXME: This is a hack. */ 2047 insn->immediates[insn->numImmediatesConsumed] = 2048 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 2049 ++insn->numImmediatesConsumed; 2050 break; 2051 } 2052 if (readImmediate(insn, 1)) 2053 return -1; 2054 if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || 2055 x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) 2056 sawRegImm = 1; 2057 break; 2058 case ENCODING_IW: 2059 if (readImmediate(insn, 2)) 2060 return -1; 2061 break; 2062 case ENCODING_ID: 2063 if (readImmediate(insn, 4)) 2064 return -1; 2065 break; 2066 case ENCODING_IO: 2067 if (readImmediate(insn, 8)) 2068 return -1; 2069 break; 2070 case ENCODING_Iv: 2071 if (readImmediate(insn, insn->immediateSize)) 2072 return -1; 2073 break; 2074 case ENCODING_Ia: 2075 if (readImmediate(insn, insn->addressSize)) 2076 return -1; 2077 break; 2078 case ENCODING_RB: 2079 if (readOpcodeRegister(insn, 1)) 2080 return -1; 2081 break; 2082 case ENCODING_RW: 2083 if (readOpcodeRegister(insn, 2)) 2084 return -1; 2085 break; 2086 case ENCODING_RD: 2087 if (readOpcodeRegister(insn, 4)) 2088 return -1; 2089 break; 2090 case ENCODING_RO: 2091 if (readOpcodeRegister(insn, 8)) 2092 return -1; 2093 break; 2094 case ENCODING_Rv: 2095 if (readOpcodeRegister(insn, 0)) 2096 return -1; 2097 break; 2098 case ENCODING_FP: 2099 break; 2100 case ENCODING_VVVV: 2101 needVVVV = 0; /* Mark that we have found a VVVV operand. */ 2102 if (!hasVVVV) 2103 return -1; 2104 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 2105 return -1; 2106 break; 2107 case ENCODING_WRITEMASK: 2108 if (readMaskRegister(insn)) 2109 return -1; 2110 break; 2111 case ENCODING_DUP: 2112 break; 2113 default: 2114 // dbgprintf(insn, "Encountered an operand with an unknown encoding."); 2115 return -1; 2116 } 2117 } 2118 2119 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 2120 if (needVVVV) return -1; 2121 2122 return 0; 2123} 2124 2125// return True if instruction is illegal to use with prefixes 2126// This also check & fix the prefixPresent[] when a prefix is irrelevant. 2127static bool checkPrefix(struct InternalInstruction *insn) 2128{ 2129 // LOCK prefix 2130 if (insn->isPrefixf0) { 2131 switch(insn->instructionID) { 2132 default: 2133 // invalid LOCK 2134 return true; 2135 2136 // nop dword [rax] 2137 case X86_NOOPL: 2138 2139 // DEC 2140 case X86_DEC16m: 2141 case X86_DEC32m: 2142 case X86_DEC64_16m: 2143 case X86_DEC64_32m: 2144 case X86_DEC64m: 2145 case X86_DEC8m: 2146 2147 // ADC 2148 case X86_ADC16mi: 2149 case X86_ADC16mi8: 2150 case X86_ADC16mr: 2151 case X86_ADC32mi: 2152 case X86_ADC32mi8: 2153 case X86_ADC32mr: 2154 case X86_ADC64mi32: 2155 case X86_ADC64mi8: 2156 case X86_ADC64mr: 2157 case X86_ADC8mi: 2158 case X86_ADC8mr: 2159 2160 // ADD 2161 case X86_ADD16mi: 2162 case X86_ADD16mi8: 2163 case X86_ADD16mr: 2164 case X86_ADD32mi: 2165 case X86_ADD32mi8: 2166 case X86_ADD32mr: 2167 case X86_ADD64mi32: 2168 case X86_ADD64mi8: 2169 case X86_ADD64mr: 2170 case X86_ADD8mi: 2171 case X86_ADD8mr: 2172 2173 // AND 2174 case X86_AND16mi: 2175 case X86_AND16mi8: 2176 case X86_AND16mr: 2177 case X86_AND32mi: 2178 case X86_AND32mi8: 2179 case X86_AND32mr: 2180 case X86_AND64mi32: 2181 case X86_AND64mi8: 2182 case X86_AND64mr: 2183 case X86_AND8mi: 2184 case X86_AND8mr: 2185 2186 // BTC 2187 case X86_BTC16mi8: 2188 case X86_BTC16mr: 2189 case X86_BTC32mi8: 2190 case X86_BTC32mr: 2191 case X86_BTC64mi8: 2192 case X86_BTC64mr: 2193 2194 // BTR 2195 case X86_BTR16mi8: 2196 case X86_BTR16mr: 2197 case X86_BTR32mi8: 2198 case X86_BTR32mr: 2199 case X86_BTR64mi8: 2200 case X86_BTR64mr: 2201 2202 // BTS 2203 case X86_BTS16mi8: 2204 case X86_BTS16mr: 2205 case X86_BTS32mi8: 2206 case X86_BTS32mr: 2207 case X86_BTS64mi8: 2208 case X86_BTS64mr: 2209 2210 // CMPXCHG 2211 case X86_CMPXCHG16B: 2212 case X86_CMPXCHG16rm: 2213 case X86_CMPXCHG32rm: 2214 case X86_CMPXCHG64rm: 2215 case X86_CMPXCHG8rm: 2216 case X86_CMPXCHG8B: 2217 2218 // INC 2219 case X86_INC16m: 2220 case X86_INC32m: 2221 case X86_INC64_16m: 2222 case X86_INC64_32m: 2223 case X86_INC64m: 2224 case X86_INC8m: 2225 2226 // NEG 2227 case X86_NEG16m: 2228 case X86_NEG32m: 2229 case X86_NEG64m: 2230 case X86_NEG8m: 2231 2232 // NOT 2233 case X86_NOT16m: 2234 case X86_NOT32m: 2235 case X86_NOT64m: 2236 case X86_NOT8m: 2237 2238 // OR 2239 case X86_OR16mi: 2240 case X86_OR16mi8: 2241 case X86_OR16mr: 2242 case X86_OR32mi: 2243 case X86_OR32mi8: 2244 case X86_OR32mr: 2245 case X86_OR32mrLocked: 2246 case X86_OR64mi32: 2247 case X86_OR64mi8: 2248 case X86_OR64mr: 2249 case X86_OR8mi: 2250 case X86_OR8mr: 2251 2252 // SBB 2253 case X86_SBB16mi: 2254 case X86_SBB16mi8: 2255 case X86_SBB16mr: 2256 case X86_SBB32mi: 2257 case X86_SBB32mi8: 2258 case X86_SBB32mr: 2259 case X86_SBB64mi32: 2260 case X86_SBB64mi8: 2261 case X86_SBB64mr: 2262 case X86_SBB8mi: 2263 case X86_SBB8mr: 2264 2265 // SUB 2266 case X86_SUB16mi: 2267 case X86_SUB16mi8: 2268 case X86_SUB16mr: 2269 case X86_SUB32mi: 2270 case X86_SUB32mi8: 2271 case X86_SUB32mr: 2272 case X86_SUB64mi32: 2273 case X86_SUB64mi8: 2274 case X86_SUB64mr: 2275 case X86_SUB8mi: 2276 case X86_SUB8mr: 2277 2278 // XADD 2279 case X86_XADD16rm: 2280 case X86_XADD32rm: 2281 case X86_XADD64rm: 2282 case X86_XADD8rm: 2283 2284 // XCHG 2285 case X86_XCHG16rm: 2286 case X86_XCHG32rm: 2287 case X86_XCHG64rm: 2288 case X86_XCHG8rm: 2289 2290 // XOR 2291 case X86_XOR16mi: 2292 case X86_XOR16mi8: 2293 case X86_XOR16mr: 2294 case X86_XOR32mi: 2295 case X86_XOR32mi8: 2296 case X86_XOR32mr: 2297 case X86_XOR64mi32: 2298 case X86_XOR64mi8: 2299 case X86_XOR64mr: 2300 case X86_XOR8mi: 2301 case X86_XOR8mr: 2302 2303 // this instruction can be used with LOCK prefix 2304 return false; 2305 } 2306 } 2307 2308 // REPNE prefix 2309 if (insn->isPrefixf2) { 2310 // 0xf2 can be a part of instruction encoding, but not really a prefix. 2311 // In such a case, clear it. 2312 if (insn->twoByteEscape == 0x0f) { 2313 insn->prefix0 = 0; 2314 } 2315 } 2316 2317 // no invalid prefixes 2318 return false; 2319} 2320 2321/* 2322 * decodeInstruction - Reads and interprets a full instruction provided by the 2323 * user. 2324 * 2325 * @param insn - A pointer to the instruction to be populated. Must be 2326 * pre-allocated. 2327 * @param reader - The function to be used to read the instruction's bytes. 2328 * @param readerArg - A generic argument to be passed to the reader to store 2329 * any internal state. 2330 * @param startLoc - The address (in the reader's address space) of the first 2331 * byte in the instruction. 2332 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 2333 * decode the instruction in. 2334 * @return - 0 if instruction is valid; nonzero if not. 2335 */ 2336int decodeInstruction(struct InternalInstruction *insn, 2337 byteReader_t reader, 2338 const void *readerArg, 2339 uint64_t startLoc, 2340 DisassemblerMode mode) 2341{ 2342 insn->reader = reader; 2343 insn->readerArg = readerArg; 2344 insn->startLocation = startLoc; 2345 insn->readerCursor = startLoc; 2346 insn->mode = mode; 2347 2348 if (readPrefixes(insn) || 2349 readOpcode(insn) || 2350 getID(insn) || 2351 insn->instructionID == 0 || 2352 checkPrefix(insn) || 2353 readOperands(insn)) 2354 return -1; 2355 2356 insn->length = (size_t)(insn->readerCursor - insn->startLocation); 2357 2358 // instruction length must be <= 15 to be valid 2359 if (insn->length > 15) 2360 return -1; 2361 2362 if (insn->operandSize == 0) 2363 insn->operandSize = insn->registerSize; 2364 2365 insn->operands = &x86OperandSets[insn->spec->operands][0]; 2366 2367 // dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 2368 // startLoc, insn->readerCursor, insn->length); 2369 2370 //if (insn->length > 15) 2371 // dbgprintf(insn, "Instruction exceeds 15-byte limit"); 2372 2373#if 0 2374 printf("\n>>> x86OperandSets = %lu\n", sizeof(x86OperandSets)); 2375 printf(">>> x86DisassemblerInstrSpecifiers = %lu\n", sizeof(x86DisassemblerInstrSpecifiers)); 2376 printf(">>> x86DisassemblerContexts = %lu\n", sizeof(x86DisassemblerContexts)); 2377 printf(">>> modRMTable = %lu\n", sizeof(modRMTable)); 2378 printf(">>> x86DisassemblerOneByteOpcodes = %lu\n", sizeof(x86DisassemblerOneByteOpcodes)); 2379 printf(">>> x86DisassemblerTwoByteOpcodes = %lu\n", sizeof(x86DisassemblerTwoByteOpcodes)); 2380 printf(">>> x86DisassemblerThreeByte38Opcodes = %lu\n", sizeof(x86DisassemblerThreeByte38Opcodes)); 2381 printf(">>> x86DisassemblerThreeByte3AOpcodes = %lu\n", sizeof(x86DisassemblerThreeByte3AOpcodes)); 2382 printf(">>> x86DisassemblerThreeByteA6Opcodes = %lu\n", sizeof(x86DisassemblerThreeByteA6Opcodes)); 2383 printf(">>> x86DisassemblerThreeByteA7Opcodes= %lu\n", sizeof(x86DisassemblerThreeByteA7Opcodes)); 2384 printf(">>> x86DisassemblerXOP8Opcodes = %lu\n", sizeof(x86DisassemblerXOP8Opcodes)); 2385 printf(">>> x86DisassemblerXOP9Opcodes = %lu\n", sizeof(x86DisassemblerXOP9Opcodes)); 2386 printf(">>> x86DisassemblerXOPAOpcodes = %lu\n\n", sizeof(x86DisassemblerXOPAOpcodes)); 2387#endif 2388 2389 return 0; 2390} 2391 2392#endif 2393 2394