1 2/*--------------------------------------------------------------------*/ 3/*--- begin guest_x86_toIR.c ---*/ 4/*--------------------------------------------------------------------*/ 5 6/* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2012 OpenWorks LLP 11 info@open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34*/ 35 36/* Translates x86 code to IR. */ 37 38/* TODO: 39 40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked 41 to ensure a 32-bit value is being written. 42 43 FUCOMI(P): what happens to A and S flags? Currently are forced 44 to zero. 45 46 x87 FP Limitations: 47 48 * all arithmetic done at 64 bits 49 50 * no FP exceptions, except for handling stack over/underflow 51 52 * FP rounding mode observed only for float->int conversions 53 and int->float conversions which could lose accuracy, and 54 for float-to-float rounding. For all other operations, 55 round-to-nearest is used, regardless. 56 57 * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the 58 simulation claims the argument is in-range (-2^63 <= arg <= 2^63) 59 even when it isn't. 60 61 * some of the FCOM cases could do with testing -- not convinced 62 that the args are the right way round. 63 64 * FSAVE does not re-initialise the FPU; it should do 65 66 * FINIT not only initialises the FPU environment, it also 67 zeroes all the FP registers. It should leave the registers 68 unchanged. 69 70 SAHF should cause eflags[1] == 1, and in fact it produces 0. As 71 per Intel docs this bit has no meaning anyway. Since PUSHF is the 72 only way to observe eflags[1], a proper fix would be to make that 73 bit be set by PUSHF. 74 75 The state of %eflags.AC (alignment check, bit 18) is recorded by 76 the simulation (viz, if you set it with popf then a pushf produces 77 the value you set it to), but it is otherwise ignored. In 78 particular, setting it to 1 does NOT cause alignment checking to 79 happen. Programs that set it to 1 and then rely on the resulting 80 SIGBUSs to inform them of misaligned accesses will not work. 81 82 Implementation of sysenter is necessarily partial. sysenter is a 83 kind of system call entry. When doing a sysenter, the return 84 address is not known -- that is something that is beyond Vex's 85 knowledge. So the generated IR forces a return to the scheduler, 86 which can do what it likes to simulate the systenter, but it MUST 87 set this thread's guest_EIP field with the continuation address 88 before resuming execution. If that doesn't happen, the thread will 89 jump to address zero, which is probably fatal. 90 91 This module uses global variables and so is not MT-safe (if that 92 should ever become relevant). 93 94 The delta values are 32-bit ints, not 64-bit ints. That means 95 this module may not work right if run on a 64-bit host. That should 96 be fixed properly, really -- if anyone ever wants to use Vex to 97 translate x86 code for execution on a 64-bit host. 98 99 casLE (implementation of lock-prefixed insns) and rep-prefixed 100 insns: the side-exit back to the start of the insn is done with 101 Ijk_Boring. This is quite wrong, it should be done with 102 Ijk_NoRedir, since otherwise the side exit, which is intended to 103 restart the instruction for whatever reason, could go somewhere 104 entirely else. Doing it right (with Ijk_NoRedir jumps) would make 105 no-redir jumps performance critical, at least for rep-prefixed 106 instructions, since all iterations thereof would involve such a 107 jump. It's not such a big deal with casLE since the side exit is 108 only taken if the CAS fails, that is, the location is contended, 109 which is relatively unlikely. 110 111 XXXX: Nov 2009: handling of SWP on ARM suffers from the same 112 problem. 113 114 Note also, the test for CAS success vs failure is done using 115 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary 116 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it 117 shouldn't definedness-check these comparisons. See 118 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for 119 background/rationale. 120*/ 121 122/* Performance holes: 123 124 - fcom ; fstsw %ax ; sahf 125 sahf does not update the O flag (sigh) and so O needs to 126 be computed. This is done expensively; it would be better 127 to have a calculate_eflags_o helper. 128 129 - emwarns; some FP codes can generate huge numbers of these 130 if the fpucw is changed in an inner loop. It would be 131 better for the guest state to have an emwarn-enable reg 132 which can be set zero or nonzero. If it is zero, emwarns 133 are not flagged, and instead control just flows all the 134 way through bbs as usual. 135*/ 136 137/* "Special" instructions. 138 139 This instruction decoder can decode three special instructions 140 which mean nothing natively (are no-ops as far as regs/mem are 141 concerned) but have meaning for supporting Valgrind. A special 142 instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D 143 C1C713 (in the standard interpretation, that means: roll $3, %edi; 144 roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that, 145 one of the following 3 are allowed (standard interpretation in 146 parentheses): 147 148 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX ) 149 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR 150 87D2 (xchgl %edx,%edx) call-noredir *%EAX 151 152 Any other bytes following the 12-byte preamble are illegal and 153 constitute a failure in instruction decoding. This all assumes 154 that the preamble will never occur except in specific code 155 fragments designed for Valgrind to catch. 156 157 No prefixes may precede a "Special" instruction. 158*/ 159 160/* LOCK prefixed instructions. These are translated using IR-level 161 CAS statements (IRCAS) and are believed to preserve atomicity, even 162 from the point of view of some other process racing against a 163 simulated one (presumably they communicate via a shared memory 164 segment). 165 166 Handlers which are aware of LOCK prefixes are: 167 dis_op2_G_E (add, or, adc, sbb, and, sub, xor) 168 dis_cmpxchg_G_E (cmpxchg) 169 dis_Grp1 (add, or, adc, sbb, and, sub, xor) 170 dis_Grp3 (not, neg) 171 dis_Grp4 (inc, dec) 172 dis_Grp5 (inc, dec) 173 dis_Grp8_Imm (bts, btc, btr) 174 dis_bt_G_E (bts, btc, btr) 175 dis_xadd_G_E (xadd) 176*/ 177 178 179#include "libvex_basictypes.h" 180#include "libvex_ir.h" 181#include "libvex.h" 182#include "libvex_guest_x86.h" 183 184#include "main_util.h" 185#include "main_globals.h" 186#include "guest_generic_bb_to_IR.h" 187#include "guest_generic_x87.h" 188#include "guest_x86_defs.h" 189 190 191/*------------------------------------------------------------*/ 192/*--- Globals ---*/ 193/*------------------------------------------------------------*/ 194 195/* These are set at the start of the translation of an insn, right 196 down in disInstr_X86, so that we don't have to pass them around 197 endlessly. They are all constant during the translation of any 198 given insn. */ 199 200/* We need to know this to do sub-register accesses correctly. */ 201static Bool host_is_bigendian; 202 203/* Pointer to the guest code area (points to start of BB, not to the 204 insn being processed). */ 205static UChar* guest_code; 206 207/* The guest address corresponding to guest_code[0]. */ 208static Addr32 guest_EIP_bbstart; 209 210/* The guest address for the instruction currently being 211 translated. */ 212static Addr32 guest_EIP_curr_instr; 213 214/* The IRSB* into which we're generating code. */ 215static IRSB* irsb; 216 217 218/*------------------------------------------------------------*/ 219/*--- Debugging output ---*/ 220/*------------------------------------------------------------*/ 221 222#define DIP(format, args...) \ 223 if (vex_traceflags & VEX_TRACE_FE) \ 224 vex_printf(format, ## args) 225 226#define DIS(buf, format, args...) \ 227 if (vex_traceflags & VEX_TRACE_FE) \ 228 vex_sprintf(buf, format, ## args) 229 230 231/*------------------------------------------------------------*/ 232/*--- Offsets of various parts of the x86 guest state. ---*/ 233/*------------------------------------------------------------*/ 234 235#define OFFB_EAX offsetof(VexGuestX86State,guest_EAX) 236#define OFFB_EBX offsetof(VexGuestX86State,guest_EBX) 237#define OFFB_ECX offsetof(VexGuestX86State,guest_ECX) 238#define OFFB_EDX offsetof(VexGuestX86State,guest_EDX) 239#define OFFB_ESP offsetof(VexGuestX86State,guest_ESP) 240#define OFFB_EBP offsetof(VexGuestX86State,guest_EBP) 241#define OFFB_ESI offsetof(VexGuestX86State,guest_ESI) 242#define OFFB_EDI offsetof(VexGuestX86State,guest_EDI) 243 244#define OFFB_EIP offsetof(VexGuestX86State,guest_EIP) 245 246#define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP) 247#define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1) 248#define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2) 249#define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP) 250 251#define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0]) 252#define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0]) 253#define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG) 254#define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG) 255#define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG) 256#define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP) 257#define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210) 258#define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND) 259 260#define OFFB_CS offsetof(VexGuestX86State,guest_CS) 261#define OFFB_DS offsetof(VexGuestX86State,guest_DS) 262#define OFFB_ES offsetof(VexGuestX86State,guest_ES) 263#define OFFB_FS offsetof(VexGuestX86State,guest_FS) 264#define OFFB_GS offsetof(VexGuestX86State,guest_GS) 265#define OFFB_SS offsetof(VexGuestX86State,guest_SS) 266#define OFFB_LDT offsetof(VexGuestX86State,guest_LDT) 267#define OFFB_GDT offsetof(VexGuestX86State,guest_GDT) 268 269#define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND) 270#define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0) 271#define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1) 272#define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2) 273#define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3) 274#define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4) 275#define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5) 276#define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6) 277#define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7) 278 279#define OFFB_EMWARN offsetof(VexGuestX86State,guest_EMWARN) 280 281#define OFFB_TISTART offsetof(VexGuestX86State,guest_TISTART) 282#define OFFB_TILEN offsetof(VexGuestX86State,guest_TILEN) 283#define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR) 284 285#define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL) 286 287 288/*------------------------------------------------------------*/ 289/*--- Helper bits and pieces for deconstructing the ---*/ 290/*--- x86 insn stream. ---*/ 291/*------------------------------------------------------------*/ 292 293/* This is the Intel register encoding -- integer regs. */ 294#define R_EAX 0 295#define R_ECX 1 296#define R_EDX 2 297#define R_EBX 3 298#define R_ESP 4 299#define R_EBP 5 300#define R_ESI 6 301#define R_EDI 7 302 303#define R_AL (0+R_EAX) 304#define R_AH (4+R_EAX) 305 306/* This is the Intel register encoding -- segment regs. */ 307#define R_ES 0 308#define R_CS 1 309#define R_SS 2 310#define R_DS 3 311#define R_FS 4 312#define R_GS 5 313 314 315/* Add a statement to the list held by "irbb". */ 316static void stmt ( IRStmt* st ) 317{ 318 addStmtToIRSB( irsb, st ); 319} 320 321/* Generate a new temporary of the given type. */ 322static IRTemp newTemp ( IRType ty ) 323{ 324 vassert(isPlausibleIRType(ty)); 325 return newIRTemp( irsb->tyenv, ty ); 326} 327 328/* Various simple conversions */ 329 330static UInt extend_s_8to32( UInt x ) 331{ 332 return (UInt)((((Int)x) << 24) >> 24); 333} 334 335static UInt extend_s_16to32 ( UInt x ) 336{ 337 return (UInt)((((Int)x) << 16) >> 16); 338} 339 340/* Fetch a byte from the guest insn stream. */ 341static UChar getIByte ( Int delta ) 342{ 343 return guest_code[delta]; 344} 345 346/* Extract the reg field from a modRM byte. */ 347static Int gregOfRM ( UChar mod_reg_rm ) 348{ 349 return (Int)( (mod_reg_rm >> 3) & 7 ); 350} 351 352/* Figure out whether the mod and rm parts of a modRM byte refer to a 353 register or memory. If so, the byte will have the form 11XXXYYY, 354 where YYY is the register number. */ 355static Bool epartIsReg ( UChar mod_reg_rm ) 356{ 357 return toBool(0xC0 == (mod_reg_rm & 0xC0)); 358} 359 360/* ... and extract the register number ... */ 361static Int eregOfRM ( UChar mod_reg_rm ) 362{ 363 return (Int)(mod_reg_rm & 0x7); 364} 365 366/* Get a 8/16/32-bit unsigned value out of the insn stream. */ 367 368static UChar getUChar ( Int delta ) 369{ 370 UChar v = guest_code[delta+0]; 371 return toUChar(v); 372} 373 374static UInt getUDisp16 ( Int delta ) 375{ 376 UInt v = guest_code[delta+1]; v <<= 8; 377 v |= guest_code[delta+0]; 378 return v & 0xFFFF; 379} 380 381static UInt getUDisp32 ( Int delta ) 382{ 383 UInt v = guest_code[delta+3]; v <<= 8; 384 v |= guest_code[delta+2]; v <<= 8; 385 v |= guest_code[delta+1]; v <<= 8; 386 v |= guest_code[delta+0]; 387 return v; 388} 389 390static UInt getUDisp ( Int size, Int delta ) 391{ 392 switch (size) { 393 case 4: return getUDisp32(delta); 394 case 2: return getUDisp16(delta); 395 case 1: return (UInt)getUChar(delta); 396 default: vpanic("getUDisp(x86)"); 397 } 398 return 0; /*notreached*/ 399} 400 401 402/* Get a byte value out of the insn stream and sign-extend to 32 403 bits. */ 404static UInt getSDisp8 ( Int delta ) 405{ 406 return extend_s_8to32( (UInt) (guest_code[delta]) ); 407} 408 409static UInt getSDisp16 ( Int delta0 ) 410{ 411 UChar* eip = (UChar*)(&guest_code[delta0]); 412 UInt d = *eip++; 413 d |= ((*eip++) << 8); 414 return extend_s_16to32(d); 415} 416 417static UInt getSDisp ( Int size, Int delta ) 418{ 419 switch (size) { 420 case 4: return getUDisp32(delta); 421 case 2: return getSDisp16(delta); 422 case 1: return getSDisp8(delta); 423 default: vpanic("getSDisp(x86)"); 424 } 425 return 0; /*notreached*/ 426} 427 428 429/*------------------------------------------------------------*/ 430/*--- Helpers for constructing IR. ---*/ 431/*------------------------------------------------------------*/ 432 433/* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit 434 register references, we need to take the host endianness into 435 account. Supplied value is 0 .. 7 and in the Intel instruction 436 encoding. */ 437 438static IRType szToITy ( Int n ) 439{ 440 switch (n) { 441 case 1: return Ity_I8; 442 case 2: return Ity_I16; 443 case 4: return Ity_I32; 444 default: vpanic("szToITy(x86)"); 445 } 446} 447 448/* On a little-endian host, less significant bits of the guest 449 registers are at lower addresses. Therefore, if a reference to a 450 register low half has the safe guest state offset as a reference to 451 the full register. 452*/ 453static Int integerGuestRegOffset ( Int sz, UInt archreg ) 454{ 455 vassert(archreg < 8); 456 457 /* Correct for little-endian host only. */ 458 vassert(!host_is_bigendian); 459 460 if (sz == 4 || sz == 2 || (sz == 1 && archreg < 4)) { 461 switch (archreg) { 462 case R_EAX: return OFFB_EAX; 463 case R_EBX: return OFFB_EBX; 464 case R_ECX: return OFFB_ECX; 465 case R_EDX: return OFFB_EDX; 466 case R_ESI: return OFFB_ESI; 467 case R_EDI: return OFFB_EDI; 468 case R_ESP: return OFFB_ESP; 469 case R_EBP: return OFFB_EBP; 470 default: vpanic("integerGuestRegOffset(x86,le)(4,2)"); 471 } 472 } 473 474 vassert(archreg >= 4 && archreg < 8 && sz == 1); 475 switch (archreg-4) { 476 case R_EAX: return 1+ OFFB_EAX; 477 case R_EBX: return 1+ OFFB_EBX; 478 case R_ECX: return 1+ OFFB_ECX; 479 case R_EDX: return 1+ OFFB_EDX; 480 default: vpanic("integerGuestRegOffset(x86,le)(1h)"); 481 } 482 483 /* NOTREACHED */ 484 vpanic("integerGuestRegOffset(x86,le)"); 485} 486 487static Int segmentGuestRegOffset ( UInt sreg ) 488{ 489 switch (sreg) { 490 case R_ES: return OFFB_ES; 491 case R_CS: return OFFB_CS; 492 case R_SS: return OFFB_SS; 493 case R_DS: return OFFB_DS; 494 case R_FS: return OFFB_FS; 495 case R_GS: return OFFB_GS; 496 default: vpanic("segmentGuestRegOffset(x86)"); 497 } 498} 499 500static Int xmmGuestRegOffset ( UInt xmmreg ) 501{ 502 switch (xmmreg) { 503 case 0: return OFFB_XMM0; 504 case 1: return OFFB_XMM1; 505 case 2: return OFFB_XMM2; 506 case 3: return OFFB_XMM3; 507 case 4: return OFFB_XMM4; 508 case 5: return OFFB_XMM5; 509 case 6: return OFFB_XMM6; 510 case 7: return OFFB_XMM7; 511 default: vpanic("xmmGuestRegOffset"); 512 } 513} 514 515/* Lanes of vector registers are always numbered from zero being the 516 least significant lane (rightmost in the register). */ 517 518static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) 519{ 520 /* Correct for little-endian host only. */ 521 vassert(!host_is_bigendian); 522 vassert(laneno >= 0 && laneno < 8); 523 return xmmGuestRegOffset( xmmreg ) + 2 * laneno; 524} 525 526static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) 527{ 528 /* Correct for little-endian host only. */ 529 vassert(!host_is_bigendian); 530 vassert(laneno >= 0 && laneno < 4); 531 return xmmGuestRegOffset( xmmreg ) + 4 * laneno; 532} 533 534static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) 535{ 536 /* Correct for little-endian host only. */ 537 vassert(!host_is_bigendian); 538 vassert(laneno >= 0 && laneno < 2); 539 return xmmGuestRegOffset( xmmreg ) + 8 * laneno; 540} 541 542static IRExpr* getIReg ( Int sz, UInt archreg ) 543{ 544 vassert(sz == 1 || sz == 2 || sz == 4); 545 vassert(archreg < 8); 546 return IRExpr_Get( integerGuestRegOffset(sz,archreg), 547 szToITy(sz) ); 548} 549 550/* Ditto, but write to a reg instead. */ 551static void putIReg ( Int sz, UInt archreg, IRExpr* e ) 552{ 553 IRType ty = typeOfIRExpr(irsb->tyenv, e); 554 switch (sz) { 555 case 1: vassert(ty == Ity_I8); break; 556 case 2: vassert(ty == Ity_I16); break; 557 case 4: vassert(ty == Ity_I32); break; 558 default: vpanic("putIReg(x86)"); 559 } 560 vassert(archreg < 8); 561 stmt( IRStmt_Put(integerGuestRegOffset(sz,archreg), e) ); 562} 563 564static IRExpr* getSReg ( UInt sreg ) 565{ 566 return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 ); 567} 568 569static void putSReg ( UInt sreg, IRExpr* e ) 570{ 571 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 572 stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) ); 573} 574 575static IRExpr* getXMMReg ( UInt xmmreg ) 576{ 577 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); 578} 579 580static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) 581{ 582 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); 583} 584 585static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) 586{ 587 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); 588} 589 590static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) 591{ 592 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); 593} 594 595static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) 596{ 597 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); 598} 599 600static void putXMMReg ( UInt xmmreg, IRExpr* e ) 601{ 602 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 603 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); 604} 605 606static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) 607{ 608 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 609 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 610} 611 612static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) 613{ 614 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 615 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 616} 617 618static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) 619{ 620 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 621 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 622} 623 624static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) 625{ 626 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 627 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 628} 629 630static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e ) 631{ 632 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 633 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) ); 634} 635 636static void assign ( IRTemp dst, IRExpr* e ) 637{ 638 stmt( IRStmt_WrTmp(dst, e) ); 639} 640 641static void storeLE ( IRExpr* addr, IRExpr* data ) 642{ 643 stmt( IRStmt_Store(Iend_LE, addr, data) ); 644} 645 646static IRExpr* unop ( IROp op, IRExpr* a ) 647{ 648 return IRExpr_Unop(op, a); 649} 650 651static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 652{ 653 return IRExpr_Binop(op, a1, a2); 654} 655 656static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 657{ 658 return IRExpr_Triop(op, a1, a2, a3); 659} 660 661static IRExpr* mkexpr ( IRTemp tmp ) 662{ 663 return IRExpr_RdTmp(tmp); 664} 665 666static IRExpr* mkU8 ( UInt i ) 667{ 668 vassert(i < 256); 669 return IRExpr_Const(IRConst_U8( (UChar)i )); 670} 671 672static IRExpr* mkU16 ( UInt i ) 673{ 674 vassert(i < 65536); 675 return IRExpr_Const(IRConst_U16( (UShort)i )); 676} 677 678static IRExpr* mkU32 ( UInt i ) 679{ 680 return IRExpr_Const(IRConst_U32(i)); 681} 682 683static IRExpr* mkU64 ( ULong i ) 684{ 685 return IRExpr_Const(IRConst_U64(i)); 686} 687 688static IRExpr* mkU ( IRType ty, UInt i ) 689{ 690 if (ty == Ity_I8) return mkU8(i); 691 if (ty == Ity_I16) return mkU16(i); 692 if (ty == Ity_I32) return mkU32(i); 693 /* If this panics, it usually means you passed a size (1,2,4) 694 value as the IRType, rather than a real IRType. */ 695 vpanic("mkU(x86)"); 696} 697 698static IRExpr* mkV128 ( UShort mask ) 699{ 700 return IRExpr_Const(IRConst_V128(mask)); 701} 702 703static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 704{ 705 return IRExpr_Load(Iend_LE, ty, addr); 706} 707 708static IROp mkSizedOp ( IRType ty, IROp op8 ) 709{ 710 Int adj; 711 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 712 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 713 || op8 == Iop_Mul8 714 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 715 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 716 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 717 || op8 == Iop_CasCmpNE8 718 || op8 == Iop_Not8); 719 adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 720 return adj + op8; 721} 722 723static IROp mkWidenOp ( Int szSmall, Int szBig, Bool signd ) 724{ 725 if (szSmall == 1 && szBig == 4) { 726 return signd ? Iop_8Sto32 : Iop_8Uto32; 727 } 728 if (szSmall == 1 && szBig == 2) { 729 return signd ? Iop_8Sto16 : Iop_8Uto16; 730 } 731 if (szSmall == 2 && szBig == 4) { 732 return signd ? Iop_16Sto32 : Iop_16Uto32; 733 } 734 vpanic("mkWidenOp(x86,guest)"); 735} 736 737static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) 738{ 739 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); 740 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); 741 return unop(Iop_32to1, 742 binop(Iop_And32, 743 unop(Iop_1Uto32,x), 744 unop(Iop_1Uto32,y))); 745} 746 747/* Generate a compare-and-swap operation, operating on memory at 748 'addr'. The expected value is 'expVal' and the new value is 749 'newVal'. If the operation fails, then transfer control (with a 750 no-redir jump (XXX no -- see comment at top of this file)) to 751 'restart_point', which is presumably the address of the guest 752 instruction again -- retrying, essentially. */ 753static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, 754 Addr32 restart_point ) 755{ 756 IRCAS* cas; 757 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); 758 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); 759 IRTemp oldTmp = newTemp(tyE); 760 IRTemp expTmp = newTemp(tyE); 761 vassert(tyE == tyN); 762 vassert(tyE == Ity_I32 || tyE == Ity_I16 || tyE == Ity_I8); 763 assign(expTmp, expVal); 764 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, 765 NULL, mkexpr(expTmp), NULL, newVal ); 766 stmt( IRStmt_CAS(cas) ); 767 stmt( IRStmt_Exit( 768 binop( mkSizedOp(tyE,Iop_CasCmpNE8), 769 mkexpr(oldTmp), mkexpr(expTmp) ), 770 Ijk_Boring, /*Ijk_NoRedir*/ 771 IRConst_U32( restart_point ), 772 OFFB_EIP 773 )); 774} 775 776 777/*------------------------------------------------------------*/ 778/*--- Helpers for %eflags. ---*/ 779/*------------------------------------------------------------*/ 780 781/* -------------- Evaluating the flags-thunk. -------------- */ 782 783/* Build IR to calculate all the eflags from stored 784 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 785 Ity_I32. */ 786static IRExpr* mk_x86g_calculate_eflags_all ( void ) 787{ 788 IRExpr** args 789 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 790 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 791 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 792 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 793 IRExpr* call 794 = mkIRExprCCall( 795 Ity_I32, 796 0/*regparm*/, 797 "x86g_calculate_eflags_all", &x86g_calculate_eflags_all, 798 args 799 ); 800 /* Exclude OP and NDEP from definedness checking. We're only 801 interested in DEP1 and DEP2. */ 802 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 803 return call; 804} 805 806/* Build IR to calculate some particular condition from stored 807 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 808 Ity_Bit. */ 809static IRExpr* mk_x86g_calculate_condition ( X86Condcode cond ) 810{ 811 IRExpr** args 812 = mkIRExprVec_5( mkU32(cond), 813 IRExpr_Get(OFFB_CC_OP, Ity_I32), 814 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 815 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 816 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 817 IRExpr* call 818 = mkIRExprCCall( 819 Ity_I32, 820 0/*regparm*/, 821 "x86g_calculate_condition", &x86g_calculate_condition, 822 args 823 ); 824 /* Exclude the requested condition, OP and NDEP from definedness 825 checking. We're only interested in DEP1 and DEP2. */ 826 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); 827 return unop(Iop_32to1, call); 828} 829 830/* Build IR to calculate just the carry flag from stored 831 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */ 832static IRExpr* mk_x86g_calculate_eflags_c ( void ) 833{ 834 IRExpr** args 835 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 836 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 837 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 838 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 839 IRExpr* call 840 = mkIRExprCCall( 841 Ity_I32, 842 3/*regparm*/, 843 "x86g_calculate_eflags_c", &x86g_calculate_eflags_c, 844 args 845 ); 846 /* Exclude OP and NDEP from definedness checking. We're only 847 interested in DEP1 and DEP2. */ 848 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 849 return call; 850} 851 852 853/* -------------- Building the flags-thunk. -------------- */ 854 855/* The machinery in this section builds the flag-thunk following a 856 flag-setting operation. Hence the various setFlags_* functions. 857*/ 858 859static Bool isAddSub ( IROp op8 ) 860{ 861 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); 862} 863 864static Bool isLogic ( IROp op8 ) 865{ 866 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); 867} 868 869/* U-widen 8/16/32 bit int expr to 32. */ 870static IRExpr* widenUto32 ( IRExpr* e ) 871{ 872 switch (typeOfIRExpr(irsb->tyenv,e)) { 873 case Ity_I32: return e; 874 case Ity_I16: return unop(Iop_16Uto32,e); 875 case Ity_I8: return unop(Iop_8Uto32,e); 876 default: vpanic("widenUto32"); 877 } 878} 879 880/* S-widen 8/16/32 bit int expr to 32. */ 881static IRExpr* widenSto32 ( IRExpr* e ) 882{ 883 switch (typeOfIRExpr(irsb->tyenv,e)) { 884 case Ity_I32: return e; 885 case Ity_I16: return unop(Iop_16Sto32,e); 886 case Ity_I8: return unop(Iop_8Sto32,e); 887 default: vpanic("widenSto32"); 888 } 889} 890 891/* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some 892 of these combinations make sense. */ 893static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) 894{ 895 IRType src_ty = typeOfIRExpr(irsb->tyenv,e); 896 if (src_ty == dst_ty) 897 return e; 898 if (src_ty == Ity_I32 && dst_ty == Ity_I16) 899 return unop(Iop_32to16, e); 900 if (src_ty == Ity_I32 && dst_ty == Ity_I8) 901 return unop(Iop_32to8, e); 902 903 vex_printf("\nsrc, dst tys are: "); 904 ppIRType(src_ty); 905 vex_printf(", "); 906 ppIRType(dst_ty); 907 vex_printf("\n"); 908 vpanic("narrowTo(x86)"); 909} 910 911 912/* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is 913 auto-sized up to the real op. */ 914 915static 916void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) 917{ 918 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 919 920 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 921 922 switch (op8) { 923 case Iop_Add8: ccOp += X86G_CC_OP_ADDB; break; 924 case Iop_Sub8: ccOp += X86G_CC_OP_SUBB; break; 925 default: ppIROp(op8); 926 vpanic("setFlags_DEP1_DEP2(x86)"); 927 } 928 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); 929 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) ); 930 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(dep2))) ); 931 /* Set NDEP even though it isn't used. This makes redundant-PUT 932 elimination of previous stores to this field work better. */ 933 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 934} 935 936 937/* Set the OP and DEP1 fields only, and write zero to DEP2. */ 938 939static 940void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) 941{ 942 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 943 944 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 945 946 switch (op8) { 947 case Iop_Or8: 948 case Iop_And8: 949 case Iop_Xor8: ccOp += X86G_CC_OP_LOGICB; break; 950 default: ppIROp(op8); 951 vpanic("setFlags_DEP1(x86)"); 952 } 953 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); 954 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) ); 955 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) ); 956 /* Set NDEP even though it isn't used. This makes redundant-PUT 957 elimination of previous stores to this field work better. */ 958 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 959} 960 961 962/* For shift operations, we put in the result and the undershifted 963 result. Except if the shift amount is zero, the thunk is left 964 unchanged. */ 965 966static void setFlags_DEP1_DEP2_shift ( IROp op32, 967 IRTemp res, 968 IRTemp resUS, 969 IRType ty, 970 IRTemp guard ) 971{ 972 Int ccOp = ty==Ity_I8 ? 2 : (ty==Ity_I16 ? 1 : 0); 973 974 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 975 vassert(guard); 976 977 /* Both kinds of right shifts are handled by the same thunk 978 operation. */ 979 switch (op32) { 980 case Iop_Shr32: 981 case Iop_Sar32: ccOp = X86G_CC_OP_SHRL - ccOp; break; 982 case Iop_Shl32: ccOp = X86G_CC_OP_SHLL - ccOp; break; 983 default: ppIROp(op32); 984 vpanic("setFlags_DEP1_DEP2_shift(x86)"); 985 } 986 987 /* DEP1 contains the result, DEP2 contains the undershifted value. */ 988 stmt( IRStmt_Put( OFFB_CC_OP, 989 IRExpr_Mux0X( mkexpr(guard), 990 IRExpr_Get(OFFB_CC_OP,Ity_I32), 991 mkU32(ccOp))) ); 992 stmt( IRStmt_Put( OFFB_CC_DEP1, 993 IRExpr_Mux0X( mkexpr(guard), 994 IRExpr_Get(OFFB_CC_DEP1,Ity_I32), 995 widenUto32(mkexpr(res)))) ); 996 stmt( IRStmt_Put( OFFB_CC_DEP2, 997 IRExpr_Mux0X( mkexpr(guard), 998 IRExpr_Get(OFFB_CC_DEP2,Ity_I32), 999 widenUto32(mkexpr(resUS)))) ); 1000 /* Set NDEP even though it isn't used. This makes redundant-PUT 1001 elimination of previous stores to this field work better. */ 1002 stmt( IRStmt_Put( OFFB_CC_NDEP, 1003 IRExpr_Mux0X( mkexpr(guard), 1004 IRExpr_Get(OFFB_CC_NDEP,Ity_I32), 1005 mkU32(0) ))); 1006} 1007 1008 1009/* For the inc/dec case, we store in DEP1 the result value and in NDEP 1010 the former value of the carry flag, which unfortunately we have to 1011 compute. */ 1012 1013static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) 1014{ 1015 Int ccOp = inc ? X86G_CC_OP_INCB : X86G_CC_OP_DECB; 1016 1017 ccOp += ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 1018 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 1019 1020 /* This has to come first, because calculating the C flag 1021 may require reading all four thunk fields. */ 1022 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_x86g_calculate_eflags_c()) ); 1023 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); 1024 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(res))) ); 1025 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) ); 1026} 1027 1028 1029/* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the 1030 two arguments. */ 1031 1032static 1033void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, UInt base_op ) 1034{ 1035 switch (ty) { 1036 case Ity_I8: 1037 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+0) ) ); 1038 break; 1039 case Ity_I16: 1040 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+1) ) ); 1041 break; 1042 case Ity_I32: 1043 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+2) ) ); 1044 break; 1045 default: 1046 vpanic("setFlags_MUL(x86)"); 1047 } 1048 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(arg1)) )); 1049 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(arg2)) )); 1050 /* Set NDEP even though it isn't used. This makes redundant-PUT 1051 elimination of previous stores to this field work better. */ 1052 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 1053} 1054 1055 1056/* -------------- Condition codes. -------------- */ 1057 1058/* Condition codes, using the Intel encoding. */ 1059 1060static HChar* name_X86Condcode ( X86Condcode cond ) 1061{ 1062 switch (cond) { 1063 case X86CondO: return "o"; 1064 case X86CondNO: return "no"; 1065 case X86CondB: return "b"; 1066 case X86CondNB: return "nb"; 1067 case X86CondZ: return "z"; 1068 case X86CondNZ: return "nz"; 1069 case X86CondBE: return "be"; 1070 case X86CondNBE: return "nbe"; 1071 case X86CondS: return "s"; 1072 case X86CondNS: return "ns"; 1073 case X86CondP: return "p"; 1074 case X86CondNP: return "np"; 1075 case X86CondL: return "l"; 1076 case X86CondNL: return "nl"; 1077 case X86CondLE: return "le"; 1078 case X86CondNLE: return "nle"; 1079 case X86CondAlways: return "ALWAYS"; 1080 default: vpanic("name_X86Condcode"); 1081 } 1082} 1083 1084static 1085X86Condcode positiveIse_X86Condcode ( X86Condcode cond, 1086 Bool* needInvert ) 1087{ 1088 vassert(cond >= X86CondO && cond <= X86CondNLE); 1089 if (cond & 1) { 1090 *needInvert = True; 1091 return cond-1; 1092 } else { 1093 *needInvert = False; 1094 return cond; 1095 } 1096} 1097 1098 1099/* -------------- Helpers for ADD/SUB with carry. -------------- */ 1100 1101/* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags 1102 appropriately. 1103 1104 Optionally, generate a store for the 'tres' value. This can either 1105 be a normal store, or it can be a cas-with-possible-failure style 1106 store: 1107 1108 if taddr is IRTemp_INVALID, then no store is generated. 1109 1110 if taddr is not IRTemp_INVALID, then a store (using taddr as 1111 the address) is generated: 1112 1113 if texpVal is IRTemp_INVALID then a normal store is 1114 generated, and restart_point must be zero (it is irrelevant). 1115 1116 if texpVal is not IRTemp_INVALID then a cas-style store is 1117 generated. texpVal is the expected value, restart_point 1118 is the restart point if the store fails, and texpVal must 1119 have the same type as tres. 1120*/ 1121static void helper_ADC ( Int sz, 1122 IRTemp tres, IRTemp ta1, IRTemp ta2, 1123 /* info about optional store: */ 1124 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1125{ 1126 UInt thunkOp; 1127 IRType ty = szToITy(sz); 1128 IRTemp oldc = newTemp(Ity_I32); 1129 IRTemp oldcn = newTemp(ty); 1130 IROp plus = mkSizedOp(ty, Iop_Add8); 1131 IROp xor = mkSizedOp(ty, Iop_Xor8); 1132 1133 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1134 vassert(sz == 1 || sz == 2 || sz == 4); 1135 thunkOp = sz==4 ? X86G_CC_OP_ADCL 1136 : (sz==2 ? X86G_CC_OP_ADCW : X86G_CC_OP_ADCB); 1137 1138 /* oldc = old carry flag, 0 or 1 */ 1139 assign( oldc, binop(Iop_And32, 1140 mk_x86g_calculate_eflags_c(), 1141 mkU32(1)) ); 1142 1143 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1144 1145 assign( tres, binop(plus, 1146 binop(plus,mkexpr(ta1),mkexpr(ta2)), 1147 mkexpr(oldcn)) ); 1148 1149 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1150 start of this function. */ 1151 if (taddr != IRTemp_INVALID) { 1152 if (texpVal == IRTemp_INVALID) { 1153 vassert(restart_point == 0); 1154 storeLE( mkexpr(taddr), mkexpr(tres) ); 1155 } else { 1156 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1157 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1158 casLE( mkexpr(taddr), 1159 mkexpr(texpVal), mkexpr(tres), restart_point ); 1160 } 1161 } 1162 1163 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) ); 1164 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1)) )); 1165 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2), 1166 mkexpr(oldcn)) )) ); 1167 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 1168} 1169 1170 1171/* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags 1172 appropriately. As with helper_ADC, possibly generate a store of 1173 the result -- see comments on helper_ADC for details. 1174*/ 1175static void helper_SBB ( Int sz, 1176 IRTemp tres, IRTemp ta1, IRTemp ta2, 1177 /* info about optional store: */ 1178 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1179{ 1180 UInt thunkOp; 1181 IRType ty = szToITy(sz); 1182 IRTemp oldc = newTemp(Ity_I32); 1183 IRTemp oldcn = newTemp(ty); 1184 IROp minus = mkSizedOp(ty, Iop_Sub8); 1185 IROp xor = mkSizedOp(ty, Iop_Xor8); 1186 1187 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1188 vassert(sz == 1 || sz == 2 || sz == 4); 1189 thunkOp = sz==4 ? X86G_CC_OP_SBBL 1190 : (sz==2 ? X86G_CC_OP_SBBW : X86G_CC_OP_SBBB); 1191 1192 /* oldc = old carry flag, 0 or 1 */ 1193 assign( oldc, binop(Iop_And32, 1194 mk_x86g_calculate_eflags_c(), 1195 mkU32(1)) ); 1196 1197 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1198 1199 assign( tres, binop(minus, 1200 binop(minus,mkexpr(ta1),mkexpr(ta2)), 1201 mkexpr(oldcn)) ); 1202 1203 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1204 start of this function. */ 1205 if (taddr != IRTemp_INVALID) { 1206 if (texpVal == IRTemp_INVALID) { 1207 vassert(restart_point == 0); 1208 storeLE( mkexpr(taddr), mkexpr(tres) ); 1209 } else { 1210 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1211 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1212 casLE( mkexpr(taddr), 1213 mkexpr(texpVal), mkexpr(tres), restart_point ); 1214 } 1215 } 1216 1217 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) ); 1218 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1) )) ); 1219 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2), 1220 mkexpr(oldcn)) )) ); 1221 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 1222} 1223 1224 1225/* -------------- Helpers for disassembly printing. -------------- */ 1226 1227static HChar* nameGrp1 ( Int opc_aux ) 1228{ 1229 static HChar* grp1_names[8] 1230 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; 1231 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(x86)"); 1232 return grp1_names[opc_aux]; 1233} 1234 1235static HChar* nameGrp2 ( Int opc_aux ) 1236{ 1237 static HChar* grp2_names[8] 1238 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; 1239 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(x86)"); 1240 return grp2_names[opc_aux]; 1241} 1242 1243static HChar* nameGrp4 ( Int opc_aux ) 1244{ 1245 static HChar* grp4_names[8] 1246 = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; 1247 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(x86)"); 1248 return grp4_names[opc_aux]; 1249} 1250 1251static HChar* nameGrp5 ( Int opc_aux ) 1252{ 1253 static HChar* grp5_names[8] 1254 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; 1255 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(x86)"); 1256 return grp5_names[opc_aux]; 1257} 1258 1259static HChar* nameGrp8 ( Int opc_aux ) 1260{ 1261 static HChar* grp8_names[8] 1262 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; 1263 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(x86)"); 1264 return grp8_names[opc_aux]; 1265} 1266 1267static HChar* nameIReg ( Int size, Int reg ) 1268{ 1269 static HChar* ireg32_names[8] 1270 = { "%eax", "%ecx", "%edx", "%ebx", 1271 "%esp", "%ebp", "%esi", "%edi" }; 1272 static HChar* ireg16_names[8] 1273 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" }; 1274 static HChar* ireg8_names[8] 1275 = { "%al", "%cl", "%dl", "%bl", 1276 "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" }; 1277 if (reg < 0 || reg > 7) goto bad; 1278 switch (size) { 1279 case 4: return ireg32_names[reg]; 1280 case 2: return ireg16_names[reg]; 1281 case 1: return ireg8_names[reg]; 1282 } 1283 bad: 1284 vpanic("nameIReg(X86)"); 1285 return NULL; /*notreached*/ 1286} 1287 1288static HChar* nameSReg ( UInt sreg ) 1289{ 1290 switch (sreg) { 1291 case R_ES: return "%es"; 1292 case R_CS: return "%cs"; 1293 case R_SS: return "%ss"; 1294 case R_DS: return "%ds"; 1295 case R_FS: return "%fs"; 1296 case R_GS: return "%gs"; 1297 default: vpanic("nameSReg(x86)"); 1298 } 1299} 1300 1301static HChar* nameMMXReg ( Int mmxreg ) 1302{ 1303 static HChar* mmx_names[8] 1304 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; 1305 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(x86,guest)"); 1306 return mmx_names[mmxreg]; 1307} 1308 1309static HChar* nameXMMReg ( Int xmmreg ) 1310{ 1311 static HChar* xmm_names[8] 1312 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", 1313 "%xmm4", "%xmm5", "%xmm6", "%xmm7" }; 1314 if (xmmreg < 0 || xmmreg > 7) vpanic("name_of_xmm_reg"); 1315 return xmm_names[xmmreg]; 1316} 1317 1318static HChar* nameMMXGran ( Int gran ) 1319{ 1320 switch (gran) { 1321 case 0: return "b"; 1322 case 1: return "w"; 1323 case 2: return "d"; 1324 case 3: return "q"; 1325 default: vpanic("nameMMXGran(x86,guest)"); 1326 } 1327} 1328 1329static HChar nameISize ( Int size ) 1330{ 1331 switch (size) { 1332 case 4: return 'l'; 1333 case 2: return 'w'; 1334 case 1: return 'b'; 1335 default: vpanic("nameISize(x86)"); 1336 } 1337} 1338 1339 1340/*------------------------------------------------------------*/ 1341/*--- JMP helpers ---*/ 1342/*------------------------------------------------------------*/ 1343 1344static void jmp_lit( /*MOD*/DisResult* dres, 1345 IRJumpKind kind, Addr32 d32 ) 1346{ 1347 vassert(dres->whatNext == Dis_Continue); 1348 vassert(dres->len == 0); 1349 vassert(dres->continueAt == 0); 1350 vassert(dres->jk_StopHere == Ijk_INVALID); 1351 dres->whatNext = Dis_StopHere; 1352 dres->jk_StopHere = kind; 1353 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32) ) ); 1354} 1355 1356static void jmp_treg( /*MOD*/DisResult* dres, 1357 IRJumpKind kind, IRTemp t ) 1358{ 1359 vassert(dres->whatNext == Dis_Continue); 1360 vassert(dres->len == 0); 1361 vassert(dres->continueAt == 0); 1362 vassert(dres->jk_StopHere == Ijk_INVALID); 1363 dres->whatNext = Dis_StopHere; 1364 dres->jk_StopHere = kind; 1365 stmt( IRStmt_Put( OFFB_EIP, mkexpr(t) ) ); 1366} 1367 1368static 1369void jcc_01( /*MOD*/DisResult* dres, 1370 X86Condcode cond, Addr32 d32_false, Addr32 d32_true ) 1371{ 1372 Bool invert; 1373 X86Condcode condPos; 1374 vassert(dres->whatNext == Dis_Continue); 1375 vassert(dres->len == 0); 1376 vassert(dres->continueAt == 0); 1377 vassert(dres->jk_StopHere == Ijk_INVALID); 1378 dres->whatNext = Dis_StopHere; 1379 dres->jk_StopHere = Ijk_Boring; 1380 condPos = positiveIse_X86Condcode ( cond, &invert ); 1381 if (invert) { 1382 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos), 1383 Ijk_Boring, 1384 IRConst_U32(d32_false), 1385 OFFB_EIP ) ); 1386 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_true) ) ); 1387 } else { 1388 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos), 1389 Ijk_Boring, 1390 IRConst_U32(d32_true), 1391 OFFB_EIP ) ); 1392 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_false) ) ); 1393 } 1394} 1395 1396 1397/*------------------------------------------------------------*/ 1398/*--- Disassembling addressing modes ---*/ 1399/*------------------------------------------------------------*/ 1400 1401static 1402HChar* sorbTxt ( UChar sorb ) 1403{ 1404 switch (sorb) { 1405 case 0: return ""; /* no override */ 1406 case 0x3E: return "%ds"; 1407 case 0x26: return "%es:"; 1408 case 0x64: return "%fs:"; 1409 case 0x65: return "%gs:"; 1410 default: vpanic("sorbTxt(x86,guest)"); 1411 } 1412} 1413 1414 1415/* 'virtual' is an IRExpr* holding a virtual address. Convert it to a 1416 linear address by adding any required segment override as indicated 1417 by sorb. */ 1418static 1419IRExpr* handleSegOverride ( UChar sorb, IRExpr* virtual ) 1420{ 1421 Int sreg; 1422 IRType hWordTy; 1423 IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; 1424 1425 if (sorb == 0) 1426 /* the common case - no override */ 1427 return virtual; 1428 1429 switch (sorb) { 1430 case 0x3E: sreg = R_DS; break; 1431 case 0x26: sreg = R_ES; break; 1432 case 0x64: sreg = R_FS; break; 1433 case 0x65: sreg = R_GS; break; 1434 default: vpanic("handleSegOverride(x86,guest)"); 1435 } 1436 1437 hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; 1438 1439 seg_selector = newTemp(Ity_I32); 1440 ldt_ptr = newTemp(hWordTy); 1441 gdt_ptr = newTemp(hWordTy); 1442 r64 = newTemp(Ity_I64); 1443 1444 assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); 1445 assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); 1446 assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); 1447 1448 /* 1449 Call this to do the translation and limit checks: 1450 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 1451 UInt seg_selector, UInt virtual_addr ) 1452 */ 1453 assign( 1454 r64, 1455 mkIRExprCCall( 1456 Ity_I64, 1457 0/*regparms*/, 1458 "x86g_use_seg_selector", 1459 &x86g_use_seg_selector, 1460 mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), 1461 mkexpr(seg_selector), virtual) 1462 ) 1463 ); 1464 1465 /* If the high 32 of the result are non-zero, there was a 1466 failure in address translation. In which case, make a 1467 quick exit. 1468 */ 1469 stmt( 1470 IRStmt_Exit( 1471 binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), 1472 Ijk_MapFail, 1473 IRConst_U32( guest_EIP_curr_instr ), 1474 OFFB_EIP 1475 ) 1476 ); 1477 1478 /* otherwise, here's the translated result. */ 1479 return unop(Iop_64to32, mkexpr(r64)); 1480} 1481 1482 1483/* Generate IR to calculate an address indicated by a ModRM and 1484 following SIB bytes. The expression, and the number of bytes in 1485 the address mode, are returned. Note that this fn should not be 1486 called if the R/M part of the address denotes a register instead of 1487 memory. If print_codegen is true, text of the addressing mode is 1488 placed in buf. 1489 1490 The computed address is stored in a new tempreg, and the 1491 identity of the tempreg is returned. */ 1492 1493static IRTemp disAMode_copy2tmp ( IRExpr* addr32 ) 1494{ 1495 IRTemp tmp = newTemp(Ity_I32); 1496 assign( tmp, addr32 ); 1497 return tmp; 1498} 1499 1500static 1501IRTemp disAMode ( Int* len, UChar sorb, Int delta, HChar* buf ) 1502{ 1503 UChar mod_reg_rm = getIByte(delta); 1504 delta++; 1505 1506 buf[0] = (UChar)0; 1507 1508 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 1509 jump table seems a bit excessive. 1510 */ 1511 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 1512 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 1513 /* is now XX0XXYYY */ 1514 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 1515 switch (mod_reg_rm) { 1516 1517 /* (%eax) .. (%edi), not including (%esp) or (%ebp). 1518 --> GET %reg, t 1519 */ 1520 case 0x00: case 0x01: case 0x02: case 0x03: 1521 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 1522 { UChar rm = mod_reg_rm; 1523 DIS(buf, "%s(%s)", sorbTxt(sorb), nameIReg(4,rm)); 1524 *len = 1; 1525 return disAMode_copy2tmp( 1526 handleSegOverride(sorb, getIReg(4,rm))); 1527 } 1528 1529 /* d8(%eax) ... d8(%edi), not including d8(%esp) 1530 --> GET %reg, t ; ADDL d8, t 1531 */ 1532 case 0x08: case 0x09: case 0x0A: case 0x0B: 1533 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 1534 { UChar rm = toUChar(mod_reg_rm & 7); 1535 UInt d = getSDisp8(delta); 1536 DIS(buf, "%s%d(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm)); 1537 *len = 2; 1538 return disAMode_copy2tmp( 1539 handleSegOverride(sorb, 1540 binop(Iop_Add32,getIReg(4,rm),mkU32(d)))); 1541 } 1542 1543 /* d32(%eax) ... d32(%edi), not including d32(%esp) 1544 --> GET %reg, t ; ADDL d8, t 1545 */ 1546 case 0x10: case 0x11: case 0x12: case 0x13: 1547 /* ! 14 */ case 0x15: case 0x16: case 0x17: 1548 { UChar rm = toUChar(mod_reg_rm & 7); 1549 UInt d = getUDisp32(delta); 1550 DIS(buf, "%s0x%x(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm)); 1551 *len = 5; 1552 return disAMode_copy2tmp( 1553 handleSegOverride(sorb, 1554 binop(Iop_Add32,getIReg(4,rm),mkU32(d)))); 1555 } 1556 1557 /* a register, %eax .. %edi. This shouldn't happen. */ 1558 case 0x18: case 0x19: case 0x1A: case 0x1B: 1559 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 1560 vpanic("disAMode(x86): not an addr!"); 1561 1562 /* a 32-bit literal address 1563 --> MOV d32, tmp 1564 */ 1565 case 0x05: 1566 { UInt d = getUDisp32(delta); 1567 *len = 5; 1568 DIS(buf, "%s(0x%x)", sorbTxt(sorb), d); 1569 return disAMode_copy2tmp( 1570 handleSegOverride(sorb, mkU32(d))); 1571 } 1572 1573 case 0x04: { 1574 /* SIB, with no displacement. Special cases: 1575 -- %esp cannot act as an index value. 1576 If index_r indicates %esp, zero is used for the index. 1577 -- when mod is zero and base indicates EBP, base is instead 1578 a 32-bit literal. 1579 It's all madness, I tell you. Extract %index, %base and 1580 scale from the SIB byte. The value denoted is then: 1581 | %index == %ESP && %base == %EBP 1582 = d32 following SIB byte 1583 | %index == %ESP && %base != %EBP 1584 = %base 1585 | %index != %ESP && %base == %EBP 1586 = d32 following SIB byte + (%index << scale) 1587 | %index != %ESP && %base != %ESP 1588 = %base + (%index << scale) 1589 1590 What happens to the souls of CPU architects who dream up such 1591 horrendous schemes, do you suppose? 1592 */ 1593 UChar sib = getIByte(delta); 1594 UChar scale = toUChar((sib >> 6) & 3); 1595 UChar index_r = toUChar((sib >> 3) & 7); 1596 UChar base_r = toUChar(sib & 7); 1597 delta++; 1598 1599 if (index_r != R_ESP && base_r != R_EBP) { 1600 DIS(buf, "%s(%s,%s,%d)", sorbTxt(sorb), 1601 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale); 1602 *len = 2; 1603 return 1604 disAMode_copy2tmp( 1605 handleSegOverride(sorb, 1606 binop(Iop_Add32, 1607 getIReg(4,base_r), 1608 binop(Iop_Shl32, getIReg(4,index_r), 1609 mkU8(scale))))); 1610 } 1611 1612 if (index_r != R_ESP && base_r == R_EBP) { 1613 UInt d = getUDisp32(delta); 1614 DIS(buf, "%s0x%x(,%s,%d)", sorbTxt(sorb), d, 1615 nameIReg(4,index_r), 1<<scale); 1616 *len = 6; 1617 return 1618 disAMode_copy2tmp( 1619 handleSegOverride(sorb, 1620 binop(Iop_Add32, 1621 binop(Iop_Shl32, getIReg(4,index_r), mkU8(scale)), 1622 mkU32(d)))); 1623 } 1624 1625 if (index_r == R_ESP && base_r != R_EBP) { 1626 DIS(buf, "%s(%s,,)", sorbTxt(sorb), nameIReg(4,base_r)); 1627 *len = 2; 1628 return disAMode_copy2tmp( 1629 handleSegOverride(sorb, getIReg(4,base_r))); 1630 } 1631 1632 if (index_r == R_ESP && base_r == R_EBP) { 1633 UInt d = getUDisp32(delta); 1634 DIS(buf, "%s0x%x(,,)", sorbTxt(sorb), d); 1635 *len = 6; 1636 return disAMode_copy2tmp( 1637 handleSegOverride(sorb, mkU32(d))); 1638 } 1639 /*NOTREACHED*/ 1640 vassert(0); 1641 } 1642 1643 /* SIB, with 8-bit displacement. Special cases: 1644 -- %esp cannot act as an index value. 1645 If index_r indicates %esp, zero is used for the index. 1646 Denoted value is: 1647 | %index == %ESP 1648 = d8 + %base 1649 | %index != %ESP 1650 = d8 + %base + (%index << scale) 1651 */ 1652 case 0x0C: { 1653 UChar sib = getIByte(delta); 1654 UChar scale = toUChar((sib >> 6) & 3); 1655 UChar index_r = toUChar((sib >> 3) & 7); 1656 UChar base_r = toUChar(sib & 7); 1657 UInt d = getSDisp8(delta+1); 1658 1659 if (index_r == R_ESP) { 1660 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb), 1661 (Int)d, nameIReg(4,base_r)); 1662 *len = 3; 1663 return disAMode_copy2tmp( 1664 handleSegOverride(sorb, 1665 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) )); 1666 } else { 1667 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d, 1668 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale); 1669 *len = 3; 1670 return 1671 disAMode_copy2tmp( 1672 handleSegOverride(sorb, 1673 binop(Iop_Add32, 1674 binop(Iop_Add32, 1675 getIReg(4,base_r), 1676 binop(Iop_Shl32, 1677 getIReg(4,index_r), mkU8(scale))), 1678 mkU32(d)))); 1679 } 1680 /*NOTREACHED*/ 1681 vassert(0); 1682 } 1683 1684 /* SIB, with 32-bit displacement. Special cases: 1685 -- %esp cannot act as an index value. 1686 If index_r indicates %esp, zero is used for the index. 1687 Denoted value is: 1688 | %index == %ESP 1689 = d32 + %base 1690 | %index != %ESP 1691 = d32 + %base + (%index << scale) 1692 */ 1693 case 0x14: { 1694 UChar sib = getIByte(delta); 1695 UChar scale = toUChar((sib >> 6) & 3); 1696 UChar index_r = toUChar((sib >> 3) & 7); 1697 UChar base_r = toUChar(sib & 7); 1698 UInt d = getUDisp32(delta+1); 1699 1700 if (index_r == R_ESP) { 1701 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb), 1702 (Int)d, nameIReg(4,base_r)); 1703 *len = 6; 1704 return disAMode_copy2tmp( 1705 handleSegOverride(sorb, 1706 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) )); 1707 } else { 1708 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d, 1709 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale); 1710 *len = 6; 1711 return 1712 disAMode_copy2tmp( 1713 handleSegOverride(sorb, 1714 binop(Iop_Add32, 1715 binop(Iop_Add32, 1716 getIReg(4,base_r), 1717 binop(Iop_Shl32, 1718 getIReg(4,index_r), mkU8(scale))), 1719 mkU32(d)))); 1720 } 1721 /*NOTREACHED*/ 1722 vassert(0); 1723 } 1724 1725 default: 1726 vpanic("disAMode(x86)"); 1727 return 0; /*notreached*/ 1728 } 1729} 1730 1731 1732/* Figure out the number of (insn-stream) bytes constituting the amode 1733 beginning at delta. Is useful for getting hold of literals beyond 1734 the end of the amode before it has been disassembled. */ 1735 1736static UInt lengthAMode ( Int delta ) 1737{ 1738 UChar mod_reg_rm = getIByte(delta); delta++; 1739 1740 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 1741 jump table seems a bit excessive. 1742 */ 1743 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 1744 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 1745 /* is now XX0XXYYY */ 1746 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 1747 switch (mod_reg_rm) { 1748 1749 /* (%eax) .. (%edi), not including (%esp) or (%ebp). */ 1750 case 0x00: case 0x01: case 0x02: case 0x03: 1751 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 1752 return 1; 1753 1754 /* d8(%eax) ... d8(%edi), not including d8(%esp). */ 1755 case 0x08: case 0x09: case 0x0A: case 0x0B: 1756 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 1757 return 2; 1758 1759 /* d32(%eax) ... d32(%edi), not including d32(%esp). */ 1760 case 0x10: case 0x11: case 0x12: case 0x13: 1761 /* ! 14 */ case 0x15: case 0x16: case 0x17: 1762 return 5; 1763 1764 /* a register, %eax .. %edi. (Not an addr, but still handled.) */ 1765 case 0x18: case 0x19: case 0x1A: case 0x1B: 1766 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 1767 return 1; 1768 1769 /* a 32-bit literal address. */ 1770 case 0x05: return 5; 1771 1772 /* SIB, no displacement. */ 1773 case 0x04: { 1774 UChar sib = getIByte(delta); 1775 UChar base_r = toUChar(sib & 7); 1776 if (base_r == R_EBP) return 6; else return 2; 1777 } 1778 /* SIB, with 8-bit displacement. */ 1779 case 0x0C: return 3; 1780 1781 /* SIB, with 32-bit displacement. */ 1782 case 0x14: return 6; 1783 1784 default: 1785 vpanic("lengthAMode"); 1786 return 0; /*notreached*/ 1787 } 1788} 1789 1790/*------------------------------------------------------------*/ 1791/*--- Disassembling common idioms ---*/ 1792/*------------------------------------------------------------*/ 1793 1794/* Handle binary integer instructions of the form 1795 op E, G meaning 1796 op reg-or-mem, reg 1797 Is passed the a ptr to the modRM byte, the actual operation, and the 1798 data size. Returns the address advanced completely over this 1799 instruction. 1800 1801 E(src) is reg-or-mem 1802 G(dst) is reg. 1803 1804 If E is reg, --> GET %G, tmp 1805 OP %E, tmp 1806 PUT tmp, %G 1807 1808 If E is mem and OP is not reversible, 1809 --> (getAddr E) -> tmpa 1810 LD (tmpa), tmpa 1811 GET %G, tmp2 1812 OP tmpa, tmp2 1813 PUT tmp2, %G 1814 1815 If E is mem and OP is reversible 1816 --> (getAddr E) -> tmpa 1817 LD (tmpa), tmpa 1818 OP %G, tmpa 1819 PUT tmpa, %G 1820*/ 1821static 1822UInt dis_op2_E_G ( UChar sorb, 1823 Bool addSubCarry, 1824 IROp op8, 1825 Bool keep, 1826 Int size, 1827 Int delta0, 1828 HChar* t_x86opc ) 1829{ 1830 HChar dis_buf[50]; 1831 Int len; 1832 IRType ty = szToITy(size); 1833 IRTemp dst1 = newTemp(ty); 1834 IRTemp src = newTemp(ty); 1835 IRTemp dst0 = newTemp(ty); 1836 UChar rm = getUChar(delta0); 1837 IRTemp addr = IRTemp_INVALID; 1838 1839 /* addSubCarry == True indicates the intended operation is 1840 add-with-carry or subtract-with-borrow. */ 1841 if (addSubCarry) { 1842 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 1843 vassert(keep); 1844 } 1845 1846 if (epartIsReg(rm)) { 1847 /* Specially handle XOR reg,reg, because that doesn't really 1848 depend on reg, and doing the obvious thing potentially 1849 generates a spurious value check failure due to the bogus 1850 dependency. Ditto SBB reg,reg. */ 1851 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 1852 && gregOfRM(rm) == eregOfRM(rm)) { 1853 putIReg(size, gregOfRM(rm), mkU(ty,0)); 1854 } 1855 assign( dst0, getIReg(size,gregOfRM(rm)) ); 1856 assign( src, getIReg(size,eregOfRM(rm)) ); 1857 1858 if (addSubCarry && op8 == Iop_Add8) { 1859 helper_ADC( size, dst1, dst0, src, 1860 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1861 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1862 } else 1863 if (addSubCarry && op8 == Iop_Sub8) { 1864 helper_SBB( size, dst1, dst0, src, 1865 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1866 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1867 } else { 1868 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 1869 if (isAddSub(op8)) 1870 setFlags_DEP1_DEP2(op8, dst0, src, ty); 1871 else 1872 setFlags_DEP1(op8, dst1, ty); 1873 if (keep) 1874 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1875 } 1876 1877 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 1878 nameIReg(size,eregOfRM(rm)), 1879 nameIReg(size,gregOfRM(rm))); 1880 return 1+delta0; 1881 } else { 1882 /* E refers to memory */ 1883 addr = disAMode ( &len, sorb, delta0, dis_buf); 1884 assign( dst0, getIReg(size,gregOfRM(rm)) ); 1885 assign( src, loadLE(szToITy(size), mkexpr(addr)) ); 1886 1887 if (addSubCarry && op8 == Iop_Add8) { 1888 helper_ADC( size, dst1, dst0, src, 1889 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1890 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1891 } else 1892 if (addSubCarry && op8 == Iop_Sub8) { 1893 helper_SBB( size, dst1, dst0, src, 1894 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1895 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1896 } else { 1897 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 1898 if (isAddSub(op8)) 1899 setFlags_DEP1_DEP2(op8, dst0, src, ty); 1900 else 1901 setFlags_DEP1(op8, dst1, ty); 1902 if (keep) 1903 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1904 } 1905 1906 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 1907 dis_buf,nameIReg(size,gregOfRM(rm))); 1908 return len+delta0; 1909 } 1910} 1911 1912 1913 1914/* Handle binary integer instructions of the form 1915 op G, E meaning 1916 op reg, reg-or-mem 1917 Is passed the a ptr to the modRM byte, the actual operation, and the 1918 data size. Returns the address advanced completely over this 1919 instruction. 1920 1921 G(src) is reg. 1922 E(dst) is reg-or-mem 1923 1924 If E is reg, --> GET %E, tmp 1925 OP %G, tmp 1926 PUT tmp, %E 1927 1928 If E is mem, --> (getAddr E) -> tmpa 1929 LD (tmpa), tmpv 1930 OP %G, tmpv 1931 ST tmpv, (tmpa) 1932*/ 1933static 1934UInt dis_op2_G_E ( UChar sorb, 1935 Bool locked, 1936 Bool addSubCarry, 1937 IROp op8, 1938 Bool keep, 1939 Int size, 1940 Int delta0, 1941 HChar* t_x86opc ) 1942{ 1943 HChar dis_buf[50]; 1944 Int len; 1945 IRType ty = szToITy(size); 1946 IRTemp dst1 = newTemp(ty); 1947 IRTemp src = newTemp(ty); 1948 IRTemp dst0 = newTemp(ty); 1949 UChar rm = getIByte(delta0); 1950 IRTemp addr = IRTemp_INVALID; 1951 1952 /* addSubCarry == True indicates the intended operation is 1953 add-with-carry or subtract-with-borrow. */ 1954 if (addSubCarry) { 1955 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 1956 vassert(keep); 1957 } 1958 1959 if (epartIsReg(rm)) { 1960 /* Specially handle XOR reg,reg, because that doesn't really 1961 depend on reg, and doing the obvious thing potentially 1962 generates a spurious value check failure due to the bogus 1963 dependency. Ditto SBB reg,reg.*/ 1964 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 1965 && gregOfRM(rm) == eregOfRM(rm)) { 1966 putIReg(size, eregOfRM(rm), mkU(ty,0)); 1967 } 1968 assign(dst0, getIReg(size,eregOfRM(rm))); 1969 assign(src, getIReg(size,gregOfRM(rm))); 1970 1971 if (addSubCarry && op8 == Iop_Add8) { 1972 helper_ADC( size, dst1, dst0, src, 1973 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1974 putIReg(size, eregOfRM(rm), mkexpr(dst1)); 1975 } else 1976 if (addSubCarry && op8 == Iop_Sub8) { 1977 helper_SBB( size, dst1, dst0, src, 1978 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1979 putIReg(size, eregOfRM(rm), mkexpr(dst1)); 1980 } else { 1981 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 1982 if (isAddSub(op8)) 1983 setFlags_DEP1_DEP2(op8, dst0, src, ty); 1984 else 1985 setFlags_DEP1(op8, dst1, ty); 1986 if (keep) 1987 putIReg(size, eregOfRM(rm), mkexpr(dst1)); 1988 } 1989 1990 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 1991 nameIReg(size,gregOfRM(rm)), 1992 nameIReg(size,eregOfRM(rm))); 1993 return 1+delta0; 1994 } 1995 1996 /* E refers to memory */ 1997 { 1998 addr = disAMode ( &len, sorb, delta0, dis_buf); 1999 assign(dst0, loadLE(ty,mkexpr(addr))); 2000 assign(src, getIReg(size,gregOfRM(rm))); 2001 2002 if (addSubCarry && op8 == Iop_Add8) { 2003 if (locked) { 2004 /* cas-style store */ 2005 helper_ADC( size, dst1, dst0, src, 2006 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 2007 } else { 2008 /* normal store */ 2009 helper_ADC( size, dst1, dst0, src, 2010 /*store*/addr, IRTemp_INVALID, 0 ); 2011 } 2012 } else 2013 if (addSubCarry && op8 == Iop_Sub8) { 2014 if (locked) { 2015 /* cas-style store */ 2016 helper_SBB( size, dst1, dst0, src, 2017 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 2018 } else { 2019 /* normal store */ 2020 helper_SBB( size, dst1, dst0, src, 2021 /*store*/addr, IRTemp_INVALID, 0 ); 2022 } 2023 } else { 2024 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2025 if (keep) { 2026 if (locked) { 2027 if (0) vex_printf("locked case\n" ); 2028 casLE( mkexpr(addr), 2029 mkexpr(dst0)/*expval*/, 2030 mkexpr(dst1)/*newval*/, guest_EIP_curr_instr ); 2031 } else { 2032 if (0) vex_printf("nonlocked case\n"); 2033 storeLE(mkexpr(addr), mkexpr(dst1)); 2034 } 2035 } 2036 if (isAddSub(op8)) 2037 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2038 else 2039 setFlags_DEP1(op8, dst1, ty); 2040 } 2041 2042 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 2043 nameIReg(size,gregOfRM(rm)), dis_buf); 2044 return len+delta0; 2045 } 2046} 2047 2048 2049/* Handle move instructions of the form 2050 mov E, G meaning 2051 mov reg-or-mem, reg 2052 Is passed the a ptr to the modRM byte, and the data size. Returns 2053 the address advanced completely over this instruction. 2054 2055 E(src) is reg-or-mem 2056 G(dst) is reg. 2057 2058 If E is reg, --> GET %E, tmpv 2059 PUT tmpv, %G 2060 2061 If E is mem --> (getAddr E) -> tmpa 2062 LD (tmpa), tmpb 2063 PUT tmpb, %G 2064*/ 2065static 2066UInt dis_mov_E_G ( UChar sorb, 2067 Int size, 2068 Int delta0 ) 2069{ 2070 Int len; 2071 UChar rm = getIByte(delta0); 2072 HChar dis_buf[50]; 2073 2074 if (epartIsReg(rm)) { 2075 putIReg(size, gregOfRM(rm), getIReg(size, eregOfRM(rm))); 2076 DIP("mov%c %s,%s\n", nameISize(size), 2077 nameIReg(size,eregOfRM(rm)), 2078 nameIReg(size,gregOfRM(rm))); 2079 return 1+delta0; 2080 } 2081 2082 /* E refers to memory */ 2083 { 2084 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 2085 putIReg(size, gregOfRM(rm), loadLE(szToITy(size), mkexpr(addr))); 2086 DIP("mov%c %s,%s\n", nameISize(size), 2087 dis_buf,nameIReg(size,gregOfRM(rm))); 2088 return delta0+len; 2089 } 2090} 2091 2092 2093/* Handle move instructions of the form 2094 mov G, E meaning 2095 mov reg, reg-or-mem 2096 Is passed the a ptr to the modRM byte, and the data size. Returns 2097 the address advanced completely over this instruction. 2098 2099 G(src) is reg. 2100 E(dst) is reg-or-mem 2101 2102 If E is reg, --> GET %G, tmp 2103 PUT tmp, %E 2104 2105 If E is mem, --> (getAddr E) -> tmpa 2106 GET %G, tmpv 2107 ST tmpv, (tmpa) 2108*/ 2109static 2110UInt dis_mov_G_E ( UChar sorb, 2111 Int size, 2112 Int delta0 ) 2113{ 2114 Int len; 2115 UChar rm = getIByte(delta0); 2116 HChar dis_buf[50]; 2117 2118 if (epartIsReg(rm)) { 2119 putIReg(size, eregOfRM(rm), getIReg(size, gregOfRM(rm))); 2120 DIP("mov%c %s,%s\n", nameISize(size), 2121 nameIReg(size,gregOfRM(rm)), 2122 nameIReg(size,eregOfRM(rm))); 2123 return 1+delta0; 2124 } 2125 2126 /* E refers to memory */ 2127 { 2128 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf); 2129 storeLE( mkexpr(addr), getIReg(size, gregOfRM(rm)) ); 2130 DIP("mov%c %s,%s\n", nameISize(size), 2131 nameIReg(size,gregOfRM(rm)), dis_buf); 2132 return len+delta0; 2133 } 2134} 2135 2136 2137/* op $immediate, AL/AX/EAX. */ 2138static 2139UInt dis_op_imm_A ( Int size, 2140 Bool carrying, 2141 IROp op8, 2142 Bool keep, 2143 Int delta, 2144 HChar* t_x86opc ) 2145{ 2146 IRType ty = szToITy(size); 2147 IRTemp dst0 = newTemp(ty); 2148 IRTemp src = newTemp(ty); 2149 IRTemp dst1 = newTemp(ty); 2150 UInt lit = getUDisp(size,delta); 2151 assign(dst0, getIReg(size,R_EAX)); 2152 assign(src, mkU(ty,lit)); 2153 2154 if (isAddSub(op8) && !carrying) { 2155 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2156 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2157 } 2158 else 2159 if (isLogic(op8)) { 2160 vassert(!carrying); 2161 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2162 setFlags_DEP1(op8, dst1, ty); 2163 } 2164 else 2165 if (op8 == Iop_Add8 && carrying) { 2166 helper_ADC( size, dst1, dst0, src, 2167 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2168 } 2169 else 2170 if (op8 == Iop_Sub8 && carrying) { 2171 helper_SBB( size, dst1, dst0, src, 2172 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2173 } 2174 else 2175 vpanic("dis_op_imm_A(x86,guest)"); 2176 2177 if (keep) 2178 putIReg(size, R_EAX, mkexpr(dst1)); 2179 2180 DIP("%s%c $0x%x, %s\n", t_x86opc, nameISize(size), 2181 lit, nameIReg(size,R_EAX)); 2182 return delta+size; 2183} 2184 2185 2186/* Sign- and Zero-extending moves. */ 2187static 2188UInt dis_movx_E_G ( UChar sorb, 2189 Int delta, Int szs, Int szd, Bool sign_extend ) 2190{ 2191 UChar rm = getIByte(delta); 2192 if (epartIsReg(rm)) { 2193 if (szd == szs) { 2194 // mutant case. See #250799 2195 putIReg(szd, gregOfRM(rm), 2196 getIReg(szs,eregOfRM(rm))); 2197 } else { 2198 // normal case 2199 putIReg(szd, gregOfRM(rm), 2200 unop(mkWidenOp(szs,szd,sign_extend), 2201 getIReg(szs,eregOfRM(rm)))); 2202 } 2203 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 2204 nameISize(szs), nameISize(szd), 2205 nameIReg(szs,eregOfRM(rm)), 2206 nameIReg(szd,gregOfRM(rm))); 2207 return 1+delta; 2208 } 2209 2210 /* E refers to memory */ 2211 { 2212 Int len; 2213 HChar dis_buf[50]; 2214 IRTemp addr = disAMode ( &len, sorb, delta, dis_buf ); 2215 if (szd == szs) { 2216 // mutant case. See #250799 2217 putIReg(szd, gregOfRM(rm), 2218 loadLE(szToITy(szs),mkexpr(addr))); 2219 } else { 2220 // normal case 2221 putIReg(szd, gregOfRM(rm), 2222 unop(mkWidenOp(szs,szd,sign_extend), 2223 loadLE(szToITy(szs),mkexpr(addr)))); 2224 } 2225 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 2226 nameISize(szs), nameISize(szd), 2227 dis_buf, nameIReg(szd,gregOfRM(rm))); 2228 return len+delta; 2229 } 2230} 2231 2232 2233/* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 / 2234 16 / 8 bit quantity in the given IRTemp. */ 2235static 2236void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) 2237{ 2238 IROp op = signed_divide ? Iop_DivModS64to32 : Iop_DivModU64to32; 2239 IRTemp src64 = newTemp(Ity_I64); 2240 IRTemp dst64 = newTemp(Ity_I64); 2241 switch (sz) { 2242 case 4: 2243 assign( src64, binop(Iop_32HLto64, 2244 getIReg(4,R_EDX), getIReg(4,R_EAX)) ); 2245 assign( dst64, binop(op, mkexpr(src64), mkexpr(t)) ); 2246 putIReg( 4, R_EAX, unop(Iop_64to32,mkexpr(dst64)) ); 2247 putIReg( 4, R_EDX, unop(Iop_64HIto32,mkexpr(dst64)) ); 2248 break; 2249 case 2: { 2250 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 2251 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 2252 assign( src64, unop(widen3264, 2253 binop(Iop_16HLto32, 2254 getIReg(2,R_EDX), getIReg(2,R_EAX))) ); 2255 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); 2256 putIReg( 2, R_EAX, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); 2257 putIReg( 2, R_EDX, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); 2258 break; 2259 } 2260 case 1: { 2261 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 2262 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 2263 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; 2264 assign( src64, unop(widen3264, unop(widen1632, getIReg(2,R_EAX))) ); 2265 assign( dst64, 2266 binop(op, mkexpr(src64), 2267 unop(widen1632, unop(widen816, mkexpr(t)))) ); 2268 putIReg( 1, R_AL, unop(Iop_16to8, unop(Iop_32to16, 2269 unop(Iop_64to32,mkexpr(dst64)))) ); 2270 putIReg( 1, R_AH, unop(Iop_16to8, unop(Iop_32to16, 2271 unop(Iop_64HIto32,mkexpr(dst64)))) ); 2272 break; 2273 } 2274 default: vpanic("codegen_div(x86)"); 2275 } 2276} 2277 2278 2279static 2280UInt dis_Grp1 ( UChar sorb, Bool locked, 2281 Int delta, UChar modrm, 2282 Int am_sz, Int d_sz, Int sz, UInt d32 ) 2283{ 2284 Int len; 2285 HChar dis_buf[50]; 2286 IRType ty = szToITy(sz); 2287 IRTemp dst1 = newTemp(ty); 2288 IRTemp src = newTemp(ty); 2289 IRTemp dst0 = newTemp(ty); 2290 IRTemp addr = IRTemp_INVALID; 2291 IROp op8 = Iop_INVALID; 2292 UInt mask = sz==1 ? 0xFF : (sz==2 ? 0xFFFF : 0xFFFFFFFF); 2293 2294 switch (gregOfRM(modrm)) { 2295 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; 2296 case 2: break; // ADC 2297 case 3: break; // SBB 2298 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; 2299 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; 2300 /*NOTREACHED*/ 2301 default: vpanic("dis_Grp1: unhandled case"); 2302 } 2303 2304 if (epartIsReg(modrm)) { 2305 vassert(am_sz == 1); 2306 2307 assign(dst0, getIReg(sz,eregOfRM(modrm))); 2308 assign(src, mkU(ty,d32 & mask)); 2309 2310 if (gregOfRM(modrm) == 2 /* ADC */) { 2311 helper_ADC( sz, dst1, dst0, src, 2312 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2313 } else 2314 if (gregOfRM(modrm) == 3 /* SBB */) { 2315 helper_SBB( sz, dst1, dst0, src, 2316 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2317 } else { 2318 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2319 if (isAddSub(op8)) 2320 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2321 else 2322 setFlags_DEP1(op8, dst1, ty); 2323 } 2324 2325 if (gregOfRM(modrm) < 7) 2326 putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); 2327 2328 delta += (am_sz + d_sz); 2329 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), d32, 2330 nameIReg(sz,eregOfRM(modrm))); 2331 } else { 2332 addr = disAMode ( &len, sorb, delta, dis_buf); 2333 2334 assign(dst0, loadLE(ty,mkexpr(addr))); 2335 assign(src, mkU(ty,d32 & mask)); 2336 2337 if (gregOfRM(modrm) == 2 /* ADC */) { 2338 if (locked) { 2339 /* cas-style store */ 2340 helper_ADC( sz, dst1, dst0, src, 2341 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 2342 } else { 2343 /* normal store */ 2344 helper_ADC( sz, dst1, dst0, src, 2345 /*store*/addr, IRTemp_INVALID, 0 ); 2346 } 2347 } else 2348 if (gregOfRM(modrm) == 3 /* SBB */) { 2349 if (locked) { 2350 /* cas-style store */ 2351 helper_SBB( sz, dst1, dst0, src, 2352 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 2353 } else { 2354 /* normal store */ 2355 helper_SBB( sz, dst1, dst0, src, 2356 /*store*/addr, IRTemp_INVALID, 0 ); 2357 } 2358 } else { 2359 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2360 if (gregOfRM(modrm) < 7) { 2361 if (locked) { 2362 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, 2363 mkexpr(dst1)/*newVal*/, 2364 guest_EIP_curr_instr ); 2365 } else { 2366 storeLE(mkexpr(addr), mkexpr(dst1)); 2367 } 2368 } 2369 if (isAddSub(op8)) 2370 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2371 else 2372 setFlags_DEP1(op8, dst1, ty); 2373 } 2374 2375 delta += (len+d_sz); 2376 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), 2377 d32, dis_buf); 2378 } 2379 return delta; 2380} 2381 2382 2383/* Group 2 extended opcodes. shift_expr must be an 8-bit typed 2384 expression. */ 2385 2386static 2387UInt dis_Grp2 ( UChar sorb, 2388 Int delta, UChar modrm, 2389 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, 2390 HChar* shift_expr_txt, Bool* decode_OK ) 2391{ 2392 /* delta on entry points at the modrm byte. */ 2393 HChar dis_buf[50]; 2394 Int len; 2395 Bool isShift, isRotate, isRotateC; 2396 IRType ty = szToITy(sz); 2397 IRTemp dst0 = newTemp(ty); 2398 IRTemp dst1 = newTemp(ty); 2399 IRTemp addr = IRTemp_INVALID; 2400 2401 *decode_OK = True; 2402 2403 vassert(sz == 1 || sz == 2 || sz == 4); 2404 2405 /* Put value to shift/rotate in dst0. */ 2406 if (epartIsReg(modrm)) { 2407 assign(dst0, getIReg(sz, eregOfRM(modrm))); 2408 delta += (am_sz + d_sz); 2409 } else { 2410 addr = disAMode ( &len, sorb, delta, dis_buf); 2411 assign(dst0, loadLE(ty,mkexpr(addr))); 2412 delta += len + d_sz; 2413 } 2414 2415 isShift = False; 2416 switch (gregOfRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; } 2417 2418 isRotate = False; 2419 switch (gregOfRM(modrm)) { case 0: case 1: isRotate = True; } 2420 2421 isRotateC = False; 2422 switch (gregOfRM(modrm)) { case 2: case 3: isRotateC = True; } 2423 2424 if (!isShift && !isRotate && !isRotateC) { 2425 /*NOTREACHED*/ 2426 vpanic("dis_Grp2(Reg): unhandled case(x86)"); 2427 } 2428 2429 if (isRotateC) { 2430 /* call a helper; these insns are so ridiculous they do not 2431 deserve better */ 2432 Bool left = toBool(gregOfRM(modrm) == 2); 2433 IRTemp r64 = newTemp(Ity_I64); 2434 IRExpr** args 2435 = mkIRExprVec_4( widenUto32(mkexpr(dst0)), /* thing to rotate */ 2436 widenUto32(shift_expr), /* rotate amount */ 2437 widenUto32(mk_x86g_calculate_eflags_all()), 2438 mkU32(sz) ); 2439 assign( r64, mkIRExprCCall( 2440 Ity_I64, 2441 0/*regparm*/, 2442 left ? "x86g_calculate_RCL" : "x86g_calculate_RCR", 2443 left ? &x86g_calculate_RCL : &x86g_calculate_RCR, 2444 args 2445 ) 2446 ); 2447 /* new eflags in hi half r64; new value in lo half r64 */ 2448 assign( dst1, narrowTo(ty, unop(Iop_64to32, mkexpr(r64))) ); 2449 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 2450 stmt( IRStmt_Put( OFFB_CC_DEP1, unop(Iop_64HIto32, mkexpr(r64)) )); 2451 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 2452 /* Set NDEP even though it isn't used. This makes redundant-PUT 2453 elimination of previous stores to this field work better. */ 2454 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 2455 } 2456 2457 if (isShift) { 2458 2459 IRTemp pre32 = newTemp(Ity_I32); 2460 IRTemp res32 = newTemp(Ity_I32); 2461 IRTemp res32ss = newTemp(Ity_I32); 2462 IRTemp shift_amt = newTemp(Ity_I8); 2463 IROp op32; 2464 2465 switch (gregOfRM(modrm)) { 2466 case 4: op32 = Iop_Shl32; break; 2467 case 5: op32 = Iop_Shr32; break; 2468 case 6: op32 = Iop_Shl32; break; 2469 case 7: op32 = Iop_Sar32; break; 2470 /*NOTREACHED*/ 2471 default: vpanic("dis_Grp2:shift"); break; 2472 } 2473 2474 /* Widen the value to be shifted to 32 bits, do the shift, and 2475 narrow back down. This seems surprisingly long-winded, but 2476 unfortunately the Intel semantics requires that 8/16-bit 2477 shifts give defined results for shift values all the way up 2478 to 31, and this seems the simplest way to do it. It has the 2479 advantage that the only IR level shifts generated are of 32 2480 bit values, and the shift amount is guaranteed to be in the 2481 range 0 .. 31, thereby observing the IR semantics requiring 2482 all shift values to be in the range 0 .. 2^word_size-1. */ 2483 2484 /* shift_amt = shift_expr & 31, regardless of operation size */ 2485 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(31)) ); 2486 2487 /* suitably widen the value to be shifted to 32 bits. */ 2488 assign( pre32, op32==Iop_Sar32 ? widenSto32(mkexpr(dst0)) 2489 : widenUto32(mkexpr(dst0)) ); 2490 2491 /* res32 = pre32 `shift` shift_amt */ 2492 assign( res32, binop(op32, mkexpr(pre32), mkexpr(shift_amt)) ); 2493 2494 /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */ 2495 assign( res32ss, 2496 binop(op32, 2497 mkexpr(pre32), 2498 binop(Iop_And8, 2499 binop(Iop_Sub8, 2500 mkexpr(shift_amt), mkU8(1)), 2501 mkU8(31))) ); 2502 2503 /* Build the flags thunk. */ 2504 setFlags_DEP1_DEP2_shift(op32, res32, res32ss, ty, shift_amt); 2505 2506 /* Narrow the result back down. */ 2507 assign( dst1, narrowTo(ty, mkexpr(res32)) ); 2508 2509 } /* if (isShift) */ 2510 2511 else 2512 if (isRotate) { 2513 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 2514 Bool left = toBool(gregOfRM(modrm) == 0); 2515 IRTemp rot_amt = newTemp(Ity_I8); 2516 IRTemp rot_amt32 = newTemp(Ity_I8); 2517 IRTemp oldFlags = newTemp(Ity_I32); 2518 2519 /* rot_amt = shift_expr & mask */ 2520 /* By masking the rotate amount thusly, the IR-level Shl/Shr 2521 expressions never shift beyond the word size and thus remain 2522 well defined. */ 2523 assign(rot_amt32, binop(Iop_And8, shift_expr, mkU8(31))); 2524 2525 if (ty == Ity_I32) 2526 assign(rot_amt, mkexpr(rot_amt32)); 2527 else 2528 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt32), mkU8(8*sz-1))); 2529 2530 if (left) { 2531 2532 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ 2533 assign(dst1, 2534 binop( mkSizedOp(ty,Iop_Or8), 2535 binop( mkSizedOp(ty,Iop_Shl8), 2536 mkexpr(dst0), 2537 mkexpr(rot_amt) 2538 ), 2539 binop( mkSizedOp(ty,Iop_Shr8), 2540 mkexpr(dst0), 2541 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 2542 ) 2543 ) 2544 ); 2545 ccOp += X86G_CC_OP_ROLB; 2546 2547 } else { /* right */ 2548 2549 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ 2550 assign(dst1, 2551 binop( mkSizedOp(ty,Iop_Or8), 2552 binop( mkSizedOp(ty,Iop_Shr8), 2553 mkexpr(dst0), 2554 mkexpr(rot_amt) 2555 ), 2556 binop( mkSizedOp(ty,Iop_Shl8), 2557 mkexpr(dst0), 2558 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 2559 ) 2560 ) 2561 ); 2562 ccOp += X86G_CC_OP_RORB; 2563 2564 } 2565 2566 /* dst1 now holds the rotated value. Build flag thunk. We 2567 need the resulting value for this, and the previous flags. 2568 Except don't set it if the rotate count is zero. */ 2569 2570 assign(oldFlags, mk_x86g_calculate_eflags_all()); 2571 2572 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ 2573 stmt( IRStmt_Put( OFFB_CC_OP, 2574 IRExpr_Mux0X( mkexpr(rot_amt32), 2575 IRExpr_Get(OFFB_CC_OP,Ity_I32), 2576 mkU32(ccOp))) ); 2577 stmt( IRStmt_Put( OFFB_CC_DEP1, 2578 IRExpr_Mux0X( mkexpr(rot_amt32), 2579 IRExpr_Get(OFFB_CC_DEP1,Ity_I32), 2580 widenUto32(mkexpr(dst1)))) ); 2581 stmt( IRStmt_Put( OFFB_CC_DEP2, 2582 IRExpr_Mux0X( mkexpr(rot_amt32), 2583 IRExpr_Get(OFFB_CC_DEP2,Ity_I32), 2584 mkU32(0))) ); 2585 stmt( IRStmt_Put( OFFB_CC_NDEP, 2586 IRExpr_Mux0X( mkexpr(rot_amt32), 2587 IRExpr_Get(OFFB_CC_NDEP,Ity_I32), 2588 mkexpr(oldFlags))) ); 2589 } /* if (isRotate) */ 2590 2591 /* Save result, and finish up. */ 2592 if (epartIsReg(modrm)) { 2593 putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); 2594 if (vex_traceflags & VEX_TRACE_FE) { 2595 vex_printf("%s%c ", 2596 nameGrp2(gregOfRM(modrm)), nameISize(sz) ); 2597 if (shift_expr_txt) 2598 vex_printf("%s", shift_expr_txt); 2599 else 2600 ppIRExpr(shift_expr); 2601 vex_printf(", %s\n", nameIReg(sz,eregOfRM(modrm))); 2602 } 2603 } else { 2604 storeLE(mkexpr(addr), mkexpr(dst1)); 2605 if (vex_traceflags & VEX_TRACE_FE) { 2606 vex_printf("%s%c ", 2607 nameGrp2(gregOfRM(modrm)), nameISize(sz) ); 2608 if (shift_expr_txt) 2609 vex_printf("%s", shift_expr_txt); 2610 else 2611 ppIRExpr(shift_expr); 2612 vex_printf(", %s\n", dis_buf); 2613 } 2614 } 2615 return delta; 2616} 2617 2618 2619/* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ 2620static 2621UInt dis_Grp8_Imm ( UChar sorb, 2622 Bool locked, 2623 Int delta, UChar modrm, 2624 Int am_sz, Int sz, UInt src_val, 2625 Bool* decode_OK ) 2626{ 2627 /* src_val denotes a d8. 2628 And delta on entry points at the modrm byte. */ 2629 2630 IRType ty = szToITy(sz); 2631 IRTemp t2 = newTemp(Ity_I32); 2632 IRTemp t2m = newTemp(Ity_I32); 2633 IRTemp t_addr = IRTemp_INVALID; 2634 HChar dis_buf[50]; 2635 UInt mask; 2636 2637 /* we're optimists :-) */ 2638 *decode_OK = True; 2639 2640 /* Limit src_val -- the bit offset -- to something within a word. 2641 The Intel docs say that literal offsets larger than a word are 2642 masked in this way. */ 2643 switch (sz) { 2644 case 2: src_val &= 15; break; 2645 case 4: src_val &= 31; break; 2646 default: *decode_OK = False; return delta; 2647 } 2648 2649 /* Invent a mask suitable for the operation. */ 2650 switch (gregOfRM(modrm)) { 2651 case 4: /* BT */ mask = 0; break; 2652 case 5: /* BTS */ mask = 1 << src_val; break; 2653 case 6: /* BTR */ mask = ~(1 << src_val); break; 2654 case 7: /* BTC */ mask = 1 << src_val; break; 2655 /* If this needs to be extended, probably simplest to make a 2656 new function to handle the other cases (0 .. 3). The 2657 Intel docs do however not indicate any use for 0 .. 3, so 2658 we don't expect this to happen. */ 2659 default: *decode_OK = False; return delta; 2660 } 2661 2662 /* Fetch the value to be tested and modified into t2, which is 2663 32-bits wide regardless of sz. */ 2664 if (epartIsReg(modrm)) { 2665 vassert(am_sz == 1); 2666 assign( t2, widenUto32(getIReg(sz, eregOfRM(modrm))) ); 2667 delta += (am_sz + 1); 2668 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz), 2669 src_val, nameIReg(sz,eregOfRM(modrm))); 2670 } else { 2671 Int len; 2672 t_addr = disAMode ( &len, sorb, delta, dis_buf); 2673 delta += (len+1); 2674 assign( t2, widenUto32(loadLE(ty, mkexpr(t_addr))) ); 2675 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz), 2676 src_val, dis_buf); 2677 } 2678 2679 /* Compute the new value into t2m, if non-BT. */ 2680 switch (gregOfRM(modrm)) { 2681 case 4: /* BT */ 2682 break; 2683 case 5: /* BTS */ 2684 assign( t2m, binop(Iop_Or32, mkU32(mask), mkexpr(t2)) ); 2685 break; 2686 case 6: /* BTR */ 2687 assign( t2m, binop(Iop_And32, mkU32(mask), mkexpr(t2)) ); 2688 break; 2689 case 7: /* BTC */ 2690 assign( t2m, binop(Iop_Xor32, mkU32(mask), mkexpr(t2)) ); 2691 break; 2692 default: 2693 /*NOTREACHED*/ /*the previous switch guards this*/ 2694 vassert(0); 2695 } 2696 2697 /* Write the result back, if non-BT. If the CAS fails then we 2698 side-exit from the trace at this point, and so the flag state is 2699 not affected. This is of course as required. */ 2700 if (gregOfRM(modrm) != 4 /* BT */) { 2701 if (epartIsReg(modrm)) { 2702 putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m))); 2703 } else { 2704 if (locked) { 2705 casLE( mkexpr(t_addr), 2706 narrowTo(ty, mkexpr(t2))/*expd*/, 2707 narrowTo(ty, mkexpr(t2m))/*new*/, 2708 guest_EIP_curr_instr ); 2709 } else { 2710 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); 2711 } 2712 } 2713 } 2714 2715 /* Copy relevant bit from t2 into the carry flag. */ 2716 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 2717 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 2718 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 2719 stmt( IRStmt_Put( 2720 OFFB_CC_DEP1, 2721 binop(Iop_And32, 2722 binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)), 2723 mkU32(1)) 2724 )); 2725 /* Set NDEP even though it isn't used. This makes redundant-PUT 2726 elimination of previous stores to this field work better. */ 2727 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 2728 2729 return delta; 2730} 2731 2732 2733/* Signed/unsigned widening multiply. Generate IR to multiply the 2734 value in EAX/AX/AL by the given IRTemp, and park the result in 2735 EDX:EAX/DX:AX/AX. 2736*/ 2737static void codegen_mulL_A_D ( Int sz, Bool syned, 2738 IRTemp tmp, HChar* tmp_txt ) 2739{ 2740 IRType ty = szToITy(sz); 2741 IRTemp t1 = newTemp(ty); 2742 2743 assign( t1, getIReg(sz, R_EAX) ); 2744 2745 switch (ty) { 2746 case Ity_I32: { 2747 IRTemp res64 = newTemp(Ity_I64); 2748 IRTemp resHi = newTemp(Ity_I32); 2749 IRTemp resLo = newTemp(Ity_I32); 2750 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; 2751 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; 2752 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); 2753 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 2754 assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); 2755 assign( resLo, unop(Iop_64to32,mkexpr(res64))); 2756 putIReg(4, R_EDX, mkexpr(resHi)); 2757 putIReg(4, R_EAX, mkexpr(resLo)); 2758 break; 2759 } 2760 case Ity_I16: { 2761 IRTemp res32 = newTemp(Ity_I32); 2762 IRTemp resHi = newTemp(Ity_I16); 2763 IRTemp resLo = newTemp(Ity_I16); 2764 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; 2765 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; 2766 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); 2767 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 2768 assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); 2769 assign( resLo, unop(Iop_32to16,mkexpr(res32))); 2770 putIReg(2, R_EDX, mkexpr(resHi)); 2771 putIReg(2, R_EAX, mkexpr(resLo)); 2772 break; 2773 } 2774 case Ity_I8: { 2775 IRTemp res16 = newTemp(Ity_I16); 2776 IRTemp resHi = newTemp(Ity_I8); 2777 IRTemp resLo = newTemp(Ity_I8); 2778 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; 2779 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; 2780 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); 2781 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 2782 assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); 2783 assign( resLo, unop(Iop_16to8,mkexpr(res16))); 2784 putIReg(2, R_EAX, mkexpr(res16)); 2785 break; 2786 } 2787 default: 2788 vpanic("codegen_mulL_A_D(x86)"); 2789 } 2790 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); 2791} 2792 2793 2794/* Group 3 extended opcodes. */ 2795static 2796UInt dis_Grp3 ( UChar sorb, Bool locked, Int sz, Int delta, Bool* decode_OK ) 2797{ 2798 UInt d32; 2799 UChar modrm; 2800 HChar dis_buf[50]; 2801 Int len; 2802 IRTemp addr; 2803 IRType ty = szToITy(sz); 2804 IRTemp t1 = newTemp(ty); 2805 IRTemp dst1, src, dst0; 2806 2807 *decode_OK = True; /* may change this later */ 2808 2809 modrm = getIByte(delta); 2810 2811 if (locked && (gregOfRM(modrm) != 2 && gregOfRM(modrm) != 3)) { 2812 /* LOCK prefix only allowed with not and neg subopcodes */ 2813 *decode_OK = False; 2814 return delta; 2815 } 2816 2817 if (epartIsReg(modrm)) { 2818 switch (gregOfRM(modrm)) { 2819 case 0: { /* TEST */ 2820 delta++; d32 = getUDisp(sz, delta); delta += sz; 2821 dst1 = newTemp(ty); 2822 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 2823 getIReg(sz,eregOfRM(modrm)), 2824 mkU(ty,d32))); 2825 setFlags_DEP1( Iop_And8, dst1, ty ); 2826 DIP("test%c $0x%x, %s\n", nameISize(sz), d32, 2827 nameIReg(sz, eregOfRM(modrm))); 2828 break; 2829 } 2830 case 1: /* UNDEFINED */ 2831 /* The Intel docs imply this insn is undefined and binutils 2832 agrees. Unfortunately Core 2 will run it (with who 2833 knows what result?) sandpile.org reckons it's an alias 2834 for case 0. We play safe. */ 2835 *decode_OK = False; 2836 break; 2837 case 2: /* NOT */ 2838 delta++; 2839 putIReg(sz, eregOfRM(modrm), 2840 unop(mkSizedOp(ty,Iop_Not8), 2841 getIReg(sz, eregOfRM(modrm)))); 2842 DIP("not%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2843 break; 2844 case 3: /* NEG */ 2845 delta++; 2846 dst0 = newTemp(ty); 2847 src = newTemp(ty); 2848 dst1 = newTemp(ty); 2849 assign(dst0, mkU(ty,0)); 2850 assign(src, getIReg(sz,eregOfRM(modrm))); 2851 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src))); 2852 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 2853 putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); 2854 DIP("neg%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2855 break; 2856 case 4: /* MUL (unsigned widening) */ 2857 delta++; 2858 src = newTemp(ty); 2859 assign(src, getIReg(sz,eregOfRM(modrm))); 2860 codegen_mulL_A_D ( sz, False, src, nameIReg(sz,eregOfRM(modrm)) ); 2861 break; 2862 case 5: /* IMUL (signed widening) */ 2863 delta++; 2864 src = newTemp(ty); 2865 assign(src, getIReg(sz,eregOfRM(modrm))); 2866 codegen_mulL_A_D ( sz, True, src, nameIReg(sz,eregOfRM(modrm)) ); 2867 break; 2868 case 6: /* DIV */ 2869 delta++; 2870 assign( t1, getIReg(sz, eregOfRM(modrm)) ); 2871 codegen_div ( sz, t1, False ); 2872 DIP("div%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2873 break; 2874 case 7: /* IDIV */ 2875 delta++; 2876 assign( t1, getIReg(sz, eregOfRM(modrm)) ); 2877 codegen_div ( sz, t1, True ); 2878 DIP("idiv%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2879 break; 2880 default: 2881 /* This can't happen - gregOfRM should return 0 .. 7 only */ 2882 vpanic("Grp3(x86)"); 2883 } 2884 } else { 2885 addr = disAMode ( &len, sorb, delta, dis_buf ); 2886 t1 = newTemp(ty); 2887 delta += len; 2888 assign(t1, loadLE(ty,mkexpr(addr))); 2889 switch (gregOfRM(modrm)) { 2890 case 0: { /* TEST */ 2891 d32 = getUDisp(sz, delta); delta += sz; 2892 dst1 = newTemp(ty); 2893 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 2894 mkexpr(t1), mkU(ty,d32))); 2895 setFlags_DEP1( Iop_And8, dst1, ty ); 2896 DIP("test%c $0x%x, %s\n", nameISize(sz), d32, dis_buf); 2897 break; 2898 } 2899 case 1: /* UNDEFINED */ 2900 /* See comment above on R case */ 2901 *decode_OK = False; 2902 break; 2903 case 2: /* NOT */ 2904 dst1 = newTemp(ty); 2905 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); 2906 if (locked) { 2907 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 2908 guest_EIP_curr_instr ); 2909 } else { 2910 storeLE( mkexpr(addr), mkexpr(dst1) ); 2911 } 2912 DIP("not%c %s\n", nameISize(sz), dis_buf); 2913 break; 2914 case 3: /* NEG */ 2915 dst0 = newTemp(ty); 2916 src = newTemp(ty); 2917 dst1 = newTemp(ty); 2918 assign(dst0, mkU(ty,0)); 2919 assign(src, mkexpr(t1)); 2920 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), 2921 mkexpr(dst0), mkexpr(src))); 2922 if (locked) { 2923 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 2924 guest_EIP_curr_instr ); 2925 } else { 2926 storeLE( mkexpr(addr), mkexpr(dst1) ); 2927 } 2928 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 2929 DIP("neg%c %s\n", nameISize(sz), dis_buf); 2930 break; 2931 case 4: /* MUL */ 2932 codegen_mulL_A_D ( sz, False, t1, dis_buf ); 2933 break; 2934 case 5: /* IMUL */ 2935 codegen_mulL_A_D ( sz, True, t1, dis_buf ); 2936 break; 2937 case 6: /* DIV */ 2938 codegen_div ( sz, t1, False ); 2939 DIP("div%c %s\n", nameISize(sz), dis_buf); 2940 break; 2941 case 7: /* IDIV */ 2942 codegen_div ( sz, t1, True ); 2943 DIP("idiv%c %s\n", nameISize(sz), dis_buf); 2944 break; 2945 default: 2946 /* This can't happen - gregOfRM should return 0 .. 7 only */ 2947 vpanic("Grp3(x86)"); 2948 } 2949 } 2950 return delta; 2951} 2952 2953 2954/* Group 4 extended opcodes. */ 2955static 2956UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK ) 2957{ 2958 Int alen; 2959 UChar modrm; 2960 HChar dis_buf[50]; 2961 IRType ty = Ity_I8; 2962 IRTemp t1 = newTemp(ty); 2963 IRTemp t2 = newTemp(ty); 2964 2965 *decode_OK = True; 2966 2967 modrm = getIByte(delta); 2968 2969 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) { 2970 /* LOCK prefix only allowed with inc and dec subopcodes */ 2971 *decode_OK = False; 2972 return delta; 2973 } 2974 2975 if (epartIsReg(modrm)) { 2976 assign(t1, getIReg(1, eregOfRM(modrm))); 2977 switch (gregOfRM(modrm)) { 2978 case 0: /* INC */ 2979 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 2980 putIReg(1, eregOfRM(modrm), mkexpr(t2)); 2981 setFlags_INC_DEC( True, t2, ty ); 2982 break; 2983 case 1: /* DEC */ 2984 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 2985 putIReg(1, eregOfRM(modrm), mkexpr(t2)); 2986 setFlags_INC_DEC( False, t2, ty ); 2987 break; 2988 default: 2989 *decode_OK = False; 2990 return delta; 2991 } 2992 delta++; 2993 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), 2994 nameIReg(1, eregOfRM(modrm))); 2995 } else { 2996 IRTemp addr = disAMode ( &alen, sorb, delta, dis_buf ); 2997 assign( t1, loadLE(ty, mkexpr(addr)) ); 2998 switch (gregOfRM(modrm)) { 2999 case 0: /* INC */ 3000 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 3001 if (locked) { 3002 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 3003 guest_EIP_curr_instr ); 3004 } else { 3005 storeLE( mkexpr(addr), mkexpr(t2) ); 3006 } 3007 setFlags_INC_DEC( True, t2, ty ); 3008 break; 3009 case 1: /* DEC */ 3010 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 3011 if (locked) { 3012 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 3013 guest_EIP_curr_instr ); 3014 } else { 3015 storeLE( mkexpr(addr), mkexpr(t2) ); 3016 } 3017 setFlags_INC_DEC( False, t2, ty ); 3018 break; 3019 default: 3020 *decode_OK = False; 3021 return delta; 3022 } 3023 delta += alen; 3024 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf); 3025 } 3026 return delta; 3027} 3028 3029 3030/* Group 5 extended opcodes. */ 3031static 3032UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta, 3033 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK ) 3034{ 3035 Int len; 3036 UChar modrm; 3037 HChar dis_buf[50]; 3038 IRTemp addr = IRTemp_INVALID; 3039 IRType ty = szToITy(sz); 3040 IRTemp t1 = newTemp(ty); 3041 IRTemp t2 = IRTemp_INVALID; 3042 3043 *decode_OK = True; 3044 3045 modrm = getIByte(delta); 3046 3047 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) { 3048 /* LOCK prefix only allowed with inc and dec subopcodes */ 3049 *decode_OK = False; 3050 return delta; 3051 } 3052 3053 if (epartIsReg(modrm)) { 3054 assign(t1, getIReg(sz,eregOfRM(modrm))); 3055 switch (gregOfRM(modrm)) { 3056 case 0: /* INC */ 3057 vassert(sz == 2 || sz == 4); 3058 t2 = newTemp(ty); 3059 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 3060 mkexpr(t1), mkU(ty,1))); 3061 setFlags_INC_DEC( True, t2, ty ); 3062 putIReg(sz,eregOfRM(modrm),mkexpr(t2)); 3063 break; 3064 case 1: /* DEC */ 3065 vassert(sz == 2 || sz == 4); 3066 t2 = newTemp(ty); 3067 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 3068 mkexpr(t1), mkU(ty,1))); 3069 setFlags_INC_DEC( False, t2, ty ); 3070 putIReg(sz,eregOfRM(modrm),mkexpr(t2)); 3071 break; 3072 case 2: /* call Ev */ 3073 vassert(sz == 4); 3074 t2 = newTemp(Ity_I32); 3075 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 3076 putIReg(4, R_ESP, mkexpr(t2)); 3077 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1)); 3078 jmp_treg(dres, Ijk_Call, t1); 3079 vassert(dres->whatNext == Dis_StopHere); 3080 break; 3081 case 4: /* jmp Ev */ 3082 vassert(sz == 4); 3083 jmp_treg(dres, Ijk_Boring, t1); 3084 vassert(dres->whatNext == Dis_StopHere); 3085 break; 3086 case 6: /* PUSH Ev */ 3087 vassert(sz == 4 || sz == 2); 3088 t2 = newTemp(Ity_I32); 3089 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 3090 putIReg(4, R_ESP, mkexpr(t2) ); 3091 storeLE( mkexpr(t2), mkexpr(t1) ); 3092 break; 3093 default: 3094 *decode_OK = False; 3095 return delta; 3096 } 3097 delta++; 3098 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)), 3099 nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 3100 } else { 3101 addr = disAMode ( &len, sorb, delta, dis_buf ); 3102 assign(t1, loadLE(ty,mkexpr(addr))); 3103 switch (gregOfRM(modrm)) { 3104 case 0: /* INC */ 3105 t2 = newTemp(ty); 3106 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 3107 mkexpr(t1), mkU(ty,1))); 3108 if (locked) { 3109 casLE( mkexpr(addr), 3110 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); 3111 } else { 3112 storeLE(mkexpr(addr),mkexpr(t2)); 3113 } 3114 setFlags_INC_DEC( True, t2, ty ); 3115 break; 3116 case 1: /* DEC */ 3117 t2 = newTemp(ty); 3118 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 3119 mkexpr(t1), mkU(ty,1))); 3120 if (locked) { 3121 casLE( mkexpr(addr), 3122 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); 3123 } else { 3124 storeLE(mkexpr(addr),mkexpr(t2)); 3125 } 3126 setFlags_INC_DEC( False, t2, ty ); 3127 break; 3128 case 2: /* call Ev */ 3129 vassert(sz == 4); 3130 t2 = newTemp(Ity_I32); 3131 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 3132 putIReg(4, R_ESP, mkexpr(t2)); 3133 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len)); 3134 jmp_treg(dres, Ijk_Call, t1); 3135 vassert(dres->whatNext == Dis_StopHere); 3136 break; 3137 case 4: /* JMP Ev */ 3138 vassert(sz == 4); 3139 jmp_treg(dres, Ijk_Boring, t1); 3140 vassert(dres->whatNext == Dis_StopHere); 3141 break; 3142 case 6: /* PUSH Ev */ 3143 vassert(sz == 4 || sz == 2); 3144 t2 = newTemp(Ity_I32); 3145 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 3146 putIReg(4, R_ESP, mkexpr(t2) ); 3147 storeLE( mkexpr(t2), mkexpr(t1) ); 3148 break; 3149 default: 3150 *decode_OK = False; 3151 return delta; 3152 } 3153 delta += len; 3154 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)), 3155 nameISize(sz), dis_buf); 3156 } 3157 return delta; 3158} 3159 3160 3161/*------------------------------------------------------------*/ 3162/*--- Disassembling string ops (including REP prefixes) ---*/ 3163/*------------------------------------------------------------*/ 3164 3165/* Code shared by all the string ops */ 3166static 3167void dis_string_op_increment(Int sz, Int t_inc) 3168{ 3169 if (sz == 4 || sz == 2) { 3170 assign( t_inc, 3171 binop(Iop_Shl32, IRExpr_Get( OFFB_DFLAG, Ity_I32 ), 3172 mkU8(sz/2) ) ); 3173 } else { 3174 assign( t_inc, 3175 IRExpr_Get( OFFB_DFLAG, Ity_I32 ) ); 3176 } 3177} 3178 3179static 3180void dis_string_op( void (*dis_OP)( Int, IRTemp ), 3181 Int sz, HChar* name, UChar sorb ) 3182{ 3183 IRTemp t_inc = newTemp(Ity_I32); 3184 vassert(sorb == 0); /* hmm. so what was the point of passing it in? */ 3185 dis_string_op_increment(sz, t_inc); 3186 dis_OP( sz, t_inc ); 3187 DIP("%s%c\n", name, nameISize(sz)); 3188} 3189 3190static 3191void dis_MOVS ( Int sz, IRTemp t_inc ) 3192{ 3193 IRType ty = szToITy(sz); 3194 IRTemp td = newTemp(Ity_I32); /* EDI */ 3195 IRTemp ts = newTemp(Ity_I32); /* ESI */ 3196 3197 assign( td, getIReg(4, R_EDI) ); 3198 assign( ts, getIReg(4, R_ESI) ); 3199 3200 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); 3201 3202 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3203 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); 3204} 3205 3206static 3207void dis_LODS ( Int sz, IRTemp t_inc ) 3208{ 3209 IRType ty = szToITy(sz); 3210 IRTemp ts = newTemp(Ity_I32); /* ESI */ 3211 3212 assign( ts, getIReg(4, R_ESI) ); 3213 3214 putIReg( sz, R_EAX, loadLE(ty, mkexpr(ts)) ); 3215 3216 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); 3217} 3218 3219static 3220void dis_STOS ( Int sz, IRTemp t_inc ) 3221{ 3222 IRType ty = szToITy(sz); 3223 IRTemp ta = newTemp(ty); /* EAX */ 3224 IRTemp td = newTemp(Ity_I32); /* EDI */ 3225 3226 assign( ta, getIReg(sz, R_EAX) ); 3227 assign( td, getIReg(4, R_EDI) ); 3228 3229 storeLE( mkexpr(td), mkexpr(ta) ); 3230 3231 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3232} 3233 3234static 3235void dis_CMPS ( Int sz, IRTemp t_inc ) 3236{ 3237 IRType ty = szToITy(sz); 3238 IRTemp tdv = newTemp(ty); /* (EDI) */ 3239 IRTemp tsv = newTemp(ty); /* (ESI) */ 3240 IRTemp td = newTemp(Ity_I32); /* EDI */ 3241 IRTemp ts = newTemp(Ity_I32); /* ESI */ 3242 3243 assign( td, getIReg(4, R_EDI) ); 3244 assign( ts, getIReg(4, R_ESI) ); 3245 3246 assign( tdv, loadLE(ty,mkexpr(td)) ); 3247 assign( tsv, loadLE(ty,mkexpr(ts)) ); 3248 3249 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); 3250 3251 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3252 putIReg(4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); 3253} 3254 3255static 3256void dis_SCAS ( Int sz, IRTemp t_inc ) 3257{ 3258 IRType ty = szToITy(sz); 3259 IRTemp ta = newTemp(ty); /* EAX */ 3260 IRTemp td = newTemp(Ity_I32); /* EDI */ 3261 IRTemp tdv = newTemp(ty); /* (EDI) */ 3262 3263 assign( ta, getIReg(sz, R_EAX) ); 3264 assign( td, getIReg(4, R_EDI) ); 3265 3266 assign( tdv, loadLE(ty,mkexpr(td)) ); 3267 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); 3268 3269 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3270} 3271 3272 3273/* Wrap the appropriate string op inside a REP/REPE/REPNE. 3274 We assume the insn is the last one in the basic block, and so emit a jump 3275 to the next insn, rather than just falling through. */ 3276static 3277void dis_REP_op ( /*MOD*/DisResult* dres, 3278 X86Condcode cond, 3279 void (*dis_OP)(Int, IRTemp), 3280 Int sz, Addr32 eip, Addr32 eip_next, HChar* name ) 3281{ 3282 IRTemp t_inc = newTemp(Ity_I32); 3283 IRTemp tc = newTemp(Ity_I32); /* ECX */ 3284 3285 assign( tc, getIReg(4,R_ECX) ); 3286 3287 stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)), 3288 Ijk_Boring, 3289 IRConst_U32(eip_next), OFFB_EIP ) ); 3290 3291 putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); 3292 3293 dis_string_op_increment(sz, t_inc); 3294 dis_OP (sz, t_inc); 3295 3296 if (cond == X86CondAlways) { 3297 jmp_lit(dres, Ijk_Boring, eip); 3298 vassert(dres->whatNext == Dis_StopHere); 3299 } else { 3300 stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond), 3301 Ijk_Boring, 3302 IRConst_U32(eip), OFFB_EIP ) ); 3303 jmp_lit(dres, Ijk_Boring, eip_next); 3304 vassert(dres->whatNext == Dis_StopHere); 3305 } 3306 DIP("%s%c\n", name, nameISize(sz)); 3307} 3308 3309 3310/*------------------------------------------------------------*/ 3311/*--- Arithmetic, etc. ---*/ 3312/*------------------------------------------------------------*/ 3313 3314/* IMUL E, G. Supplied eip points to the modR/M byte. */ 3315static 3316UInt dis_mul_E_G ( UChar sorb, 3317 Int size, 3318 Int delta0 ) 3319{ 3320 Int alen; 3321 HChar dis_buf[50]; 3322 UChar rm = getIByte(delta0); 3323 IRType ty = szToITy(size); 3324 IRTemp te = newTemp(ty); 3325 IRTemp tg = newTemp(ty); 3326 IRTemp resLo = newTemp(ty); 3327 3328 assign( tg, getIReg(size, gregOfRM(rm)) ); 3329 if (epartIsReg(rm)) { 3330 assign( te, getIReg(size, eregOfRM(rm)) ); 3331 } else { 3332 IRTemp addr = disAMode( &alen, sorb, delta0, dis_buf ); 3333 assign( te, loadLE(ty,mkexpr(addr)) ); 3334 } 3335 3336 setFlags_MUL ( ty, te, tg, X86G_CC_OP_SMULB ); 3337 3338 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); 3339 3340 putIReg(size, gregOfRM(rm), mkexpr(resLo) ); 3341 3342 if (epartIsReg(rm)) { 3343 DIP("imul%c %s, %s\n", nameISize(size), 3344 nameIReg(size,eregOfRM(rm)), 3345 nameIReg(size,gregOfRM(rm))); 3346 return 1+delta0; 3347 } else { 3348 DIP("imul%c %s, %s\n", nameISize(size), 3349 dis_buf, nameIReg(size,gregOfRM(rm))); 3350 return alen+delta0; 3351 } 3352} 3353 3354 3355/* IMUL I * E -> G. Supplied eip points to the modR/M byte. */ 3356static 3357UInt dis_imul_I_E_G ( UChar sorb, 3358 Int size, 3359 Int delta, 3360 Int litsize ) 3361{ 3362 Int d32, alen; 3363 HChar dis_buf[50]; 3364 UChar rm = getIByte(delta); 3365 IRType ty = szToITy(size); 3366 IRTemp te = newTemp(ty); 3367 IRTemp tl = newTemp(ty); 3368 IRTemp resLo = newTemp(ty); 3369 3370 vassert(size == 1 || size == 2 || size == 4); 3371 3372 if (epartIsReg(rm)) { 3373 assign(te, getIReg(size, eregOfRM(rm))); 3374 delta++; 3375 } else { 3376 IRTemp addr = disAMode( &alen, sorb, delta, dis_buf ); 3377 assign(te, loadLE(ty, mkexpr(addr))); 3378 delta += alen; 3379 } 3380 d32 = getSDisp(litsize,delta); 3381 delta += litsize; 3382 3383 if (size == 1) d32 &= 0xFF; 3384 if (size == 2) d32 &= 0xFFFF; 3385 3386 assign(tl, mkU(ty,d32)); 3387 3388 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); 3389 3390 setFlags_MUL ( ty, te, tl, X86G_CC_OP_SMULB ); 3391 3392 putIReg(size, gregOfRM(rm), mkexpr(resLo)); 3393 3394 DIP("imul %d, %s, %s\n", d32, 3395 ( epartIsReg(rm) ? nameIReg(size,eregOfRM(rm)) : dis_buf ), 3396 nameIReg(size,gregOfRM(rm)) ); 3397 return delta; 3398} 3399 3400 3401/* Generate an IR sequence to do a count-leading-zeroes operation on 3402 the supplied IRTemp, and return a new IRTemp holding the result. 3403 'ty' may be Ity_I16 or Ity_I32 only. In the case where the 3404 argument is zero, return the number of bits in the word (the 3405 natural semantics). */ 3406static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 3407{ 3408 vassert(ty == Ity_I32 || ty == Ity_I16); 3409 3410 IRTemp src32 = newTemp(Ity_I32); 3411 assign(src32, widenUto32( mkexpr(src) )); 3412 3413 IRTemp src32x = newTemp(Ity_I32); 3414 assign(src32x, 3415 binop(Iop_Shl32, mkexpr(src32), 3416 mkU8(32 - 8 * sizeofIRType(ty)))); 3417 3418 // Clz32 has undefined semantics when its input is zero, so 3419 // special-case around that. 3420 IRTemp res32 = newTemp(Ity_I32); 3421 assign(res32, 3422 IRExpr_Mux0X( 3423 unop(Iop_1Uto8, 3424 binop(Iop_CmpEQ32, mkexpr(src32x), mkU32(0))), 3425 unop(Iop_Clz32, mkexpr(src32x)), 3426 mkU32(8 * sizeofIRType(ty)) 3427 )); 3428 3429 IRTemp res = newTemp(ty); 3430 assign(res, narrowTo(ty, mkexpr(res32))); 3431 return res; 3432} 3433 3434 3435/*------------------------------------------------------------*/ 3436/*--- ---*/ 3437/*--- x87 FLOATING POINT INSTRUCTIONS ---*/ 3438/*--- ---*/ 3439/*------------------------------------------------------------*/ 3440 3441/* --- Helper functions for dealing with the register stack. --- */ 3442 3443/* --- Set the emulation-warning pseudo-register. --- */ 3444 3445static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) 3446{ 3447 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 3448 stmt( IRStmt_Put( OFFB_EMWARN, e ) ); 3449} 3450 3451/* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ 3452 3453static IRExpr* mkQNaN64 ( void ) 3454{ 3455 /* QNaN is 0 2047 1 0(51times) 3456 == 0b 11111111111b 1 0(51times) 3457 == 0x7FF8 0000 0000 0000 3458 */ 3459 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); 3460} 3461 3462/* --------- Get/put the top-of-stack pointer. --------- */ 3463 3464static IRExpr* get_ftop ( void ) 3465{ 3466 return IRExpr_Get( OFFB_FTOP, Ity_I32 ); 3467} 3468 3469static void put_ftop ( IRExpr* e ) 3470{ 3471 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 3472 stmt( IRStmt_Put( OFFB_FTOP, e ) ); 3473} 3474 3475/* --------- Get/put the C3210 bits. --------- */ 3476 3477static IRExpr* get_C3210 ( void ) 3478{ 3479 return IRExpr_Get( OFFB_FC3210, Ity_I32 ); 3480} 3481 3482static void put_C3210 ( IRExpr* e ) 3483{ 3484 stmt( IRStmt_Put( OFFB_FC3210, e ) ); 3485} 3486 3487/* --------- Get/put the FPU rounding mode. --------- */ 3488static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) 3489{ 3490 return IRExpr_Get( OFFB_FPROUND, Ity_I32 ); 3491} 3492 3493static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) 3494{ 3495 stmt( IRStmt_Put( OFFB_FPROUND, e ) ); 3496} 3497 3498 3499/* --------- Synthesise a 2-bit FPU rounding mode. --------- */ 3500/* Produces a value in 0 .. 3, which is encoded as per the type 3501 IRRoundingMode. Since the guest_FPROUND value is also encoded as 3502 per IRRoundingMode, we merely need to get it and mask it for 3503 safety. 3504*/ 3505static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) 3506{ 3507 return binop( Iop_And32, get_fpround(), mkU32(3) ); 3508} 3509 3510static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 3511{ 3512 return mkU32(Irrm_NEAREST); 3513} 3514 3515 3516/* --------- Get/set FP register tag bytes. --------- */ 3517 3518/* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ 3519 3520static void put_ST_TAG ( Int i, IRExpr* value ) 3521{ 3522 IRRegArray* descr; 3523 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); 3524 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 3525 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 3526} 3527 3528/* Given i, generate an expression yielding 'ST_TAG(i)'. This will be 3529 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ 3530 3531static IRExpr* get_ST_TAG ( Int i ) 3532{ 3533 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 3534 return IRExpr_GetI( descr, get_ftop(), i ); 3535} 3536 3537 3538/* --------- Get/set FP registers. --------- */ 3539 3540/* Given i, and some expression e, emit 'ST(i) = e' and set the 3541 register's tag to indicate the register is full. The previous 3542 state of the register is not checked. */ 3543 3544static void put_ST_UNCHECKED ( Int i, IRExpr* value ) 3545{ 3546 IRRegArray* descr; 3547 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); 3548 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 3549 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 3550 /* Mark the register as in-use. */ 3551 put_ST_TAG(i, mkU8(1)); 3552} 3553 3554/* Given i, and some expression e, emit 3555 ST(i) = is_full(i) ? NaN : e 3556 and set the tag accordingly. 3557*/ 3558 3559static void put_ST ( Int i, IRExpr* value ) 3560{ 3561 put_ST_UNCHECKED( i, 3562 IRExpr_Mux0X( get_ST_TAG(i), 3563 /* 0 means empty */ 3564 value, 3565 /* non-0 means full */ 3566 mkQNaN64() 3567 ) 3568 ); 3569} 3570 3571 3572/* Given i, generate an expression yielding 'ST(i)'. */ 3573 3574static IRExpr* get_ST_UNCHECKED ( Int i ) 3575{ 3576 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 3577 return IRExpr_GetI( descr, get_ftop(), i ); 3578} 3579 3580 3581/* Given i, generate an expression yielding 3582 is_full(i) ? ST(i) : NaN 3583*/ 3584 3585static IRExpr* get_ST ( Int i ) 3586{ 3587 return 3588 IRExpr_Mux0X( get_ST_TAG(i), 3589 /* 0 means empty */ 3590 mkQNaN64(), 3591 /* non-0 means full */ 3592 get_ST_UNCHECKED(i)); 3593} 3594 3595 3596/* Adjust FTOP downwards by one register. */ 3597 3598static void fp_push ( void ) 3599{ 3600 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); 3601} 3602 3603/* Adjust FTOP upwards by one register, and mark the vacated register 3604 as empty. */ 3605 3606static void fp_pop ( void ) 3607{ 3608 put_ST_TAG(0, mkU8(0)); 3609 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 3610} 3611 3612/* Clear the C2 bit of the FPU status register, for 3613 sin/cos/tan/sincos. */ 3614 3615static void clear_C2 ( void ) 3616{ 3617 put_C3210( binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)) ); 3618} 3619 3620/* Invent a plausible-looking FPU status word value: 3621 ((ftop & 7) << 11) | (c3210 & 0x4700) 3622 */ 3623static IRExpr* get_FPU_sw ( void ) 3624{ 3625 return 3626 unop(Iop_32to16, 3627 binop(Iop_Or32, 3628 binop(Iop_Shl32, 3629 binop(Iop_And32, get_ftop(), mkU32(7)), 3630 mkU8(11)), 3631 binop(Iop_And32, get_C3210(), mkU32(0x4700)) 3632 )); 3633} 3634 3635 3636/* ------------------------------------------------------- */ 3637/* Given all that stack-mangling junk, we can now go ahead 3638 and describe FP instructions. 3639*/ 3640 3641/* ST(0) = ST(0) `op` mem64/32(addr) 3642 Need to check ST(0)'s tag on read, but not on write. 3643*/ 3644static 3645void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, 3646 IROp op, Bool dbl ) 3647{ 3648 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 3649 if (dbl) { 3650 put_ST_UNCHECKED(0, 3651 triop( op, 3652 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3653 get_ST(0), 3654 loadLE(Ity_F64,mkexpr(addr)) 3655 )); 3656 } else { 3657 put_ST_UNCHECKED(0, 3658 triop( op, 3659 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3660 get_ST(0), 3661 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) 3662 )); 3663 } 3664} 3665 3666 3667/* ST(0) = mem64/32(addr) `op` ST(0) 3668 Need to check ST(0)'s tag on read, but not on write. 3669*/ 3670static 3671void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, 3672 IROp op, Bool dbl ) 3673{ 3674 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 3675 if (dbl) { 3676 put_ST_UNCHECKED(0, 3677 triop( op, 3678 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3679 loadLE(Ity_F64,mkexpr(addr)), 3680 get_ST(0) 3681 )); 3682 } else { 3683 put_ST_UNCHECKED(0, 3684 triop( op, 3685 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3686 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), 3687 get_ST(0) 3688 )); 3689 } 3690} 3691 3692 3693/* ST(dst) = ST(dst) `op` ST(src). 3694 Check dst and src tags when reading but not on write. 3695*/ 3696static 3697void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 3698 Bool pop_after ) 3699{ 3700 DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"", 3701 (Int)st_src, (Int)st_dst ); 3702 put_ST_UNCHECKED( 3703 st_dst, 3704 triop( op, 3705 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3706 get_ST(st_dst), 3707 get_ST(st_src) ) 3708 ); 3709 if (pop_after) 3710 fp_pop(); 3711} 3712 3713/* ST(dst) = ST(src) `op` ST(dst). 3714 Check dst and src tags when reading but not on write. 3715*/ 3716static 3717void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 3718 Bool pop_after ) 3719{ 3720 DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"", 3721 (Int)st_src, (Int)st_dst ); 3722 put_ST_UNCHECKED( 3723 st_dst, 3724 triop( op, 3725 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3726 get_ST(st_src), 3727 get_ST(st_dst) ) 3728 ); 3729 if (pop_after) 3730 fp_pop(); 3731} 3732 3733/* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */ 3734static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) 3735{ 3736 DIP("fucomi%s %%st(0),%%st(%d)\n", pop_after ? "p" : "", (Int)i ); 3737 /* This is a bit of a hack (and isn't really right). It sets 3738 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel 3739 documentation implies A and S are unchanged. 3740 */ 3741 /* It's also fishy in that it is used both for COMIP and 3742 UCOMIP, and they aren't the same (although similar). */ 3743 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 3744 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 3745 stmt( IRStmt_Put( OFFB_CC_DEP1, 3746 binop( Iop_And32, 3747 binop(Iop_CmpF64, get_ST(0), get_ST(i)), 3748 mkU32(0x45) 3749 ))); 3750 /* Set NDEP even though it isn't used. This makes redundant-PUT 3751 elimination of previous stores to this field work better. */ 3752 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 3753 if (pop_after) 3754 fp_pop(); 3755} 3756 3757 3758static 3759UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta ) 3760{ 3761 Int len; 3762 UInt r_src, r_dst; 3763 HChar dis_buf[50]; 3764 IRTemp t1, t2; 3765 3766 /* On entry, delta points at the second byte of the insn (the modrm 3767 byte).*/ 3768 UChar first_opcode = getIByte(delta-1); 3769 UChar modrm = getIByte(delta+0); 3770 3771 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ 3772 3773 if (first_opcode == 0xD8) { 3774 if (modrm < 0xC0) { 3775 3776 /* bits 5,4,3 are an opcode extension, and the modRM also 3777 specifies an address. */ 3778 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 3779 delta += len; 3780 3781 switch (gregOfRM(modrm)) { 3782 3783 case 0: /* FADD single-real */ 3784 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); 3785 break; 3786 3787 case 1: /* FMUL single-real */ 3788 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); 3789 break; 3790 3791 case 2: /* FCOM single-real */ 3792 DIP("fcoms %s\n", dis_buf); 3793 /* This forces C1 to zero, which isn't right. */ 3794 put_C3210( 3795 binop( Iop_And32, 3796 binop(Iop_Shl32, 3797 binop(Iop_CmpF64, 3798 get_ST(0), 3799 unop(Iop_F32toF64, 3800 loadLE(Ity_F32,mkexpr(addr)))), 3801 mkU8(8)), 3802 mkU32(0x4500) 3803 )); 3804 break; 3805 3806 case 3: /* FCOMP single-real */ 3807 DIP("fcomps %s\n", dis_buf); 3808 /* This forces C1 to zero, which isn't right. */ 3809 put_C3210( 3810 binop( Iop_And32, 3811 binop(Iop_Shl32, 3812 binop(Iop_CmpF64, 3813 get_ST(0), 3814 unop(Iop_F32toF64, 3815 loadLE(Ity_F32,mkexpr(addr)))), 3816 mkU8(8)), 3817 mkU32(0x4500) 3818 )); 3819 fp_pop(); 3820 break; 3821 3822 case 4: /* FSUB single-real */ 3823 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); 3824 break; 3825 3826 case 5: /* FSUBR single-real */ 3827 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); 3828 break; 3829 3830 case 6: /* FDIV single-real */ 3831 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); 3832 break; 3833 3834 case 7: /* FDIVR single-real */ 3835 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); 3836 break; 3837 3838 default: 3839 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 3840 vex_printf("first_opcode == 0xD8\n"); 3841 goto decode_fail; 3842 } 3843 } else { 3844 delta++; 3845 switch (modrm) { 3846 3847 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ 3848 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); 3849 break; 3850 3851 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ 3852 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); 3853 break; 3854 3855 /* Dunno if this is right */ 3856 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ 3857 r_dst = (UInt)modrm - 0xD0; 3858 DIP("fcom %%st(0),%%st(%d)\n", (Int)r_dst); 3859 /* This forces C1 to zero, which isn't right. */ 3860 put_C3210( 3861 binop( Iop_And32, 3862 binop(Iop_Shl32, 3863 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 3864 mkU8(8)), 3865 mkU32(0x4500) 3866 )); 3867 break; 3868 3869 /* Dunno if this is right */ 3870 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ 3871 r_dst = (UInt)modrm - 0xD8; 3872 DIP("fcomp %%st(0),%%st(%d)\n", (Int)r_dst); 3873 /* This forces C1 to zero, which isn't right. */ 3874 put_C3210( 3875 binop( Iop_And32, 3876 binop(Iop_Shl32, 3877 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 3878 mkU8(8)), 3879 mkU32(0x4500) 3880 )); 3881 fp_pop(); 3882 break; 3883 3884 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ 3885 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); 3886 break; 3887 3888 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ 3889 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); 3890 break; 3891 3892 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ 3893 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); 3894 break; 3895 3896 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ 3897 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); 3898 break; 3899 3900 default: 3901 goto decode_fail; 3902 } 3903 } 3904 } 3905 3906 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ 3907 else 3908 if (first_opcode == 0xD9) { 3909 if (modrm < 0xC0) { 3910 3911 /* bits 5,4,3 are an opcode extension, and the modRM also 3912 specifies an address. */ 3913 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 3914 delta += len; 3915 3916 switch (gregOfRM(modrm)) { 3917 3918 case 0: /* FLD single-real */ 3919 DIP("flds %s\n", dis_buf); 3920 fp_push(); 3921 put_ST(0, unop(Iop_F32toF64, 3922 loadLE(Ity_F32, mkexpr(addr)))); 3923 break; 3924 3925 case 2: /* FST single-real */ 3926 DIP("fsts %s\n", dis_buf); 3927 storeLE(mkexpr(addr), 3928 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 3929 break; 3930 3931 case 3: /* FSTP single-real */ 3932 DIP("fstps %s\n", dis_buf); 3933 storeLE(mkexpr(addr), 3934 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 3935 fp_pop(); 3936 break; 3937 3938 case 4: { /* FLDENV m28 */ 3939 /* Uses dirty helper: 3940 VexEmWarn x86g_do_FLDENV ( VexGuestX86State*, HWord ) */ 3941 IRTemp ew = newTemp(Ity_I32); 3942 IRDirty* d = unsafeIRDirty_0_N ( 3943 0/*regparms*/, 3944 "x86g_dirtyhelper_FLDENV", 3945 &x86g_dirtyhelper_FLDENV, 3946 mkIRExprVec_1( mkexpr(addr) ) 3947 ); 3948 d->needsBBP = True; 3949 d->tmp = ew; 3950 /* declare we're reading memory */ 3951 d->mFx = Ifx_Read; 3952 d->mAddr = mkexpr(addr); 3953 d->mSize = 28; 3954 3955 /* declare we're writing guest state */ 3956 d->nFxState = 4; 3957 vex_bzero(&d->fxState, sizeof(d->fxState)); 3958 3959 d->fxState[0].fx = Ifx_Write; 3960 d->fxState[0].offset = OFFB_FTOP; 3961 d->fxState[0].size = sizeof(UInt); 3962 3963 d->fxState[1].fx = Ifx_Write; 3964 d->fxState[1].offset = OFFB_FPTAGS; 3965 d->fxState[1].size = 8 * sizeof(UChar); 3966 3967 d->fxState[2].fx = Ifx_Write; 3968 d->fxState[2].offset = OFFB_FPROUND; 3969 d->fxState[2].size = sizeof(UInt); 3970 3971 d->fxState[3].fx = Ifx_Write; 3972 d->fxState[3].offset = OFFB_FC3210; 3973 d->fxState[3].size = sizeof(UInt); 3974 3975 stmt( IRStmt_Dirty(d) ); 3976 3977 /* ew contains any emulation warning we may need to 3978 issue. If needed, side-exit to the next insn, 3979 reporting the warning, so that Valgrind's dispatcher 3980 sees the warning. */ 3981 put_emwarn( mkexpr(ew) ); 3982 stmt( 3983 IRStmt_Exit( 3984 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 3985 Ijk_EmWarn, 3986 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta), 3987 OFFB_EIP 3988 ) 3989 ); 3990 3991 DIP("fldenv %s\n", dis_buf); 3992 break; 3993 } 3994 3995 case 5: {/* FLDCW */ 3996 /* The only thing we observe in the control word is the 3997 rounding mode. Therefore, pass the 16-bit value 3998 (x87 native-format control word) to a clean helper, 3999 getting back a 64-bit value, the lower half of which 4000 is the FPROUND value to store, and the upper half of 4001 which is the emulation-warning token which may be 4002 generated. 4003 */ 4004 /* ULong x86h_check_fldcw ( UInt ); */ 4005 IRTemp t64 = newTemp(Ity_I64); 4006 IRTemp ew = newTemp(Ity_I32); 4007 DIP("fldcw %s\n", dis_buf); 4008 assign( t64, mkIRExprCCall( 4009 Ity_I64, 0/*regparms*/, 4010 "x86g_check_fldcw", 4011 &x86g_check_fldcw, 4012 mkIRExprVec_1( 4013 unop( Iop_16Uto32, 4014 loadLE(Ity_I16, mkexpr(addr))) 4015 ) 4016 ) 4017 ); 4018 4019 put_fpround( unop(Iop_64to32, mkexpr(t64)) ); 4020 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 4021 put_emwarn( mkexpr(ew) ); 4022 /* Finally, if an emulation warning was reported, 4023 side-exit to the next insn, reporting the warning, 4024 so that Valgrind's dispatcher sees the warning. */ 4025 stmt( 4026 IRStmt_Exit( 4027 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 4028 Ijk_EmWarn, 4029 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta), 4030 OFFB_EIP 4031 ) 4032 ); 4033 break; 4034 } 4035 4036 case 6: { /* FNSTENV m28 */ 4037 /* Uses dirty helper: 4038 void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */ 4039 IRDirty* d = unsafeIRDirty_0_N ( 4040 0/*regparms*/, 4041 "x86g_dirtyhelper_FSTENV", 4042 &x86g_dirtyhelper_FSTENV, 4043 mkIRExprVec_1( mkexpr(addr) ) 4044 ); 4045 d->needsBBP = True; 4046 /* declare we're writing memory */ 4047 d->mFx = Ifx_Write; 4048 d->mAddr = mkexpr(addr); 4049 d->mSize = 28; 4050 4051 /* declare we're reading guest state */ 4052 d->nFxState = 4; 4053 vex_bzero(&d->fxState, sizeof(d->fxState)); 4054 4055 d->fxState[0].fx = Ifx_Read; 4056 d->fxState[0].offset = OFFB_FTOP; 4057 d->fxState[0].size = sizeof(UInt); 4058 4059 d->fxState[1].fx = Ifx_Read; 4060 d->fxState[1].offset = OFFB_FPTAGS; 4061 d->fxState[1].size = 8 * sizeof(UChar); 4062 4063 d->fxState[2].fx = Ifx_Read; 4064 d->fxState[2].offset = OFFB_FPROUND; 4065 d->fxState[2].size = sizeof(UInt); 4066 4067 d->fxState[3].fx = Ifx_Read; 4068 d->fxState[3].offset = OFFB_FC3210; 4069 d->fxState[3].size = sizeof(UInt); 4070 4071 stmt( IRStmt_Dirty(d) ); 4072 4073 DIP("fnstenv %s\n", dis_buf); 4074 break; 4075 } 4076 4077 case 7: /* FNSTCW */ 4078 /* Fake up a native x87 FPU control word. The only 4079 thing it depends on is FPROUND[1:0], so call a clean 4080 helper to cook it up. */ 4081 /* UInt x86h_create_fpucw ( UInt fpround ) */ 4082 DIP("fnstcw %s\n", dis_buf); 4083 storeLE( 4084 mkexpr(addr), 4085 unop( Iop_32to16, 4086 mkIRExprCCall( 4087 Ity_I32, 0/*regp*/, 4088 "x86g_create_fpucw", &x86g_create_fpucw, 4089 mkIRExprVec_1( get_fpround() ) 4090 ) 4091 ) 4092 ); 4093 break; 4094 4095 default: 4096 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 4097 vex_printf("first_opcode == 0xD9\n"); 4098 goto decode_fail; 4099 } 4100 4101 } else { 4102 delta++; 4103 switch (modrm) { 4104 4105 case 0xC0 ... 0xC7: /* FLD %st(?) */ 4106 r_src = (UInt)modrm - 0xC0; 4107 DIP("fld %%st(%d)\n", (Int)r_src); 4108 t1 = newTemp(Ity_F64); 4109 assign(t1, get_ST(r_src)); 4110 fp_push(); 4111 put_ST(0, mkexpr(t1)); 4112 break; 4113 4114 case 0xC8 ... 0xCF: /* FXCH %st(?) */ 4115 r_src = (UInt)modrm - 0xC8; 4116 DIP("fxch %%st(%d)\n", (Int)r_src); 4117 t1 = newTemp(Ity_F64); 4118 t2 = newTemp(Ity_F64); 4119 assign(t1, get_ST(0)); 4120 assign(t2, get_ST(r_src)); 4121 put_ST_UNCHECKED(0, mkexpr(t2)); 4122 put_ST_UNCHECKED(r_src, mkexpr(t1)); 4123 break; 4124 4125 case 0xE0: /* FCHS */ 4126 DIP("fchs\n"); 4127 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); 4128 break; 4129 4130 case 0xE1: /* FABS */ 4131 DIP("fabs\n"); 4132 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); 4133 break; 4134 4135 case 0xE4: /* FTST */ 4136 DIP("ftst\n"); 4137 /* This forces C1 to zero, which isn't right. */ 4138 /* Well, in fact the Intel docs say (bizarrely): "C1 is 4139 set to 0 if stack underflow occurred; otherwise, set 4140 to 0" which is pretty nonsensical. I guess it's a 4141 typo. */ 4142 put_C3210( 4143 binop( Iop_And32, 4144 binop(Iop_Shl32, 4145 binop(Iop_CmpF64, 4146 get_ST(0), 4147 IRExpr_Const(IRConst_F64i(0x0ULL))), 4148 mkU8(8)), 4149 mkU32(0x4500) 4150 )); 4151 break; 4152 4153 case 0xE5: { /* FXAM */ 4154 /* This is an interesting one. It examines %st(0), 4155 regardless of whether the tag says it's empty or not. 4156 Here, just pass both the tag (in our format) and the 4157 value (as a double, actually a ULong) to a helper 4158 function. */ 4159 IRExpr** args 4160 = mkIRExprVec_2( unop(Iop_8Uto32, get_ST_TAG(0)), 4161 unop(Iop_ReinterpF64asI64, 4162 get_ST_UNCHECKED(0)) ); 4163 put_C3210(mkIRExprCCall( 4164 Ity_I32, 4165 0/*regparm*/, 4166 "x86g_calculate_FXAM", &x86g_calculate_FXAM, 4167 args 4168 )); 4169 DIP("fxam\n"); 4170 break; 4171 } 4172 4173 case 0xE8: /* FLD1 */ 4174 DIP("fld1\n"); 4175 fp_push(); 4176 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ 4177 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); 4178 break; 4179 4180 case 0xE9: /* FLDL2T */ 4181 DIP("fldl2t\n"); 4182 fp_push(); 4183 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ 4184 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); 4185 break; 4186 4187 case 0xEA: /* FLDL2E */ 4188 DIP("fldl2e\n"); 4189 fp_push(); 4190 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ 4191 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); 4192 break; 4193 4194 case 0xEB: /* FLDPI */ 4195 DIP("fldpi\n"); 4196 fp_push(); 4197 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ 4198 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); 4199 break; 4200 4201 case 0xEC: /* FLDLG2 */ 4202 DIP("fldlg2\n"); 4203 fp_push(); 4204 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ 4205 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); 4206 break; 4207 4208 case 0xED: /* FLDLN2 */ 4209 DIP("fldln2\n"); 4210 fp_push(); 4211 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ 4212 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); 4213 break; 4214 4215 case 0xEE: /* FLDZ */ 4216 DIP("fldz\n"); 4217 fp_push(); 4218 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ 4219 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); 4220 break; 4221 4222 case 0xF0: /* F2XM1 */ 4223 DIP("f2xm1\n"); 4224 put_ST_UNCHECKED(0, 4225 binop(Iop_2xm1F64, 4226 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4227 get_ST(0))); 4228 break; 4229 4230 case 0xF1: /* FYL2X */ 4231 DIP("fyl2x\n"); 4232 put_ST_UNCHECKED(1, 4233 triop(Iop_Yl2xF64, 4234 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4235 get_ST(1), 4236 get_ST(0))); 4237 fp_pop(); 4238 break; 4239 4240 case 0xF2: /* FPTAN */ 4241 DIP("ftan\n"); 4242 put_ST_UNCHECKED(0, 4243 binop(Iop_TanF64, 4244 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4245 get_ST(0))); 4246 fp_push(); 4247 put_ST(0, IRExpr_Const(IRConst_F64(1.0))); 4248 clear_C2(); /* HACK */ 4249 break; 4250 4251 case 0xF3: /* FPATAN */ 4252 DIP("fpatan\n"); 4253 put_ST_UNCHECKED(1, 4254 triop(Iop_AtanF64, 4255 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4256 get_ST(1), 4257 get_ST(0))); 4258 fp_pop(); 4259 break; 4260 4261 case 0xF4: { /* FXTRACT */ 4262 IRTemp argF = newTemp(Ity_F64); 4263 IRTemp sigF = newTemp(Ity_F64); 4264 IRTemp expF = newTemp(Ity_F64); 4265 IRTemp argI = newTemp(Ity_I64); 4266 IRTemp sigI = newTemp(Ity_I64); 4267 IRTemp expI = newTemp(Ity_I64); 4268 DIP("fxtract\n"); 4269 assign( argF, get_ST(0) ); 4270 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); 4271 assign( sigI, 4272 mkIRExprCCall( 4273 Ity_I64, 0/*regparms*/, 4274 "x86amd64g_calculate_FXTRACT", 4275 &x86amd64g_calculate_FXTRACT, 4276 mkIRExprVec_2( mkexpr(argI), 4277 mkIRExpr_HWord(0)/*sig*/ )) 4278 ); 4279 assign( expI, 4280 mkIRExprCCall( 4281 Ity_I64, 0/*regparms*/, 4282 "x86amd64g_calculate_FXTRACT", 4283 &x86amd64g_calculate_FXTRACT, 4284 mkIRExprVec_2( mkexpr(argI), 4285 mkIRExpr_HWord(1)/*exp*/ )) 4286 ); 4287 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); 4288 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); 4289 /* exponent */ 4290 put_ST_UNCHECKED(0, mkexpr(expF) ); 4291 fp_push(); 4292 /* significand */ 4293 put_ST(0, mkexpr(sigF) ); 4294 break; 4295 } 4296 4297 case 0xF5: { /* FPREM1 -- IEEE compliant */ 4298 IRTemp a1 = newTemp(Ity_F64); 4299 IRTemp a2 = newTemp(Ity_F64); 4300 DIP("fprem1\n"); 4301 /* Do FPREM1 twice, once to get the remainder, and once 4302 to get the C3210 flag values. */ 4303 assign( a1, get_ST(0) ); 4304 assign( a2, get_ST(1) ); 4305 put_ST_UNCHECKED(0, 4306 triop(Iop_PRem1F64, 4307 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4308 mkexpr(a1), 4309 mkexpr(a2))); 4310 put_C3210( 4311 triop(Iop_PRem1C3210F64, 4312 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4313 mkexpr(a1), 4314 mkexpr(a2)) ); 4315 break; 4316 } 4317 4318 case 0xF7: /* FINCSTP */ 4319 DIP("fprem\n"); 4320 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 4321 break; 4322 4323 case 0xF8: { /* FPREM -- not IEEE compliant */ 4324 IRTemp a1 = newTemp(Ity_F64); 4325 IRTemp a2 = newTemp(Ity_F64); 4326 DIP("fprem\n"); 4327 /* Do FPREM twice, once to get the remainder, and once 4328 to get the C3210 flag values. */ 4329 assign( a1, get_ST(0) ); 4330 assign( a2, get_ST(1) ); 4331 put_ST_UNCHECKED(0, 4332 triop(Iop_PRemF64, 4333 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4334 mkexpr(a1), 4335 mkexpr(a2))); 4336 put_C3210( 4337 triop(Iop_PRemC3210F64, 4338 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4339 mkexpr(a1), 4340 mkexpr(a2)) ); 4341 break; 4342 } 4343 4344 case 0xF9: /* FYL2XP1 */ 4345 DIP("fyl2xp1\n"); 4346 put_ST_UNCHECKED(1, 4347 triop(Iop_Yl2xp1F64, 4348 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4349 get_ST(1), 4350 get_ST(0))); 4351 fp_pop(); 4352 break; 4353 4354 case 0xFA: /* FSQRT */ 4355 DIP("fsqrt\n"); 4356 put_ST_UNCHECKED(0, 4357 binop(Iop_SqrtF64, 4358 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4359 get_ST(0))); 4360 break; 4361 4362 case 0xFB: { /* FSINCOS */ 4363 IRTemp a1 = newTemp(Ity_F64); 4364 assign( a1, get_ST(0) ); 4365 DIP("fsincos\n"); 4366 put_ST_UNCHECKED(0, 4367 binop(Iop_SinF64, 4368 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4369 mkexpr(a1))); 4370 fp_push(); 4371 put_ST(0, 4372 binop(Iop_CosF64, 4373 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4374 mkexpr(a1))); 4375 clear_C2(); /* HACK */ 4376 break; 4377 } 4378 4379 case 0xFC: /* FRNDINT */ 4380 DIP("frndint\n"); 4381 put_ST_UNCHECKED(0, 4382 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); 4383 break; 4384 4385 case 0xFD: /* FSCALE */ 4386 DIP("fscale\n"); 4387 put_ST_UNCHECKED(0, 4388 triop(Iop_ScaleF64, 4389 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4390 get_ST(0), 4391 get_ST(1))); 4392 break; 4393 4394 case 0xFE: /* FSIN */ 4395 DIP("fsin\n"); 4396 put_ST_UNCHECKED(0, 4397 binop(Iop_SinF64, 4398 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4399 get_ST(0))); 4400 clear_C2(); /* HACK */ 4401 break; 4402 4403 case 0xFF: /* FCOS */ 4404 DIP("fcos\n"); 4405 put_ST_UNCHECKED(0, 4406 binop(Iop_CosF64, 4407 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4408 get_ST(0))); 4409 clear_C2(); /* HACK */ 4410 break; 4411 4412 default: 4413 goto decode_fail; 4414 } 4415 } 4416 } 4417 4418 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ 4419 else 4420 if (first_opcode == 0xDA) { 4421 4422 if (modrm < 0xC0) { 4423 4424 /* bits 5,4,3 are an opcode extension, and the modRM also 4425 specifies an address. */ 4426 IROp fop; 4427 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4428 delta += len; 4429 switch (gregOfRM(modrm)) { 4430 4431 case 0: /* FIADD m32int */ /* ST(0) += m32int */ 4432 DIP("fiaddl %s\n", dis_buf); 4433 fop = Iop_AddF64; 4434 goto do_fop_m32; 4435 4436 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ 4437 DIP("fimull %s\n", dis_buf); 4438 fop = Iop_MulF64; 4439 goto do_fop_m32; 4440 4441 case 2: /* FICOM m32int */ 4442 DIP("ficoml %s\n", dis_buf); 4443 /* This forces C1 to zero, which isn't right. */ 4444 put_C3210( 4445 binop( Iop_And32, 4446 binop(Iop_Shl32, 4447 binop(Iop_CmpF64, 4448 get_ST(0), 4449 unop(Iop_I32StoF64, 4450 loadLE(Ity_I32,mkexpr(addr)))), 4451 mkU8(8)), 4452 mkU32(0x4500) 4453 )); 4454 break; 4455 4456 case 3: /* FICOMP m32int */ 4457 DIP("ficompl %s\n", dis_buf); 4458 /* This forces C1 to zero, which isn't right. */ 4459 put_C3210( 4460 binop( Iop_And32, 4461 binop(Iop_Shl32, 4462 binop(Iop_CmpF64, 4463 get_ST(0), 4464 unop(Iop_I32StoF64, 4465 loadLE(Ity_I32,mkexpr(addr)))), 4466 mkU8(8)), 4467 mkU32(0x4500) 4468 )); 4469 fp_pop(); 4470 break; 4471 4472 case 4: /* FISUB m32int */ /* ST(0) -= m32int */ 4473 DIP("fisubl %s\n", dis_buf); 4474 fop = Iop_SubF64; 4475 goto do_fop_m32; 4476 4477 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ 4478 DIP("fisubrl %s\n", dis_buf); 4479 fop = Iop_SubF64; 4480 goto do_foprev_m32; 4481 4482 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ 4483 DIP("fidivl %s\n", dis_buf); 4484 fop = Iop_DivF64; 4485 goto do_fop_m32; 4486 4487 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ 4488 DIP("fidivrl %s\n", dis_buf); 4489 fop = Iop_DivF64; 4490 goto do_foprev_m32; 4491 4492 do_fop_m32: 4493 put_ST_UNCHECKED(0, 4494 triop(fop, 4495 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4496 get_ST(0), 4497 unop(Iop_I32StoF64, 4498 loadLE(Ity_I32, mkexpr(addr))))); 4499 break; 4500 4501 do_foprev_m32: 4502 put_ST_UNCHECKED(0, 4503 triop(fop, 4504 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4505 unop(Iop_I32StoF64, 4506 loadLE(Ity_I32, mkexpr(addr))), 4507 get_ST(0))); 4508 break; 4509 4510 default: 4511 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 4512 vex_printf("first_opcode == 0xDA\n"); 4513 goto decode_fail; 4514 } 4515 4516 } else { 4517 4518 delta++; 4519 switch (modrm) { 4520 4521 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ 4522 r_src = (UInt)modrm - 0xC0; 4523 DIP("fcmovb %%st(%d), %%st(0)\n", (Int)r_src); 4524 put_ST_UNCHECKED(0, 4525 IRExpr_Mux0X( 4526 unop(Iop_1Uto8, 4527 mk_x86g_calculate_condition(X86CondB)), 4528 get_ST(0), get_ST(r_src)) ); 4529 break; 4530 4531 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ 4532 r_src = (UInt)modrm - 0xC8; 4533 DIP("fcmovz %%st(%d), %%st(0)\n", (Int)r_src); 4534 put_ST_UNCHECKED(0, 4535 IRExpr_Mux0X( 4536 unop(Iop_1Uto8, 4537 mk_x86g_calculate_condition(X86CondZ)), 4538 get_ST(0), get_ST(r_src)) ); 4539 break; 4540 4541 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ 4542 r_src = (UInt)modrm - 0xD0; 4543 DIP("fcmovbe %%st(%d), %%st(0)\n", (Int)r_src); 4544 put_ST_UNCHECKED(0, 4545 IRExpr_Mux0X( 4546 unop(Iop_1Uto8, 4547 mk_x86g_calculate_condition(X86CondBE)), 4548 get_ST(0), get_ST(r_src)) ); 4549 break; 4550 4551 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ 4552 r_src = (UInt)modrm - 0xD8; 4553 DIP("fcmovu %%st(%d), %%st(0)\n", (Int)r_src); 4554 put_ST_UNCHECKED(0, 4555 IRExpr_Mux0X( 4556 unop(Iop_1Uto8, 4557 mk_x86g_calculate_condition(X86CondP)), 4558 get_ST(0), get_ST(r_src)) ); 4559 break; 4560 4561 case 0xE9: /* FUCOMPP %st(0),%st(1) */ 4562 DIP("fucompp %%st(0),%%st(1)\n"); 4563 /* This forces C1 to zero, which isn't right. */ 4564 put_C3210( 4565 binop( Iop_And32, 4566 binop(Iop_Shl32, 4567 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 4568 mkU8(8)), 4569 mkU32(0x4500) 4570 )); 4571 fp_pop(); 4572 fp_pop(); 4573 break; 4574 4575 default: 4576 goto decode_fail; 4577 } 4578 4579 } 4580 } 4581 4582 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ 4583 else 4584 if (first_opcode == 0xDB) { 4585 if (modrm < 0xC0) { 4586 4587 /* bits 5,4,3 are an opcode extension, and the modRM also 4588 specifies an address. */ 4589 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4590 delta += len; 4591 4592 switch (gregOfRM(modrm)) { 4593 4594 case 0: /* FILD m32int */ 4595 DIP("fildl %s\n", dis_buf); 4596 fp_push(); 4597 put_ST(0, unop(Iop_I32StoF64, 4598 loadLE(Ity_I32, mkexpr(addr)))); 4599 break; 4600 4601 case 1: /* FISTTPL m32 (SSE3) */ 4602 DIP("fisttpl %s\n", dis_buf); 4603 storeLE( mkexpr(addr), 4604 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ); 4605 fp_pop(); 4606 break; 4607 4608 case 2: /* FIST m32 */ 4609 DIP("fistl %s\n", dis_buf); 4610 storeLE( mkexpr(addr), 4611 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 4612 break; 4613 4614 case 3: /* FISTP m32 */ 4615 DIP("fistpl %s\n", dis_buf); 4616 storeLE( mkexpr(addr), 4617 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 4618 fp_pop(); 4619 break; 4620 4621 case 5: { /* FLD extended-real */ 4622 /* Uses dirty helper: 4623 ULong x86g_loadF80le ( UInt ) 4624 addr holds the address. First, do a dirty call to 4625 get hold of the data. */ 4626 IRTemp val = newTemp(Ity_I64); 4627 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); 4628 4629 IRDirty* d = unsafeIRDirty_1_N ( 4630 val, 4631 0/*regparms*/, 4632 "x86g_dirtyhelper_loadF80le", 4633 &x86g_dirtyhelper_loadF80le, 4634 args 4635 ); 4636 /* declare that we're reading memory */ 4637 d->mFx = Ifx_Read; 4638 d->mAddr = mkexpr(addr); 4639 d->mSize = 10; 4640 4641 /* execute the dirty call, dumping the result in val. */ 4642 stmt( IRStmt_Dirty(d) ); 4643 fp_push(); 4644 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); 4645 4646 DIP("fldt %s\n", dis_buf); 4647 break; 4648 } 4649 4650 case 7: { /* FSTP extended-real */ 4651 /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */ 4652 IRExpr** args 4653 = mkIRExprVec_2( mkexpr(addr), 4654 unop(Iop_ReinterpF64asI64, get_ST(0)) ); 4655 4656 IRDirty* d = unsafeIRDirty_0_N ( 4657 0/*regparms*/, 4658 "x86g_dirtyhelper_storeF80le", 4659 &x86g_dirtyhelper_storeF80le, 4660 args 4661 ); 4662 /* declare we're writing memory */ 4663 d->mFx = Ifx_Write; 4664 d->mAddr = mkexpr(addr); 4665 d->mSize = 10; 4666 4667 /* execute the dirty call. */ 4668 stmt( IRStmt_Dirty(d) ); 4669 fp_pop(); 4670 4671 DIP("fstpt\n %s", dis_buf); 4672 break; 4673 } 4674 4675 default: 4676 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 4677 vex_printf("first_opcode == 0xDB\n"); 4678 goto decode_fail; 4679 } 4680 4681 } else { 4682 4683 delta++; 4684 switch (modrm) { 4685 4686 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ 4687 r_src = (UInt)modrm - 0xC0; 4688 DIP("fcmovnb %%st(%d), %%st(0)\n", (Int)r_src); 4689 put_ST_UNCHECKED(0, 4690 IRExpr_Mux0X( 4691 unop(Iop_1Uto8, 4692 mk_x86g_calculate_condition(X86CondNB)), 4693 get_ST(0), get_ST(r_src)) ); 4694 break; 4695 4696 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ 4697 r_src = (UInt)modrm - 0xC8; 4698 DIP("fcmovnz %%st(%d), %%st(0)\n", (Int)r_src); 4699 put_ST_UNCHECKED(0, 4700 IRExpr_Mux0X( 4701 unop(Iop_1Uto8, 4702 mk_x86g_calculate_condition(X86CondNZ)), 4703 get_ST(0), get_ST(r_src)) ); 4704 break; 4705 4706 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ 4707 r_src = (UInt)modrm - 0xD0; 4708 DIP("fcmovnbe %%st(%d), %%st(0)\n", (Int)r_src); 4709 put_ST_UNCHECKED(0, 4710 IRExpr_Mux0X( 4711 unop(Iop_1Uto8, 4712 mk_x86g_calculate_condition(X86CondNBE)), 4713 get_ST(0), get_ST(r_src)) ); 4714 break; 4715 4716 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ 4717 r_src = (UInt)modrm - 0xD8; 4718 DIP("fcmovnu %%st(%d), %%st(0)\n", (Int)r_src); 4719 put_ST_UNCHECKED(0, 4720 IRExpr_Mux0X( 4721 unop(Iop_1Uto8, 4722 mk_x86g_calculate_condition(X86CondNP)), 4723 get_ST(0), get_ST(r_src)) ); 4724 break; 4725 4726 case 0xE2: 4727 DIP("fnclex\n"); 4728 break; 4729 4730 case 0xE3: { 4731 /* Uses dirty helper: 4732 void x86g_do_FINIT ( VexGuestX86State* ) */ 4733 IRDirty* d = unsafeIRDirty_0_N ( 4734 0/*regparms*/, 4735 "x86g_dirtyhelper_FINIT", 4736 &x86g_dirtyhelper_FINIT, 4737 mkIRExprVec_0() 4738 ); 4739 d->needsBBP = True; 4740 4741 /* declare we're writing guest state */ 4742 d->nFxState = 5; 4743 vex_bzero(&d->fxState, sizeof(d->fxState)); 4744 4745 d->fxState[0].fx = Ifx_Write; 4746 d->fxState[0].offset = OFFB_FTOP; 4747 d->fxState[0].size = sizeof(UInt); 4748 4749 d->fxState[1].fx = Ifx_Write; 4750 d->fxState[1].offset = OFFB_FPREGS; 4751 d->fxState[1].size = 8 * sizeof(ULong); 4752 4753 d->fxState[2].fx = Ifx_Write; 4754 d->fxState[2].offset = OFFB_FPTAGS; 4755 d->fxState[2].size = 8 * sizeof(UChar); 4756 4757 d->fxState[3].fx = Ifx_Write; 4758 d->fxState[3].offset = OFFB_FPROUND; 4759 d->fxState[3].size = sizeof(UInt); 4760 4761 d->fxState[4].fx = Ifx_Write; 4762 d->fxState[4].offset = OFFB_FC3210; 4763 d->fxState[4].size = sizeof(UInt); 4764 4765 stmt( IRStmt_Dirty(d) ); 4766 4767 DIP("fninit\n"); 4768 break; 4769 } 4770 4771 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ 4772 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); 4773 break; 4774 4775 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ 4776 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); 4777 break; 4778 4779 default: 4780 goto decode_fail; 4781 } 4782 } 4783 } 4784 4785 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ 4786 else 4787 if (first_opcode == 0xDC) { 4788 if (modrm < 0xC0) { 4789 4790 /* bits 5,4,3 are an opcode extension, and the modRM also 4791 specifies an address. */ 4792 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4793 delta += len; 4794 4795 switch (gregOfRM(modrm)) { 4796 4797 case 0: /* FADD double-real */ 4798 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); 4799 break; 4800 4801 case 1: /* FMUL double-real */ 4802 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); 4803 break; 4804 4805 case 2: /* FCOM double-real */ 4806 DIP("fcoml %s\n", dis_buf); 4807 /* This forces C1 to zero, which isn't right. */ 4808 put_C3210( 4809 binop( Iop_And32, 4810 binop(Iop_Shl32, 4811 binop(Iop_CmpF64, 4812 get_ST(0), 4813 loadLE(Ity_F64,mkexpr(addr))), 4814 mkU8(8)), 4815 mkU32(0x4500) 4816 )); 4817 break; 4818 4819 case 3: /* FCOMP double-real */ 4820 DIP("fcompl %s\n", dis_buf); 4821 /* This forces C1 to zero, which isn't right. */ 4822 put_C3210( 4823 binop( Iop_And32, 4824 binop(Iop_Shl32, 4825 binop(Iop_CmpF64, 4826 get_ST(0), 4827 loadLE(Ity_F64,mkexpr(addr))), 4828 mkU8(8)), 4829 mkU32(0x4500) 4830 )); 4831 fp_pop(); 4832 break; 4833 4834 case 4: /* FSUB double-real */ 4835 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); 4836 break; 4837 4838 case 5: /* FSUBR double-real */ 4839 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); 4840 break; 4841 4842 case 6: /* FDIV double-real */ 4843 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); 4844 break; 4845 4846 case 7: /* FDIVR double-real */ 4847 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); 4848 break; 4849 4850 default: 4851 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 4852 vex_printf("first_opcode == 0xDC\n"); 4853 goto decode_fail; 4854 } 4855 4856 } else { 4857 4858 delta++; 4859 switch (modrm) { 4860 4861 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ 4862 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); 4863 break; 4864 4865 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ 4866 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); 4867 break; 4868 4869 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ 4870 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); 4871 break; 4872 4873 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ 4874 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); 4875 break; 4876 4877 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ 4878 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); 4879 break; 4880 4881 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ 4882 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); 4883 break; 4884 4885 default: 4886 goto decode_fail; 4887 } 4888 4889 } 4890 } 4891 4892 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ 4893 else 4894 if (first_opcode == 0xDD) { 4895 4896 if (modrm < 0xC0) { 4897 4898 /* bits 5,4,3 are an opcode extension, and the modRM also 4899 specifies an address. */ 4900 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4901 delta += len; 4902 4903 switch (gregOfRM(modrm)) { 4904 4905 case 0: /* FLD double-real */ 4906 DIP("fldl %s\n", dis_buf); 4907 fp_push(); 4908 put_ST(0, loadLE(Ity_F64, mkexpr(addr))); 4909 break; 4910 4911 case 1: /* FISTTPQ m64 (SSE3) */ 4912 DIP("fistppll %s\n", dis_buf); 4913 storeLE( mkexpr(addr), 4914 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) ); 4915 fp_pop(); 4916 break; 4917 4918 case 2: /* FST double-real */ 4919 DIP("fstl %s\n", dis_buf); 4920 storeLE(mkexpr(addr), get_ST(0)); 4921 break; 4922 4923 case 3: /* FSTP double-real */ 4924 DIP("fstpl %s\n", dis_buf); 4925 storeLE(mkexpr(addr), get_ST(0)); 4926 fp_pop(); 4927 break; 4928 4929 case 4: { /* FRSTOR m108 */ 4930 /* Uses dirty helper: 4931 VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */ 4932 IRTemp ew = newTemp(Ity_I32); 4933 IRDirty* d = unsafeIRDirty_0_N ( 4934 0/*regparms*/, 4935 "x86g_dirtyhelper_FRSTOR", 4936 &x86g_dirtyhelper_FRSTOR, 4937 mkIRExprVec_1( mkexpr(addr) ) 4938 ); 4939 d->needsBBP = True; 4940 d->tmp = ew; 4941 /* declare we're reading memory */ 4942 d->mFx = Ifx_Read; 4943 d->mAddr = mkexpr(addr); 4944 d->mSize = 108; 4945 4946 /* declare we're writing guest state */ 4947 d->nFxState = 5; 4948 vex_bzero(&d->fxState, sizeof(d->fxState)); 4949 4950 d->fxState[0].fx = Ifx_Write; 4951 d->fxState[0].offset = OFFB_FTOP; 4952 d->fxState[0].size = sizeof(UInt); 4953 4954 d->fxState[1].fx = Ifx_Write; 4955 d->fxState[1].offset = OFFB_FPREGS; 4956 d->fxState[1].size = 8 * sizeof(ULong); 4957 4958 d->fxState[2].fx = Ifx_Write; 4959 d->fxState[2].offset = OFFB_FPTAGS; 4960 d->fxState[2].size = 8 * sizeof(UChar); 4961 4962 d->fxState[3].fx = Ifx_Write; 4963 d->fxState[3].offset = OFFB_FPROUND; 4964 d->fxState[3].size = sizeof(UInt); 4965 4966 d->fxState[4].fx = Ifx_Write; 4967 d->fxState[4].offset = OFFB_FC3210; 4968 d->fxState[4].size = sizeof(UInt); 4969 4970 stmt( IRStmt_Dirty(d) ); 4971 4972 /* ew contains any emulation warning we may need to 4973 issue. If needed, side-exit to the next insn, 4974 reporting the warning, so that Valgrind's dispatcher 4975 sees the warning. */ 4976 put_emwarn( mkexpr(ew) ); 4977 stmt( 4978 IRStmt_Exit( 4979 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 4980 Ijk_EmWarn, 4981 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta), 4982 OFFB_EIP 4983 ) 4984 ); 4985 4986 DIP("frstor %s\n", dis_buf); 4987 break; 4988 } 4989 4990 case 6: { /* FNSAVE m108 */ 4991 /* Uses dirty helper: 4992 void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */ 4993 IRDirty* d = unsafeIRDirty_0_N ( 4994 0/*regparms*/, 4995 "x86g_dirtyhelper_FSAVE", 4996 &x86g_dirtyhelper_FSAVE, 4997 mkIRExprVec_1( mkexpr(addr) ) 4998 ); 4999 d->needsBBP = True; 5000 /* declare we're writing memory */ 5001 d->mFx = Ifx_Write; 5002 d->mAddr = mkexpr(addr); 5003 d->mSize = 108; 5004 5005 /* declare we're reading guest state */ 5006 d->nFxState = 5; 5007 vex_bzero(&d->fxState, sizeof(d->fxState)); 5008 5009 d->fxState[0].fx = Ifx_Read; 5010 d->fxState[0].offset = OFFB_FTOP; 5011 d->fxState[0].size = sizeof(UInt); 5012 5013 d->fxState[1].fx = Ifx_Read; 5014 d->fxState[1].offset = OFFB_FPREGS; 5015 d->fxState[1].size = 8 * sizeof(ULong); 5016 5017 d->fxState[2].fx = Ifx_Read; 5018 d->fxState[2].offset = OFFB_FPTAGS; 5019 d->fxState[2].size = 8 * sizeof(UChar); 5020 5021 d->fxState[3].fx = Ifx_Read; 5022 d->fxState[3].offset = OFFB_FPROUND; 5023 d->fxState[3].size = sizeof(UInt); 5024 5025 d->fxState[4].fx = Ifx_Read; 5026 d->fxState[4].offset = OFFB_FC3210; 5027 d->fxState[4].size = sizeof(UInt); 5028 5029 stmt( IRStmt_Dirty(d) ); 5030 5031 DIP("fnsave %s\n", dis_buf); 5032 break; 5033 } 5034 5035 case 7: { /* FNSTSW m16 */ 5036 IRExpr* sw = get_FPU_sw(); 5037 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); 5038 storeLE( mkexpr(addr), sw ); 5039 DIP("fnstsw %s\n", dis_buf); 5040 break; 5041 } 5042 5043 default: 5044 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 5045 vex_printf("first_opcode == 0xDD\n"); 5046 goto decode_fail; 5047 } 5048 } else { 5049 delta++; 5050 switch (modrm) { 5051 5052 case 0xC0 ... 0xC7: /* FFREE %st(?) */ 5053 r_dst = (UInt)modrm - 0xC0; 5054 DIP("ffree %%st(%d)\n", (Int)r_dst); 5055 put_ST_TAG ( r_dst, mkU8(0) ); 5056 break; 5057 5058 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ 5059 r_dst = (UInt)modrm - 0xD0; 5060 DIP("fst %%st(0),%%st(%d)\n", (Int)r_dst); 5061 /* P4 manual says: "If the destination operand is a 5062 non-empty register, the invalid-operation exception 5063 is not generated. Hence put_ST_UNCHECKED. */ 5064 put_ST_UNCHECKED(r_dst, get_ST(0)); 5065 break; 5066 5067 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ 5068 r_dst = (UInt)modrm - 0xD8; 5069 DIP("fstp %%st(0),%%st(%d)\n", (Int)r_dst); 5070 /* P4 manual says: "If the destination operand is a 5071 non-empty register, the invalid-operation exception 5072 is not generated. Hence put_ST_UNCHECKED. */ 5073 put_ST_UNCHECKED(r_dst, get_ST(0)); 5074 fp_pop(); 5075 break; 5076 5077 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ 5078 r_dst = (UInt)modrm - 0xE0; 5079 DIP("fucom %%st(0),%%st(%d)\n", (Int)r_dst); 5080 /* This forces C1 to zero, which isn't right. */ 5081 put_C3210( 5082 binop( Iop_And32, 5083 binop(Iop_Shl32, 5084 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5085 mkU8(8)), 5086 mkU32(0x4500) 5087 )); 5088 break; 5089 5090 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ 5091 r_dst = (UInt)modrm - 0xE8; 5092 DIP("fucomp %%st(0),%%st(%d)\n", (Int)r_dst); 5093 /* This forces C1 to zero, which isn't right. */ 5094 put_C3210( 5095 binop( Iop_And32, 5096 binop(Iop_Shl32, 5097 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5098 mkU8(8)), 5099 mkU32(0x4500) 5100 )); 5101 fp_pop(); 5102 break; 5103 5104 default: 5105 goto decode_fail; 5106 } 5107 } 5108 } 5109 5110 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ 5111 else 5112 if (first_opcode == 0xDE) { 5113 5114 if (modrm < 0xC0) { 5115 5116 /* bits 5,4,3 are an opcode extension, and the modRM also 5117 specifies an address. */ 5118 IROp fop; 5119 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5120 delta += len; 5121 5122 switch (gregOfRM(modrm)) { 5123 5124 case 0: /* FIADD m16int */ /* ST(0) += m16int */ 5125 DIP("fiaddw %s\n", dis_buf); 5126 fop = Iop_AddF64; 5127 goto do_fop_m16; 5128 5129 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ 5130 DIP("fimulw %s\n", dis_buf); 5131 fop = Iop_MulF64; 5132 goto do_fop_m16; 5133 5134 case 2: /* FICOM m16int */ 5135 DIP("ficomw %s\n", dis_buf); 5136 /* This forces C1 to zero, which isn't right. */ 5137 put_C3210( 5138 binop( Iop_And32, 5139 binop(Iop_Shl32, 5140 binop(Iop_CmpF64, 5141 get_ST(0), 5142 unop(Iop_I32StoF64, 5143 unop(Iop_16Sto32, 5144 loadLE(Ity_I16,mkexpr(addr))))), 5145 mkU8(8)), 5146 mkU32(0x4500) 5147 )); 5148 break; 5149 5150 case 3: /* FICOMP m16int */ 5151 DIP("ficompw %s\n", dis_buf); 5152 /* This forces C1 to zero, which isn't right. */ 5153 put_C3210( 5154 binop( Iop_And32, 5155 binop(Iop_Shl32, 5156 binop(Iop_CmpF64, 5157 get_ST(0), 5158 unop(Iop_I32StoF64, 5159 unop(Iop_16Sto32, 5160 loadLE(Ity_I16,mkexpr(addr))))), 5161 mkU8(8)), 5162 mkU32(0x4500) 5163 )); 5164 fp_pop(); 5165 break; 5166 5167 case 4: /* FISUB m16int */ /* ST(0) -= m16int */ 5168 DIP("fisubw %s\n", dis_buf); 5169 fop = Iop_SubF64; 5170 goto do_fop_m16; 5171 5172 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ 5173 DIP("fisubrw %s\n", dis_buf); 5174 fop = Iop_SubF64; 5175 goto do_foprev_m16; 5176 5177 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ 5178 DIP("fisubw %s\n", dis_buf); 5179 fop = Iop_DivF64; 5180 goto do_fop_m16; 5181 5182 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ 5183 DIP("fidivrw %s\n", dis_buf); 5184 fop = Iop_DivF64; 5185 goto do_foprev_m16; 5186 5187 do_fop_m16: 5188 put_ST_UNCHECKED(0, 5189 triop(fop, 5190 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5191 get_ST(0), 5192 unop(Iop_I32StoF64, 5193 unop(Iop_16Sto32, 5194 loadLE(Ity_I16, mkexpr(addr)))))); 5195 break; 5196 5197 do_foprev_m16: 5198 put_ST_UNCHECKED(0, 5199 triop(fop, 5200 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5201 unop(Iop_I32StoF64, 5202 unop(Iop_16Sto32, 5203 loadLE(Ity_I16, mkexpr(addr)))), 5204 get_ST(0))); 5205 break; 5206 5207 default: 5208 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 5209 vex_printf("first_opcode == 0xDE\n"); 5210 goto decode_fail; 5211 } 5212 5213 } else { 5214 5215 delta++; 5216 switch (modrm) { 5217 5218 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ 5219 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); 5220 break; 5221 5222 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ 5223 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); 5224 break; 5225 5226 case 0xD9: /* FCOMPP %st(0),%st(1) */ 5227 DIP("fuompp %%st(0),%%st(1)\n"); 5228 /* This forces C1 to zero, which isn't right. */ 5229 put_C3210( 5230 binop( Iop_And32, 5231 binop(Iop_Shl32, 5232 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 5233 mkU8(8)), 5234 mkU32(0x4500) 5235 )); 5236 fp_pop(); 5237 fp_pop(); 5238 break; 5239 5240 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ 5241 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); 5242 break; 5243 5244 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ 5245 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); 5246 break; 5247 5248 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ 5249 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); 5250 break; 5251 5252 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ 5253 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); 5254 break; 5255 5256 default: 5257 goto decode_fail; 5258 } 5259 5260 } 5261 } 5262 5263 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ 5264 else 5265 if (first_opcode == 0xDF) { 5266 5267 if (modrm < 0xC0) { 5268 5269 /* bits 5,4,3 are an opcode extension, and the modRM also 5270 specifies an address. */ 5271 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5272 delta += len; 5273 5274 switch (gregOfRM(modrm)) { 5275 5276 case 0: /* FILD m16int */ 5277 DIP("fildw %s\n", dis_buf); 5278 fp_push(); 5279 put_ST(0, unop(Iop_I32StoF64, 5280 unop(Iop_16Sto32, 5281 loadLE(Ity_I16, mkexpr(addr))))); 5282 break; 5283 5284 case 1: /* FISTTPS m16 (SSE3) */ 5285 DIP("fisttps %s\n", dis_buf); 5286 storeLE( mkexpr(addr), 5287 binop(Iop_F64toI16S, mkU32(Irrm_ZERO), get_ST(0)) ); 5288 fp_pop(); 5289 break; 5290 5291 case 2: /* FIST m16 */ 5292 DIP("fistp %s\n", dis_buf); 5293 storeLE( mkexpr(addr), 5294 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) ); 5295 break; 5296 5297 case 3: /* FISTP m16 */ 5298 DIP("fistps %s\n", dis_buf); 5299 storeLE( mkexpr(addr), 5300 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) ); 5301 fp_pop(); 5302 break; 5303 5304 case 5: /* FILD m64 */ 5305 DIP("fildll %s\n", dis_buf); 5306 fp_push(); 5307 put_ST(0, binop(Iop_I64StoF64, 5308 get_roundingmode(), 5309 loadLE(Ity_I64, mkexpr(addr)))); 5310 break; 5311 5312 case 7: /* FISTP m64 */ 5313 DIP("fistpll %s\n", dis_buf); 5314 storeLE( mkexpr(addr), 5315 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) ); 5316 fp_pop(); 5317 break; 5318 5319 default: 5320 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 5321 vex_printf("first_opcode == 0xDF\n"); 5322 goto decode_fail; 5323 } 5324 5325 } else { 5326 5327 delta++; 5328 switch (modrm) { 5329 5330 case 0xC0: /* FFREEP %st(0) */ 5331 DIP("ffreep %%st(%d)\n", 0); 5332 put_ST_TAG ( 0, mkU8(0) ); 5333 fp_pop(); 5334 break; 5335 5336 case 0xE0: /* FNSTSW %ax */ 5337 DIP("fnstsw %%ax\n"); 5338 /* Get the FPU status word value and dump it in %AX. */ 5339 if (0) { 5340 /* The obvious thing to do is simply dump the 16-bit 5341 status word value in %AX. However, due to a 5342 limitation in Memcheck's origin tracking 5343 machinery, this causes Memcheck not to track the 5344 origin of any undefinedness into %AH (only into 5345 %AL/%AX/%EAX), which means origins are lost in 5346 the sequence "fnstsw %ax; test $M,%ah; jcond .." */ 5347 putIReg(2, R_EAX, get_FPU_sw()); 5348 } else { 5349 /* So a somewhat lame kludge is to make it very 5350 clear to Memcheck that the value is written to 5351 both %AH and %AL. This generates marginally 5352 worse code, but I don't think it matters much. */ 5353 IRTemp t16 = newTemp(Ity_I16); 5354 assign(t16, get_FPU_sw()); 5355 putIReg( 1, R_AL, unop(Iop_16to8, mkexpr(t16)) ); 5356 putIReg( 1, R_AH, unop(Iop_16HIto8, mkexpr(t16)) ); 5357 } 5358 break; 5359 5360 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ 5361 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); 5362 break; 5363 5364 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ 5365 /* not really right since COMIP != UCOMIP */ 5366 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); 5367 break; 5368 5369 default: 5370 goto decode_fail; 5371 } 5372 } 5373 5374 } 5375 5376 else 5377 vpanic("dis_FPU(x86): invalid primary opcode"); 5378 5379 *decode_ok = True; 5380 return delta; 5381 5382 decode_fail: 5383 *decode_ok = False; 5384 return delta; 5385} 5386 5387 5388/*------------------------------------------------------------*/ 5389/*--- ---*/ 5390/*--- MMX INSTRUCTIONS ---*/ 5391/*--- ---*/ 5392/*------------------------------------------------------------*/ 5393 5394/* Effect of MMX insns on x87 FPU state (table 11-2 of 5395 IA32 arch manual, volume 3): 5396 5397 Read from, or write to MMX register (viz, any insn except EMMS): 5398 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero 5399 * FP stack pointer set to zero 5400 5401 EMMS: 5402 * All tags set to Invalid (empty) -- FPTAGS[i] := zero 5403 * FP stack pointer set to zero 5404*/ 5405 5406static void do_MMX_preamble ( void ) 5407{ 5408 Int i; 5409 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5410 IRExpr* zero = mkU32(0); 5411 IRExpr* tag1 = mkU8(1); 5412 put_ftop(zero); 5413 for (i = 0; i < 8; i++) 5414 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) ); 5415} 5416 5417static void do_EMMS_preamble ( void ) 5418{ 5419 Int i; 5420 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5421 IRExpr* zero = mkU32(0); 5422 IRExpr* tag0 = mkU8(0); 5423 put_ftop(zero); 5424 for (i = 0; i < 8; i++) 5425 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) ); 5426} 5427 5428 5429static IRExpr* getMMXReg ( UInt archreg ) 5430{ 5431 vassert(archreg < 8); 5432 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); 5433} 5434 5435 5436static void putMMXReg ( UInt archreg, IRExpr* e ) 5437{ 5438 vassert(archreg < 8); 5439 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 5440 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); 5441} 5442 5443 5444/* Helper for non-shift MMX insns. Note this is incomplete in the 5445 sense that it does not first call do_MMX_preamble() -- that is the 5446 responsibility of its caller. */ 5447 5448static 5449UInt dis_MMXop_regmem_to_reg ( UChar sorb, 5450 Int delta, 5451 UChar opc, 5452 HChar* name, 5453 Bool show_granularity ) 5454{ 5455 HChar dis_buf[50]; 5456 UChar modrm = getIByte(delta); 5457 Bool isReg = epartIsReg(modrm); 5458 IRExpr* argL = NULL; 5459 IRExpr* argR = NULL; 5460 IRExpr* argG = NULL; 5461 IRExpr* argE = NULL; 5462 IRTemp res = newTemp(Ity_I64); 5463 5464 Bool invG = False; 5465 IROp op = Iop_INVALID; 5466 void* hAddr = NULL; 5467 HChar* hName = NULL; 5468 Bool eLeft = False; 5469 5470# define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) 5471 5472 switch (opc) { 5473 /* Original MMX ones */ 5474 case 0xFC: op = Iop_Add8x8; break; 5475 case 0xFD: op = Iop_Add16x4; break; 5476 case 0xFE: op = Iop_Add32x2; break; 5477 5478 case 0xEC: op = Iop_QAdd8Sx8; break; 5479 case 0xED: op = Iop_QAdd16Sx4; break; 5480 5481 case 0xDC: op = Iop_QAdd8Ux8; break; 5482 case 0xDD: op = Iop_QAdd16Ux4; break; 5483 5484 case 0xF8: op = Iop_Sub8x8; break; 5485 case 0xF9: op = Iop_Sub16x4; break; 5486 case 0xFA: op = Iop_Sub32x2; break; 5487 5488 case 0xE8: op = Iop_QSub8Sx8; break; 5489 case 0xE9: op = Iop_QSub16Sx4; break; 5490 5491 case 0xD8: op = Iop_QSub8Ux8; break; 5492 case 0xD9: op = Iop_QSub16Ux4; break; 5493 5494 case 0xE5: op = Iop_MulHi16Sx4; break; 5495 case 0xD5: op = Iop_Mul16x4; break; 5496 case 0xF5: XXX(x86g_calculate_mmx_pmaddwd); break; 5497 5498 case 0x74: op = Iop_CmpEQ8x8; break; 5499 case 0x75: op = Iop_CmpEQ16x4; break; 5500 case 0x76: op = Iop_CmpEQ32x2; break; 5501 5502 case 0x64: op = Iop_CmpGT8Sx8; break; 5503 case 0x65: op = Iop_CmpGT16Sx4; break; 5504 case 0x66: op = Iop_CmpGT32Sx2; break; 5505 5506 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break; 5507 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break; 5508 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break; 5509 5510 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; 5511 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; 5512 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; 5513 5514 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; 5515 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; 5516 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; 5517 5518 case 0xDB: op = Iop_And64; break; 5519 case 0xDF: op = Iop_And64; invG = True; break; 5520 case 0xEB: op = Iop_Or64; break; 5521 case 0xEF: /* Possibly do better here if argL and argR are the 5522 same reg */ 5523 op = Iop_Xor64; break; 5524 5525 /* Introduced in SSE1 */ 5526 case 0xE0: op = Iop_Avg8Ux8; break; 5527 case 0xE3: op = Iop_Avg16Ux4; break; 5528 case 0xEE: op = Iop_Max16Sx4; break; 5529 case 0xDE: op = Iop_Max8Ux8; break; 5530 case 0xEA: op = Iop_Min16Sx4; break; 5531 case 0xDA: op = Iop_Min8Ux8; break; 5532 case 0xE4: op = Iop_MulHi16Ux4; break; 5533 case 0xF6: XXX(x86g_calculate_mmx_psadbw); break; 5534 5535 /* Introduced in SSE2 */ 5536 case 0xD4: op = Iop_Add64; break; 5537 case 0xFB: op = Iop_Sub64; break; 5538 5539 default: 5540 vex_printf("\n0x%x\n", (Int)opc); 5541 vpanic("dis_MMXop_regmem_to_reg"); 5542 } 5543 5544# undef XXX 5545 5546 argG = getMMXReg(gregOfRM(modrm)); 5547 if (invG) 5548 argG = unop(Iop_Not64, argG); 5549 5550 if (isReg) { 5551 delta++; 5552 argE = getMMXReg(eregOfRM(modrm)); 5553 } else { 5554 Int len; 5555 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5556 delta += len; 5557 argE = loadLE(Ity_I64, mkexpr(addr)); 5558 } 5559 5560 if (eLeft) { 5561 argL = argE; 5562 argR = argG; 5563 } else { 5564 argL = argG; 5565 argR = argE; 5566 } 5567 5568 if (op != Iop_INVALID) { 5569 vassert(hName == NULL); 5570 vassert(hAddr == NULL); 5571 assign(res, binop(op, argL, argR)); 5572 } else { 5573 vassert(hName != NULL); 5574 vassert(hAddr != NULL); 5575 assign( res, 5576 mkIRExprCCall( 5577 Ity_I64, 5578 0/*regparms*/, hName, hAddr, 5579 mkIRExprVec_2( argL, argR ) 5580 ) 5581 ); 5582 } 5583 5584 putMMXReg( gregOfRM(modrm), mkexpr(res) ); 5585 5586 DIP("%s%s %s, %s\n", 5587 name, show_granularity ? nameMMXGran(opc & 3) : "", 5588 ( isReg ? nameMMXReg(eregOfRM(modrm)) : dis_buf ), 5589 nameMMXReg(gregOfRM(modrm)) ); 5590 5591 return delta; 5592} 5593 5594 5595/* Vector by scalar shift of G by the amount specified at the bottom 5596 of E. This is a straight copy of dis_SSE_shiftG_byE. */ 5597 5598static UInt dis_MMX_shiftG_byE ( UChar sorb, Int delta, 5599 HChar* opname, IROp op ) 5600{ 5601 HChar dis_buf[50]; 5602 Int alen, size; 5603 IRTemp addr; 5604 Bool shl, shr, sar; 5605 UChar rm = getIByte(delta); 5606 IRTemp g0 = newTemp(Ity_I64); 5607 IRTemp g1 = newTemp(Ity_I64); 5608 IRTemp amt = newTemp(Ity_I32); 5609 IRTemp amt8 = newTemp(Ity_I8); 5610 5611 if (epartIsReg(rm)) { 5612 assign( amt, unop(Iop_64to32, getMMXReg(eregOfRM(rm))) ); 5613 DIP("%s %s,%s\n", opname, 5614 nameMMXReg(eregOfRM(rm)), 5615 nameMMXReg(gregOfRM(rm)) ); 5616 delta++; 5617 } else { 5618 addr = disAMode ( &alen, sorb, delta, dis_buf ); 5619 assign( amt, loadLE(Ity_I32, mkexpr(addr)) ); 5620 DIP("%s %s,%s\n", opname, 5621 dis_buf, 5622 nameMMXReg(gregOfRM(rm)) ); 5623 delta += alen; 5624 } 5625 assign( g0, getMMXReg(gregOfRM(rm)) ); 5626 assign( amt8, unop(Iop_32to8, mkexpr(amt)) ); 5627 5628 shl = shr = sar = False; 5629 size = 0; 5630 switch (op) { 5631 case Iop_ShlN16x4: shl = True; size = 32; break; 5632 case Iop_ShlN32x2: shl = True; size = 32; break; 5633 case Iop_Shl64: shl = True; size = 64; break; 5634 case Iop_ShrN16x4: shr = True; size = 16; break; 5635 case Iop_ShrN32x2: shr = True; size = 32; break; 5636 case Iop_Shr64: shr = True; size = 64; break; 5637 case Iop_SarN16x4: sar = True; size = 16; break; 5638 case Iop_SarN32x2: sar = True; size = 32; break; 5639 default: vassert(0); 5640 } 5641 5642 if (shl || shr) { 5643 assign( 5644 g1, 5645 IRExpr_Mux0X( 5646 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), 5647 mkU64(0), 5648 binop(op, mkexpr(g0), mkexpr(amt8)) 5649 ) 5650 ); 5651 } else 5652 if (sar) { 5653 assign( 5654 g1, 5655 IRExpr_Mux0X( 5656 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), 5657 binop(op, mkexpr(g0), mkU8(size-1)), 5658 binop(op, mkexpr(g0), mkexpr(amt8)) 5659 ) 5660 ); 5661 } else { 5662 /*NOTREACHED*/ 5663 vassert(0); 5664 } 5665 5666 putMMXReg( gregOfRM(rm), mkexpr(g1) ); 5667 return delta; 5668} 5669 5670 5671/* Vector by scalar shift of E by an immediate byte. This is a 5672 straight copy of dis_SSE_shiftE_imm. */ 5673 5674static 5675UInt dis_MMX_shiftE_imm ( Int delta, HChar* opname, IROp op ) 5676{ 5677 Bool shl, shr, sar; 5678 UChar rm = getIByte(delta); 5679 IRTemp e0 = newTemp(Ity_I64); 5680 IRTemp e1 = newTemp(Ity_I64); 5681 UChar amt, size; 5682 vassert(epartIsReg(rm)); 5683 vassert(gregOfRM(rm) == 2 5684 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6); 5685 amt = getIByte(delta+1); 5686 delta += 2; 5687 DIP("%s $%d,%s\n", opname, 5688 (Int)amt, 5689 nameMMXReg(eregOfRM(rm)) ); 5690 5691 assign( e0, getMMXReg(eregOfRM(rm)) ); 5692 5693 shl = shr = sar = False; 5694 size = 0; 5695 switch (op) { 5696 case Iop_ShlN16x4: shl = True; size = 16; break; 5697 case Iop_ShlN32x2: shl = True; size = 32; break; 5698 case Iop_Shl64: shl = True; size = 64; break; 5699 case Iop_SarN16x4: sar = True; size = 16; break; 5700 case Iop_SarN32x2: sar = True; size = 32; break; 5701 case Iop_ShrN16x4: shr = True; size = 16; break; 5702 case Iop_ShrN32x2: shr = True; size = 32; break; 5703 case Iop_Shr64: shr = True; size = 64; break; 5704 default: vassert(0); 5705 } 5706 5707 if (shl || shr) { 5708 assign( e1, amt >= size 5709 ? mkU64(0) 5710 : binop(op, mkexpr(e0), mkU8(amt)) 5711 ); 5712 } else 5713 if (sar) { 5714 assign( e1, amt >= size 5715 ? binop(op, mkexpr(e0), mkU8(size-1)) 5716 : binop(op, mkexpr(e0), mkU8(amt)) 5717 ); 5718 } else { 5719 /*NOTREACHED*/ 5720 vassert(0); 5721 } 5722 5723 putMMXReg( eregOfRM(rm), mkexpr(e1) ); 5724 return delta; 5725} 5726 5727 5728/* Completely handle all MMX instructions except emms. */ 5729 5730static 5731UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, Int delta ) 5732{ 5733 Int len; 5734 UChar modrm; 5735 HChar dis_buf[50]; 5736 UChar opc = getIByte(delta); 5737 delta++; 5738 5739 /* dis_MMX handles all insns except emms. */ 5740 do_MMX_preamble(); 5741 5742 switch (opc) { 5743 5744 case 0x6E: 5745 /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/ 5746 if (sz != 4) 5747 goto mmx_decode_failure; 5748 modrm = getIByte(delta); 5749 if (epartIsReg(modrm)) { 5750 delta++; 5751 putMMXReg( 5752 gregOfRM(modrm), 5753 binop( Iop_32HLto64, 5754 mkU32(0), 5755 getIReg(4, eregOfRM(modrm)) ) ); 5756 DIP("movd %s, %s\n", 5757 nameIReg(4,eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm))); 5758 } else { 5759 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5760 delta += len; 5761 putMMXReg( 5762 gregOfRM(modrm), 5763 binop( Iop_32HLto64, 5764 mkU32(0), 5765 loadLE(Ity_I32, mkexpr(addr)) ) ); 5766 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregOfRM(modrm))); 5767 } 5768 break; 5769 5770 case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */ 5771 if (sz != 4) 5772 goto mmx_decode_failure; 5773 modrm = getIByte(delta); 5774 if (epartIsReg(modrm)) { 5775 delta++; 5776 putIReg( 4, eregOfRM(modrm), 5777 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) ); 5778 DIP("movd %s, %s\n", 5779 nameMMXReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm))); 5780 } else { 5781 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5782 delta += len; 5783 storeLE( mkexpr(addr), 5784 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) ); 5785 DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm)), dis_buf); 5786 } 5787 break; 5788 5789 case 0x6F: 5790 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 5791 if (sz != 4) 5792 goto mmx_decode_failure; 5793 modrm = getIByte(delta); 5794 if (epartIsReg(modrm)) { 5795 delta++; 5796 putMMXReg( gregOfRM(modrm), getMMXReg(eregOfRM(modrm)) ); 5797 DIP("movq %s, %s\n", 5798 nameMMXReg(eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm))); 5799 } else { 5800 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5801 delta += len; 5802 putMMXReg( gregOfRM(modrm), loadLE(Ity_I64, mkexpr(addr)) ); 5803 DIP("movq %s, %s\n", 5804 dis_buf, nameMMXReg(gregOfRM(modrm))); 5805 } 5806 break; 5807 5808 case 0x7F: 5809 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 5810 if (sz != 4) 5811 goto mmx_decode_failure; 5812 modrm = getIByte(delta); 5813 if (epartIsReg(modrm)) { 5814 delta++; 5815 putMMXReg( eregOfRM(modrm), getMMXReg(gregOfRM(modrm)) ); 5816 DIP("movq %s, %s\n", 5817 nameMMXReg(gregOfRM(modrm)), nameMMXReg(eregOfRM(modrm))); 5818 } else { 5819 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5820 delta += len; 5821 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) ); 5822 DIP("mov(nt)q %s, %s\n", 5823 nameMMXReg(gregOfRM(modrm)), dis_buf); 5824 } 5825 break; 5826 5827 case 0xFC: 5828 case 0xFD: 5829 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 5830 if (sz != 4) 5831 goto mmx_decode_failure; 5832 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padd", True ); 5833 break; 5834 5835 case 0xEC: 5836 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5837 if (sz != 4) 5838 goto mmx_decode_failure; 5839 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padds", True ); 5840 break; 5841 5842 case 0xDC: 5843 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5844 if (sz != 4) 5845 goto mmx_decode_failure; 5846 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "paddus", True ); 5847 break; 5848 5849 case 0xF8: 5850 case 0xF9: 5851 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 5852 if (sz != 4) 5853 goto mmx_decode_failure; 5854 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psub", True ); 5855 break; 5856 5857 case 0xE8: 5858 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5859 if (sz != 4) 5860 goto mmx_decode_failure; 5861 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubs", True ); 5862 break; 5863 5864 case 0xD8: 5865 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5866 if (sz != 4) 5867 goto mmx_decode_failure; 5868 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubus", True ); 5869 break; 5870 5871 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 5872 if (sz != 4) 5873 goto mmx_decode_failure; 5874 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmulhw", False ); 5875 break; 5876 5877 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 5878 if (sz != 4) 5879 goto mmx_decode_failure; 5880 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmullw", False ); 5881 break; 5882 5883 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 5884 vassert(sz == 4); 5885 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmaddwd", False ); 5886 break; 5887 5888 case 0x74: 5889 case 0x75: 5890 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 5891 if (sz != 4) 5892 goto mmx_decode_failure; 5893 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpeq", True ); 5894 break; 5895 5896 case 0x64: 5897 case 0x65: 5898 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 5899 if (sz != 4) 5900 goto mmx_decode_failure; 5901 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpgt", True ); 5902 break; 5903 5904 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 5905 if (sz != 4) 5906 goto mmx_decode_failure; 5907 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packssdw", False ); 5908 break; 5909 5910 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 5911 if (sz != 4) 5912 goto mmx_decode_failure; 5913 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packsswb", False ); 5914 break; 5915 5916 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 5917 if (sz != 4) 5918 goto mmx_decode_failure; 5919 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packuswb", False ); 5920 break; 5921 5922 case 0x68: 5923 case 0x69: 5924 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 5925 if (sz != 4) 5926 goto mmx_decode_failure; 5927 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckh", True ); 5928 break; 5929 5930 case 0x60: 5931 case 0x61: 5932 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 5933 if (sz != 4) 5934 goto mmx_decode_failure; 5935 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckl", True ); 5936 break; 5937 5938 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 5939 if (sz != 4) 5940 goto mmx_decode_failure; 5941 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pand", False ); 5942 break; 5943 5944 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 5945 if (sz != 4) 5946 goto mmx_decode_failure; 5947 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pandn", False ); 5948 break; 5949 5950 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 5951 if (sz != 4) 5952 goto mmx_decode_failure; 5953 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "por", False ); 5954 break; 5955 5956 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 5957 if (sz != 4) 5958 goto mmx_decode_failure; 5959 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pxor", False ); 5960 break; 5961 5962# define SHIFT_BY_REG(_name,_op) \ 5963 delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \ 5964 break; 5965 5966 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 5967 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4); 5968 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2); 5969 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64); 5970 5971 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 5972 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4); 5973 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2); 5974 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64); 5975 5976 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 5977 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4); 5978 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2); 5979 5980# undef SHIFT_BY_REG 5981 5982 case 0x71: 5983 case 0x72: 5984 case 0x73: { 5985 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 5986 UChar byte2, subopc; 5987 if (sz != 4) 5988 goto mmx_decode_failure; 5989 byte2 = getIByte(delta); /* amode / sub-opcode */ 5990 subopc = toUChar( (byte2 >> 3) & 7 ); 5991 5992# define SHIFT_BY_IMM(_name,_op) \ 5993 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \ 5994 } while (0) 5995 5996 if (subopc == 2 /*SRL*/ && opc == 0x71) 5997 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4); 5998 else if (subopc == 2 /*SRL*/ && opc == 0x72) 5999 SHIFT_BY_IMM("psrld", Iop_ShrN32x2); 6000 else if (subopc == 2 /*SRL*/ && opc == 0x73) 6001 SHIFT_BY_IMM("psrlq", Iop_Shr64); 6002 6003 else if (subopc == 4 /*SAR*/ && opc == 0x71) 6004 SHIFT_BY_IMM("psraw", Iop_SarN16x4); 6005 else if (subopc == 4 /*SAR*/ && opc == 0x72) 6006 SHIFT_BY_IMM("psrad", Iop_SarN32x2); 6007 6008 else if (subopc == 6 /*SHL*/ && opc == 0x71) 6009 SHIFT_BY_IMM("psllw", Iop_ShlN16x4); 6010 else if (subopc == 6 /*SHL*/ && opc == 0x72) 6011 SHIFT_BY_IMM("pslld", Iop_ShlN32x2); 6012 else if (subopc == 6 /*SHL*/ && opc == 0x73) 6013 SHIFT_BY_IMM("psllq", Iop_Shl64); 6014 6015 else goto mmx_decode_failure; 6016 6017# undef SHIFT_BY_IMM 6018 break; 6019 } 6020 6021 case 0xF7: { 6022 IRTemp addr = newTemp(Ity_I32); 6023 IRTemp regD = newTemp(Ity_I64); 6024 IRTemp regM = newTemp(Ity_I64); 6025 IRTemp mask = newTemp(Ity_I64); 6026 IRTemp olddata = newTemp(Ity_I64); 6027 IRTemp newdata = newTemp(Ity_I64); 6028 6029 modrm = getIByte(delta); 6030 if (sz != 4 || (!epartIsReg(modrm))) 6031 goto mmx_decode_failure; 6032 delta++; 6033 6034 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) )); 6035 assign( regM, getMMXReg( eregOfRM(modrm) )); 6036 assign( regD, getMMXReg( gregOfRM(modrm) )); 6037 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); 6038 assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); 6039 assign( newdata, 6040 binop(Iop_Or64, 6041 binop(Iop_And64, 6042 mkexpr(regD), 6043 mkexpr(mask) ), 6044 binop(Iop_And64, 6045 mkexpr(olddata), 6046 unop(Iop_Not64, mkexpr(mask)))) ); 6047 storeLE( mkexpr(addr), mkexpr(newdata) ); 6048 DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ), 6049 nameMMXReg( gregOfRM(modrm) ) ); 6050 break; 6051 } 6052 6053 /* --- MMX decode failure --- */ 6054 default: 6055 mmx_decode_failure: 6056 *decode_ok = False; 6057 return delta; /* ignored */ 6058 6059 } 6060 6061 *decode_ok = True; 6062 return delta; 6063} 6064 6065 6066/*------------------------------------------------------------*/ 6067/*--- More misc arithmetic and other obscure insns. ---*/ 6068/*------------------------------------------------------------*/ 6069 6070/* Double length left and right shifts. Apparently only required in 6071 v-size (no b- variant). */ 6072static 6073UInt dis_SHLRD_Gv_Ev ( UChar sorb, 6074 Int delta, UChar modrm, 6075 Int sz, 6076 IRExpr* shift_amt, 6077 Bool amt_is_literal, 6078 HChar* shift_amt_txt, 6079 Bool left_shift ) 6080{ 6081 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used 6082 for printing it. And eip on entry points at the modrm byte. */ 6083 Int len; 6084 HChar dis_buf[50]; 6085 6086 IRType ty = szToITy(sz); 6087 IRTemp gsrc = newTemp(ty); 6088 IRTemp esrc = newTemp(ty); 6089 IRTemp addr = IRTemp_INVALID; 6090 IRTemp tmpSH = newTemp(Ity_I8); 6091 IRTemp tmpL = IRTemp_INVALID; 6092 IRTemp tmpRes = IRTemp_INVALID; 6093 IRTemp tmpSubSh = IRTemp_INVALID; 6094 IROp mkpair; 6095 IROp getres; 6096 IROp shift; 6097 IRExpr* mask = NULL; 6098 6099 vassert(sz == 2 || sz == 4); 6100 6101 /* The E-part is the destination; this is shifted. The G-part 6102 supplies bits to be shifted into the E-part, but is not 6103 changed. 6104 6105 If shifting left, form a double-length word with E at the top 6106 and G at the bottom, and shift this left. The result is then in 6107 the high part. 6108 6109 If shifting right, form a double-length word with G at the top 6110 and E at the bottom, and shift this right. The result is then 6111 at the bottom. */ 6112 6113 /* Fetch the operands. */ 6114 6115 assign( gsrc, getIReg(sz, gregOfRM(modrm)) ); 6116 6117 if (epartIsReg(modrm)) { 6118 delta++; 6119 assign( esrc, getIReg(sz, eregOfRM(modrm)) ); 6120 DIP("sh%cd%c %s, %s, %s\n", 6121 ( left_shift ? 'l' : 'r' ), nameISize(sz), 6122 shift_amt_txt, 6123 nameIReg(sz, gregOfRM(modrm)), nameIReg(sz, eregOfRM(modrm))); 6124 } else { 6125 addr = disAMode ( &len, sorb, delta, dis_buf ); 6126 delta += len; 6127 assign( esrc, loadLE(ty, mkexpr(addr)) ); 6128 DIP("sh%cd%c %s, %s, %s\n", 6129 ( left_shift ? 'l' : 'r' ), nameISize(sz), 6130 shift_amt_txt, 6131 nameIReg(sz, gregOfRM(modrm)), dis_buf); 6132 } 6133 6134 /* Round up the relevant primops. */ 6135 6136 if (sz == 4) { 6137 tmpL = newTemp(Ity_I64); 6138 tmpRes = newTemp(Ity_I32); 6139 tmpSubSh = newTemp(Ity_I32); 6140 mkpair = Iop_32HLto64; 6141 getres = left_shift ? Iop_64HIto32 : Iop_64to32; 6142 shift = left_shift ? Iop_Shl64 : Iop_Shr64; 6143 mask = mkU8(31); 6144 } else { 6145 /* sz == 2 */ 6146 tmpL = newTemp(Ity_I32); 6147 tmpRes = newTemp(Ity_I16); 6148 tmpSubSh = newTemp(Ity_I16); 6149 mkpair = Iop_16HLto32; 6150 getres = left_shift ? Iop_32HIto16 : Iop_32to16; 6151 shift = left_shift ? Iop_Shl32 : Iop_Shr32; 6152 mask = mkU8(15); 6153 } 6154 6155 /* Do the shift, calculate the subshift value, and set 6156 the flag thunk. */ 6157 6158 assign( tmpSH, binop(Iop_And8, shift_amt, mask) ); 6159 6160 if (left_shift) 6161 assign( tmpL, binop(mkpair, mkexpr(esrc), mkexpr(gsrc)) ); 6162 else 6163 assign( tmpL, binop(mkpair, mkexpr(gsrc), mkexpr(esrc)) ); 6164 6165 assign( tmpRes, unop(getres, binop(shift, mkexpr(tmpL), mkexpr(tmpSH)) ) ); 6166 assign( tmpSubSh, 6167 unop(getres, 6168 binop(shift, 6169 mkexpr(tmpL), 6170 binop(Iop_And8, 6171 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ), 6172 mask))) ); 6173 6174 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl32 : Iop_Sar32, 6175 tmpRes, tmpSubSh, ty, tmpSH ); 6176 6177 /* Put result back. */ 6178 6179 if (epartIsReg(modrm)) { 6180 putIReg(sz, eregOfRM(modrm), mkexpr(tmpRes)); 6181 } else { 6182 storeLE( mkexpr(addr), mkexpr(tmpRes) ); 6183 } 6184 6185 if (amt_is_literal) delta++; 6186 return delta; 6187} 6188 6189 6190/* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not 6191 required. */ 6192 6193typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; 6194 6195static HChar* nameBtOp ( BtOp op ) 6196{ 6197 switch (op) { 6198 case BtOpNone: return ""; 6199 case BtOpSet: return "s"; 6200 case BtOpReset: return "r"; 6201 case BtOpComp: return "c"; 6202 default: vpanic("nameBtOp(x86)"); 6203 } 6204} 6205 6206 6207static 6208UInt dis_bt_G_E ( VexAbiInfo* vbi, 6209 UChar sorb, Bool locked, Int sz, Int delta, BtOp op ) 6210{ 6211 HChar dis_buf[50]; 6212 UChar modrm; 6213 Int len; 6214 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0, 6215 t_addr1, t_esp, t_mask, t_new; 6216 6217 vassert(sz == 2 || sz == 4); 6218 6219 t_fetched = t_bitno0 = t_bitno1 = t_bitno2 6220 = t_addr0 = t_addr1 = t_esp 6221 = t_mask = t_new = IRTemp_INVALID; 6222 6223 t_fetched = newTemp(Ity_I8); 6224 t_new = newTemp(Ity_I8); 6225 t_bitno0 = newTemp(Ity_I32); 6226 t_bitno1 = newTemp(Ity_I32); 6227 t_bitno2 = newTemp(Ity_I8); 6228 t_addr1 = newTemp(Ity_I32); 6229 modrm = getIByte(delta); 6230 6231 assign( t_bitno0, widenSto32(getIReg(sz, gregOfRM(modrm))) ); 6232 6233 if (epartIsReg(modrm)) { 6234 delta++; 6235 /* Get it onto the client's stack. */ 6236 t_esp = newTemp(Ity_I32); 6237 t_addr0 = newTemp(Ity_I32); 6238 6239 /* For the choice of the value 128, see comment in dis_bt_G_E in 6240 guest_amd64_toIR.c. We point out here only that 128 is 6241 fast-cased in Memcheck and is > 0, so seems like a good 6242 choice. */ 6243 vassert(vbi->guest_stack_redzone_size == 0); 6244 assign( t_esp, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(128)) ); 6245 putIReg(4, R_ESP, mkexpr(t_esp)); 6246 6247 storeLE( mkexpr(t_esp), getIReg(sz, eregOfRM(modrm)) ); 6248 6249 /* Make t_addr0 point at it. */ 6250 assign( t_addr0, mkexpr(t_esp) ); 6251 6252 /* Mask out upper bits of the shift amount, since we're doing a 6253 reg. */ 6254 assign( t_bitno1, binop(Iop_And32, 6255 mkexpr(t_bitno0), 6256 mkU32(sz == 4 ? 31 : 15)) ); 6257 6258 } else { 6259 t_addr0 = disAMode ( &len, sorb, delta, dis_buf ); 6260 delta += len; 6261 assign( t_bitno1, mkexpr(t_bitno0) ); 6262 } 6263 6264 /* At this point: t_addr0 is the address being operated on. If it 6265 was a reg, we will have pushed it onto the client's stack. 6266 t_bitno1 is the bit number, suitably masked in the case of a 6267 reg. */ 6268 6269 /* Now the main sequence. */ 6270 assign( t_addr1, 6271 binop(Iop_Add32, 6272 mkexpr(t_addr0), 6273 binop(Iop_Sar32, mkexpr(t_bitno1), mkU8(3))) ); 6274 6275 /* t_addr1 now holds effective address */ 6276 6277 assign( t_bitno2, 6278 unop(Iop_32to8, 6279 binop(Iop_And32, mkexpr(t_bitno1), mkU32(7))) ); 6280 6281 /* t_bitno2 contains offset of bit within byte */ 6282 6283 if (op != BtOpNone) { 6284 t_mask = newTemp(Ity_I8); 6285 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) ); 6286 } 6287 6288 /* t_mask is now a suitable byte mask */ 6289 6290 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) ); 6291 6292 if (op != BtOpNone) { 6293 switch (op) { 6294 case BtOpSet: 6295 assign( t_new, 6296 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) ); 6297 break; 6298 case BtOpComp: 6299 assign( t_new, 6300 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) ); 6301 break; 6302 case BtOpReset: 6303 assign( t_new, 6304 binop(Iop_And8, mkexpr(t_fetched), 6305 unop(Iop_Not8, mkexpr(t_mask))) ); 6306 break; 6307 default: 6308 vpanic("dis_bt_G_E(x86)"); 6309 } 6310 if (locked && !epartIsReg(modrm)) { 6311 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/, 6312 mkexpr(t_new)/*new*/, 6313 guest_EIP_curr_instr ); 6314 } else { 6315 storeLE( mkexpr(t_addr1), mkexpr(t_new) ); 6316 } 6317 } 6318 6319 /* Side effect done; now get selected bit into Carry flag */ 6320 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 6321 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 6322 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 6323 stmt( IRStmt_Put( 6324 OFFB_CC_DEP1, 6325 binop(Iop_And32, 6326 binop(Iop_Shr32, 6327 unop(Iop_8Uto32, mkexpr(t_fetched)), 6328 mkexpr(t_bitno2)), 6329 mkU32(1))) 6330 ); 6331 /* Set NDEP even though it isn't used. This makes redundant-PUT 6332 elimination of previous stores to this field work better. */ 6333 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6334 6335 /* Move reg operand from stack back to reg */ 6336 if (epartIsReg(modrm)) { 6337 /* t_esp still points at it. */ 6338 putIReg(sz, eregOfRM(modrm), loadLE(szToITy(sz), mkexpr(t_esp)) ); 6339 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t_esp), mkU32(128)) ); 6340 } 6341 6342 DIP("bt%s%c %s, %s\n", 6343 nameBtOp(op), nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 6344 ( epartIsReg(modrm) ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ) ); 6345 6346 return delta; 6347} 6348 6349 6350 6351/* Handle BSF/BSR. Only v-size seems necessary. */ 6352static 6353UInt dis_bs_E_G ( UChar sorb, Int sz, Int delta, Bool fwds ) 6354{ 6355 Bool isReg; 6356 UChar modrm; 6357 HChar dis_buf[50]; 6358 6359 IRType ty = szToITy(sz); 6360 IRTemp src = newTemp(ty); 6361 IRTemp dst = newTemp(ty); 6362 6363 IRTemp src32 = newTemp(Ity_I32); 6364 IRTemp dst32 = newTemp(Ity_I32); 6365 IRTemp src8 = newTemp(Ity_I8); 6366 6367 vassert(sz == 4 || sz == 2); 6368 6369 modrm = getIByte(delta); 6370 6371 isReg = epartIsReg(modrm); 6372 if (isReg) { 6373 delta++; 6374 assign( src, getIReg(sz, eregOfRM(modrm)) ); 6375 } else { 6376 Int len; 6377 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 6378 delta += len; 6379 assign( src, loadLE(ty, mkexpr(addr)) ); 6380 } 6381 6382 DIP("bs%c%c %s, %s\n", 6383 fwds ? 'f' : 'r', nameISize(sz), 6384 ( isReg ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ), 6385 nameIReg(sz, gregOfRM(modrm))); 6386 6387 /* Generate an 8-bit expression which is zero iff the 6388 original is zero, and nonzero otherwise */ 6389 assign( src8, 6390 unop(Iop_1Uto8, binop(mkSizedOp(ty,Iop_CmpNE8), 6391 mkexpr(src), mkU(ty,0))) ); 6392 6393 /* Flags: Z is 1 iff source value is zero. All others 6394 are undefined -- we force them to zero. */ 6395 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 6396 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 6397 stmt( IRStmt_Put( 6398 OFFB_CC_DEP1, 6399 IRExpr_Mux0X( mkexpr(src8), 6400 /* src==0 */ 6401 mkU32(X86G_CC_MASK_Z), 6402 /* src!=0 */ 6403 mkU32(0) 6404 ) 6405 )); 6406 /* Set NDEP even though it isn't used. This makes redundant-PUT 6407 elimination of previous stores to this field work better. */ 6408 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6409 6410 /* Result: iff source value is zero, we can't use 6411 Iop_Clz32/Iop_Ctz32 as they have no defined result in that case. 6412 But anyway, Intel x86 semantics say the result is undefined in 6413 such situations. Hence handle the zero case specially. */ 6414 6415 /* Bleh. What we compute: 6416 6417 bsf32: if src == 0 then 0 else Ctz32(src) 6418 bsr32: if src == 0 then 0 else 31 - Clz32(src) 6419 6420 bsf16: if src == 0 then 0 else Ctz32(16Uto32(src)) 6421 bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src)) 6422 6423 First, widen src to 32 bits if it is not already. 6424 6425 Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the 6426 dst register unchanged when src == 0. Hence change accordingly. 6427 */ 6428 if (sz == 2) 6429 assign( src32, unop(Iop_16Uto32, mkexpr(src)) ); 6430 else 6431 assign( src32, mkexpr(src) ); 6432 6433 /* The main computation, guarding against zero. */ 6434 assign( dst32, 6435 IRExpr_Mux0X( 6436 mkexpr(src8), 6437 /* src == 0 -- leave dst unchanged */ 6438 widenUto32( getIReg( sz, gregOfRM(modrm) ) ), 6439 /* src != 0 */ 6440 fwds ? unop(Iop_Ctz32, mkexpr(src32)) 6441 : binop(Iop_Sub32, 6442 mkU32(31), 6443 unop(Iop_Clz32, mkexpr(src32))) 6444 ) 6445 ); 6446 6447 if (sz == 2) 6448 assign( dst, unop(Iop_32to16, mkexpr(dst32)) ); 6449 else 6450 assign( dst, mkexpr(dst32) ); 6451 6452 /* dump result back */ 6453 putIReg( sz, gregOfRM(modrm), mkexpr(dst) ); 6454 6455 return delta; 6456} 6457 6458 6459static 6460void codegen_xchg_eAX_Reg ( Int sz, Int reg ) 6461{ 6462 IRType ty = szToITy(sz); 6463 IRTemp t1 = newTemp(ty); 6464 IRTemp t2 = newTemp(ty); 6465 vassert(sz == 2 || sz == 4); 6466 assign( t1, getIReg(sz, R_EAX) ); 6467 assign( t2, getIReg(sz, reg) ); 6468 putIReg( sz, R_EAX, mkexpr(t2) ); 6469 putIReg( sz, reg, mkexpr(t1) ); 6470 DIP("xchg%c %s, %s\n", 6471 nameISize(sz), nameIReg(sz, R_EAX), nameIReg(sz, reg)); 6472} 6473 6474 6475static 6476void codegen_SAHF ( void ) 6477{ 6478 /* Set the flags to: 6479 (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag 6480 | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A 6481 |X86G_CC_MASK_P|X86G_CC_MASK_C) 6482 */ 6483 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A 6484 |X86G_CC_MASK_C|X86G_CC_MASK_P; 6485 IRTemp oldflags = newTemp(Ity_I32); 6486 assign( oldflags, mk_x86g_calculate_eflags_all() ); 6487 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 6488 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6489 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 6490 stmt( IRStmt_Put( OFFB_CC_DEP1, 6491 binop(Iop_Or32, 6492 binop(Iop_And32, mkexpr(oldflags), mkU32(X86G_CC_MASK_O)), 6493 binop(Iop_And32, 6494 binop(Iop_Shr32, getIReg(4, R_EAX), mkU8(8)), 6495 mkU32(mask_SZACP)) 6496 ) 6497 )); 6498 /* Set NDEP even though it isn't used. This makes redundant-PUT 6499 elimination of previous stores to this field work better. */ 6500 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6501} 6502 6503 6504static 6505void codegen_LAHF ( void ) 6506{ 6507 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */ 6508 IRExpr* eax_with_hole; 6509 IRExpr* new_byte; 6510 IRExpr* new_eax; 6511 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A 6512 |X86G_CC_MASK_C|X86G_CC_MASK_P; 6513 6514 IRTemp flags = newTemp(Ity_I32); 6515 assign( flags, mk_x86g_calculate_eflags_all() ); 6516 6517 eax_with_hole 6518 = binop(Iop_And32, getIReg(4, R_EAX), mkU32(0xFFFF00FF)); 6519 new_byte 6520 = binop(Iop_Or32, binop(Iop_And32, mkexpr(flags), mkU32(mask_SZACP)), 6521 mkU32(1<<1)); 6522 new_eax 6523 = binop(Iop_Or32, eax_with_hole, 6524 binop(Iop_Shl32, new_byte, mkU8(8))); 6525 putIReg(4, R_EAX, new_eax); 6526} 6527 6528 6529static 6530UInt dis_cmpxchg_G_E ( UChar sorb, 6531 Bool locked, 6532 Int size, 6533 Int delta0 ) 6534{ 6535 HChar dis_buf[50]; 6536 Int len; 6537 6538 IRType ty = szToITy(size); 6539 IRTemp acc = newTemp(ty); 6540 IRTemp src = newTemp(ty); 6541 IRTemp dest = newTemp(ty); 6542 IRTemp dest2 = newTemp(ty); 6543 IRTemp acc2 = newTemp(ty); 6544 IRTemp cond8 = newTemp(Ity_I8); 6545 IRTemp addr = IRTemp_INVALID; 6546 UChar rm = getUChar(delta0); 6547 6548 /* There are 3 cases to consider: 6549 6550 reg-reg: ignore any lock prefix, generate sequence based 6551 on Mux0X 6552 6553 reg-mem, not locked: ignore any lock prefix, generate sequence 6554 based on Mux0X 6555 6556 reg-mem, locked: use IRCAS 6557 */ 6558 if (epartIsReg(rm)) { 6559 /* case 1 */ 6560 assign( dest, getIReg(size, eregOfRM(rm)) ); 6561 delta0++; 6562 assign( src, getIReg(size, gregOfRM(rm)) ); 6563 assign( acc, getIReg(size, R_EAX) ); 6564 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 6565 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) ); 6566 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); 6567 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 6568 putIReg(size, R_EAX, mkexpr(acc2)); 6569 putIReg(size, eregOfRM(rm), mkexpr(dest2)); 6570 DIP("cmpxchg%c %s,%s\n", nameISize(size), 6571 nameIReg(size,gregOfRM(rm)), 6572 nameIReg(size,eregOfRM(rm)) ); 6573 } 6574 else if (!epartIsReg(rm) && !locked) { 6575 /* case 2 */ 6576 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6577 assign( dest, loadLE(ty, mkexpr(addr)) ); 6578 delta0 += len; 6579 assign( src, getIReg(size, gregOfRM(rm)) ); 6580 assign( acc, getIReg(size, R_EAX) ); 6581 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 6582 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) ); 6583 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); 6584 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 6585 putIReg(size, R_EAX, mkexpr(acc2)); 6586 storeLE( mkexpr(addr), mkexpr(dest2) ); 6587 DIP("cmpxchg%c %s,%s\n", nameISize(size), 6588 nameIReg(size,gregOfRM(rm)), dis_buf); 6589 } 6590 else if (!epartIsReg(rm) && locked) { 6591 /* case 3 */ 6592 /* src is new value. acc is expected value. dest is old value. 6593 Compute success from the output of the IRCAS, and steer the 6594 new value for EAX accordingly: in case of success, EAX is 6595 unchanged. */ 6596 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6597 delta0 += len; 6598 assign( src, getIReg(size, gregOfRM(rm)) ); 6599 assign( acc, getIReg(size, R_EAX) ); 6600 stmt( IRStmt_CAS( 6601 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr), 6602 NULL, mkexpr(acc), NULL, mkexpr(src) ) 6603 )); 6604 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 6605 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) ); 6606 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 6607 putIReg(size, R_EAX, mkexpr(acc2)); 6608 DIP("cmpxchg%c %s,%s\n", nameISize(size), 6609 nameIReg(size,gregOfRM(rm)), dis_buf); 6610 } 6611 else vassert(0); 6612 6613 return delta0; 6614} 6615 6616 6617/* Handle conditional move instructions of the form 6618 cmovcc E(reg-or-mem), G(reg) 6619 6620 E(src) is reg-or-mem 6621 G(dst) is reg. 6622 6623 If E is reg, --> GET %E, tmps 6624 GET %G, tmpd 6625 CMOVcc tmps, tmpd 6626 PUT tmpd, %G 6627 6628 If E is mem --> (getAddr E) -> tmpa 6629 LD (tmpa), tmps 6630 GET %G, tmpd 6631 CMOVcc tmps, tmpd 6632 PUT tmpd, %G 6633*/ 6634static 6635UInt dis_cmov_E_G ( UChar sorb, 6636 Int sz, 6637 X86Condcode cond, 6638 Int delta0 ) 6639{ 6640 UChar rm = getIByte(delta0); 6641 HChar dis_buf[50]; 6642 Int len; 6643 6644 IRType ty = szToITy(sz); 6645 IRTemp tmps = newTemp(ty); 6646 IRTemp tmpd = newTemp(ty); 6647 6648 if (epartIsReg(rm)) { 6649 assign( tmps, getIReg(sz, eregOfRM(rm)) ); 6650 assign( tmpd, getIReg(sz, gregOfRM(rm)) ); 6651 6652 putIReg(sz, gregOfRM(rm), 6653 IRExpr_Mux0X( unop(Iop_1Uto8, 6654 mk_x86g_calculate_condition(cond)), 6655 mkexpr(tmpd), 6656 mkexpr(tmps) ) 6657 ); 6658 DIP("cmov%c%s %s,%s\n", nameISize(sz), 6659 name_X86Condcode(cond), 6660 nameIReg(sz,eregOfRM(rm)), 6661 nameIReg(sz,gregOfRM(rm))); 6662 return 1+delta0; 6663 } 6664 6665 /* E refers to memory */ 6666 { 6667 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 6668 assign( tmps, loadLE(ty, mkexpr(addr)) ); 6669 assign( tmpd, getIReg(sz, gregOfRM(rm)) ); 6670 6671 putIReg(sz, gregOfRM(rm), 6672 IRExpr_Mux0X( unop(Iop_1Uto8, 6673 mk_x86g_calculate_condition(cond)), 6674 mkexpr(tmpd), 6675 mkexpr(tmps) ) 6676 ); 6677 6678 DIP("cmov%c%s %s,%s\n", nameISize(sz), 6679 name_X86Condcode(cond), 6680 dis_buf, 6681 nameIReg(sz,gregOfRM(rm))); 6682 return len+delta0; 6683 } 6684} 6685 6686 6687static 6688UInt dis_xadd_G_E ( UChar sorb, Bool locked, Int sz, Int delta0, 6689 Bool* decodeOK ) 6690{ 6691 Int len; 6692 UChar rm = getIByte(delta0); 6693 HChar dis_buf[50]; 6694 6695 IRType ty = szToITy(sz); 6696 IRTemp tmpd = newTemp(ty); 6697 IRTemp tmpt0 = newTemp(ty); 6698 IRTemp tmpt1 = newTemp(ty); 6699 6700 /* There are 3 cases to consider: 6701 6702 reg-reg: ignore any lock prefix, 6703 generate 'naive' (non-atomic) sequence 6704 6705 reg-mem, not locked: ignore any lock prefix, generate 'naive' 6706 (non-atomic) sequence 6707 6708 reg-mem, locked: use IRCAS 6709 */ 6710 6711 if (epartIsReg(rm)) { 6712 /* case 1 */ 6713 assign( tmpd, getIReg(sz, eregOfRM(rm))); 6714 assign( tmpt0, getIReg(sz, gregOfRM(rm)) ); 6715 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 6716 mkexpr(tmpd), mkexpr(tmpt0)) ); 6717 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 6718 putIReg(sz, eregOfRM(rm), mkexpr(tmpt1)); 6719 putIReg(sz, gregOfRM(rm), mkexpr(tmpd)); 6720 DIP("xadd%c %s, %s\n", 6721 nameISize(sz), nameIReg(sz,gregOfRM(rm)), 6722 nameIReg(sz,eregOfRM(rm))); 6723 *decodeOK = True; 6724 return 1+delta0; 6725 } 6726 else if (!epartIsReg(rm) && !locked) { 6727 /* case 2 */ 6728 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 6729 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 6730 assign( tmpt0, getIReg(sz, gregOfRM(rm)) ); 6731 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 6732 mkexpr(tmpd), mkexpr(tmpt0)) ); 6733 storeLE( mkexpr(addr), mkexpr(tmpt1) ); 6734 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 6735 putIReg(sz, gregOfRM(rm), mkexpr(tmpd)); 6736 DIP("xadd%c %s, %s\n", 6737 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf); 6738 *decodeOK = True; 6739 return len+delta0; 6740 } 6741 else if (!epartIsReg(rm) && locked) { 6742 /* case 3 */ 6743 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 6744 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 6745 assign( tmpt0, getIReg(sz, gregOfRM(rm)) ); 6746 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 6747 mkexpr(tmpd), mkexpr(tmpt0)) ); 6748 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/, 6749 mkexpr(tmpt1)/*newVal*/, guest_EIP_curr_instr ); 6750 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 6751 putIReg(sz, gregOfRM(rm), mkexpr(tmpd)); 6752 DIP("xadd%c %s, %s\n", 6753 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf); 6754 *decodeOK = True; 6755 return len+delta0; 6756 } 6757 /*UNREACHED*/ 6758 vassert(0); 6759} 6760 6761/* Move 16 bits from Ew (ireg or mem) to G (a segment register). */ 6762 6763static 6764UInt dis_mov_Ew_Sw ( UChar sorb, Int delta0 ) 6765{ 6766 Int len; 6767 IRTemp addr; 6768 UChar rm = getIByte(delta0); 6769 HChar dis_buf[50]; 6770 6771 if (epartIsReg(rm)) { 6772 putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) ); 6773 DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm))); 6774 return 1+delta0; 6775 } else { 6776 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6777 putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) ); 6778 DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm))); 6779 return len+delta0; 6780 } 6781} 6782 6783/* Move 16 bits from G (a segment register) to Ew (ireg or mem). If 6784 dst is ireg and sz==4, zero out top half of it. */ 6785 6786static 6787UInt dis_mov_Sw_Ew ( UChar sorb, 6788 Int sz, 6789 Int delta0 ) 6790{ 6791 Int len; 6792 IRTemp addr; 6793 UChar rm = getIByte(delta0); 6794 HChar dis_buf[50]; 6795 6796 vassert(sz == 2 || sz == 4); 6797 6798 if (epartIsReg(rm)) { 6799 if (sz == 4) 6800 putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm)))); 6801 else 6802 putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm))); 6803 6804 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm))); 6805 return 1+delta0; 6806 } else { 6807 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6808 storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) ); 6809 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf); 6810 return len+delta0; 6811 } 6812} 6813 6814 6815static 6816void dis_push_segreg ( UInt sreg, Int sz ) 6817{ 6818 IRTemp t1 = newTemp(Ity_I16); 6819 IRTemp ta = newTemp(Ity_I32); 6820 vassert(sz == 2 || sz == 4); 6821 6822 assign( t1, getSReg(sreg) ); 6823 assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); 6824 putIReg(4, R_ESP, mkexpr(ta)); 6825 storeLE( mkexpr(ta), mkexpr(t1) ); 6826 6827 DIP("push%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg)); 6828} 6829 6830static 6831void dis_pop_segreg ( UInt sreg, Int sz ) 6832{ 6833 IRTemp t1 = newTemp(Ity_I16); 6834 IRTemp ta = newTemp(Ity_I32); 6835 vassert(sz == 2 || sz == 4); 6836 6837 assign( ta, getIReg(4, R_ESP) ); 6838 assign( t1, loadLE(Ity_I16, mkexpr(ta)) ); 6839 6840 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) ); 6841 putSReg( sreg, mkexpr(t1) ); 6842 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg)); 6843} 6844 6845static 6846void dis_ret ( /*MOD*/DisResult* dres, UInt d32 ) 6847{ 6848 IRTemp t1 = newTemp(Ity_I32); 6849 IRTemp t2 = newTemp(Ity_I32); 6850 assign(t1, getIReg(4,R_ESP)); 6851 assign(t2, loadLE(Ity_I32,mkexpr(t1))); 6852 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32))); 6853 jmp_treg(dres, Ijk_Ret, t2); 6854 vassert(dres->whatNext == Dis_StopHere); 6855} 6856 6857/*------------------------------------------------------------*/ 6858/*--- SSE/SSE2/SSE3 helpers ---*/ 6859/*------------------------------------------------------------*/ 6860 6861/* Worker function; do not call directly. 6862 Handles full width G = G `op` E and G = (not G) `op` E. 6863*/ 6864 6865static UInt dis_SSE_E_to_G_all_wrk ( 6866 UChar sorb, Int delta, 6867 HChar* opname, IROp op, 6868 Bool invertG 6869 ) 6870{ 6871 HChar dis_buf[50]; 6872 Int alen; 6873 IRTemp addr; 6874 UChar rm = getIByte(delta); 6875 IRExpr* gpart 6876 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRM(rm))) 6877 : getXMMReg(gregOfRM(rm)); 6878 if (epartIsReg(rm)) { 6879 putXMMReg( gregOfRM(rm), 6880 binop(op, gpart, 6881 getXMMReg(eregOfRM(rm))) ); 6882 DIP("%s %s,%s\n", opname, 6883 nameXMMReg(eregOfRM(rm)), 6884 nameXMMReg(gregOfRM(rm)) ); 6885 return delta+1; 6886 } else { 6887 addr = disAMode ( &alen, sorb, delta, dis_buf ); 6888 putXMMReg( gregOfRM(rm), 6889 binop(op, gpart, 6890 loadLE(Ity_V128, mkexpr(addr))) ); 6891 DIP("%s %s,%s\n", opname, 6892 dis_buf, 6893 nameXMMReg(gregOfRM(rm)) ); 6894 return delta+alen; 6895 } 6896} 6897 6898 6899/* All lanes SSE binary operation, G = G `op` E. */ 6900 6901static 6902UInt dis_SSE_E_to_G_all ( UChar sorb, Int delta, HChar* opname, IROp op ) 6903{ 6904 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, False ); 6905} 6906 6907/* All lanes SSE binary operation, G = (not G) `op` E. */ 6908 6909static 6910UInt dis_SSE_E_to_G_all_invG ( UChar sorb, Int delta, 6911 HChar* opname, IROp op ) 6912{ 6913 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, True ); 6914} 6915 6916 6917/* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */ 6918 6919static UInt dis_SSE_E_to_G_lo32 ( UChar sorb, Int delta, 6920 HChar* opname, IROp op ) 6921{ 6922 HChar dis_buf[50]; 6923 Int alen; 6924 IRTemp addr; 6925 UChar rm = getIByte(delta); 6926 IRExpr* gpart = getXMMReg(gregOfRM(rm)); 6927 if (epartIsReg(rm)) { 6928 putXMMReg( gregOfRM(rm), 6929 binop(op, gpart, 6930 getXMMReg(eregOfRM(rm))) ); 6931 DIP("%s %s,%s\n", opname, 6932 nameXMMReg(eregOfRM(rm)), 6933 nameXMMReg(gregOfRM(rm)) ); 6934 return delta+1; 6935 } else { 6936 /* We can only do a 32-bit memory read, so the upper 3/4 of the 6937 E operand needs to be made simply of zeroes. */ 6938 IRTemp epart = newTemp(Ity_V128); 6939 addr = disAMode ( &alen, sorb, delta, dis_buf ); 6940 assign( epart, unop( Iop_32UtoV128, 6941 loadLE(Ity_I32, mkexpr(addr))) ); 6942 putXMMReg( gregOfRM(rm), 6943 binop(op, gpart, mkexpr(epart)) ); 6944 DIP("%s %s,%s\n", opname, 6945 dis_buf, 6946 nameXMMReg(gregOfRM(rm)) ); 6947 return delta+alen; 6948 } 6949} 6950 6951 6952/* Lower 64-bit lane only SSE binary operation, G = G `op` E. */ 6953 6954static UInt dis_SSE_E_to_G_lo64 ( UChar sorb, Int delta, 6955 HChar* opname, IROp op ) 6956{ 6957 HChar dis_buf[50]; 6958 Int alen; 6959 IRTemp addr; 6960 UChar rm = getIByte(delta); 6961 IRExpr* gpart = getXMMReg(gregOfRM(rm)); 6962 if (epartIsReg(rm)) { 6963 putXMMReg( gregOfRM(rm), 6964 binop(op, gpart, 6965 getXMMReg(eregOfRM(rm))) ); 6966 DIP("%s %s,%s\n", opname, 6967 nameXMMReg(eregOfRM(rm)), 6968 nameXMMReg(gregOfRM(rm)) ); 6969 return delta+1; 6970 } else { 6971 /* We can only do a 64-bit memory read, so the upper half of the 6972 E operand needs to be made simply of zeroes. */ 6973 IRTemp epart = newTemp(Ity_V128); 6974 addr = disAMode ( &alen, sorb, delta, dis_buf ); 6975 assign( epart, unop( Iop_64UtoV128, 6976 loadLE(Ity_I64, mkexpr(addr))) ); 6977 putXMMReg( gregOfRM(rm), 6978 binop(op, gpart, mkexpr(epart)) ); 6979 DIP("%s %s,%s\n", opname, 6980 dis_buf, 6981 nameXMMReg(gregOfRM(rm)) ); 6982 return delta+alen; 6983 } 6984} 6985 6986 6987/* All lanes unary SSE operation, G = op(E). */ 6988 6989static UInt dis_SSE_E_to_G_unary_all ( 6990 UChar sorb, Int delta, 6991 HChar* opname, IROp op 6992 ) 6993{ 6994 HChar dis_buf[50]; 6995 Int alen; 6996 IRTemp addr; 6997 UChar rm = getIByte(delta); 6998 if (epartIsReg(rm)) { 6999 putXMMReg( gregOfRM(rm), 7000 unop(op, getXMMReg(eregOfRM(rm))) ); 7001 DIP("%s %s,%s\n", opname, 7002 nameXMMReg(eregOfRM(rm)), 7003 nameXMMReg(gregOfRM(rm)) ); 7004 return delta+1; 7005 } else { 7006 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7007 putXMMReg( gregOfRM(rm), 7008 unop(op, loadLE(Ity_V128, mkexpr(addr))) ); 7009 DIP("%s %s,%s\n", opname, 7010 dis_buf, 7011 nameXMMReg(gregOfRM(rm)) ); 7012 return delta+alen; 7013 } 7014} 7015 7016 7017/* Lowest 32-bit lane only unary SSE operation, G = op(E). */ 7018 7019static UInt dis_SSE_E_to_G_unary_lo32 ( 7020 UChar sorb, Int delta, 7021 HChar* opname, IROp op 7022 ) 7023{ 7024 /* First we need to get the old G value and patch the low 32 bits 7025 of the E operand into it. Then apply op and write back to G. */ 7026 HChar dis_buf[50]; 7027 Int alen; 7028 IRTemp addr; 7029 UChar rm = getIByte(delta); 7030 IRTemp oldG0 = newTemp(Ity_V128); 7031 IRTemp oldG1 = newTemp(Ity_V128); 7032 7033 assign( oldG0, getXMMReg(gregOfRM(rm)) ); 7034 7035 if (epartIsReg(rm)) { 7036 assign( oldG1, 7037 binop( Iop_SetV128lo32, 7038 mkexpr(oldG0), 7039 getXMMRegLane32(eregOfRM(rm), 0)) ); 7040 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7041 DIP("%s %s,%s\n", opname, 7042 nameXMMReg(eregOfRM(rm)), 7043 nameXMMReg(gregOfRM(rm)) ); 7044 return delta+1; 7045 } else { 7046 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7047 assign( oldG1, 7048 binop( Iop_SetV128lo32, 7049 mkexpr(oldG0), 7050 loadLE(Ity_I32, mkexpr(addr)) )); 7051 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7052 DIP("%s %s,%s\n", opname, 7053 dis_buf, 7054 nameXMMReg(gregOfRM(rm)) ); 7055 return delta+alen; 7056 } 7057} 7058 7059 7060/* Lowest 64-bit lane only unary SSE operation, G = op(E). */ 7061 7062static UInt dis_SSE_E_to_G_unary_lo64 ( 7063 UChar sorb, Int delta, 7064 HChar* opname, IROp op 7065 ) 7066{ 7067 /* First we need to get the old G value and patch the low 64 bits 7068 of the E operand into it. Then apply op and write back to G. */ 7069 HChar dis_buf[50]; 7070 Int alen; 7071 IRTemp addr; 7072 UChar rm = getIByte(delta); 7073 IRTemp oldG0 = newTemp(Ity_V128); 7074 IRTemp oldG1 = newTemp(Ity_V128); 7075 7076 assign( oldG0, getXMMReg(gregOfRM(rm)) ); 7077 7078 if (epartIsReg(rm)) { 7079 assign( oldG1, 7080 binop( Iop_SetV128lo64, 7081 mkexpr(oldG0), 7082 getXMMRegLane64(eregOfRM(rm), 0)) ); 7083 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7084 DIP("%s %s,%s\n", opname, 7085 nameXMMReg(eregOfRM(rm)), 7086 nameXMMReg(gregOfRM(rm)) ); 7087 return delta+1; 7088 } else { 7089 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7090 assign( oldG1, 7091 binop( Iop_SetV128lo64, 7092 mkexpr(oldG0), 7093 loadLE(Ity_I64, mkexpr(addr)) )); 7094 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7095 DIP("%s %s,%s\n", opname, 7096 dis_buf, 7097 nameXMMReg(gregOfRM(rm)) ); 7098 return delta+alen; 7099 } 7100} 7101 7102 7103/* SSE integer binary operation: 7104 G = G `op` E (eLeft == False) 7105 G = E `op` G (eLeft == True) 7106*/ 7107static UInt dis_SSEint_E_to_G( 7108 UChar sorb, Int delta, 7109 HChar* opname, IROp op, 7110 Bool eLeft 7111 ) 7112{ 7113 HChar dis_buf[50]; 7114 Int alen; 7115 IRTemp addr; 7116 UChar rm = getIByte(delta); 7117 IRExpr* gpart = getXMMReg(gregOfRM(rm)); 7118 IRExpr* epart = NULL; 7119 if (epartIsReg(rm)) { 7120 epart = getXMMReg(eregOfRM(rm)); 7121 DIP("%s %s,%s\n", opname, 7122 nameXMMReg(eregOfRM(rm)), 7123 nameXMMReg(gregOfRM(rm)) ); 7124 delta += 1; 7125 } else { 7126 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7127 epart = loadLE(Ity_V128, mkexpr(addr)); 7128 DIP("%s %s,%s\n", opname, 7129 dis_buf, 7130 nameXMMReg(gregOfRM(rm)) ); 7131 delta += alen; 7132 } 7133 putXMMReg( gregOfRM(rm), 7134 eLeft ? binop(op, epart, gpart) 7135 : binop(op, gpart, epart) ); 7136 return delta; 7137} 7138 7139 7140/* Helper for doing SSE FP comparisons. */ 7141 7142static void findSSECmpOp ( Bool* needNot, IROp* op, 7143 Int imm8, Bool all_lanes, Int sz ) 7144{ 7145 imm8 &= 7; 7146 *needNot = False; 7147 *op = Iop_INVALID; 7148 if (imm8 >= 4) { 7149 *needNot = True; 7150 imm8 -= 4; 7151 } 7152 7153 if (sz == 4 && all_lanes) { 7154 switch (imm8) { 7155 case 0: *op = Iop_CmpEQ32Fx4; return; 7156 case 1: *op = Iop_CmpLT32Fx4; return; 7157 case 2: *op = Iop_CmpLE32Fx4; return; 7158 case 3: *op = Iop_CmpUN32Fx4; return; 7159 default: break; 7160 } 7161 } 7162 if (sz == 4 && !all_lanes) { 7163 switch (imm8) { 7164 case 0: *op = Iop_CmpEQ32F0x4; return; 7165 case 1: *op = Iop_CmpLT32F0x4; return; 7166 case 2: *op = Iop_CmpLE32F0x4; return; 7167 case 3: *op = Iop_CmpUN32F0x4; return; 7168 default: break; 7169 } 7170 } 7171 if (sz == 8 && all_lanes) { 7172 switch (imm8) { 7173 case 0: *op = Iop_CmpEQ64Fx2; return; 7174 case 1: *op = Iop_CmpLT64Fx2; return; 7175 case 2: *op = Iop_CmpLE64Fx2; return; 7176 case 3: *op = Iop_CmpUN64Fx2; return; 7177 default: break; 7178 } 7179 } 7180 if (sz == 8 && !all_lanes) { 7181 switch (imm8) { 7182 case 0: *op = Iop_CmpEQ64F0x2; return; 7183 case 1: *op = Iop_CmpLT64F0x2; return; 7184 case 2: *op = Iop_CmpLE64F0x2; return; 7185 case 3: *op = Iop_CmpUN64F0x2; return; 7186 default: break; 7187 } 7188 } 7189 vpanic("findSSECmpOp(x86,guest)"); 7190} 7191 7192/* Handles SSE 32F/64F comparisons. */ 7193 7194static UInt dis_SSEcmp_E_to_G ( UChar sorb, Int delta, 7195 HChar* opname, Bool all_lanes, Int sz ) 7196{ 7197 HChar dis_buf[50]; 7198 Int alen, imm8; 7199 IRTemp addr; 7200 Bool needNot = False; 7201 IROp op = Iop_INVALID; 7202 IRTemp plain = newTemp(Ity_V128); 7203 UChar rm = getIByte(delta); 7204 UShort mask = 0; 7205 vassert(sz == 4 || sz == 8); 7206 if (epartIsReg(rm)) { 7207 imm8 = getIByte(delta+1); 7208 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); 7209 assign( plain, binop(op, getXMMReg(gregOfRM(rm)), 7210 getXMMReg(eregOfRM(rm))) ); 7211 delta += 2; 7212 DIP("%s $%d,%s,%s\n", opname, 7213 (Int)imm8, 7214 nameXMMReg(eregOfRM(rm)), 7215 nameXMMReg(gregOfRM(rm)) ); 7216 } else { 7217 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7218 imm8 = getIByte(delta+alen); 7219 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); 7220 assign( plain, 7221 binop( 7222 op, 7223 getXMMReg(gregOfRM(rm)), 7224 all_lanes ? loadLE(Ity_V128, mkexpr(addr)) 7225 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 7226 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))) 7227 ) 7228 ); 7229 delta += alen+1; 7230 DIP("%s $%d,%s,%s\n", opname, 7231 (Int)imm8, 7232 dis_buf, 7233 nameXMMReg(gregOfRM(rm)) ); 7234 } 7235 7236 if (needNot && all_lanes) { 7237 putXMMReg( gregOfRM(rm), 7238 unop(Iop_NotV128, mkexpr(plain)) ); 7239 } 7240 else 7241 if (needNot && !all_lanes) { 7242 mask = toUShort( sz==4 ? 0x000F : 0x00FF ); 7243 putXMMReg( gregOfRM(rm), 7244 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) ); 7245 } 7246 else { 7247 putXMMReg( gregOfRM(rm), mkexpr(plain) ); 7248 } 7249 7250 return delta; 7251} 7252 7253 7254/* Vector by scalar shift of G by the amount specified at the bottom 7255 of E. */ 7256 7257static UInt dis_SSE_shiftG_byE ( UChar sorb, Int delta, 7258 HChar* opname, IROp op ) 7259{ 7260 HChar dis_buf[50]; 7261 Int alen, size; 7262 IRTemp addr; 7263 Bool shl, shr, sar; 7264 UChar rm = getIByte(delta); 7265 IRTemp g0 = newTemp(Ity_V128); 7266 IRTemp g1 = newTemp(Ity_V128); 7267 IRTemp amt = newTemp(Ity_I32); 7268 IRTemp amt8 = newTemp(Ity_I8); 7269 if (epartIsReg(rm)) { 7270 assign( amt, getXMMRegLane32(eregOfRM(rm), 0) ); 7271 DIP("%s %s,%s\n", opname, 7272 nameXMMReg(eregOfRM(rm)), 7273 nameXMMReg(gregOfRM(rm)) ); 7274 delta++; 7275 } else { 7276 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7277 assign( amt, loadLE(Ity_I32, mkexpr(addr)) ); 7278 DIP("%s %s,%s\n", opname, 7279 dis_buf, 7280 nameXMMReg(gregOfRM(rm)) ); 7281 delta += alen; 7282 } 7283 assign( g0, getXMMReg(gregOfRM(rm)) ); 7284 assign( amt8, unop(Iop_32to8, mkexpr(amt)) ); 7285 7286 shl = shr = sar = False; 7287 size = 0; 7288 switch (op) { 7289 case Iop_ShlN16x8: shl = True; size = 32; break; 7290 case Iop_ShlN32x4: shl = True; size = 32; break; 7291 case Iop_ShlN64x2: shl = True; size = 64; break; 7292 case Iop_SarN16x8: sar = True; size = 16; break; 7293 case Iop_SarN32x4: sar = True; size = 32; break; 7294 case Iop_ShrN16x8: shr = True; size = 16; break; 7295 case Iop_ShrN32x4: shr = True; size = 32; break; 7296 case Iop_ShrN64x2: shr = True; size = 64; break; 7297 default: vassert(0); 7298 } 7299 7300 if (shl || shr) { 7301 assign( 7302 g1, 7303 IRExpr_Mux0X( 7304 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), 7305 mkV128(0x0000), 7306 binop(op, mkexpr(g0), mkexpr(amt8)) 7307 ) 7308 ); 7309 } else 7310 if (sar) { 7311 assign( 7312 g1, 7313 IRExpr_Mux0X( 7314 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), 7315 binop(op, mkexpr(g0), mkU8(size-1)), 7316 binop(op, mkexpr(g0), mkexpr(amt8)) 7317 ) 7318 ); 7319 } else { 7320 /*NOTREACHED*/ 7321 vassert(0); 7322 } 7323 7324 putXMMReg( gregOfRM(rm), mkexpr(g1) ); 7325 return delta; 7326} 7327 7328 7329/* Vector by scalar shift of E by an immediate byte. */ 7330 7331static 7332UInt dis_SSE_shiftE_imm ( Int delta, HChar* opname, IROp op ) 7333{ 7334 Bool shl, shr, sar; 7335 UChar rm = getIByte(delta); 7336 IRTemp e0 = newTemp(Ity_V128); 7337 IRTemp e1 = newTemp(Ity_V128); 7338 UChar amt, size; 7339 vassert(epartIsReg(rm)); 7340 vassert(gregOfRM(rm) == 2 7341 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6); 7342 amt = getIByte(delta+1); 7343 delta += 2; 7344 DIP("%s $%d,%s\n", opname, 7345 (Int)amt, 7346 nameXMMReg(eregOfRM(rm)) ); 7347 assign( e0, getXMMReg(eregOfRM(rm)) ); 7348 7349 shl = shr = sar = False; 7350 size = 0; 7351 switch (op) { 7352 case Iop_ShlN16x8: shl = True; size = 16; break; 7353 case Iop_ShlN32x4: shl = True; size = 32; break; 7354 case Iop_ShlN64x2: shl = True; size = 64; break; 7355 case Iop_SarN16x8: sar = True; size = 16; break; 7356 case Iop_SarN32x4: sar = True; size = 32; break; 7357 case Iop_ShrN16x8: shr = True; size = 16; break; 7358 case Iop_ShrN32x4: shr = True; size = 32; break; 7359 case Iop_ShrN64x2: shr = True; size = 64; break; 7360 default: vassert(0); 7361 } 7362 7363 if (shl || shr) { 7364 assign( e1, amt >= size 7365 ? mkV128(0x0000) 7366 : binop(op, mkexpr(e0), mkU8(amt)) 7367 ); 7368 } else 7369 if (sar) { 7370 assign( e1, amt >= size 7371 ? binop(op, mkexpr(e0), mkU8(size-1)) 7372 : binop(op, mkexpr(e0), mkU8(amt)) 7373 ); 7374 } else { 7375 /*NOTREACHED*/ 7376 vassert(0); 7377 } 7378 7379 putXMMReg( eregOfRM(rm), mkexpr(e1) ); 7380 return delta; 7381} 7382 7383 7384/* Get the current SSE rounding mode. */ 7385 7386static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void ) 7387{ 7388 return binop( Iop_And32, 7389 IRExpr_Get( OFFB_SSEROUND, Ity_I32 ), 7390 mkU32(3) ); 7391} 7392 7393static void put_sse_roundingmode ( IRExpr* sseround ) 7394{ 7395 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32); 7396 stmt( IRStmt_Put( OFFB_SSEROUND, sseround ) ); 7397} 7398 7399/* Break a 128-bit value up into four 32-bit ints. */ 7400 7401static void breakup128to32s ( IRTemp t128, 7402 /*OUTs*/ 7403 IRTemp* t3, IRTemp* t2, 7404 IRTemp* t1, IRTemp* t0 ) 7405{ 7406 IRTemp hi64 = newTemp(Ity_I64); 7407 IRTemp lo64 = newTemp(Ity_I64); 7408 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) ); 7409 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) ); 7410 7411 vassert(t0 && *t0 == IRTemp_INVALID); 7412 vassert(t1 && *t1 == IRTemp_INVALID); 7413 vassert(t2 && *t2 == IRTemp_INVALID); 7414 vassert(t3 && *t3 == IRTemp_INVALID); 7415 7416 *t0 = newTemp(Ity_I32); 7417 *t1 = newTemp(Ity_I32); 7418 *t2 = newTemp(Ity_I32); 7419 *t3 = newTemp(Ity_I32); 7420 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) ); 7421 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) ); 7422 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) ); 7423 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) ); 7424} 7425 7426/* Construct a 128-bit value from four 32-bit ints. */ 7427 7428static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2, 7429 IRTemp t1, IRTemp t0 ) 7430{ 7431 return 7432 binop( Iop_64HLtoV128, 7433 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 7434 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) 7435 ); 7436} 7437 7438/* Break a 64-bit value up into four 16-bit ints. */ 7439 7440static void breakup64to16s ( IRTemp t64, 7441 /*OUTs*/ 7442 IRTemp* t3, IRTemp* t2, 7443 IRTemp* t1, IRTemp* t0 ) 7444{ 7445 IRTemp hi32 = newTemp(Ity_I32); 7446 IRTemp lo32 = newTemp(Ity_I32); 7447 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) ); 7448 assign( lo32, unop(Iop_64to32, mkexpr(t64)) ); 7449 7450 vassert(t0 && *t0 == IRTemp_INVALID); 7451 vassert(t1 && *t1 == IRTemp_INVALID); 7452 vassert(t2 && *t2 == IRTemp_INVALID); 7453 vassert(t3 && *t3 == IRTemp_INVALID); 7454 7455 *t0 = newTemp(Ity_I16); 7456 *t1 = newTemp(Ity_I16); 7457 *t2 = newTemp(Ity_I16); 7458 *t3 = newTemp(Ity_I16); 7459 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) ); 7460 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) ); 7461 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) ); 7462 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) ); 7463} 7464 7465/* Construct a 64-bit value from four 16-bit ints. */ 7466 7467static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, 7468 IRTemp t1, IRTemp t0 ) 7469{ 7470 return 7471 binop( Iop_32HLto64, 7472 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)), 7473 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0)) 7474 ); 7475} 7476 7477/* Generate IR to set the guest %EFLAGS from the pushfl-format image 7478 in the given 32-bit temporary. The flags that are set are: O S Z A 7479 C P D ID AC. 7480 7481 In all cases, code to set AC is generated. However, VEX actually 7482 ignores the AC value and so can optionally emit an emulation 7483 warning when it is enabled. In this routine, an emulation warning 7484 is only emitted if emit_AC_emwarn is True, in which case 7485 next_insn_EIP must be correct (this allows for correct code 7486 generation for popfl/popfw). If emit_AC_emwarn is False, 7487 next_insn_EIP is unimportant (this allows for easy if kludgey code 7488 generation for IRET.) */ 7489 7490static 7491void set_EFLAGS_from_value ( IRTemp t1, 7492 Bool emit_AC_emwarn, 7493 Addr32 next_insn_EIP ) 7494{ 7495 vassert(typeOfIRTemp(irsb->tyenv,t1) == Ity_I32); 7496 7497 /* t1 is the flag word. Mask out everything except OSZACP and set 7498 the flags thunk to X86G_CC_OP_COPY. */ 7499 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 7500 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 7501 stmt( IRStmt_Put( OFFB_CC_DEP1, 7502 binop(Iop_And32, 7503 mkexpr(t1), 7504 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P 7505 | X86G_CC_MASK_A | X86G_CC_MASK_Z 7506 | X86G_CC_MASK_S| X86G_CC_MASK_O ) 7507 ) 7508 ) 7509 ); 7510 /* Set NDEP even though it isn't used. This makes redundant-PUT 7511 elimination of previous stores to this field work better. */ 7512 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 7513 7514 /* Also need to set the D flag, which is held in bit 10 of t1. 7515 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */ 7516 stmt( IRStmt_Put( 7517 OFFB_DFLAG, 7518 IRExpr_Mux0X( 7519 unop(Iop_32to8, 7520 binop(Iop_And32, 7521 binop(Iop_Shr32, mkexpr(t1), mkU8(10)), 7522 mkU32(1))), 7523 mkU32(1), 7524 mkU32(0xFFFFFFFF))) 7525 ); 7526 7527 /* Set the ID flag */ 7528 stmt( IRStmt_Put( 7529 OFFB_IDFLAG, 7530 IRExpr_Mux0X( 7531 unop(Iop_32to8, 7532 binop(Iop_And32, 7533 binop(Iop_Shr32, mkexpr(t1), mkU8(21)), 7534 mkU32(1))), 7535 mkU32(0), 7536 mkU32(1))) 7537 ); 7538 7539 /* And set the AC flag. If setting it 1 to, possibly emit an 7540 emulation warning. */ 7541 stmt( IRStmt_Put( 7542 OFFB_ACFLAG, 7543 IRExpr_Mux0X( 7544 unop(Iop_32to8, 7545 binop(Iop_And32, 7546 binop(Iop_Shr32, mkexpr(t1), mkU8(18)), 7547 mkU32(1))), 7548 mkU32(0), 7549 mkU32(1))) 7550 ); 7551 7552 if (emit_AC_emwarn) { 7553 put_emwarn( mkU32(EmWarn_X86_acFlag) ); 7554 stmt( 7555 IRStmt_Exit( 7556 binop( Iop_CmpNE32, 7557 binop(Iop_And32, mkexpr(t1), mkU32(1<<18)), 7558 mkU32(0) ), 7559 Ijk_EmWarn, 7560 IRConst_U32( next_insn_EIP ), 7561 OFFB_EIP 7562 ) 7563 ); 7564 } 7565} 7566 7567 7568/* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit 7569 values (aa,bb), computes, for each of the 4 16-bit lanes: 7570 7571 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 7572*/ 7573static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) 7574{ 7575 IRTemp aa = newTemp(Ity_I64); 7576 IRTemp bb = newTemp(Ity_I64); 7577 IRTemp aahi32s = newTemp(Ity_I64); 7578 IRTemp aalo32s = newTemp(Ity_I64); 7579 IRTemp bbhi32s = newTemp(Ity_I64); 7580 IRTemp bblo32s = newTemp(Ity_I64); 7581 IRTemp rHi = newTemp(Ity_I64); 7582 IRTemp rLo = newTemp(Ity_I64); 7583 IRTemp one32x2 = newTemp(Ity_I64); 7584 assign(aa, aax); 7585 assign(bb, bbx); 7586 assign( aahi32s, 7587 binop(Iop_SarN32x2, 7588 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), 7589 mkU8(16) )); 7590 assign( aalo32s, 7591 binop(Iop_SarN32x2, 7592 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), 7593 mkU8(16) )); 7594 assign( bbhi32s, 7595 binop(Iop_SarN32x2, 7596 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), 7597 mkU8(16) )); 7598 assign( bblo32s, 7599 binop(Iop_SarN32x2, 7600 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), 7601 mkU8(16) )); 7602 assign(one32x2, mkU64( (1ULL << 32) + 1 )); 7603 assign( 7604 rHi, 7605 binop( 7606 Iop_ShrN32x2, 7607 binop( 7608 Iop_Add32x2, 7609 binop( 7610 Iop_ShrN32x2, 7611 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), 7612 mkU8(14) 7613 ), 7614 mkexpr(one32x2) 7615 ), 7616 mkU8(1) 7617 ) 7618 ); 7619 assign( 7620 rLo, 7621 binop( 7622 Iop_ShrN32x2, 7623 binop( 7624 Iop_Add32x2, 7625 binop( 7626 Iop_ShrN32x2, 7627 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), 7628 mkU8(14) 7629 ), 7630 mkexpr(one32x2) 7631 ), 7632 mkU8(1) 7633 ) 7634 ); 7635 return 7636 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); 7637} 7638 7639/* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit 7640 values (aa,bb), computes, for each lane: 7641 7642 if aa_lane < 0 then - bb_lane 7643 else if aa_lane > 0 then bb_lane 7644 else 0 7645*/ 7646static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) 7647{ 7648 IRTemp aa = newTemp(Ity_I64); 7649 IRTemp bb = newTemp(Ity_I64); 7650 IRTemp zero = newTemp(Ity_I64); 7651 IRTemp bbNeg = newTemp(Ity_I64); 7652 IRTemp negMask = newTemp(Ity_I64); 7653 IRTemp posMask = newTemp(Ity_I64); 7654 IROp opSub = Iop_INVALID; 7655 IROp opCmpGTS = Iop_INVALID; 7656 7657 switch (laneszB) { 7658 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; 7659 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; 7660 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; 7661 default: vassert(0); 7662 } 7663 7664 assign( aa, aax ); 7665 assign( bb, bbx ); 7666 assign( zero, mkU64(0) ); 7667 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); 7668 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); 7669 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); 7670 7671 return 7672 binop(Iop_Or64, 7673 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), 7674 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); 7675 7676} 7677 7678/* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit 7679 value aa, computes, for each lane 7680 7681 if aa < 0 then -aa else aa 7682 7683 Note that the result is interpreted as unsigned, so that the 7684 absolute value of the most negative signed input can be 7685 represented. 7686*/ 7687static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB ) 7688{ 7689 IRTemp aa = newTemp(Ity_I64); 7690 IRTemp zero = newTemp(Ity_I64); 7691 IRTemp aaNeg = newTemp(Ity_I64); 7692 IRTemp negMask = newTemp(Ity_I64); 7693 IRTemp posMask = newTemp(Ity_I64); 7694 IROp opSub = Iop_INVALID; 7695 IROp opSarN = Iop_INVALID; 7696 7697 switch (laneszB) { 7698 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; 7699 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; 7700 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; 7701 default: vassert(0); 7702 } 7703 7704 assign( aa, aax ); 7705 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); 7706 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); 7707 assign( zero, mkU64(0) ); 7708 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); 7709 return 7710 binop(Iop_Or64, 7711 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), 7712 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ); 7713} 7714 7715static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, 7716 IRTemp lo64, Int byteShift ) 7717{ 7718 vassert(byteShift >= 1 && byteShift <= 7); 7719 return 7720 binop(Iop_Or64, 7721 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), 7722 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) 7723 ); 7724} 7725 7726/* Generate a SIGSEGV followed by a restart of the current instruction 7727 if effective_addr is not 16-aligned. This is required behaviour 7728 for some SSE3 instructions and all 128-bit SSSE3 instructions. 7729 This assumes that guest_RIP_curr_instr is set correctly! */ 7730static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) 7731{ 7732 stmt( 7733 IRStmt_Exit( 7734 binop(Iop_CmpNE32, 7735 binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)), 7736 mkU32(0)), 7737 Ijk_SigSEGV, 7738 IRConst_U32(guest_EIP_curr_instr), 7739 OFFB_EIP 7740 ) 7741 ); 7742} 7743 7744 7745/* Helper for deciding whether a given insn (starting at the opcode 7746 byte) may validly be used with a LOCK prefix. The following insns 7747 may be used with LOCK when their destination operand is in memory. 7748 AFAICS this is exactly the same for both 32-bit and 64-bit mode. 7749 7750 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01 7751 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09 7752 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11 7753 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19 7754 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21 7755 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29 7756 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31 7757 7758 DEC FE /1, FF /1 7759 INC FE /0, FF /0 7760 7761 NEG F6 /3, F7 /3 7762 NOT F6 /2, F7 /2 7763 7764 XCHG 86, 87 7765 7766 BTC 0F BB, 0F BA /7 7767 BTR 0F B3, 0F BA /6 7768 BTS 0F AB, 0F BA /5 7769 7770 CMPXCHG 0F B0, 0F B1 7771 CMPXCHG8B 0F C7 /1 7772 7773 XADD 0F C0, 0F C1 7774 7775 ------------------------------ 7776 7777 80 /0 = addb $imm8, rm8 7778 81 /0 = addl $imm32, rm32 and addw $imm16, rm16 7779 82 /0 = addb $imm8, rm8 7780 83 /0 = addl $simm8, rm32 and addw $simm8, rm16 7781 7782 00 = addb r8, rm8 7783 01 = addl r32, rm32 and addw r16, rm16 7784 7785 Same for ADD OR ADC SBB AND SUB XOR 7786 7787 FE /1 = dec rm8 7788 FF /1 = dec rm32 and dec rm16 7789 7790 FE /0 = inc rm8 7791 FF /0 = inc rm32 and inc rm16 7792 7793 F6 /3 = neg rm8 7794 F7 /3 = neg rm32 and neg rm16 7795 7796 F6 /2 = not rm8 7797 F7 /2 = not rm32 and not rm16 7798 7799 0F BB = btcw r16, rm16 and btcl r32, rm32 7800 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32 7801 7802 Same for BTS, BTR 7803*/ 7804static Bool can_be_used_with_LOCK_prefix ( UChar* opc ) 7805{ 7806 switch (opc[0]) { 7807 case 0x00: case 0x01: case 0x08: case 0x09: 7808 case 0x10: case 0x11: case 0x18: case 0x19: 7809 case 0x20: case 0x21: case 0x28: case 0x29: 7810 case 0x30: case 0x31: 7811 if (!epartIsReg(opc[1])) 7812 return True; 7813 break; 7814 7815 case 0x80: case 0x81: case 0x82: case 0x83: 7816 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6 7817 && !epartIsReg(opc[1])) 7818 return True; 7819 break; 7820 7821 case 0xFE: case 0xFF: 7822 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1 7823 && !epartIsReg(opc[1])) 7824 return True; 7825 break; 7826 7827 case 0xF6: case 0xF7: 7828 if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3 7829 && !epartIsReg(opc[1])) 7830 return True; 7831 break; 7832 7833 case 0x86: case 0x87: 7834 if (!epartIsReg(opc[1])) 7835 return True; 7836 break; 7837 7838 case 0x0F: { 7839 switch (opc[1]) { 7840 case 0xBB: case 0xB3: case 0xAB: 7841 if (!epartIsReg(opc[2])) 7842 return True; 7843 break; 7844 case 0xBA: 7845 if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7 7846 && !epartIsReg(opc[2])) 7847 return True; 7848 break; 7849 case 0xB0: case 0xB1: 7850 if (!epartIsReg(opc[2])) 7851 return True; 7852 break; 7853 case 0xC7: 7854 if (gregOfRM(opc[2]) == 1 && !epartIsReg(opc[2]) ) 7855 return True; 7856 break; 7857 case 0xC0: case 0xC1: 7858 if (!epartIsReg(opc[2])) 7859 return True; 7860 break; 7861 default: 7862 break; 7863 } /* switch (opc[1]) */ 7864 break; 7865 } 7866 7867 default: 7868 break; 7869 } /* switch (opc[0]) */ 7870 7871 return False; 7872} 7873 7874static IRTemp math_BSWAP ( IRTemp t1, IRType ty ) 7875{ 7876 IRTemp t2 = newTemp(ty); 7877 if (ty == Ity_I32) { 7878 assign( t2, 7879 binop( 7880 Iop_Or32, 7881 binop(Iop_Shl32, mkexpr(t1), mkU8(24)), 7882 binop( 7883 Iop_Or32, 7884 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)), 7885 mkU32(0x00FF0000)), 7886 binop(Iop_Or32, 7887 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)), 7888 mkU32(0x0000FF00)), 7889 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)), 7890 mkU32(0x000000FF) ) 7891 ))) 7892 ); 7893 return t2; 7894 } 7895 if (ty == Ity_I16) { 7896 assign(t2, 7897 binop(Iop_Or16, 7898 binop(Iop_Shl16, mkexpr(t1), mkU8(8)), 7899 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) )); 7900 return t2; 7901 } 7902 vassert(0); 7903 /*NOTREACHED*/ 7904 return IRTemp_INVALID; 7905} 7906 7907/*------------------------------------------------------------*/ 7908/*--- Disassemble a single instruction ---*/ 7909/*------------------------------------------------------------*/ 7910 7911/* Disassemble a single instruction into IR. The instruction is 7912 located in host memory at &guest_code[delta]. *expect_CAS is set 7913 to True if the resulting IR is expected to contain an IRCAS 7914 statement, and False if it's not expected to. This makes it 7915 possible for the caller of disInstr_X86_WRK to check that 7916 LOCK-prefixed instructions are at least plausibly translated, in 7917 that it becomes possible to check that a (validly) LOCK-prefixed 7918 instruction generates a translation containing an IRCAS, and 7919 instructions without LOCK prefixes don't generate translations 7920 containing an IRCAS. 7921*/ 7922static 7923DisResult disInstr_X86_WRK ( 7924 /*OUT*/Bool* expect_CAS, 7925 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 7926 Bool resteerCisOk, 7927 void* callback_opaque, 7928 Long delta64, 7929 VexArchInfo* archinfo, 7930 VexAbiInfo* vbi 7931 ) 7932{ 7933 IRType ty; 7934 IRTemp addr, t0, t1, t2, t3, t4, t5, t6; 7935 Int alen; 7936 UChar opc, modrm, abyte, pre; 7937 UInt d32; 7938 HChar dis_buf[50]; 7939 Int am_sz, d_sz, n_prefixes; 7940 DisResult dres; 7941 UChar* insn; /* used in SSE decoders */ 7942 7943 /* The running delta */ 7944 Int delta = (Int)delta64; 7945 7946 /* Holds eip at the start of the insn, so that we can print 7947 consistent error messages for unimplemented insns. */ 7948 Int delta_start = delta; 7949 7950 /* sz denotes the nominal data-op size of the insn; we change it to 7951 2 if an 0x66 prefix is seen */ 7952 Int sz = 4; 7953 7954 /* sorb holds the segment-override-prefix byte, if any. Zero if no 7955 prefix has been seen, else one of {0x26, 0x3E, 0x64, 0x65} 7956 indicating the prefix. */ 7957 UChar sorb = 0; 7958 7959 /* Gets set to True if a LOCK prefix is seen. */ 7960 Bool pfx_lock = False; 7961 7962 /* Set result defaults. */ 7963 dres.whatNext = Dis_Continue; 7964 dres.len = 0; 7965 dres.continueAt = 0; 7966 dres.jk_StopHere = Ijk_INVALID; 7967 7968 *expect_CAS = False; 7969 7970 addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID; 7971 7972 vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr); 7973 DIP("\t0x%x: ", guest_EIP_bbstart+delta); 7974 7975 /* Spot "Special" instructions (see comment at top of file). */ 7976 { 7977 UChar* code = (UChar*)(guest_code + delta); 7978 /* Spot the 12-byte preamble: 7979 C1C703 roll $3, %edi 7980 C1C70D roll $13, %edi 7981 C1C71D roll $29, %edi 7982 C1C713 roll $19, %edi 7983 */ 7984 if (code[ 0] == 0xC1 && code[ 1] == 0xC7 && code[ 2] == 0x03 && 7985 code[ 3] == 0xC1 && code[ 4] == 0xC7 && code[ 5] == 0x0D && 7986 code[ 6] == 0xC1 && code[ 7] == 0xC7 && code[ 8] == 0x1D && 7987 code[ 9] == 0xC1 && code[10] == 0xC7 && code[11] == 0x13) { 7988 /* Got a "Special" instruction preamble. Which one is it? */ 7989 if (code[12] == 0x87 && code[13] == 0xDB /* xchgl %ebx,%ebx */) { 7990 /* %EDX = client_request ( %EAX ) */ 7991 DIP("%%edx = client_request ( %%eax )\n"); 7992 delta += 14; 7993 jmp_lit(&dres, Ijk_ClientReq, guest_EIP_bbstart+delta); 7994 vassert(dres.whatNext == Dis_StopHere); 7995 goto decode_success; 7996 } 7997 else 7998 if (code[12] == 0x87 && code[13] == 0xC9 /* xchgl %ecx,%ecx */) { 7999 /* %EAX = guest_NRADDR */ 8000 DIP("%%eax = guest_NRADDR\n"); 8001 delta += 14; 8002 putIReg(4, R_EAX, IRExpr_Get( OFFB_NRADDR, Ity_I32 )); 8003 goto decode_success; 8004 } 8005 else 8006 if (code[12] == 0x87 && code[13] == 0xD2 /* xchgl %edx,%edx */) { 8007 /* call-noredir *%EAX */ 8008 DIP("call-noredir *%%eax\n"); 8009 delta += 14; 8010 t1 = newTemp(Ity_I32); 8011 assign(t1, getIReg(4,R_EAX)); 8012 t2 = newTemp(Ity_I32); 8013 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 8014 putIReg(4, R_ESP, mkexpr(t2)); 8015 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta)); 8016 jmp_treg(&dres, Ijk_NoRedir, t1); 8017 vassert(dres.whatNext == Dis_StopHere); 8018 goto decode_success; 8019 } 8020 /* We don't know what it is. */ 8021 goto decode_failure; 8022 /*NOTREACHED*/ 8023 } 8024 } 8025 8026 /* Handle a couple of weird-ass NOPs that have been observed in the 8027 wild. */ 8028 { 8029 UChar* code = (UChar*)(guest_code + delta); 8030 /* Sun's JVM 1.5.0 uses the following as a NOP: 8031 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */ 8032 if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64 8033 && code[3] == 0x65 && code[4] == 0x90) { 8034 DIP("%%es:%%cs:%%fs:%%gs:nop\n"); 8035 delta += 5; 8036 goto decode_success; 8037 } 8038 /* Don't barf on recent binutils padding, 8039 all variants of which are: nopw %cs:0x0(%eax,%eax,1) 8040 66 2e 0f 1f 84 00 00 00 00 00 8041 66 66 2e 0f 1f 84 00 00 00 00 00 8042 66 66 66 2e 0f 1f 84 00 00 00 00 00 8043 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 8044 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 8045 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 8046 */ 8047 if (code[0] == 0x66) { 8048 Int data16_cnt; 8049 for (data16_cnt = 1; data16_cnt < 6; data16_cnt++) 8050 if (code[data16_cnt] != 0x66) 8051 break; 8052 if (code[data16_cnt] == 0x2E && code[data16_cnt + 1] == 0x0F 8053 && code[data16_cnt + 2] == 0x1F && code[data16_cnt + 3] == 0x84 8054 && code[data16_cnt + 4] == 0x00 && code[data16_cnt + 5] == 0x00 8055 && code[data16_cnt + 6] == 0x00 && code[data16_cnt + 7] == 0x00 8056 && code[data16_cnt + 8] == 0x00 ) { 8057 DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n"); 8058 delta += 9 + data16_cnt; 8059 goto decode_success; 8060 } 8061 } 8062 } 8063 8064 /* Normal instruction handling starts here. */ 8065 8066 /* Deal with some but not all prefixes: 8067 66(oso) 8068 F0(lock) 8069 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:) 8070 Not dealt with (left in place): 8071 F2 F3 8072 */ 8073 n_prefixes = 0; 8074 while (True) { 8075 if (n_prefixes > 7) goto decode_failure; 8076 pre = getUChar(delta); 8077 switch (pre) { 8078 case 0x66: 8079 sz = 2; 8080 break; 8081 case 0xF0: 8082 pfx_lock = True; 8083 *expect_CAS = True; 8084 break; 8085 case 0x3E: /* %DS: */ 8086 case 0x26: /* %ES: */ 8087 case 0x64: /* %FS: */ 8088 case 0x65: /* %GS: */ 8089 if (sorb != 0) 8090 goto decode_failure; /* only one seg override allowed */ 8091 sorb = pre; 8092 break; 8093 case 0x2E: { /* %CS: */ 8094 /* 2E prefix on a conditional branch instruction is a 8095 branch-prediction hint, which can safely be ignored. */ 8096 UChar op1 = getIByte(delta+1); 8097 UChar op2 = getIByte(delta+2); 8098 if ((op1 >= 0x70 && op1 <= 0x7F) 8099 || (op1 == 0xE3) 8100 || (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) { 8101 if (0) vex_printf("vex x86->IR: ignoring branch hint\n"); 8102 } else { 8103 /* All other CS override cases are not handled */ 8104 goto decode_failure; 8105 } 8106 break; 8107 } 8108 case 0x36: /* %SS: */ 8109 /* SS override cases are not handled */ 8110 goto decode_failure; 8111 default: 8112 goto not_a_prefix; 8113 } 8114 n_prefixes++; 8115 delta++; 8116 } 8117 8118 not_a_prefix: 8119 8120 /* Now we should be looking at the primary opcode byte or the 8121 leading F2 or F3. Check that any LOCK prefix is actually 8122 allowed. */ 8123 8124 if (pfx_lock) { 8125 if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) { 8126 DIP("lock "); 8127 } else { 8128 *expect_CAS = False; 8129 goto decode_failure; 8130 } 8131 } 8132 8133 8134 /* ---------------------------------------------------- */ 8135 /* --- The SSE decoder. --- */ 8136 /* ---------------------------------------------------- */ 8137 8138 /* What did I do to deserve SSE ? Perhaps I was really bad in a 8139 previous life? */ 8140 8141 /* Note, this doesn't handle SSE2 or SSE3. That is handled in a 8142 later section, further on. */ 8143 8144 insn = (UChar*)&guest_code[delta]; 8145 8146 /* Treat fxsave specially. It should be doable even on an SSE0 8147 (Pentium-II class) CPU. Hence be prepared to handle it on 8148 any subarchitecture variant. 8149 */ 8150 8151 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */ 8152 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE 8153 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 0) { 8154 IRDirty* d; 8155 modrm = getIByte(delta+2); 8156 vassert(sz == 4); 8157 vassert(!epartIsReg(modrm)); 8158 8159 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8160 delta += 2+alen; 8161 gen_SEGV_if_not_16_aligned(addr); 8162 8163 DIP("fxsave %s\n", dis_buf); 8164 8165 /* Uses dirty helper: 8166 void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */ 8167 d = unsafeIRDirty_0_N ( 8168 0/*regparms*/, 8169 "x86g_dirtyhelper_FXSAVE", 8170 &x86g_dirtyhelper_FXSAVE, 8171 mkIRExprVec_1( mkexpr(addr) ) 8172 ); 8173 d->needsBBP = True; 8174 8175 /* declare we're writing memory */ 8176 d->mFx = Ifx_Write; 8177 d->mAddr = mkexpr(addr); 8178 d->mSize = 464; /* according to recent Intel docs */ 8179 8180 /* declare we're reading guest state */ 8181 d->nFxState = 7; 8182 vex_bzero(&d->fxState, sizeof(d->fxState)); 8183 8184 d->fxState[0].fx = Ifx_Read; 8185 d->fxState[0].offset = OFFB_FTOP; 8186 d->fxState[0].size = sizeof(UInt); 8187 8188 d->fxState[1].fx = Ifx_Read; 8189 d->fxState[1].offset = OFFB_FPREGS; 8190 d->fxState[1].size = 8 * sizeof(ULong); 8191 8192 d->fxState[2].fx = Ifx_Read; 8193 d->fxState[2].offset = OFFB_FPTAGS; 8194 d->fxState[2].size = 8 * sizeof(UChar); 8195 8196 d->fxState[3].fx = Ifx_Read; 8197 d->fxState[3].offset = OFFB_FPROUND; 8198 d->fxState[3].size = sizeof(UInt); 8199 8200 d->fxState[4].fx = Ifx_Read; 8201 d->fxState[4].offset = OFFB_FC3210; 8202 d->fxState[4].size = sizeof(UInt); 8203 8204 d->fxState[5].fx = Ifx_Read; 8205 d->fxState[5].offset = OFFB_XMM0; 8206 d->fxState[5].size = 8 * sizeof(U128); 8207 8208 d->fxState[6].fx = Ifx_Read; 8209 d->fxState[6].offset = OFFB_SSEROUND; 8210 d->fxState[6].size = sizeof(UInt); 8211 8212 /* Be paranoid ... this assertion tries to ensure the 8 %xmm 8213 images are packed back-to-back. If not, the value of 8214 d->fxState[5].size is wrong. */ 8215 vassert(16 == sizeof(U128)); 8216 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16)); 8217 8218 stmt( IRStmt_Dirty(d) ); 8219 8220 goto decode_success; 8221 } 8222 8223 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */ 8224 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE 8225 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 1) { 8226 IRDirty* d; 8227 modrm = getIByte(delta+2); 8228 vassert(sz == 4); 8229 vassert(!epartIsReg(modrm)); 8230 8231 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8232 delta += 2+alen; 8233 gen_SEGV_if_not_16_aligned(addr); 8234 8235 DIP("fxrstor %s\n", dis_buf); 8236 8237 /* Uses dirty helper: 8238 VexEmWarn x86g_do_FXRSTOR ( VexGuestX86State*, UInt ) 8239 NOTE: 8240 the VexEmWarn value is simply ignored (unlike for FRSTOR) 8241 */ 8242 d = unsafeIRDirty_0_N ( 8243 0/*regparms*/, 8244 "x86g_dirtyhelper_FXRSTOR", 8245 &x86g_dirtyhelper_FXRSTOR, 8246 mkIRExprVec_1( mkexpr(addr) ) 8247 ); 8248 d->needsBBP = True; 8249 8250 /* declare we're reading memory */ 8251 d->mFx = Ifx_Read; 8252 d->mAddr = mkexpr(addr); 8253 d->mSize = 464; /* according to recent Intel docs */ 8254 8255 /* declare we're writing guest state */ 8256 d->nFxState = 7; 8257 vex_bzero(&d->fxState, sizeof(d->fxState)); 8258 8259 d->fxState[0].fx = Ifx_Write; 8260 d->fxState[0].offset = OFFB_FTOP; 8261 d->fxState[0].size = sizeof(UInt); 8262 8263 d->fxState[1].fx = Ifx_Write; 8264 d->fxState[1].offset = OFFB_FPREGS; 8265 d->fxState[1].size = 8 * sizeof(ULong); 8266 8267 d->fxState[2].fx = Ifx_Write; 8268 d->fxState[2].offset = OFFB_FPTAGS; 8269 d->fxState[2].size = 8 * sizeof(UChar); 8270 8271 d->fxState[3].fx = Ifx_Write; 8272 d->fxState[3].offset = OFFB_FPROUND; 8273 d->fxState[3].size = sizeof(UInt); 8274 8275 d->fxState[4].fx = Ifx_Write; 8276 d->fxState[4].offset = OFFB_FC3210; 8277 d->fxState[4].size = sizeof(UInt); 8278 8279 d->fxState[5].fx = Ifx_Write; 8280 d->fxState[5].offset = OFFB_XMM0; 8281 d->fxState[5].size = 8 * sizeof(U128); 8282 8283 d->fxState[6].fx = Ifx_Write; 8284 d->fxState[6].offset = OFFB_SSEROUND; 8285 d->fxState[6].size = sizeof(UInt); 8286 8287 /* Be paranoid ... this assertion tries to ensure the 8 %xmm 8288 images are packed back-to-back. If not, the value of 8289 d->fxState[5].size is wrong. */ 8290 vassert(16 == sizeof(U128)); 8291 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16)); 8292 8293 stmt( IRStmt_Dirty(d) ); 8294 8295 goto decode_success; 8296 } 8297 8298 /* ------ SSE decoder main ------ */ 8299 8300 /* Skip parts of the decoder which don't apply given the stated 8301 guest subarchitecture. */ 8302 if (archinfo->hwcaps == 0/*baseline, no sse at all*/) 8303 goto after_sse_decoders; 8304 8305 /* Otherwise we must be doing sse1 or sse2, so we can at least try 8306 for SSE1 here. */ 8307 8308 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ 8309 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x58) { 8310 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addps", Iop_Add32Fx4 ); 8311 goto decode_success; 8312 } 8313 8314 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */ 8315 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x58) { 8316 vassert(sz == 4); 8317 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "addss", Iop_Add32F0x4 ); 8318 goto decode_success; 8319 } 8320 8321 /* 0F 55 = ANDNPS -- G = (not G) and E */ 8322 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x55) { 8323 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnps", Iop_AndV128 ); 8324 goto decode_success; 8325 } 8326 8327 /* 0F 54 = ANDPS -- G = G and E */ 8328 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x54) { 8329 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andps", Iop_AndV128 ); 8330 goto decode_success; 8331 } 8332 8333 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */ 8334 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC2) { 8335 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmpps", True, 4 ); 8336 goto decode_success; 8337 } 8338 8339 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */ 8340 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xC2) { 8341 vassert(sz == 4); 8342 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpss", False, 4 ); 8343 goto decode_success; 8344 } 8345 8346 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */ 8347 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */ 8348 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { 8349 IRTemp argL = newTemp(Ity_F32); 8350 IRTemp argR = newTemp(Ity_F32); 8351 modrm = getIByte(delta+2); 8352 if (epartIsReg(modrm)) { 8353 assign( argR, getXMMRegLane32F( eregOfRM(modrm), 0/*lowest lane*/ ) ); 8354 delta += 2+1; 8355 DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm)), 8356 nameXMMReg(gregOfRM(modrm)) ); 8357 } else { 8358 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8359 assign( argR, loadLE(Ity_F32, mkexpr(addr)) ); 8360 delta += 2+alen; 8361 DIP("[u]comiss %s,%s\n", dis_buf, 8362 nameXMMReg(gregOfRM(modrm)) ); 8363 } 8364 assign( argL, getXMMRegLane32F( gregOfRM(modrm), 0/*lowest lane*/ ) ); 8365 8366 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 8367 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 8368 stmt( IRStmt_Put( 8369 OFFB_CC_DEP1, 8370 binop( Iop_And32, 8371 binop(Iop_CmpF64, 8372 unop(Iop_F32toF64,mkexpr(argL)), 8373 unop(Iop_F32toF64,mkexpr(argR))), 8374 mkU32(0x45) 8375 ))); 8376 /* Set NDEP even though it isn't used. This makes redundant-PUT 8377 elimination of previous stores to this field work better. */ 8378 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 8379 goto decode_success; 8380 } 8381 8382 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low 8383 half xmm */ 8384 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x2A) { 8385 IRTemp arg64 = newTemp(Ity_I64); 8386 IRTemp rmode = newTemp(Ity_I32); 8387 vassert(sz == 4); 8388 8389 modrm = getIByte(delta+2); 8390 do_MMX_preamble(); 8391 if (epartIsReg(modrm)) { 8392 assign( arg64, getMMXReg(eregOfRM(modrm)) ); 8393 delta += 2+1; 8394 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm)), 8395 nameXMMReg(gregOfRM(modrm))); 8396 } else { 8397 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8398 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 8399 delta += 2+alen; 8400 DIP("cvtpi2ps %s,%s\n", dis_buf, 8401 nameXMMReg(gregOfRM(modrm)) ); 8402 } 8403 8404 assign( rmode, get_sse_roundingmode() ); 8405 8406 putXMMRegLane32F( 8407 gregOfRM(modrm), 0, 8408 binop(Iop_F64toF32, 8409 mkexpr(rmode), 8410 unop(Iop_I32StoF64, 8411 unop(Iop_64to32, mkexpr(arg64)) )) ); 8412 8413 putXMMRegLane32F( 8414 gregOfRM(modrm), 1, 8415 binop(Iop_F64toF32, 8416 mkexpr(rmode), 8417 unop(Iop_I32StoF64, 8418 unop(Iop_64HIto32, mkexpr(arg64)) )) ); 8419 8420 goto decode_success; 8421 } 8422 8423 /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low 8424 quarter xmm */ 8425 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x2A) { 8426 IRTemp arg32 = newTemp(Ity_I32); 8427 IRTemp rmode = newTemp(Ity_I32); 8428 vassert(sz == 4); 8429 8430 modrm = getIByte(delta+3); 8431 if (epartIsReg(modrm)) { 8432 assign( arg32, getIReg(4, eregOfRM(modrm)) ); 8433 delta += 3+1; 8434 DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm)), 8435 nameXMMReg(gregOfRM(modrm))); 8436 } else { 8437 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 8438 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 8439 delta += 3+alen; 8440 DIP("cvtsi2ss %s,%s\n", dis_buf, 8441 nameXMMReg(gregOfRM(modrm)) ); 8442 } 8443 8444 assign( rmode, get_sse_roundingmode() ); 8445 8446 putXMMRegLane32F( 8447 gregOfRM(modrm), 0, 8448 binop(Iop_F64toF32, 8449 mkexpr(rmode), 8450 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 8451 8452 goto decode_success; 8453 } 8454 8455 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 8456 I32 in mmx, according to prevailing SSE rounding mode */ 8457 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 8458 I32 in mmx, rounding towards zero */ 8459 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { 8460 IRTemp dst64 = newTemp(Ity_I64); 8461 IRTemp rmode = newTemp(Ity_I32); 8462 IRTemp f32lo = newTemp(Ity_F32); 8463 IRTemp f32hi = newTemp(Ity_F32); 8464 Bool r2zero = toBool(insn[1] == 0x2C); 8465 8466 do_MMX_preamble(); 8467 modrm = getIByte(delta+2); 8468 8469 if (epartIsReg(modrm)) { 8470 delta += 2+1; 8471 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); 8472 assign(f32hi, getXMMRegLane32F(eregOfRM(modrm), 1)); 8473 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 8474 nameXMMReg(eregOfRM(modrm)), 8475 nameMMXReg(gregOfRM(modrm))); 8476 } else { 8477 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8478 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 8479 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add32, 8480 mkexpr(addr), 8481 mkU32(4) ))); 8482 delta += 2+alen; 8483 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 8484 dis_buf, 8485 nameMMXReg(gregOfRM(modrm))); 8486 } 8487 8488 if (r2zero) { 8489 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 8490 } else { 8491 assign( rmode, get_sse_roundingmode() ); 8492 } 8493 8494 assign( 8495 dst64, 8496 binop( Iop_32HLto64, 8497 binop( Iop_F64toI32S, 8498 mkexpr(rmode), 8499 unop( Iop_F32toF64, mkexpr(f32hi) ) ), 8500 binop( Iop_F64toI32S, 8501 mkexpr(rmode), 8502 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 8503 ) 8504 ); 8505 8506 putMMXReg(gregOfRM(modrm), mkexpr(dst64)); 8507 goto decode_success; 8508 } 8509 8510 /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to 8511 I32 in ireg, according to prevailing SSE rounding mode */ 8512 /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to 8513 I32 in ireg, rounding towards zero */ 8514 if (insn[0] == 0xF3 && insn[1] == 0x0F 8515 && (insn[2] == 0x2D || insn[2] == 0x2C)) { 8516 IRTemp rmode = newTemp(Ity_I32); 8517 IRTemp f32lo = newTemp(Ity_F32); 8518 Bool r2zero = toBool(insn[2] == 0x2C); 8519 vassert(sz == 4); 8520 8521 modrm = getIByte(delta+3); 8522 if (epartIsReg(modrm)) { 8523 delta += 3+1; 8524 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); 8525 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", 8526 nameXMMReg(eregOfRM(modrm)), 8527 nameIReg(4, gregOfRM(modrm))); 8528 } else { 8529 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 8530 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 8531 delta += 3+alen; 8532 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", 8533 dis_buf, 8534 nameIReg(4, gregOfRM(modrm))); 8535 } 8536 8537 if (r2zero) { 8538 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 8539 } else { 8540 assign( rmode, get_sse_roundingmode() ); 8541 } 8542 8543 putIReg(4, gregOfRM(modrm), 8544 binop( Iop_F64toI32S, 8545 mkexpr(rmode), 8546 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 8547 ); 8548 8549 goto decode_success; 8550 } 8551 8552 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */ 8553 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5E) { 8554 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divps", Iop_Div32Fx4 ); 8555 goto decode_success; 8556 } 8557 8558 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */ 8559 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5E) { 8560 vassert(sz == 4); 8561 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "divss", Iop_Div32F0x4 ); 8562 goto decode_success; 8563 } 8564 8565 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */ 8566 if (insn[0] == 0x0F && insn[1] == 0xAE 8567 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 2) { 8568 8569 IRTemp t64 = newTemp(Ity_I64); 8570 IRTemp ew = newTemp(Ity_I32); 8571 8572 modrm = getIByte(delta+2); 8573 vassert(!epartIsReg(modrm)); 8574 vassert(sz == 4); 8575 8576 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8577 delta += 2+alen; 8578 DIP("ldmxcsr %s\n", dis_buf); 8579 8580 /* The only thing we observe in %mxcsr is the rounding mode. 8581 Therefore, pass the 32-bit value (SSE native-format control 8582 word) to a clean helper, getting back a 64-bit value, the 8583 lower half of which is the SSEROUND value to store, and the 8584 upper half of which is the emulation-warning token which may 8585 be generated. 8586 */ 8587 /* ULong x86h_check_ldmxcsr ( UInt ); */ 8588 assign( t64, mkIRExprCCall( 8589 Ity_I64, 0/*regparms*/, 8590 "x86g_check_ldmxcsr", 8591 &x86g_check_ldmxcsr, 8592 mkIRExprVec_1( loadLE(Ity_I32, mkexpr(addr)) ) 8593 ) 8594 ); 8595 8596 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) ); 8597 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 8598 put_emwarn( mkexpr(ew) ); 8599 /* Finally, if an emulation warning was reported, side-exit to 8600 the next insn, reporting the warning, so that Valgrind's 8601 dispatcher sees the warning. */ 8602 stmt( 8603 IRStmt_Exit( 8604 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 8605 Ijk_EmWarn, 8606 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta), 8607 OFFB_EIP 8608 ) 8609 ); 8610 goto decode_success; 8611 } 8612 8613 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8614 /* 0F F7 = MASKMOVQ -- 8x8 masked store */ 8615 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) { 8616 Bool ok = False; 8617 delta = dis_MMX( &ok, sorb, sz, delta+1 ); 8618 if (!ok) 8619 goto decode_failure; 8620 goto decode_success; 8621 } 8622 8623 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ 8624 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) { 8625 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 ); 8626 goto decode_success; 8627 } 8628 8629 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ 8630 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) { 8631 vassert(sz == 4); 8632 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 ); 8633 goto decode_success; 8634 } 8635 8636 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ 8637 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) { 8638 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 ); 8639 goto decode_success; 8640 } 8641 8642 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ 8643 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) { 8644 vassert(sz == 4); 8645 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 ); 8646 goto decode_success; 8647 } 8648 8649 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ 8650 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ 8651 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) { 8652 modrm = getIByte(delta+2); 8653 if (epartIsReg(modrm)) { 8654 putXMMReg( gregOfRM(modrm), 8655 getXMMReg( eregOfRM(modrm) )); 8656 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 8657 nameXMMReg(gregOfRM(modrm))); 8658 delta += 2+1; 8659 } else { 8660 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8661 if (insn[1] == 0x28/*movaps*/) 8662 gen_SEGV_if_not_16_aligned( addr ); 8663 putXMMReg( gregOfRM(modrm), 8664 loadLE(Ity_V128, mkexpr(addr)) ); 8665 DIP("mov[ua]ps %s,%s\n", dis_buf, 8666 nameXMMReg(gregOfRM(modrm))); 8667 delta += 2+alen; 8668 } 8669 goto decode_success; 8670 } 8671 8672 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ 8673 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ 8674 if (sz == 4 && insn[0] == 0x0F 8675 && (insn[1] == 0x29 || insn[1] == 0x11)) { 8676 modrm = getIByte(delta+2); 8677 if (epartIsReg(modrm)) { 8678 /* fall through; awaiting test case */ 8679 } else { 8680 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8681 if (insn[1] == 0x29/*movaps*/) 8682 gen_SEGV_if_not_16_aligned( addr ); 8683 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 8684 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)), 8685 dis_buf ); 8686 delta += 2+alen; 8687 goto decode_success; 8688 } 8689 } 8690 8691 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ 8692 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ 8693 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) { 8694 modrm = getIByte(delta+2); 8695 if (epartIsReg(modrm)) { 8696 delta += 2+1; 8697 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, 8698 getXMMRegLane64( eregOfRM(modrm), 0 ) ); 8699 DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 8700 nameXMMReg(gregOfRM(modrm))); 8701 } else { 8702 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8703 delta += 2+alen; 8704 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, 8705 loadLE(Ity_I64, mkexpr(addr)) ); 8706 DIP("movhps %s,%s\n", dis_buf, 8707 nameXMMReg( gregOfRM(modrm) )); 8708 } 8709 goto decode_success; 8710 } 8711 8712 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ 8713 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) { 8714 if (!epartIsReg(insn[2])) { 8715 delta += 2; 8716 addr = disAMode ( &alen, sorb, delta, dis_buf ); 8717 delta += alen; 8718 storeLE( mkexpr(addr), 8719 getXMMRegLane64( gregOfRM(insn[2]), 8720 1/*upper lane*/ ) ); 8721 DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ), 8722 dis_buf); 8723 goto decode_success; 8724 } 8725 /* else fall through */ 8726 } 8727 8728 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ 8729 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ 8730 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) { 8731 modrm = getIByte(delta+2); 8732 if (epartIsReg(modrm)) { 8733 delta += 2+1; 8734 putXMMRegLane64( gregOfRM(modrm), 8735 0/*lower lane*/, 8736 getXMMRegLane64( eregOfRM(modrm), 1 )); 8737 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)), 8738 nameXMMReg(gregOfRM(modrm))); 8739 } else { 8740 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8741 delta += 2+alen; 8742 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/, 8743 loadLE(Ity_I64, mkexpr(addr)) ); 8744 DIP("movlps %s, %s\n", 8745 dis_buf, nameXMMReg( gregOfRM(modrm) )); 8746 } 8747 goto decode_success; 8748 } 8749 8750 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ 8751 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) { 8752 if (!epartIsReg(insn[2])) { 8753 delta += 2; 8754 addr = disAMode ( &alen, sorb, delta, dis_buf ); 8755 delta += alen; 8756 storeLE( mkexpr(addr), 8757 getXMMRegLane64( gregOfRM(insn[2]), 8758 0/*lower lane*/ ) ); 8759 DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ), 8760 dis_buf); 8761 goto decode_success; 8762 } 8763 /* else fall through */ 8764 } 8765 8766 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) 8767 to 4 lowest bits of ireg(G) */ 8768 if (insn[0] == 0x0F && insn[1] == 0x50) { 8769 modrm = getIByte(delta+2); 8770 if (sz == 4 && epartIsReg(modrm)) { 8771 Int src; 8772 t0 = newTemp(Ity_I32); 8773 t1 = newTemp(Ity_I32); 8774 t2 = newTemp(Ity_I32); 8775 t3 = newTemp(Ity_I32); 8776 delta += 2+1; 8777 src = eregOfRM(modrm); 8778 assign( t0, binop( Iop_And32, 8779 binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)), 8780 mkU32(1) )); 8781 assign( t1, binop( Iop_And32, 8782 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)), 8783 mkU32(2) )); 8784 assign( t2, binop( Iop_And32, 8785 binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)), 8786 mkU32(4) )); 8787 assign( t3, binop( Iop_And32, 8788 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)), 8789 mkU32(8) )); 8790 putIReg(4, gregOfRM(modrm), 8791 binop(Iop_Or32, 8792 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 8793 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) 8794 ) 8795 ); 8796 DIP("movmskps %s,%s\n", nameXMMReg(src), 8797 nameIReg(4, gregOfRM(modrm))); 8798 goto decode_success; 8799 } 8800 /* else fall through */ 8801 } 8802 8803 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ 8804 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ 8805 if (insn[0] == 0x0F && insn[1] == 0x2B) { 8806 modrm = getIByte(delta+2); 8807 if (!epartIsReg(modrm)) { 8808 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8809 gen_SEGV_if_not_16_aligned( addr ); 8810 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 8811 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", 8812 dis_buf, 8813 nameXMMReg(gregOfRM(modrm))); 8814 delta += 2+alen; 8815 goto decode_success; 8816 } 8817 /* else fall through */ 8818 } 8819 8820 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8821 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the 8822 Intel manual does not say anything about the usual business of 8823 the FP reg tags getting trashed whenever an MMX insn happens. 8824 So we just leave them alone. 8825 */ 8826 if (insn[0] == 0x0F && insn[1] == 0xE7) { 8827 modrm = getIByte(delta+2); 8828 if (sz == 4 && !epartIsReg(modrm)) { 8829 /* do_MMX_preamble(); Intel docs don't specify this */ 8830 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8831 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) ); 8832 DIP("movntq %s,%s\n", dis_buf, 8833 nameMMXReg(gregOfRM(modrm))); 8834 delta += 2+alen; 8835 goto decode_success; 8836 } 8837 /* else fall through */ 8838 } 8839 8840 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G 8841 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ 8842 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) { 8843 vassert(sz == 4); 8844 modrm = getIByte(delta+3); 8845 if (epartIsReg(modrm)) { 8846 putXMMRegLane32( gregOfRM(modrm), 0, 8847 getXMMRegLane32( eregOfRM(modrm), 0 )); 8848 DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)), 8849 nameXMMReg(gregOfRM(modrm))); 8850 delta += 3+1; 8851 } else { 8852 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 8853 /* zero bits 127:64 */ 8854 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); 8855 /* zero bits 63:32 */ 8856 putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) ); 8857 /* write bits 31:0 */ 8858 putXMMRegLane32( gregOfRM(modrm), 0, 8859 loadLE(Ity_I32, mkexpr(addr)) ); 8860 DIP("movss %s,%s\n", dis_buf, 8861 nameXMMReg(gregOfRM(modrm))); 8862 delta += 3+alen; 8863 } 8864 goto decode_success; 8865 } 8866 8867 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem 8868 or lo 1/4 xmm). */ 8869 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) { 8870 vassert(sz == 4); 8871 modrm = getIByte(delta+3); 8872 if (epartIsReg(modrm)) { 8873 /* fall through, we don't yet have a test case */ 8874 } else { 8875 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 8876 storeLE( mkexpr(addr), 8877 getXMMRegLane32(gregOfRM(modrm), 0) ); 8878 DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)), 8879 dis_buf); 8880 delta += 3+alen; 8881 goto decode_success; 8882 } 8883 } 8884 8885 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ 8886 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) { 8887 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 ); 8888 goto decode_success; 8889 } 8890 8891 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ 8892 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) { 8893 vassert(sz == 4); 8894 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 ); 8895 goto decode_success; 8896 } 8897 8898 /* 0F 56 = ORPS -- G = G and E */ 8899 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) { 8900 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 ); 8901 goto decode_success; 8902 } 8903 8904 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8905 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ 8906 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) { 8907 do_MMX_preamble(); 8908 delta = dis_MMXop_regmem_to_reg ( 8909 sorb, delta+2, insn[1], "pavgb", False ); 8910 goto decode_success; 8911 } 8912 8913 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8914 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */ 8915 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE3) { 8916 do_MMX_preamble(); 8917 delta = dis_MMXop_regmem_to_reg ( 8918 sorb, delta+2, insn[1], "pavgw", False ); 8919 goto decode_success; 8920 } 8921 8922 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8923 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put 8924 zero-extend of it in ireg(G). */ 8925 if (insn[0] == 0x0F && insn[1] == 0xC5) { 8926 modrm = insn[2]; 8927 if (sz == 4 && epartIsReg(modrm)) { 8928 IRTemp sV = newTemp(Ity_I64); 8929 t5 = newTemp(Ity_I16); 8930 do_MMX_preamble(); 8931 assign(sV, getMMXReg(eregOfRM(modrm))); 8932 breakup64to16s( sV, &t3, &t2, &t1, &t0 ); 8933 switch (insn[3] & 3) { 8934 case 0: assign(t5, mkexpr(t0)); break; 8935 case 1: assign(t5, mkexpr(t1)); break; 8936 case 2: assign(t5, mkexpr(t2)); break; 8937 case 3: assign(t5, mkexpr(t3)); break; 8938 default: vassert(0); /*NOTREACHED*/ 8939 } 8940 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5))); 8941 DIP("pextrw $%d,%s,%s\n", 8942 (Int)insn[3], nameMMXReg(eregOfRM(modrm)), 8943 nameIReg(4,gregOfRM(modrm))); 8944 delta += 4; 8945 goto decode_success; 8946 } 8947 /* else fall through */ 8948 } 8949 8950 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8951 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 8952 put it into the specified lane of mmx(G). */ 8953 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC4) { 8954 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the 8955 mmx reg. t4 is the new lane value. t5 is the original 8956 mmx value. t6 is the new mmx value. */ 8957 Int lane; 8958 t4 = newTemp(Ity_I16); 8959 t5 = newTemp(Ity_I64); 8960 t6 = newTemp(Ity_I64); 8961 modrm = insn[2]; 8962 do_MMX_preamble(); 8963 8964 assign(t5, getMMXReg(gregOfRM(modrm))); 8965 breakup64to16s( t5, &t3, &t2, &t1, &t0 ); 8966 8967 if (epartIsReg(modrm)) { 8968 assign(t4, getIReg(2, eregOfRM(modrm))); 8969 delta += 3+1; 8970 lane = insn[3+1-1]; 8971 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 8972 nameIReg(2,eregOfRM(modrm)), 8973 nameMMXReg(gregOfRM(modrm))); 8974 } else { 8975 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8976 delta += 3+alen; 8977 lane = insn[3+alen-1]; 8978 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 8979 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 8980 dis_buf, 8981 nameMMXReg(gregOfRM(modrm))); 8982 } 8983 8984 switch (lane & 3) { 8985 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break; 8986 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break; 8987 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break; 8988 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break; 8989 default: vassert(0); /*NOTREACHED*/ 8990 } 8991 putMMXReg(gregOfRM(modrm), mkexpr(t6)); 8992 goto decode_success; 8993 } 8994 8995 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8996 /* 0F EE = PMAXSW -- 16x4 signed max */ 8997 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEE) { 8998 do_MMX_preamble(); 8999 delta = dis_MMXop_regmem_to_reg ( 9000 sorb, delta+2, insn[1], "pmaxsw", False ); 9001 goto decode_success; 9002 } 9003 9004 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9005 /* 0F DE = PMAXUB -- 8x8 unsigned max */ 9006 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDE) { 9007 do_MMX_preamble(); 9008 delta = dis_MMXop_regmem_to_reg ( 9009 sorb, delta+2, insn[1], "pmaxub", False ); 9010 goto decode_success; 9011 } 9012 9013 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9014 /* 0F EA = PMINSW -- 16x4 signed min */ 9015 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEA) { 9016 do_MMX_preamble(); 9017 delta = dis_MMXop_regmem_to_reg ( 9018 sorb, delta+2, insn[1], "pminsw", False ); 9019 goto decode_success; 9020 } 9021 9022 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9023 /* 0F DA = PMINUB -- 8x8 unsigned min */ 9024 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDA) { 9025 do_MMX_preamble(); 9026 delta = dis_MMXop_regmem_to_reg ( 9027 sorb, delta+2, insn[1], "pminub", False ); 9028 goto decode_success; 9029 } 9030 9031 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9032 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in 9033 mmx(G), turn them into a byte, and put zero-extend of it in 9034 ireg(G). */ 9035 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) { 9036 modrm = insn[2]; 9037 if (epartIsReg(modrm)) { 9038 do_MMX_preamble(); 9039 t0 = newTemp(Ity_I64); 9040 t1 = newTemp(Ity_I32); 9041 assign(t0, getMMXReg(eregOfRM(modrm))); 9042 assign(t1, mkIRExprCCall( 9043 Ity_I32, 0/*regparms*/, 9044 "x86g_calculate_mmx_pmovmskb", 9045 &x86g_calculate_mmx_pmovmskb, 9046 mkIRExprVec_1(mkexpr(t0)))); 9047 putIReg(4, gregOfRM(modrm), mkexpr(t1)); 9048 DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)), 9049 nameIReg(4,gregOfRM(modrm))); 9050 delta += 3; 9051 goto decode_success; 9052 } 9053 /* else fall through */ 9054 } 9055 9056 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9057 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */ 9058 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE4) { 9059 do_MMX_preamble(); 9060 delta = dis_MMXop_regmem_to_reg ( 9061 sorb, delta+2, insn[1], "pmuluh", False ); 9062 goto decode_success; 9063 } 9064 9065 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */ 9066 /* 0F 18 /1 = PREFETCH0 -- with various different hints */ 9067 /* 0F 18 /2 = PREFETCH1 */ 9068 /* 0F 18 /3 = PREFETCH2 */ 9069 if (insn[0] == 0x0F && insn[1] == 0x18 9070 && !epartIsReg(insn[2]) 9071 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 3) { 9072 HChar* hintstr = "??"; 9073 9074 modrm = getIByte(delta+2); 9075 vassert(!epartIsReg(modrm)); 9076 9077 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9078 delta += 2+alen; 9079 9080 switch (gregOfRM(modrm)) { 9081 case 0: hintstr = "nta"; break; 9082 case 1: hintstr = "t0"; break; 9083 case 2: hintstr = "t1"; break; 9084 case 3: hintstr = "t2"; break; 9085 default: vassert(0); /*NOTREACHED*/ 9086 } 9087 9088 DIP("prefetch%s %s\n", hintstr, dis_buf); 9089 goto decode_success; 9090 } 9091 9092 /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */ 9093 /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */ 9094 if (insn[0] == 0x0F && insn[1] == 0x0D 9095 && !epartIsReg(insn[2]) 9096 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 1) { 9097 HChar* hintstr = "??"; 9098 9099 modrm = getIByte(delta+2); 9100 vassert(!epartIsReg(modrm)); 9101 9102 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9103 delta += 2+alen; 9104 9105 switch (gregOfRM(modrm)) { 9106 case 0: hintstr = ""; break; 9107 case 1: hintstr = "w"; break; 9108 default: vassert(0); /*NOTREACHED*/ 9109 } 9110 9111 DIP("prefetch%s %s\n", hintstr, dis_buf); 9112 goto decode_success; 9113 } 9114 9115 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9116 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */ 9117 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF6) { 9118 do_MMX_preamble(); 9119 delta = dis_MMXop_regmem_to_reg ( 9120 sorb, delta+2, insn[1], "psadbw", False ); 9121 goto decode_success; 9122 } 9123 9124 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9125 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */ 9126 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x70) { 9127 Int order; 9128 IRTemp sV, dV, s3, s2, s1, s0; 9129 s3 = s2 = s1 = s0 = IRTemp_INVALID; 9130 sV = newTemp(Ity_I64); 9131 dV = newTemp(Ity_I64); 9132 do_MMX_preamble(); 9133 modrm = insn[2]; 9134 if (epartIsReg(modrm)) { 9135 assign( sV, getMMXReg(eregOfRM(modrm)) ); 9136 order = (Int)insn[3]; 9137 delta += 2+2; 9138 DIP("pshufw $%d,%s,%s\n", order, 9139 nameMMXReg(eregOfRM(modrm)), 9140 nameMMXReg(gregOfRM(modrm))); 9141 } else { 9142 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9143 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 9144 order = (Int)insn[2+alen]; 9145 delta += 3+alen; 9146 DIP("pshufw $%d,%s,%s\n", order, 9147 dis_buf, 9148 nameMMXReg(gregOfRM(modrm))); 9149 } 9150 breakup64to16s( sV, &s3, &s2, &s1, &s0 ); 9151 9152# define SEL(n) \ 9153 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 9154 assign(dV, 9155 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 9156 SEL((order>>2)&3), SEL((order>>0)&3) ) 9157 ); 9158 putMMXReg(gregOfRM(modrm), mkexpr(dV)); 9159# undef SEL 9160 goto decode_success; 9161 } 9162 9163 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ 9164 if (insn[0] == 0x0F && insn[1] == 0x53) { 9165 vassert(sz == 4); 9166 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 9167 "rcpps", Iop_Recip32Fx4 ); 9168 goto decode_success; 9169 } 9170 9171 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */ 9172 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x53) { 9173 vassert(sz == 4); 9174 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, 9175 "rcpss", Iop_Recip32F0x4 ); 9176 goto decode_success; 9177 } 9178 9179 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */ 9180 if (insn[0] == 0x0F && insn[1] == 0x52) { 9181 vassert(sz == 4); 9182 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 9183 "rsqrtps", Iop_RSqrt32Fx4 ); 9184 goto decode_success; 9185 } 9186 9187 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */ 9188 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x52) { 9189 vassert(sz == 4); 9190 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, 9191 "rsqrtss", Iop_RSqrt32F0x4 ); 9192 goto decode_success; 9193 } 9194 9195 /* 0F AE /7 = SFENCE -- flush pending operations to memory */ 9196 if (insn[0] == 0x0F && insn[1] == 0xAE 9197 && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) { 9198 vassert(sz == 4); 9199 delta += 3; 9200 /* Insert a memory fence. It's sometimes important that these 9201 are carried through to the generated code. */ 9202 stmt( IRStmt_MBE(Imbe_Fence) ); 9203 DIP("sfence\n"); 9204 goto decode_success; 9205 } 9206 9207 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ 9208 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) { 9209 Int select; 9210 IRTemp sV, dV; 9211 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 9212 sV = newTemp(Ity_V128); 9213 dV = newTemp(Ity_V128); 9214 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 9215 modrm = insn[2]; 9216 assign( dV, getXMMReg(gregOfRM(modrm)) ); 9217 9218 if (epartIsReg(modrm)) { 9219 assign( sV, getXMMReg(eregOfRM(modrm)) ); 9220 select = (Int)insn[3]; 9221 delta += 2+2; 9222 DIP("shufps $%d,%s,%s\n", select, 9223 nameXMMReg(eregOfRM(modrm)), 9224 nameXMMReg(gregOfRM(modrm))); 9225 } else { 9226 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9227 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 9228 select = (Int)insn[2+alen]; 9229 delta += 3+alen; 9230 DIP("shufps $%d,%s,%s\n", select, 9231 dis_buf, 9232 nameXMMReg(gregOfRM(modrm))); 9233 } 9234 9235 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 9236 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 9237 9238# define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3))) 9239# define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 9240 9241 putXMMReg( 9242 gregOfRM(modrm), 9243 mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3), 9244 SELD((select>>2)&3), SELD((select>>0)&3) ) 9245 ); 9246 9247# undef SELD 9248# undef SELS 9249 9250 goto decode_success; 9251 } 9252 9253 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */ 9254 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x51) { 9255 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 9256 "sqrtps", Iop_Sqrt32Fx4 ); 9257 goto decode_success; 9258 } 9259 9260 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */ 9261 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x51) { 9262 vassert(sz == 4); 9263 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, 9264 "sqrtss", Iop_Sqrt32F0x4 ); 9265 goto decode_success; 9266 } 9267 9268 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */ 9269 if (insn[0] == 0x0F && insn[1] == 0xAE 9270 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 3) { 9271 modrm = getIByte(delta+2); 9272 vassert(sz == 4); 9273 vassert(!epartIsReg(modrm)); 9274 9275 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9276 delta += 2+alen; 9277 9278 /* Fake up a native SSE mxcsr word. The only thing it depends 9279 on is SSEROUND[1:0], so call a clean helper to cook it up. 9280 */ 9281 /* UInt x86h_create_mxcsr ( UInt sseround ) */ 9282 DIP("stmxcsr %s\n", dis_buf); 9283 storeLE( mkexpr(addr), 9284 mkIRExprCCall( 9285 Ity_I32, 0/*regp*/, 9286 "x86g_create_mxcsr", &x86g_create_mxcsr, 9287 mkIRExprVec_1( get_sse_roundingmode() ) 9288 ) 9289 ); 9290 goto decode_success; 9291 } 9292 9293 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */ 9294 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5C) { 9295 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subps", Iop_Sub32Fx4 ); 9296 goto decode_success; 9297 } 9298 9299 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */ 9300 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5C) { 9301 vassert(sz == 4); 9302 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "subss", Iop_Sub32F0x4 ); 9303 goto decode_success; 9304 } 9305 9306 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */ 9307 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */ 9308 /* These just appear to be special cases of SHUFPS */ 9309 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { 9310 IRTemp sV, dV; 9311 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 9312 Bool hi = toBool(insn[1] == 0x15); 9313 sV = newTemp(Ity_V128); 9314 dV = newTemp(Ity_V128); 9315 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 9316 modrm = insn[2]; 9317 assign( dV, getXMMReg(gregOfRM(modrm)) ); 9318 9319 if (epartIsReg(modrm)) { 9320 assign( sV, getXMMReg(eregOfRM(modrm)) ); 9321 delta += 2+1; 9322 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 9323 nameXMMReg(eregOfRM(modrm)), 9324 nameXMMReg(gregOfRM(modrm))); 9325 } else { 9326 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9327 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 9328 delta += 2+alen; 9329 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 9330 dis_buf, 9331 nameXMMReg(gregOfRM(modrm))); 9332 } 9333 9334 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 9335 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 9336 9337 if (hi) { 9338 putXMMReg( gregOfRM(modrm), mk128from32s( s3, d3, s2, d2 ) ); 9339 } else { 9340 putXMMReg( gregOfRM(modrm), mk128from32s( s1, d1, s0, d0 ) ); 9341 } 9342 9343 goto decode_success; 9344 } 9345 9346 /* 0F 57 = XORPS -- G = G and E */ 9347 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x57) { 9348 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorps", Iop_XorV128 ); 9349 goto decode_success; 9350 } 9351 9352 /* ---------------------------------------------------- */ 9353 /* --- end of the SSE decoder. --- */ 9354 /* ---------------------------------------------------- */ 9355 9356 /* ---------------------------------------------------- */ 9357 /* --- start of the SSE2 decoder. --- */ 9358 /* ---------------------------------------------------- */ 9359 9360 /* Skip parts of the decoder which don't apply given the stated 9361 guest subarchitecture. */ 9362 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2)) 9363 goto after_sse_decoders; /* no SSE2 capabilities */ 9364 9365 insn = (UChar*)&guest_code[delta]; 9366 9367 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ 9368 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x58) { 9369 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addpd", Iop_Add64Fx2 ); 9370 goto decode_success; 9371 } 9372 9373 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ 9374 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x58) { 9375 vassert(sz == 4); 9376 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "addsd", Iop_Add64F0x2 ); 9377 goto decode_success; 9378 } 9379 9380 /* 66 0F 55 = ANDNPD -- G = (not G) and E */ 9381 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x55) { 9382 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnpd", Iop_AndV128 ); 9383 goto decode_success; 9384 } 9385 9386 /* 66 0F 54 = ANDPD -- G = G and E */ 9387 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x54) { 9388 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andpd", Iop_AndV128 ); 9389 goto decode_success; 9390 } 9391 9392 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */ 9393 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC2) { 9394 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmppd", True, 8 ); 9395 goto decode_success; 9396 } 9397 9398 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */ 9399 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xC2) { 9400 vassert(sz == 4); 9401 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpsd", False, 8 ); 9402 goto decode_success; 9403 } 9404 9405 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */ 9406 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */ 9407 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { 9408 IRTemp argL = newTemp(Ity_F64); 9409 IRTemp argR = newTemp(Ity_F64); 9410 modrm = getIByte(delta+2); 9411 if (epartIsReg(modrm)) { 9412 assign( argR, getXMMRegLane64F( eregOfRM(modrm), 0/*lowest lane*/ ) ); 9413 delta += 2+1; 9414 DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9415 nameXMMReg(gregOfRM(modrm)) ); 9416 } else { 9417 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9418 assign( argR, loadLE(Ity_F64, mkexpr(addr)) ); 9419 delta += 2+alen; 9420 DIP("[u]comisd %s,%s\n", dis_buf, 9421 nameXMMReg(gregOfRM(modrm)) ); 9422 } 9423 assign( argL, getXMMRegLane64F( gregOfRM(modrm), 0/*lowest lane*/ ) ); 9424 9425 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 9426 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 9427 stmt( IRStmt_Put( 9428 OFFB_CC_DEP1, 9429 binop( Iop_And32, 9430 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)), 9431 mkU32(0x45) 9432 ))); 9433 /* Set NDEP even though it isn't used. This makes redundant-PUT 9434 elimination of previous stores to this field work better. */ 9435 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 9436 goto decode_success; 9437 } 9438 9439 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x 9440 F64 in xmm(G) */ 9441 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xE6) { 9442 IRTemp arg64 = newTemp(Ity_I64); 9443 vassert(sz == 4); 9444 9445 modrm = getIByte(delta+3); 9446 if (epartIsReg(modrm)) { 9447 assign( arg64, getXMMRegLane64(eregOfRM(modrm), 0) ); 9448 delta += 3+1; 9449 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9450 nameXMMReg(gregOfRM(modrm))); 9451 } else { 9452 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9453 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 9454 delta += 3+alen; 9455 DIP("cvtdq2pd %s,%s\n", dis_buf, 9456 nameXMMReg(gregOfRM(modrm)) ); 9457 } 9458 9459 putXMMRegLane64F( 9460 gregOfRM(modrm), 0, 9461 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64))) 9462 ); 9463 9464 putXMMRegLane64F( 9465 gregOfRM(modrm), 1, 9466 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64))) 9467 ); 9468 9469 goto decode_success; 9470 } 9471 9472 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in 9473 xmm(G) */ 9474 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5B) { 9475 IRTemp argV = newTemp(Ity_V128); 9476 IRTemp rmode = newTemp(Ity_I32); 9477 9478 modrm = getIByte(delta+2); 9479 if (epartIsReg(modrm)) { 9480 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9481 delta += 2+1; 9482 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9483 nameXMMReg(gregOfRM(modrm))); 9484 } else { 9485 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9486 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9487 delta += 2+alen; 9488 DIP("cvtdq2ps %s,%s\n", dis_buf, 9489 nameXMMReg(gregOfRM(modrm)) ); 9490 } 9491 9492 assign( rmode, get_sse_roundingmode() ); 9493 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 9494 9495# define CVT(_t) binop( Iop_F64toF32, \ 9496 mkexpr(rmode), \ 9497 unop(Iop_I32StoF64,mkexpr(_t))) 9498 9499 putXMMRegLane32F( gregOfRM(modrm), 3, CVT(t3) ); 9500 putXMMRegLane32F( gregOfRM(modrm), 2, CVT(t2) ); 9501 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) ); 9502 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) ); 9503 9504# undef CVT 9505 9506 goto decode_success; 9507 } 9508 9509 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 9510 lo half xmm(G), and zero upper half */ 9511 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xE6) { 9512 IRTemp argV = newTemp(Ity_V128); 9513 IRTemp rmode = newTemp(Ity_I32); 9514 vassert(sz == 4); 9515 9516 modrm = getIByte(delta+3); 9517 if (epartIsReg(modrm)) { 9518 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9519 delta += 3+1; 9520 DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9521 nameXMMReg(gregOfRM(modrm))); 9522 } else { 9523 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9524 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9525 delta += 3+alen; 9526 DIP("cvtpd2dq %s,%s\n", dis_buf, 9527 nameXMMReg(gregOfRM(modrm)) ); 9528 } 9529 9530 assign( rmode, get_sse_roundingmode() ); 9531 t0 = newTemp(Ity_F64); 9532 t1 = newTemp(Ity_F64); 9533 assign( t0, unop(Iop_ReinterpI64asF64, 9534 unop(Iop_V128to64, mkexpr(argV))) ); 9535 assign( t1, unop(Iop_ReinterpI64asF64, 9536 unop(Iop_V128HIto64, mkexpr(argV))) ); 9537 9538# define CVT(_t) binop( Iop_F64toI32S, \ 9539 mkexpr(rmode), \ 9540 mkexpr(_t) ) 9541 9542 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); 9543 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); 9544 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 9545 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 9546 9547# undef CVT 9548 9549 goto decode_success; 9550 } 9551 9552 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 9553 I32 in mmx, according to prevailing SSE rounding mode */ 9554 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 9555 I32 in mmx, rounding towards zero */ 9556 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { 9557 IRTemp dst64 = newTemp(Ity_I64); 9558 IRTemp rmode = newTemp(Ity_I32); 9559 IRTemp f64lo = newTemp(Ity_F64); 9560 IRTemp f64hi = newTemp(Ity_F64); 9561 Bool r2zero = toBool(insn[1] == 0x2C); 9562 9563 do_MMX_preamble(); 9564 modrm = getIByte(delta+2); 9565 9566 if (epartIsReg(modrm)) { 9567 delta += 2+1; 9568 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); 9569 assign(f64hi, getXMMRegLane64F(eregOfRM(modrm), 1)); 9570 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "", 9571 nameXMMReg(eregOfRM(modrm)), 9572 nameMMXReg(gregOfRM(modrm))); 9573 } else { 9574 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9575 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 9576 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add32, 9577 mkexpr(addr), 9578 mkU32(8) ))); 9579 delta += 2+alen; 9580 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "", 9581 dis_buf, 9582 nameMMXReg(gregOfRM(modrm))); 9583 } 9584 9585 if (r2zero) { 9586 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 9587 } else { 9588 assign( rmode, get_sse_roundingmode() ); 9589 } 9590 9591 assign( 9592 dst64, 9593 binop( Iop_32HLto64, 9594 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ), 9595 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) ) 9596 ) 9597 ); 9598 9599 putMMXReg(gregOfRM(modrm), mkexpr(dst64)); 9600 goto decode_success; 9601 } 9602 9603 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in 9604 lo half xmm(G), and zero upper half */ 9605 /* Note, this is practically identical to CVTPD2DQ. It would have 9606 been nicer to merge them together, but the insn[] offsets differ 9607 by one. */ 9608 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5A) { 9609 IRTemp argV = newTemp(Ity_V128); 9610 IRTemp rmode = newTemp(Ity_I32); 9611 9612 modrm = getIByte(delta+2); 9613 if (epartIsReg(modrm)) { 9614 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9615 delta += 2+1; 9616 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9617 nameXMMReg(gregOfRM(modrm))); 9618 } else { 9619 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9620 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9621 delta += 2+alen; 9622 DIP("cvtpd2ps %s,%s\n", dis_buf, 9623 nameXMMReg(gregOfRM(modrm)) ); 9624 } 9625 9626 assign( rmode, get_sse_roundingmode() ); 9627 t0 = newTemp(Ity_F64); 9628 t1 = newTemp(Ity_F64); 9629 assign( t0, unop(Iop_ReinterpI64asF64, 9630 unop(Iop_V128to64, mkexpr(argV))) ); 9631 assign( t1, unop(Iop_ReinterpI64asF64, 9632 unop(Iop_V128HIto64, mkexpr(argV))) ); 9633 9634# define CVT(_t) binop( Iop_F64toF32, \ 9635 mkexpr(rmode), \ 9636 mkexpr(_t) ) 9637 9638 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); 9639 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); 9640 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) ); 9641 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) ); 9642 9643# undef CVT 9644 9645 goto decode_success; 9646 } 9647 9648 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in 9649 xmm(G) */ 9650 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x2A) { 9651 IRTemp arg64 = newTemp(Ity_I64); 9652 9653 modrm = getIByte(delta+2); 9654 if (epartIsReg(modrm)) { 9655 /* Only switch to MMX mode if the source is a MMX register. 9656 This is inconsistent with all other instructions which 9657 convert between XMM and (M64 or MMX), which always switch 9658 to MMX mode even if 64-bit operand is M64 and not MMX. At 9659 least, that's what the Intel docs seem to me to say. 9660 Fixes #210264. */ 9661 do_MMX_preamble(); 9662 assign( arg64, getMMXReg(eregOfRM(modrm)) ); 9663 delta += 2+1; 9664 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm)), 9665 nameXMMReg(gregOfRM(modrm))); 9666 } else { 9667 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9668 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 9669 delta += 2+alen; 9670 DIP("cvtpi2pd %s,%s\n", dis_buf, 9671 nameXMMReg(gregOfRM(modrm)) ); 9672 } 9673 9674 putXMMRegLane64F( 9675 gregOfRM(modrm), 0, 9676 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) ) 9677 ); 9678 9679 putXMMRegLane64F( 9680 gregOfRM(modrm), 1, 9681 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) ) 9682 ); 9683 9684 goto decode_success; 9685 } 9686 9687 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 9688 xmm(G) */ 9689 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5B) { 9690 IRTemp argV = newTemp(Ity_V128); 9691 IRTemp rmode = newTemp(Ity_I32); 9692 9693 modrm = getIByte(delta+2); 9694 if (epartIsReg(modrm)) { 9695 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9696 delta += 2+1; 9697 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9698 nameXMMReg(gregOfRM(modrm))); 9699 } else { 9700 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9701 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9702 delta += 2+alen; 9703 DIP("cvtps2dq %s,%s\n", dis_buf, 9704 nameXMMReg(gregOfRM(modrm)) ); 9705 } 9706 9707 assign( rmode, get_sse_roundingmode() ); 9708 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 9709 9710 /* This is less than ideal. If it turns out to be a performance 9711 bottleneck it can be improved. */ 9712# define CVT(_t) \ 9713 binop( Iop_F64toI32S, \ 9714 mkexpr(rmode), \ 9715 unop( Iop_F32toF64, \ 9716 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 9717 9718 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) ); 9719 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) ); 9720 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 9721 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 9722 9723# undef CVT 9724 9725 goto decode_success; 9726 } 9727 9728 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x 9729 F64 in xmm(G). */ 9730 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5A) { 9731 IRTemp f32lo = newTemp(Ity_F32); 9732 IRTemp f32hi = newTemp(Ity_F32); 9733 9734 modrm = getIByte(delta+2); 9735 if (epartIsReg(modrm)) { 9736 assign( f32lo, getXMMRegLane32F(eregOfRM(modrm), 0) ); 9737 assign( f32hi, getXMMRegLane32F(eregOfRM(modrm), 1) ); 9738 delta += 2+1; 9739 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9740 nameXMMReg(gregOfRM(modrm))); 9741 } else { 9742 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9743 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 9744 assign( f32hi, loadLE(Ity_F32, 9745 binop(Iop_Add32,mkexpr(addr),mkU32(4))) ); 9746 delta += 2+alen; 9747 DIP("cvtps2pd %s,%s\n", dis_buf, 9748 nameXMMReg(gregOfRM(modrm)) ); 9749 } 9750 9751 putXMMRegLane64F( gregOfRM(modrm), 1, 9752 unop(Iop_F32toF64, mkexpr(f32hi)) ); 9753 putXMMRegLane64F( gregOfRM(modrm), 0, 9754 unop(Iop_F32toF64, mkexpr(f32lo)) ); 9755 9756 goto decode_success; 9757 } 9758 9759 /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to 9760 I32 in ireg, according to prevailing SSE rounding mode */ 9761 /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to 9762 I32 in ireg, rounding towards zero */ 9763 if (insn[0] == 0xF2 && insn[1] == 0x0F 9764 && (insn[2] == 0x2D || insn[2] == 0x2C)) { 9765 IRTemp rmode = newTemp(Ity_I32); 9766 IRTemp f64lo = newTemp(Ity_F64); 9767 Bool r2zero = toBool(insn[2] == 0x2C); 9768 vassert(sz == 4); 9769 9770 modrm = getIByte(delta+3); 9771 if (epartIsReg(modrm)) { 9772 delta += 3+1; 9773 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); 9774 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", 9775 nameXMMReg(eregOfRM(modrm)), 9776 nameIReg(4, gregOfRM(modrm))); 9777 } else { 9778 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9779 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 9780 delta += 3+alen; 9781 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", 9782 dis_buf, 9783 nameIReg(4, gregOfRM(modrm))); 9784 } 9785 9786 if (r2zero) { 9787 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 9788 } else { 9789 assign( rmode, get_sse_roundingmode() ); 9790 } 9791 9792 putIReg(4, gregOfRM(modrm), 9793 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) ); 9794 9795 goto decode_success; 9796 } 9797 9798 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in 9799 low 1/4 xmm(G), according to prevailing SSE rounding mode */ 9800 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5A) { 9801 IRTemp rmode = newTemp(Ity_I32); 9802 IRTemp f64lo = newTemp(Ity_F64); 9803 vassert(sz == 4); 9804 9805 modrm = getIByte(delta+3); 9806 if (epartIsReg(modrm)) { 9807 delta += 3+1; 9808 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); 9809 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9810 nameXMMReg(gregOfRM(modrm))); 9811 } else { 9812 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9813 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 9814 delta += 3+alen; 9815 DIP("cvtsd2ss %s,%s\n", dis_buf, 9816 nameXMMReg(gregOfRM(modrm))); 9817 } 9818 9819 assign( rmode, get_sse_roundingmode() ); 9820 putXMMRegLane32F( 9821 gregOfRM(modrm), 0, 9822 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) ) 9823 ); 9824 9825 goto decode_success; 9826 } 9827 9828 /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low 9829 half xmm */ 9830 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x2A) { 9831 IRTemp arg32 = newTemp(Ity_I32); 9832 vassert(sz == 4); 9833 9834 modrm = getIByte(delta+3); 9835 if (epartIsReg(modrm)) { 9836 assign( arg32, getIReg(4, eregOfRM(modrm)) ); 9837 delta += 3+1; 9838 DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm)), 9839 nameXMMReg(gregOfRM(modrm))); 9840 } else { 9841 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9842 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 9843 delta += 3+alen; 9844 DIP("cvtsi2sd %s,%s\n", dis_buf, 9845 nameXMMReg(gregOfRM(modrm)) ); 9846 } 9847 9848 putXMMRegLane64F( 9849 gregOfRM(modrm), 0, 9850 unop(Iop_I32StoF64, mkexpr(arg32)) ); 9851 9852 goto decode_success; 9853 } 9854 9855 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in 9856 low half xmm(G) */ 9857 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5A) { 9858 IRTemp f32lo = newTemp(Ity_F32); 9859 vassert(sz == 4); 9860 9861 modrm = getIByte(delta+3); 9862 if (epartIsReg(modrm)) { 9863 delta += 3+1; 9864 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); 9865 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9866 nameXMMReg(gregOfRM(modrm))); 9867 } else { 9868 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9869 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 9870 delta += 3+alen; 9871 DIP("cvtss2sd %s,%s\n", dis_buf, 9872 nameXMMReg(gregOfRM(modrm))); 9873 } 9874 9875 putXMMRegLane64F( gregOfRM(modrm), 0, 9876 unop( Iop_F32toF64, mkexpr(f32lo) ) ); 9877 9878 goto decode_success; 9879 } 9880 9881 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 9882 lo half xmm(G), and zero upper half, rounding towards zero */ 9883 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE6) { 9884 IRTemp argV = newTemp(Ity_V128); 9885 IRTemp rmode = newTemp(Ity_I32); 9886 9887 modrm = getIByte(delta+2); 9888 if (epartIsReg(modrm)) { 9889 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9890 delta += 2+1; 9891 DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9892 nameXMMReg(gregOfRM(modrm))); 9893 } else { 9894 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9895 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9896 delta += 2+alen; 9897 DIP("cvttpd2dq %s,%s\n", dis_buf, 9898 nameXMMReg(gregOfRM(modrm)) ); 9899 } 9900 9901 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 9902 9903 t0 = newTemp(Ity_F64); 9904 t1 = newTemp(Ity_F64); 9905 assign( t0, unop(Iop_ReinterpI64asF64, 9906 unop(Iop_V128to64, mkexpr(argV))) ); 9907 assign( t1, unop(Iop_ReinterpI64asF64, 9908 unop(Iop_V128HIto64, mkexpr(argV))) ); 9909 9910# define CVT(_t) binop( Iop_F64toI32S, \ 9911 mkexpr(rmode), \ 9912 mkexpr(_t) ) 9913 9914 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); 9915 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); 9916 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 9917 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 9918 9919# undef CVT 9920 9921 goto decode_success; 9922 } 9923 9924 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 9925 xmm(G), rounding towards zero */ 9926 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5B) { 9927 IRTemp argV = newTemp(Ity_V128); 9928 IRTemp rmode = newTemp(Ity_I32); 9929 vassert(sz == 4); 9930 9931 modrm = getIByte(delta+3); 9932 if (epartIsReg(modrm)) { 9933 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9934 delta += 3+1; 9935 DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9936 nameXMMReg(gregOfRM(modrm))); 9937 } else { 9938 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9939 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9940 delta += 3+alen; 9941 DIP("cvttps2dq %s,%s\n", dis_buf, 9942 nameXMMReg(gregOfRM(modrm)) ); 9943 } 9944 9945 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 9946 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 9947 9948 /* This is less than ideal. If it turns out to be a performance 9949 bottleneck it can be improved. */ 9950# define CVT(_t) \ 9951 binop( Iop_F64toI32S, \ 9952 mkexpr(rmode), \ 9953 unop( Iop_F32toF64, \ 9954 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 9955 9956 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) ); 9957 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) ); 9958 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 9959 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 9960 9961# undef CVT 9962 9963 goto decode_success; 9964 } 9965 9966 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */ 9967 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5E) { 9968 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divpd", Iop_Div64Fx2 ); 9969 goto decode_success; 9970 } 9971 9972 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */ 9973 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5E) { 9974 vassert(sz == 4); 9975 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "divsd", Iop_Div64F0x2 ); 9976 goto decode_success; 9977 } 9978 9979 /* 0F AE /5 = LFENCE -- flush pending operations to memory */ 9980 /* 0F AE /6 = MFENCE -- flush pending operations to memory */ 9981 if (insn[0] == 0x0F && insn[1] == 0xAE 9982 && epartIsReg(insn[2]) 9983 && (gregOfRM(insn[2]) == 5 || gregOfRM(insn[2]) == 6)) { 9984 vassert(sz == 4); 9985 delta += 3; 9986 /* Insert a memory fence. It's sometimes important that these 9987 are carried through to the generated code. */ 9988 stmt( IRStmt_MBE(Imbe_Fence) ); 9989 DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m"); 9990 goto decode_success; 9991 } 9992 9993 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */ 9994 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5F) { 9995 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxpd", Iop_Max64Fx2 ); 9996 goto decode_success; 9997 } 9998 9999 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */ 10000 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5F) { 10001 vassert(sz == 4); 10002 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "maxsd", Iop_Max64F0x2 ); 10003 goto decode_success; 10004 } 10005 10006 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */ 10007 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5D) { 10008 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minpd", Iop_Min64Fx2 ); 10009 goto decode_success; 10010 } 10011 10012 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */ 10013 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5D) { 10014 vassert(sz == 4); 10015 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "minsd", Iop_Min64F0x2 ); 10016 goto decode_success; 10017 } 10018 10019 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */ 10020 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */ 10021 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */ 10022 if (sz == 2 && insn[0] == 0x0F 10023 && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) { 10024 HChar* wot = insn[1]==0x28 ? "apd" : 10025 insn[1]==0x10 ? "upd" : "dqa"; 10026 modrm = getIByte(delta+2); 10027 if (epartIsReg(modrm)) { 10028 putXMMReg( gregOfRM(modrm), 10029 getXMMReg( eregOfRM(modrm) )); 10030 DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRM(modrm)), 10031 nameXMMReg(gregOfRM(modrm))); 10032 delta += 2+1; 10033 } else { 10034 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10035 if (insn[1] == 0x28/*movapd*/ || insn[1] == 0x6F/*movdqa*/) 10036 gen_SEGV_if_not_16_aligned( addr ); 10037 putXMMReg( gregOfRM(modrm), 10038 loadLE(Ity_V128, mkexpr(addr)) ); 10039 DIP("mov%s %s,%s\n", wot, dis_buf, 10040 nameXMMReg(gregOfRM(modrm))); 10041 delta += 2+alen; 10042 } 10043 goto decode_success; 10044 } 10045 10046 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ 10047 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */ 10048 if (sz == 2 && insn[0] == 0x0F 10049 && (insn[1] == 0x29 || insn[1] == 0x11)) { 10050 HChar* wot = insn[1]==0x29 ? "apd" : "upd"; 10051 modrm = getIByte(delta+2); 10052 if (epartIsReg(modrm)) { 10053 /* fall through; awaiting test case */ 10054 } else { 10055 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10056 if (insn[1] == 0x29/*movapd*/) 10057 gen_SEGV_if_not_16_aligned( addr ); 10058 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 10059 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRM(modrm)), 10060 dis_buf ); 10061 delta += 2+alen; 10062 goto decode_success; 10063 } 10064 } 10065 10066 /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */ 10067 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6E) { 10068 modrm = getIByte(delta+2); 10069 if (epartIsReg(modrm)) { 10070 delta += 2+1; 10071 putXMMReg( 10072 gregOfRM(modrm), 10073 unop( Iop_32UtoV128, getIReg(4, eregOfRM(modrm)) ) 10074 ); 10075 DIP("movd %s, %s\n", 10076 nameIReg(4,eregOfRM(modrm)), nameXMMReg(gregOfRM(modrm))); 10077 } else { 10078 addr = disAMode( &alen, sorb, delta+2, dis_buf ); 10079 delta += 2+alen; 10080 putXMMReg( 10081 gregOfRM(modrm), 10082 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) ) 10083 ); 10084 DIP("movd %s, %s\n", dis_buf, nameXMMReg(gregOfRM(modrm))); 10085 } 10086 goto decode_success; 10087 } 10088 10089 /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */ 10090 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7E) { 10091 modrm = getIByte(delta+2); 10092 if (epartIsReg(modrm)) { 10093 delta += 2+1; 10094 putIReg( 4, eregOfRM(modrm), 10095 getXMMRegLane32(gregOfRM(modrm), 0) ); 10096 DIP("movd %s, %s\n", 10097 nameXMMReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm))); 10098 } else { 10099 addr = disAMode( &alen, sorb, delta+2, dis_buf ); 10100 delta += 2+alen; 10101 storeLE( mkexpr(addr), 10102 getXMMRegLane32(gregOfRM(modrm), 0) ); 10103 DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); 10104 } 10105 goto decode_success; 10106 } 10107 10108 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */ 10109 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7F) { 10110 modrm = getIByte(delta+2); 10111 if (epartIsReg(modrm)) { 10112 delta += 2+1; 10113 putXMMReg( eregOfRM(modrm), 10114 getXMMReg(gregOfRM(modrm)) ); 10115 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), 10116 nameXMMReg(eregOfRM(modrm))); 10117 } else { 10118 addr = disAMode( &alen, sorb, delta+2, dis_buf ); 10119 delta += 2+alen; 10120 gen_SEGV_if_not_16_aligned( addr ); 10121 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 10122 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); 10123 } 10124 goto decode_success; 10125 } 10126 10127 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */ 10128 /* Unfortunately can't simply use the MOVDQA case since the 10129 prefix lengths are different (66 vs F3) */ 10130 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x6F) { 10131 vassert(sz == 4); 10132 modrm = getIByte(delta+3); 10133 if (epartIsReg(modrm)) { 10134 putXMMReg( gregOfRM(modrm), 10135 getXMMReg( eregOfRM(modrm) )); 10136 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10137 nameXMMReg(gregOfRM(modrm))); 10138 delta += 3+1; 10139 } else { 10140 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10141 putXMMReg( gregOfRM(modrm), 10142 loadLE(Ity_V128, mkexpr(addr)) ); 10143 DIP("movdqu %s,%s\n", dis_buf, 10144 nameXMMReg(gregOfRM(modrm))); 10145 delta += 3+alen; 10146 } 10147 goto decode_success; 10148 } 10149 10150 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */ 10151 /* Unfortunately can't simply use the MOVDQA case since the 10152 prefix lengths are different (66 vs F3) */ 10153 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7F) { 10154 vassert(sz == 4); 10155 modrm = getIByte(delta+3); 10156 if (epartIsReg(modrm)) { 10157 delta += 3+1; 10158 putXMMReg( eregOfRM(modrm), 10159 getXMMReg(gregOfRM(modrm)) ); 10160 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), 10161 nameXMMReg(eregOfRM(modrm))); 10162 } else { 10163 addr = disAMode( &alen, sorb, delta+3, dis_buf ); 10164 delta += 3+alen; 10165 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 10166 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); 10167 } 10168 goto decode_success; 10169 } 10170 10171 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */ 10172 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD6) { 10173 vassert(sz == 4); 10174 modrm = getIByte(delta+3); 10175 if (epartIsReg(modrm)) { 10176 do_MMX_preamble(); 10177 putMMXReg( gregOfRM(modrm), 10178 getXMMRegLane64( eregOfRM(modrm), 0 )); 10179 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10180 nameMMXReg(gregOfRM(modrm))); 10181 delta += 3+1; 10182 goto decode_success; 10183 } else { 10184 /* fall through, apparently no mem case for this insn */ 10185 } 10186 } 10187 10188 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ 10189 /* These seems identical to MOVHPS. This instruction encoding is 10190 completely crazy. */ 10191 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x16) { 10192 modrm = getIByte(delta+2); 10193 if (epartIsReg(modrm)) { 10194 /* fall through; apparently reg-reg is not possible */ 10195 } else { 10196 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10197 delta += 2+alen; 10198 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, 10199 loadLE(Ity_I64, mkexpr(addr)) ); 10200 DIP("movhpd %s,%s\n", dis_buf, 10201 nameXMMReg( gregOfRM(modrm) )); 10202 goto decode_success; 10203 } 10204 } 10205 10206 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ 10207 /* Again, this seems identical to MOVHPS. */ 10208 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x17) { 10209 if (!epartIsReg(insn[2])) { 10210 delta += 2; 10211 addr = disAMode ( &alen, sorb, delta, dis_buf ); 10212 delta += alen; 10213 storeLE( mkexpr(addr), 10214 getXMMRegLane64( gregOfRM(insn[2]), 10215 1/*upper lane*/ ) ); 10216 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ), 10217 dis_buf); 10218 goto decode_success; 10219 } 10220 /* else fall through */ 10221 } 10222 10223 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ 10224 /* Identical to MOVLPS ? */ 10225 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x12) { 10226 modrm = getIByte(delta+2); 10227 if (epartIsReg(modrm)) { 10228 /* fall through; apparently reg-reg is not possible */ 10229 } else { 10230 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10231 delta += 2+alen; 10232 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/, 10233 loadLE(Ity_I64, mkexpr(addr)) ); 10234 DIP("movlpd %s, %s\n", 10235 dis_buf, nameXMMReg( gregOfRM(modrm) )); 10236 goto decode_success; 10237 } 10238 } 10239 10240 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ 10241 /* Identical to MOVLPS ? */ 10242 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x13) { 10243 if (!epartIsReg(insn[2])) { 10244 delta += 2; 10245 addr = disAMode ( &alen, sorb, delta, dis_buf ); 10246 delta += alen; 10247 storeLE( mkexpr(addr), 10248 getXMMRegLane64( gregOfRM(insn[2]), 10249 0/*lower lane*/ ) ); 10250 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ), 10251 dis_buf); 10252 goto decode_success; 10253 } 10254 /* else fall through */ 10255 } 10256 10257 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to 10258 2 lowest bits of ireg(G) */ 10259 if (insn[0] == 0x0F && insn[1] == 0x50) { 10260 modrm = getIByte(delta+2); 10261 if (sz == 2 && epartIsReg(modrm)) { 10262 Int src; 10263 t0 = newTemp(Ity_I32); 10264 t1 = newTemp(Ity_I32); 10265 delta += 2+1; 10266 src = eregOfRM(modrm); 10267 assign( t0, binop( Iop_And32, 10268 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)), 10269 mkU32(1) )); 10270 assign( t1, binop( Iop_And32, 10271 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)), 10272 mkU32(2) )); 10273 putIReg(4, gregOfRM(modrm), 10274 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)) 10275 ); 10276 DIP("movmskpd %s,%s\n", nameXMMReg(src), 10277 nameIReg(4, gregOfRM(modrm))); 10278 goto decode_success; 10279 } 10280 /* else fall through */ 10281 } 10282 10283 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ 10284 if (insn[0] == 0x0F && insn[1] == 0xF7) { 10285 modrm = getIByte(delta+2); 10286 if (sz == 2 && epartIsReg(modrm)) { 10287 IRTemp regD = newTemp(Ity_V128); 10288 IRTemp mask = newTemp(Ity_V128); 10289 IRTemp olddata = newTemp(Ity_V128); 10290 IRTemp newdata = newTemp(Ity_V128); 10291 addr = newTemp(Ity_I32); 10292 10293 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) )); 10294 assign( regD, getXMMReg( gregOfRM(modrm) )); 10295 10296 /* Unfortunately can't do the obvious thing with SarN8x16 10297 here since that can't be re-emitted as SSE2 code - no such 10298 insn. */ 10299 assign( 10300 mask, 10301 binop(Iop_64HLtoV128, 10302 binop(Iop_SarN8x8, 10303 getXMMRegLane64( eregOfRM(modrm), 1 ), 10304 mkU8(7) ), 10305 binop(Iop_SarN8x8, 10306 getXMMRegLane64( eregOfRM(modrm), 0 ), 10307 mkU8(7) ) )); 10308 assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); 10309 assign( newdata, 10310 binop(Iop_OrV128, 10311 binop(Iop_AndV128, 10312 mkexpr(regD), 10313 mkexpr(mask) ), 10314 binop(Iop_AndV128, 10315 mkexpr(olddata), 10316 unop(Iop_NotV128, mkexpr(mask)))) ); 10317 storeLE( mkexpr(addr), mkexpr(newdata) ); 10318 10319 delta += 2+1; 10320 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ), 10321 nameXMMReg( gregOfRM(modrm) ) ); 10322 goto decode_success; 10323 } 10324 /* else fall through */ 10325 } 10326 10327 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ 10328 if (insn[0] == 0x0F && insn[1] == 0xE7) { 10329 modrm = getIByte(delta+2); 10330 if (sz == 2 && !epartIsReg(modrm)) { 10331 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10332 gen_SEGV_if_not_16_aligned( addr ); 10333 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 10334 DIP("movntdq %s,%s\n", dis_buf, 10335 nameXMMReg(gregOfRM(modrm))); 10336 delta += 2+alen; 10337 goto decode_success; 10338 } 10339 /* else fall through */ 10340 } 10341 10342 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */ 10343 if (insn[0] == 0x0F && insn[1] == 0xC3) { 10344 vassert(sz == 4); 10345 modrm = getIByte(delta+2); 10346 if (!epartIsReg(modrm)) { 10347 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10348 storeLE( mkexpr(addr), getIReg(4, gregOfRM(modrm)) ); 10349 DIP("movnti %s,%s\n", dis_buf, 10350 nameIReg(4, gregOfRM(modrm))); 10351 delta += 2+alen; 10352 goto decode_success; 10353 } 10354 /* else fall through */ 10355 } 10356 10357 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem 10358 or lo half xmm). */ 10359 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD6) { 10360 modrm = getIByte(delta+2); 10361 if (epartIsReg(modrm)) { 10362 /* fall through, awaiting test case */ 10363 /* dst: lo half copied, hi half zeroed */ 10364 } else { 10365 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10366 storeLE( mkexpr(addr), 10367 getXMMRegLane64( gregOfRM(modrm), 0 )); 10368 DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm)), dis_buf ); 10369 delta += 2+alen; 10370 goto decode_success; 10371 } 10372 } 10373 10374 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero 10375 hi half). */ 10376 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xD6) { 10377 vassert(sz == 4); 10378 modrm = getIByte(delta+3); 10379 if (epartIsReg(modrm)) { 10380 do_MMX_preamble(); 10381 putXMMReg( gregOfRM(modrm), 10382 unop(Iop_64UtoV128, getMMXReg( eregOfRM(modrm) )) ); 10383 DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm)), 10384 nameXMMReg(gregOfRM(modrm))); 10385 delta += 3+1; 10386 goto decode_success; 10387 } else { 10388 /* fall through, apparently no mem case for this insn */ 10389 } 10390 } 10391 10392 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to 10393 G (lo half xmm). Upper half of G is zeroed out. */ 10394 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to 10395 G (lo half xmm). If E is mem, upper half of G is zeroed out. 10396 If E is reg, upper half of G is unchanged. */ 10397 if ((insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x10) 10398 || (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7E)) { 10399 vassert(sz == 4); 10400 modrm = getIByte(delta+3); 10401 if (epartIsReg(modrm)) { 10402 putXMMRegLane64( gregOfRM(modrm), 0, 10403 getXMMRegLane64( eregOfRM(modrm), 0 )); 10404 if (insn[0] == 0xF3/*MOVQ*/) { 10405 /* zero bits 127:64 */ 10406 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); 10407 } 10408 DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10409 nameXMMReg(gregOfRM(modrm))); 10410 delta += 3+1; 10411 } else { 10412 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10413 /* zero bits 127:64 */ 10414 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); 10415 /* write bits 63:0 */ 10416 putXMMRegLane64( gregOfRM(modrm), 0, 10417 loadLE(Ity_I64, mkexpr(addr)) ); 10418 DIP("movsd %s,%s\n", dis_buf, 10419 nameXMMReg(gregOfRM(modrm))); 10420 delta += 3+alen; 10421 } 10422 goto decode_success; 10423 } 10424 10425 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem 10426 or lo half xmm). */ 10427 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x11) { 10428 vassert(sz == 4); 10429 modrm = getIByte(delta+3); 10430 if (epartIsReg(modrm)) { 10431 putXMMRegLane64( eregOfRM(modrm), 0, 10432 getXMMRegLane64( gregOfRM(modrm), 0 )); 10433 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)), 10434 nameXMMReg(eregOfRM(modrm))); 10435 delta += 3+1; 10436 } else { 10437 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10438 storeLE( mkexpr(addr), 10439 getXMMRegLane64(gregOfRM(modrm), 0) ); 10440 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)), 10441 dis_buf); 10442 delta += 3+alen; 10443 } 10444 goto decode_success; 10445 } 10446 10447 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ 10448 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x59) { 10449 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulpd", Iop_Mul64Fx2 ); 10450 goto decode_success; 10451 } 10452 10453 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ 10454 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x59) { 10455 vassert(sz == 4); 10456 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "mulsd", Iop_Mul64F0x2 ); 10457 goto decode_success; 10458 } 10459 10460 /* 66 0F 56 = ORPD -- G = G and E */ 10461 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x56) { 10462 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orpd", Iop_OrV128 ); 10463 goto decode_success; 10464 } 10465 10466 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */ 10467 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC6) { 10468 Int select; 10469 IRTemp sV = newTemp(Ity_V128); 10470 IRTemp dV = newTemp(Ity_V128); 10471 IRTemp s1 = newTemp(Ity_I64); 10472 IRTemp s0 = newTemp(Ity_I64); 10473 IRTemp d1 = newTemp(Ity_I64); 10474 IRTemp d0 = newTemp(Ity_I64); 10475 10476 modrm = insn[2]; 10477 assign( dV, getXMMReg(gregOfRM(modrm)) ); 10478 10479 if (epartIsReg(modrm)) { 10480 assign( sV, getXMMReg(eregOfRM(modrm)) ); 10481 select = (Int)insn[3]; 10482 delta += 2+2; 10483 DIP("shufpd $%d,%s,%s\n", select, 10484 nameXMMReg(eregOfRM(modrm)), 10485 nameXMMReg(gregOfRM(modrm))); 10486 } else { 10487 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10488 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10489 select = (Int)insn[2+alen]; 10490 delta += 3+alen; 10491 DIP("shufpd $%d,%s,%s\n", select, 10492 dis_buf, 10493 nameXMMReg(gregOfRM(modrm))); 10494 } 10495 10496 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10497 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10498 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10499 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10500 10501# define SELD(n) mkexpr((n)==0 ? d0 : d1) 10502# define SELS(n) mkexpr((n)==0 ? s0 : s1) 10503 10504 putXMMReg( 10505 gregOfRM(modrm), 10506 binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) ) 10507 ); 10508 10509# undef SELD 10510# undef SELS 10511 10512 goto decode_success; 10513 } 10514 10515 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */ 10516 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x51) { 10517 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 10518 "sqrtpd", Iop_Sqrt64Fx2 ); 10519 goto decode_success; 10520 } 10521 10522 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */ 10523 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x51) { 10524 vassert(sz == 4); 10525 delta = dis_SSE_E_to_G_unary_lo64( sorb, delta+3, 10526 "sqrtsd", Iop_Sqrt64F0x2 ); 10527 goto decode_success; 10528 } 10529 10530 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ 10531 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5C) { 10532 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subpd", Iop_Sub64Fx2 ); 10533 goto decode_success; 10534 } 10535 10536 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ 10537 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5C) { 10538 vassert(sz == 4); 10539 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "subsd", Iop_Sub64F0x2 ); 10540 goto decode_success; 10541 } 10542 10543 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */ 10544 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */ 10545 /* These just appear to be special cases of SHUFPS */ 10546 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { 10547 IRTemp s1 = newTemp(Ity_I64); 10548 IRTemp s0 = newTemp(Ity_I64); 10549 IRTemp d1 = newTemp(Ity_I64); 10550 IRTemp d0 = newTemp(Ity_I64); 10551 IRTemp sV = newTemp(Ity_V128); 10552 IRTemp dV = newTemp(Ity_V128); 10553 Bool hi = toBool(insn[1] == 0x15); 10554 10555 modrm = insn[2]; 10556 assign( dV, getXMMReg(gregOfRM(modrm)) ); 10557 10558 if (epartIsReg(modrm)) { 10559 assign( sV, getXMMReg(eregOfRM(modrm)) ); 10560 delta += 2+1; 10561 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 10562 nameXMMReg(eregOfRM(modrm)), 10563 nameXMMReg(gregOfRM(modrm))); 10564 } else { 10565 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10566 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10567 delta += 2+alen; 10568 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 10569 dis_buf, 10570 nameXMMReg(gregOfRM(modrm))); 10571 } 10572 10573 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10574 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10575 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10576 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10577 10578 if (hi) { 10579 putXMMReg( gregOfRM(modrm), 10580 binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); 10581 } else { 10582 putXMMReg( gregOfRM(modrm), 10583 binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) ); 10584 } 10585 10586 goto decode_success; 10587 } 10588 10589 /* 66 0F 57 = XORPD -- G = G and E */ 10590 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x57) { 10591 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorpd", Iop_XorV128 ); 10592 goto decode_success; 10593 } 10594 10595 /* 66 0F 6B = PACKSSDW */ 10596 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) { 10597 delta = dis_SSEint_E_to_G( sorb, delta+2, 10598 "packssdw", 10599 Iop_QNarrowBin32Sto16Sx8, True ); 10600 goto decode_success; 10601 } 10602 10603 /* 66 0F 63 = PACKSSWB */ 10604 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) { 10605 delta = dis_SSEint_E_to_G( sorb, delta+2, 10606 "packsswb", 10607 Iop_QNarrowBin16Sto8Sx16, True ); 10608 goto decode_success; 10609 } 10610 10611 /* 66 0F 67 = PACKUSWB */ 10612 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) { 10613 delta = dis_SSEint_E_to_G( sorb, delta+2, 10614 "packuswb", 10615 Iop_QNarrowBin16Sto8Ux16, True ); 10616 goto decode_success; 10617 } 10618 10619 /* 66 0F FC = PADDB */ 10620 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFC) { 10621 delta = dis_SSEint_E_to_G( sorb, delta+2, 10622 "paddb", Iop_Add8x16, False ); 10623 goto decode_success; 10624 } 10625 10626 /* 66 0F FE = PADDD */ 10627 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFE) { 10628 delta = dis_SSEint_E_to_G( sorb, delta+2, 10629 "paddd", Iop_Add32x4, False ); 10630 goto decode_success; 10631 } 10632 10633 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 10634 /* 0F D4 = PADDQ -- add 64x1 */ 10635 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD4) { 10636 do_MMX_preamble(); 10637 delta = dis_MMXop_regmem_to_reg ( 10638 sorb, delta+2, insn[1], "paddq", False ); 10639 goto decode_success; 10640 } 10641 10642 /* 66 0F D4 = PADDQ */ 10643 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD4) { 10644 delta = dis_SSEint_E_to_G( sorb, delta+2, 10645 "paddq", Iop_Add64x2, False ); 10646 goto decode_success; 10647 } 10648 10649 /* 66 0F FD = PADDW */ 10650 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFD) { 10651 delta = dis_SSEint_E_to_G( sorb, delta+2, 10652 "paddw", Iop_Add16x8, False ); 10653 goto decode_success; 10654 } 10655 10656 /* 66 0F EC = PADDSB */ 10657 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEC) { 10658 delta = dis_SSEint_E_to_G( sorb, delta+2, 10659 "paddsb", Iop_QAdd8Sx16, False ); 10660 goto decode_success; 10661 } 10662 10663 /* 66 0F ED = PADDSW */ 10664 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xED) { 10665 delta = dis_SSEint_E_to_G( sorb, delta+2, 10666 "paddsw", Iop_QAdd16Sx8, False ); 10667 goto decode_success; 10668 } 10669 10670 /* 66 0F DC = PADDUSB */ 10671 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDC) { 10672 delta = dis_SSEint_E_to_G( sorb, delta+2, 10673 "paddusb", Iop_QAdd8Ux16, False ); 10674 goto decode_success; 10675 } 10676 10677 /* 66 0F DD = PADDUSW */ 10678 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDD) { 10679 delta = dis_SSEint_E_to_G( sorb, delta+2, 10680 "paddusw", Iop_QAdd16Ux8, False ); 10681 goto decode_success; 10682 } 10683 10684 /* 66 0F DB = PAND */ 10685 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDB) { 10686 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pand", Iop_AndV128 ); 10687 goto decode_success; 10688 } 10689 10690 /* 66 0F DF = PANDN */ 10691 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDF) { 10692 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "pandn", Iop_AndV128 ); 10693 goto decode_success; 10694 } 10695 10696 /* 66 0F E0 = PAVGB */ 10697 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE0) { 10698 delta = dis_SSEint_E_to_G( sorb, delta+2, 10699 "pavgb", Iop_Avg8Ux16, False ); 10700 goto decode_success; 10701 } 10702 10703 /* 66 0F E3 = PAVGW */ 10704 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE3) { 10705 delta = dis_SSEint_E_to_G( sorb, delta+2, 10706 "pavgw", Iop_Avg16Ux8, False ); 10707 goto decode_success; 10708 } 10709 10710 /* 66 0F 74 = PCMPEQB */ 10711 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x74) { 10712 delta = dis_SSEint_E_to_G( sorb, delta+2, 10713 "pcmpeqb", Iop_CmpEQ8x16, False ); 10714 goto decode_success; 10715 } 10716 10717 /* 66 0F 76 = PCMPEQD */ 10718 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x76) { 10719 delta = dis_SSEint_E_to_G( sorb, delta+2, 10720 "pcmpeqd", Iop_CmpEQ32x4, False ); 10721 goto decode_success; 10722 } 10723 10724 /* 66 0F 75 = PCMPEQW */ 10725 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x75) { 10726 delta = dis_SSEint_E_to_G( sorb, delta+2, 10727 "pcmpeqw", Iop_CmpEQ16x8, False ); 10728 goto decode_success; 10729 } 10730 10731 /* 66 0F 64 = PCMPGTB */ 10732 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x64) { 10733 delta = dis_SSEint_E_to_G( sorb, delta+2, 10734 "pcmpgtb", Iop_CmpGT8Sx16, False ); 10735 goto decode_success; 10736 } 10737 10738 /* 66 0F 66 = PCMPGTD */ 10739 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x66) { 10740 delta = dis_SSEint_E_to_G( sorb, delta+2, 10741 "pcmpgtd", Iop_CmpGT32Sx4, False ); 10742 goto decode_success; 10743 } 10744 10745 /* 66 0F 65 = PCMPGTW */ 10746 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x65) { 10747 delta = dis_SSEint_E_to_G( sorb, delta+2, 10748 "pcmpgtw", Iop_CmpGT16Sx8, False ); 10749 goto decode_success; 10750 } 10751 10752 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put 10753 zero-extend of it in ireg(G). */ 10754 if (insn[0] == 0x0F && insn[1] == 0xC5) { 10755 modrm = insn[2]; 10756 if (sz == 2 && epartIsReg(modrm)) { 10757 t5 = newTemp(Ity_V128); 10758 t4 = newTemp(Ity_I16); 10759 assign(t5, getXMMReg(eregOfRM(modrm))); 10760 breakup128to32s( t5, &t3, &t2, &t1, &t0 ); 10761 switch (insn[3] & 7) { 10762 case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break; 10763 case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break; 10764 case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break; 10765 case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break; 10766 case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break; 10767 case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break; 10768 case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break; 10769 case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break; 10770 default: vassert(0); /*NOTREACHED*/ 10771 } 10772 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t4))); 10773 DIP("pextrw $%d,%s,%s\n", 10774 (Int)insn[3], nameXMMReg(eregOfRM(modrm)), 10775 nameIReg(4,gregOfRM(modrm))); 10776 delta += 4; 10777 goto decode_success; 10778 } 10779 /* else fall through */ 10780 } 10781 10782 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 10783 put it into the specified lane of xmm(G). */ 10784 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC4) { 10785 Int lane; 10786 t4 = newTemp(Ity_I16); 10787 modrm = insn[2]; 10788 10789 if (epartIsReg(modrm)) { 10790 assign(t4, getIReg(2, eregOfRM(modrm))); 10791 delta += 3+1; 10792 lane = insn[3+1-1]; 10793 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 10794 nameIReg(2,eregOfRM(modrm)), 10795 nameXMMReg(gregOfRM(modrm))); 10796 } else { 10797 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10798 delta += 3+alen; 10799 lane = insn[3+alen-1]; 10800 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 10801 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 10802 dis_buf, 10803 nameXMMReg(gregOfRM(modrm))); 10804 } 10805 10806 putXMMRegLane16( gregOfRM(modrm), lane & 7, mkexpr(t4) ); 10807 goto decode_success; 10808 } 10809 10810 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from 10811 E(xmm or mem) to G(xmm) */ 10812 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF5) { 10813 IRTemp s1V = newTemp(Ity_V128); 10814 IRTemp s2V = newTemp(Ity_V128); 10815 IRTemp dV = newTemp(Ity_V128); 10816 IRTemp s1Hi = newTemp(Ity_I64); 10817 IRTemp s1Lo = newTemp(Ity_I64); 10818 IRTemp s2Hi = newTemp(Ity_I64); 10819 IRTemp s2Lo = newTemp(Ity_I64); 10820 IRTemp dHi = newTemp(Ity_I64); 10821 IRTemp dLo = newTemp(Ity_I64); 10822 modrm = insn[2]; 10823 if (epartIsReg(modrm)) { 10824 assign( s1V, getXMMReg(eregOfRM(modrm)) ); 10825 delta += 2+1; 10826 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10827 nameXMMReg(gregOfRM(modrm))); 10828 } else { 10829 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10830 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); 10831 delta += 2+alen; 10832 DIP("pmaddwd %s,%s\n", dis_buf, 10833 nameXMMReg(gregOfRM(modrm))); 10834 } 10835 assign( s2V, getXMMReg(gregOfRM(modrm)) ); 10836 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); 10837 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); 10838 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); 10839 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); 10840 assign( dHi, mkIRExprCCall( 10841 Ity_I64, 0/*regparms*/, 10842 "x86g_calculate_mmx_pmaddwd", 10843 &x86g_calculate_mmx_pmaddwd, 10844 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) 10845 )); 10846 assign( dLo, mkIRExprCCall( 10847 Ity_I64, 0/*regparms*/, 10848 "x86g_calculate_mmx_pmaddwd", 10849 &x86g_calculate_mmx_pmaddwd, 10850 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) 10851 )); 10852 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; 10853 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 10854 goto decode_success; 10855 } 10856 10857 /* 66 0F EE = PMAXSW -- 16x8 signed max */ 10858 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEE) { 10859 delta = dis_SSEint_E_to_G( sorb, delta+2, 10860 "pmaxsw", Iop_Max16Sx8, False ); 10861 goto decode_success; 10862 } 10863 10864 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */ 10865 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDE) { 10866 delta = dis_SSEint_E_to_G( sorb, delta+2, 10867 "pmaxub", Iop_Max8Ux16, False ); 10868 goto decode_success; 10869 } 10870 10871 /* 66 0F EA = PMINSW -- 16x8 signed min */ 10872 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEA) { 10873 delta = dis_SSEint_E_to_G( sorb, delta+2, 10874 "pminsw", Iop_Min16Sx8, False ); 10875 goto decode_success; 10876 } 10877 10878 /* 66 0F DA = PMINUB -- 8x16 unsigned min */ 10879 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDA) { 10880 delta = dis_SSEint_E_to_G( sorb, delta+2, 10881 "pminub", Iop_Min8Ux16, False ); 10882 goto decode_success; 10883 } 10884 10885 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in 10886 xmm(G), turn them into a byte, and put zero-extend of it in 10887 ireg(G). Doing this directly is just too cumbersome; give up 10888 therefore and call a helper. */ 10889 /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */ 10890 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD7) { 10891 modrm = insn[2]; 10892 if (epartIsReg(modrm)) { 10893 t0 = newTemp(Ity_I64); 10894 t1 = newTemp(Ity_I64); 10895 assign(t0, getXMMRegLane64(eregOfRM(modrm), 0)); 10896 assign(t1, getXMMRegLane64(eregOfRM(modrm), 1)); 10897 t5 = newTemp(Ity_I32); 10898 assign(t5, mkIRExprCCall( 10899 Ity_I32, 0/*regparms*/, 10900 "x86g_calculate_sse_pmovmskb", 10901 &x86g_calculate_sse_pmovmskb, 10902 mkIRExprVec_2( mkexpr(t1), mkexpr(t0) ))); 10903 putIReg(4, gregOfRM(modrm), mkexpr(t5)); 10904 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10905 nameIReg(4,gregOfRM(modrm))); 10906 delta += 3; 10907 goto decode_success; 10908 } 10909 /* else fall through */ 10910 } 10911 10912 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */ 10913 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE4) { 10914 delta = dis_SSEint_E_to_G( sorb, delta+2, 10915 "pmulhuw", Iop_MulHi16Ux8, False ); 10916 goto decode_success; 10917 } 10918 10919 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */ 10920 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE5) { 10921 delta = dis_SSEint_E_to_G( sorb, delta+2, 10922 "pmulhw", Iop_MulHi16Sx8, False ); 10923 goto decode_success; 10924 } 10925 10926 /* 66 0F D5 = PMULHL -- 16x8 multiply */ 10927 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD5) { 10928 delta = dis_SSEint_E_to_G( sorb, delta+2, 10929 "pmullw", Iop_Mul16x8, False ); 10930 goto decode_success; 10931 } 10932 10933 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 10934 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 10935 0 to form 64-bit result */ 10936 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF4) { 10937 IRTemp sV = newTemp(Ity_I64); 10938 IRTemp dV = newTemp(Ity_I64); 10939 t1 = newTemp(Ity_I32); 10940 t0 = newTemp(Ity_I32); 10941 modrm = insn[2]; 10942 10943 do_MMX_preamble(); 10944 assign( dV, getMMXReg(gregOfRM(modrm)) ); 10945 10946 if (epartIsReg(modrm)) { 10947 assign( sV, getMMXReg(eregOfRM(modrm)) ); 10948 delta += 2+1; 10949 DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm)), 10950 nameMMXReg(gregOfRM(modrm))); 10951 } else { 10952 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10953 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 10954 delta += 2+alen; 10955 DIP("pmuludq %s,%s\n", dis_buf, 10956 nameMMXReg(gregOfRM(modrm))); 10957 } 10958 10959 assign( t0, unop(Iop_64to32, mkexpr(dV)) ); 10960 assign( t1, unop(Iop_64to32, mkexpr(sV)) ); 10961 putMMXReg( gregOfRM(modrm), 10962 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) ); 10963 goto decode_success; 10964 } 10965 10966 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 10967 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit 10968 half */ 10969 /* This is a really poor translation -- could be improved if 10970 performance critical */ 10971 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF4) { 10972 IRTemp sV, dV; 10973 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10974 sV = newTemp(Ity_V128); 10975 dV = newTemp(Ity_V128); 10976 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10977 t1 = newTemp(Ity_I64); 10978 t0 = newTemp(Ity_I64); 10979 modrm = insn[2]; 10980 assign( dV, getXMMReg(gregOfRM(modrm)) ); 10981 10982 if (epartIsReg(modrm)) { 10983 assign( sV, getXMMReg(eregOfRM(modrm)) ); 10984 delta += 2+1; 10985 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10986 nameXMMReg(gregOfRM(modrm))); 10987 } else { 10988 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10989 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10990 delta += 2+alen; 10991 DIP("pmuludq %s,%s\n", dis_buf, 10992 nameXMMReg(gregOfRM(modrm))); 10993 } 10994 10995 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 10996 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 10997 10998 assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ); 10999 putXMMRegLane64( gregOfRM(modrm), 0, mkexpr(t0) ); 11000 assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) ); 11001 putXMMRegLane64( gregOfRM(modrm), 1, mkexpr(t1) ); 11002 goto decode_success; 11003 } 11004 11005 /* 66 0F EB = POR */ 11006 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEB) { 11007 delta = dis_SSE_E_to_G_all( sorb, delta+2, "por", Iop_OrV128 ); 11008 goto decode_success; 11009 } 11010 11011 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs 11012 from E(xmm or mem) to G(xmm) */ 11013 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF6) { 11014 IRTemp s1V = newTemp(Ity_V128); 11015 IRTemp s2V = newTemp(Ity_V128); 11016 IRTemp dV = newTemp(Ity_V128); 11017 IRTemp s1Hi = newTemp(Ity_I64); 11018 IRTemp s1Lo = newTemp(Ity_I64); 11019 IRTemp s2Hi = newTemp(Ity_I64); 11020 IRTemp s2Lo = newTemp(Ity_I64); 11021 IRTemp dHi = newTemp(Ity_I64); 11022 IRTemp dLo = newTemp(Ity_I64); 11023 modrm = insn[2]; 11024 if (epartIsReg(modrm)) { 11025 assign( s1V, getXMMReg(eregOfRM(modrm)) ); 11026 delta += 2+1; 11027 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11028 nameXMMReg(gregOfRM(modrm))); 11029 } else { 11030 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11031 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); 11032 delta += 2+alen; 11033 DIP("psadbw %s,%s\n", dis_buf, 11034 nameXMMReg(gregOfRM(modrm))); 11035 } 11036 assign( s2V, getXMMReg(gregOfRM(modrm)) ); 11037 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); 11038 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); 11039 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); 11040 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); 11041 assign( dHi, mkIRExprCCall( 11042 Ity_I64, 0/*regparms*/, 11043 "x86g_calculate_mmx_psadbw", 11044 &x86g_calculate_mmx_psadbw, 11045 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) 11046 )); 11047 assign( dLo, mkIRExprCCall( 11048 Ity_I64, 0/*regparms*/, 11049 "x86g_calculate_mmx_psadbw", 11050 &x86g_calculate_mmx_psadbw, 11051 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) 11052 )); 11053 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; 11054 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11055 goto decode_success; 11056 } 11057 11058 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */ 11059 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x70) { 11060 Int order; 11061 IRTemp sV, dV, s3, s2, s1, s0; 11062 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11063 sV = newTemp(Ity_V128); 11064 dV = newTemp(Ity_V128); 11065 modrm = insn[2]; 11066 if (epartIsReg(modrm)) { 11067 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11068 order = (Int)insn[3]; 11069 delta += 2+2; 11070 DIP("pshufd $%d,%s,%s\n", order, 11071 nameXMMReg(eregOfRM(modrm)), 11072 nameXMMReg(gregOfRM(modrm))); 11073 } else { 11074 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11075 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11076 order = (Int)insn[2+alen]; 11077 delta += 3+alen; 11078 DIP("pshufd $%d,%s,%s\n", order, 11079 dis_buf, 11080 nameXMMReg(gregOfRM(modrm))); 11081 } 11082 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 11083 11084# define SEL(n) \ 11085 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11086 assign(dV, 11087 mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3), 11088 SEL((order>>2)&3), SEL((order>>0)&3) ) 11089 ); 11090 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11091# undef SEL 11092 goto decode_success; 11093 } 11094 11095 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or 11096 mem) to G(xmm), and copy lower half */ 11097 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x70) { 11098 Int order; 11099 IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0; 11100 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11101 sV = newTemp(Ity_V128); 11102 dV = newTemp(Ity_V128); 11103 sVhi = newTemp(Ity_I64); 11104 dVhi = newTemp(Ity_I64); 11105 modrm = insn[3]; 11106 if (epartIsReg(modrm)) { 11107 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11108 order = (Int)insn[4]; 11109 delta += 4+1; 11110 DIP("pshufhw $%d,%s,%s\n", order, 11111 nameXMMReg(eregOfRM(modrm)), 11112 nameXMMReg(gregOfRM(modrm))); 11113 } else { 11114 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11115 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11116 order = (Int)insn[3+alen]; 11117 delta += 4+alen; 11118 DIP("pshufhw $%d,%s,%s\n", order, 11119 dis_buf, 11120 nameXMMReg(gregOfRM(modrm))); 11121 } 11122 assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) ); 11123 breakup64to16s( sVhi, &s3, &s2, &s1, &s0 ); 11124 11125# define SEL(n) \ 11126 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11127 assign(dVhi, 11128 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 11129 SEL((order>>2)&3), SEL((order>>0)&3) ) 11130 ); 11131 assign(dV, binop( Iop_64HLtoV128, 11132 mkexpr(dVhi), 11133 unop(Iop_V128to64, mkexpr(sV))) ); 11134 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11135# undef SEL 11136 goto decode_success; 11137 } 11138 11139 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or 11140 mem) to G(xmm), and copy upper half */ 11141 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x70) { 11142 Int order; 11143 IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0; 11144 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11145 sV = newTemp(Ity_V128); 11146 dV = newTemp(Ity_V128); 11147 sVlo = newTemp(Ity_I64); 11148 dVlo = newTemp(Ity_I64); 11149 modrm = insn[3]; 11150 if (epartIsReg(modrm)) { 11151 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11152 order = (Int)insn[4]; 11153 delta += 4+1; 11154 DIP("pshuflw $%d,%s,%s\n", order, 11155 nameXMMReg(eregOfRM(modrm)), 11156 nameXMMReg(gregOfRM(modrm))); 11157 } else { 11158 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11159 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11160 order = (Int)insn[3+alen]; 11161 delta += 4+alen; 11162 DIP("pshuflw $%d,%s,%s\n", order, 11163 dis_buf, 11164 nameXMMReg(gregOfRM(modrm))); 11165 } 11166 assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) ); 11167 breakup64to16s( sVlo, &s3, &s2, &s1, &s0 ); 11168 11169# define SEL(n) \ 11170 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11171 assign(dVlo, 11172 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 11173 SEL((order>>2)&3), SEL((order>>0)&3) ) 11174 ); 11175 assign(dV, binop( Iop_64HLtoV128, 11176 unop(Iop_V128HIto64, mkexpr(sV)), 11177 mkexpr(dVlo) ) ); 11178 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11179# undef SEL 11180 goto decode_success; 11181 } 11182 11183 /* 66 0F 72 /6 ib = PSLLD by immediate */ 11184 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 11185 && epartIsReg(insn[2]) 11186 && gregOfRM(insn[2]) == 6) { 11187 delta = dis_SSE_shiftE_imm( delta+2, "pslld", Iop_ShlN32x4 ); 11188 goto decode_success; 11189 } 11190 11191 /* 66 0F F2 = PSLLD by E */ 11192 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF2) { 11193 delta = dis_SSE_shiftG_byE( sorb, delta+2, "pslld", Iop_ShlN32x4 ); 11194 goto decode_success; 11195 } 11196 11197 /* 66 0F 73 /7 ib = PSLLDQ by immediate */ 11198 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11199 && epartIsReg(insn[2]) 11200 && gregOfRM(insn[2]) == 7) { 11201 IRTemp sV, dV, hi64, lo64, hi64r, lo64r; 11202 Int imm = (Int)insn[3]; 11203 Int reg = eregOfRM(insn[2]); 11204 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg)); 11205 vassert(imm >= 0 && imm <= 255); 11206 delta += 4; 11207 11208 sV = newTemp(Ity_V128); 11209 dV = newTemp(Ity_V128); 11210 hi64 = newTemp(Ity_I64); 11211 lo64 = newTemp(Ity_I64); 11212 hi64r = newTemp(Ity_I64); 11213 lo64r = newTemp(Ity_I64); 11214 11215 if (imm >= 16) { 11216 putXMMReg(reg, mkV128(0x0000)); 11217 goto decode_success; 11218 } 11219 11220 assign( sV, getXMMReg(reg) ); 11221 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 11222 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 11223 11224 if (imm == 0) { 11225 assign( lo64r, mkexpr(lo64) ); 11226 assign( hi64r, mkexpr(hi64) ); 11227 } 11228 else 11229 if (imm == 8) { 11230 assign( lo64r, mkU64(0) ); 11231 assign( hi64r, mkexpr(lo64) ); 11232 } 11233 else 11234 if (imm > 8) { 11235 assign( lo64r, mkU64(0) ); 11236 assign( hi64r, binop( Iop_Shl64, 11237 mkexpr(lo64), 11238 mkU8( 8*(imm-8) ) )); 11239 } else { 11240 assign( lo64r, binop( Iop_Shl64, 11241 mkexpr(lo64), 11242 mkU8(8 * imm) )); 11243 assign( hi64r, 11244 binop( Iop_Or64, 11245 binop(Iop_Shl64, mkexpr(hi64), 11246 mkU8(8 * imm)), 11247 binop(Iop_Shr64, mkexpr(lo64), 11248 mkU8(8 * (8 - imm)) ) 11249 ) 11250 ); 11251 } 11252 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 11253 putXMMReg(reg, mkexpr(dV)); 11254 goto decode_success; 11255 } 11256 11257 /* 66 0F 73 /6 ib = PSLLQ by immediate */ 11258 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11259 && epartIsReg(insn[2]) 11260 && gregOfRM(insn[2]) == 6) { 11261 delta = dis_SSE_shiftE_imm( delta+2, "psllq", Iop_ShlN64x2 ); 11262 goto decode_success; 11263 } 11264 11265 /* 66 0F F3 = PSLLQ by E */ 11266 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF3) { 11267 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllq", Iop_ShlN64x2 ); 11268 goto decode_success; 11269 } 11270 11271 /* 66 0F 71 /6 ib = PSLLW by immediate */ 11272 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 11273 && epartIsReg(insn[2]) 11274 && gregOfRM(insn[2]) == 6) { 11275 delta = dis_SSE_shiftE_imm( delta+2, "psllw", Iop_ShlN16x8 ); 11276 goto decode_success; 11277 } 11278 11279 /* 66 0F F1 = PSLLW by E */ 11280 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF1) { 11281 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllw", Iop_ShlN16x8 ); 11282 goto decode_success; 11283 } 11284 11285 /* 66 0F 72 /4 ib = PSRAD by immediate */ 11286 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 11287 && epartIsReg(insn[2]) 11288 && gregOfRM(insn[2]) == 4) { 11289 delta = dis_SSE_shiftE_imm( delta+2, "psrad", Iop_SarN32x4 ); 11290 goto decode_success; 11291 } 11292 11293 /* 66 0F E2 = PSRAD by E */ 11294 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE2) { 11295 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrad", Iop_SarN32x4 ); 11296 goto decode_success; 11297 } 11298 11299 /* 66 0F 71 /4 ib = PSRAW by immediate */ 11300 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 11301 && epartIsReg(insn[2]) 11302 && gregOfRM(insn[2]) == 4) { 11303 delta = dis_SSE_shiftE_imm( delta+2, "psraw", Iop_SarN16x8 ); 11304 goto decode_success; 11305 } 11306 11307 /* 66 0F E1 = PSRAW by E */ 11308 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE1) { 11309 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psraw", Iop_SarN16x8 ); 11310 goto decode_success; 11311 } 11312 11313 /* 66 0F 72 /2 ib = PSRLD by immediate */ 11314 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 11315 && epartIsReg(insn[2]) 11316 && gregOfRM(insn[2]) == 2) { 11317 delta = dis_SSE_shiftE_imm( delta+2, "psrld", Iop_ShrN32x4 ); 11318 goto decode_success; 11319 } 11320 11321 /* 66 0F D2 = PSRLD by E */ 11322 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD2) { 11323 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrld", Iop_ShrN32x4 ); 11324 goto decode_success; 11325 } 11326 11327 /* 66 0F 73 /3 ib = PSRLDQ by immediate */ 11328 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11329 && epartIsReg(insn[2]) 11330 && gregOfRM(insn[2]) == 3) { 11331 IRTemp sV, dV, hi64, lo64, hi64r, lo64r; 11332 Int imm = (Int)insn[3]; 11333 Int reg = eregOfRM(insn[2]); 11334 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg)); 11335 vassert(imm >= 0 && imm <= 255); 11336 delta += 4; 11337 11338 sV = newTemp(Ity_V128); 11339 dV = newTemp(Ity_V128); 11340 hi64 = newTemp(Ity_I64); 11341 lo64 = newTemp(Ity_I64); 11342 hi64r = newTemp(Ity_I64); 11343 lo64r = newTemp(Ity_I64); 11344 11345 if (imm >= 16) { 11346 putXMMReg(reg, mkV128(0x0000)); 11347 goto decode_success; 11348 } 11349 11350 assign( sV, getXMMReg(reg) ); 11351 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 11352 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 11353 11354 if (imm == 0) { 11355 assign( lo64r, mkexpr(lo64) ); 11356 assign( hi64r, mkexpr(hi64) ); 11357 } 11358 else 11359 if (imm == 8) { 11360 assign( hi64r, mkU64(0) ); 11361 assign( lo64r, mkexpr(hi64) ); 11362 } 11363 else 11364 if (imm > 8) { 11365 assign( hi64r, mkU64(0) ); 11366 assign( lo64r, binop( Iop_Shr64, 11367 mkexpr(hi64), 11368 mkU8( 8*(imm-8) ) )); 11369 } else { 11370 assign( hi64r, binop( Iop_Shr64, 11371 mkexpr(hi64), 11372 mkU8(8 * imm) )); 11373 assign( lo64r, 11374 binop( Iop_Or64, 11375 binop(Iop_Shr64, mkexpr(lo64), 11376 mkU8(8 * imm)), 11377 binop(Iop_Shl64, mkexpr(hi64), 11378 mkU8(8 * (8 - imm)) ) 11379 ) 11380 ); 11381 } 11382 11383 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 11384 putXMMReg(reg, mkexpr(dV)); 11385 goto decode_success; 11386 } 11387 11388 /* 66 0F 73 /2 ib = PSRLQ by immediate */ 11389 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11390 && epartIsReg(insn[2]) 11391 && gregOfRM(insn[2]) == 2) { 11392 delta = dis_SSE_shiftE_imm( delta+2, "psrlq", Iop_ShrN64x2 ); 11393 goto decode_success; 11394 } 11395 11396 /* 66 0F D3 = PSRLQ by E */ 11397 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD3) { 11398 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlq", Iop_ShrN64x2 ); 11399 goto decode_success; 11400 } 11401 11402 /* 66 0F 71 /2 ib = PSRLW by immediate */ 11403 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 11404 && epartIsReg(insn[2]) 11405 && gregOfRM(insn[2]) == 2) { 11406 delta = dis_SSE_shiftE_imm( delta+2, "psrlw", Iop_ShrN16x8 ); 11407 goto decode_success; 11408 } 11409 11410 /* 66 0F D1 = PSRLW by E */ 11411 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD1) { 11412 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlw", Iop_ShrN16x8 ); 11413 goto decode_success; 11414 } 11415 11416 /* 66 0F F8 = PSUBB */ 11417 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF8) { 11418 delta = dis_SSEint_E_to_G( sorb, delta+2, 11419 "psubb", Iop_Sub8x16, False ); 11420 goto decode_success; 11421 } 11422 11423 /* 66 0F FA = PSUBD */ 11424 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFA) { 11425 delta = dis_SSEint_E_to_G( sorb, delta+2, 11426 "psubd", Iop_Sub32x4, False ); 11427 goto decode_success; 11428 } 11429 11430 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 11431 /* 0F FB = PSUBQ -- sub 64x1 */ 11432 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xFB) { 11433 do_MMX_preamble(); 11434 delta = dis_MMXop_regmem_to_reg ( 11435 sorb, delta+2, insn[1], "psubq", False ); 11436 goto decode_success; 11437 } 11438 11439 /* 66 0F FB = PSUBQ */ 11440 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFB) { 11441 delta = dis_SSEint_E_to_G( sorb, delta+2, 11442 "psubq", Iop_Sub64x2, False ); 11443 goto decode_success; 11444 } 11445 11446 /* 66 0F F9 = PSUBW */ 11447 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF9) { 11448 delta = dis_SSEint_E_to_G( sorb, delta+2, 11449 "psubw", Iop_Sub16x8, False ); 11450 goto decode_success; 11451 } 11452 11453 /* 66 0F E8 = PSUBSB */ 11454 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE8) { 11455 delta = dis_SSEint_E_to_G( sorb, delta+2, 11456 "psubsb", Iop_QSub8Sx16, False ); 11457 goto decode_success; 11458 } 11459 11460 /* 66 0F E9 = PSUBSW */ 11461 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE9) { 11462 delta = dis_SSEint_E_to_G( sorb, delta+2, 11463 "psubsw", Iop_QSub16Sx8, False ); 11464 goto decode_success; 11465 } 11466 11467 /* 66 0F D8 = PSUBSB */ 11468 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD8) { 11469 delta = dis_SSEint_E_to_G( sorb, delta+2, 11470 "psubusb", Iop_QSub8Ux16, False ); 11471 goto decode_success; 11472 } 11473 11474 /* 66 0F D9 = PSUBSW */ 11475 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD9) { 11476 delta = dis_SSEint_E_to_G( sorb, delta+2, 11477 "psubusw", Iop_QSub16Ux8, False ); 11478 goto decode_success; 11479 } 11480 11481 /* 66 0F 68 = PUNPCKHBW */ 11482 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x68) { 11483 delta = dis_SSEint_E_to_G( sorb, delta+2, 11484 "punpckhbw", 11485 Iop_InterleaveHI8x16, True ); 11486 goto decode_success; 11487 } 11488 11489 /* 66 0F 6A = PUNPCKHDQ */ 11490 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6A) { 11491 delta = dis_SSEint_E_to_G( sorb, delta+2, 11492 "punpckhdq", 11493 Iop_InterleaveHI32x4, True ); 11494 goto decode_success; 11495 } 11496 11497 /* 66 0F 6D = PUNPCKHQDQ */ 11498 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6D) { 11499 delta = dis_SSEint_E_to_G( sorb, delta+2, 11500 "punpckhqdq", 11501 Iop_InterleaveHI64x2, True ); 11502 goto decode_success; 11503 } 11504 11505 /* 66 0F 69 = PUNPCKHWD */ 11506 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x69) { 11507 delta = dis_SSEint_E_to_G( sorb, delta+2, 11508 "punpckhwd", 11509 Iop_InterleaveHI16x8, True ); 11510 goto decode_success; 11511 } 11512 11513 /* 66 0F 60 = PUNPCKLBW */ 11514 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x60) { 11515 delta = dis_SSEint_E_to_G( sorb, delta+2, 11516 "punpcklbw", 11517 Iop_InterleaveLO8x16, True ); 11518 goto decode_success; 11519 } 11520 11521 /* 66 0F 62 = PUNPCKLDQ */ 11522 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x62) { 11523 delta = dis_SSEint_E_to_G( sorb, delta+2, 11524 "punpckldq", 11525 Iop_InterleaveLO32x4, True ); 11526 goto decode_success; 11527 } 11528 11529 /* 66 0F 6C = PUNPCKLQDQ */ 11530 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6C) { 11531 delta = dis_SSEint_E_to_G( sorb, delta+2, 11532 "punpcklqdq", 11533 Iop_InterleaveLO64x2, True ); 11534 goto decode_success; 11535 } 11536 11537 /* 66 0F 61 = PUNPCKLWD */ 11538 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x61) { 11539 delta = dis_SSEint_E_to_G( sorb, delta+2, 11540 "punpcklwd", 11541 Iop_InterleaveLO16x8, True ); 11542 goto decode_success; 11543 } 11544 11545 /* 66 0F EF = PXOR */ 11546 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEF) { 11547 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pxor", Iop_XorV128 ); 11548 goto decode_success; 11549 } 11550 11551//-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */ 11552//-- if (insn[0] == 0x0F && insn[1] == 0xAE 11553//-- && (!epartIsReg(insn[2])) 11554//-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) { 11555//-- Bool store = gregOfRM(insn[2]) == 0; 11556//-- vg_assert(sz == 4); 11557//-- pair = disAMode ( cb, sorb, eip+2, dis_buf ); 11558//-- t1 = LOW24(pair); 11559//-- eip += 2+HI8(pair); 11560//-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512, 11561//-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1], 11562//-- Lit16, (UShort)insn[2], 11563//-- TempReg, t1 ); 11564//-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf ); 11565//-- goto decode_success; 11566//-- } 11567 11568 /* 0F AE /7 = CLFLUSH -- flush cache line */ 11569 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE 11570 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) { 11571 11572 /* This is something of a hack. We need to know the size of the 11573 cache line containing addr. Since we don't (easily), assume 11574 256 on the basis that no real cache would have a line that 11575 big. It's safe to invalidate more stuff than we need, just 11576 inefficient. */ 11577 UInt lineszB = 256; 11578 11579 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11580 delta += 2+alen; 11581 11582 /* Round addr down to the start of the containing block. */ 11583 stmt( IRStmt_Put( 11584 OFFB_TISTART, 11585 binop( Iop_And32, 11586 mkexpr(addr), 11587 mkU32( ~(lineszB-1) ))) ); 11588 11589 stmt( IRStmt_Put(OFFB_TILEN, mkU32(lineszB) ) ); 11590 11591 jmp_lit(&dres, Ijk_TInval, (Addr32)(guest_EIP_bbstart+delta)); 11592 11593 DIP("clflush %s\n", dis_buf); 11594 goto decode_success; 11595 } 11596 11597 /* ---------------------------------------------------- */ 11598 /* --- end of the SSE2 decoder. --- */ 11599 /* ---------------------------------------------------- */ 11600 11601 /* ---------------------------------------------------- */ 11602 /* --- start of the SSE3 decoder. --- */ 11603 /* ---------------------------------------------------- */ 11604 11605 /* Skip parts of the decoder which don't apply given the stated 11606 guest subarchitecture. */ 11607 /* if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3)) */ 11608 /* In fact this is highly bogus; we accept SSE3 insns even on a 11609 SSE2-only guest since they turn into IR which can be re-emitted 11610 successfully on an SSE2 host. */ 11611 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2)) 11612 goto after_sse_decoders; /* no SSE3 capabilities */ 11613 11614 insn = (UChar*)&guest_code[delta]; 11615 11616 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm), 11617 duplicating some lanes (2:2:0:0). */ 11618 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm), 11619 duplicating some lanes (3:3:1:1). */ 11620 if (sz == 4 && insn[0] == 0xF3 && insn[1] == 0x0F 11621 && (insn[2] == 0x12 || insn[2] == 0x16)) { 11622 IRTemp s3, s2, s1, s0; 11623 IRTemp sV = newTemp(Ity_V128); 11624 Bool isH = insn[2] == 0x16; 11625 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11626 11627 modrm = insn[3]; 11628 if (epartIsReg(modrm)) { 11629 assign( sV, getXMMReg( eregOfRM(modrm)) ); 11630 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', 11631 nameXMMReg(eregOfRM(modrm)), 11632 nameXMMReg(gregOfRM(modrm))); 11633 delta += 3+1; 11634 } else { 11635 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11636 gen_SEGV_if_not_16_aligned( addr ); 11637 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11638 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', 11639 dis_buf, 11640 nameXMMReg(gregOfRM(modrm))); 11641 delta += 3+alen; 11642 } 11643 11644 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 11645 putXMMReg( gregOfRM(modrm), 11646 isH ? mk128from32s( s3, s3, s1, s1 ) 11647 : mk128from32s( s2, s2, s0, s0 ) ); 11648 goto decode_success; 11649 } 11650 11651 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm), 11652 duplicating some lanes (0:1:0:1). */ 11653 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x12) { 11654 IRTemp sV = newTemp(Ity_V128); 11655 IRTemp d0 = newTemp(Ity_I64); 11656 11657 modrm = insn[3]; 11658 if (epartIsReg(modrm)) { 11659 assign( sV, getXMMReg( eregOfRM(modrm)) ); 11660 DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11661 nameXMMReg(gregOfRM(modrm))); 11662 delta += 3+1; 11663 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) ); 11664 } else { 11665 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11666 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 11667 DIP("movddup %s,%s\n", dis_buf, 11668 nameXMMReg(gregOfRM(modrm))); 11669 delta += 3+alen; 11670 } 11671 11672 putXMMReg( gregOfRM(modrm), binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) ); 11673 goto decode_success; 11674 } 11675 11676 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */ 11677 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD0) { 11678 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 11679 IRTemp eV = newTemp(Ity_V128); 11680 IRTemp gV = newTemp(Ity_V128); 11681 IRTemp addV = newTemp(Ity_V128); 11682 IRTemp subV = newTemp(Ity_V128); 11683 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11684 11685 modrm = insn[3]; 11686 if (epartIsReg(modrm)) { 11687 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11688 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11689 nameXMMReg(gregOfRM(modrm))); 11690 delta += 3+1; 11691 } else { 11692 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11693 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11694 DIP("addsubps %s,%s\n", dis_buf, 11695 nameXMMReg(gregOfRM(modrm))); 11696 delta += 3+alen; 11697 } 11698 11699 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11700 11701 assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) ); 11702 assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) ); 11703 11704 breakup128to32s( addV, &a3, &a2, &a1, &a0 ); 11705 breakup128to32s( subV, &s3, &s2, &s1, &s0 ); 11706 11707 putXMMReg( gregOfRM(modrm), mk128from32s( a3, s2, a1, s0 )); 11708 goto decode_success; 11709 } 11710 11711 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */ 11712 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD0) { 11713 IRTemp eV = newTemp(Ity_V128); 11714 IRTemp gV = newTemp(Ity_V128); 11715 IRTemp addV = newTemp(Ity_V128); 11716 IRTemp subV = newTemp(Ity_V128); 11717 IRTemp a1 = newTemp(Ity_I64); 11718 IRTemp s0 = newTemp(Ity_I64); 11719 11720 modrm = insn[2]; 11721 if (epartIsReg(modrm)) { 11722 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11723 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11724 nameXMMReg(gregOfRM(modrm))); 11725 delta += 2+1; 11726 } else { 11727 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11728 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11729 DIP("addsubpd %s,%s\n", dis_buf, 11730 nameXMMReg(gregOfRM(modrm))); 11731 delta += 2+alen; 11732 } 11733 11734 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11735 11736 assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) ); 11737 assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) ); 11738 11739 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); 11740 assign( s0, unop(Iop_V128to64, mkexpr(subV) )); 11741 11742 putXMMReg( gregOfRM(modrm), 11743 binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) ); 11744 goto decode_success; 11745 } 11746 11747 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */ 11748 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */ 11749 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F 11750 && (insn[2] == 0x7C || insn[2] == 0x7D)) { 11751 IRTemp e3, e2, e1, e0, g3, g2, g1, g0; 11752 IRTemp eV = newTemp(Ity_V128); 11753 IRTemp gV = newTemp(Ity_V128); 11754 IRTemp leftV = newTemp(Ity_V128); 11755 IRTemp rightV = newTemp(Ity_V128); 11756 Bool isAdd = insn[2] == 0x7C; 11757 HChar* str = isAdd ? "add" : "sub"; 11758 e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID; 11759 11760 modrm = insn[3]; 11761 if (epartIsReg(modrm)) { 11762 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11763 DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 11764 nameXMMReg(gregOfRM(modrm))); 11765 delta += 3+1; 11766 } else { 11767 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11768 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11769 DIP("h%sps %s,%s\n", str, dis_buf, 11770 nameXMMReg(gregOfRM(modrm))); 11771 delta += 3+alen; 11772 } 11773 11774 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11775 11776 breakup128to32s( eV, &e3, &e2, &e1, &e0 ); 11777 breakup128to32s( gV, &g3, &g2, &g1, &g0 ); 11778 11779 assign( leftV, mk128from32s( e2, e0, g2, g0 ) ); 11780 assign( rightV, mk128from32s( e3, e1, g3, g1 ) ); 11781 11782 putXMMReg( gregOfRM(modrm), 11783 binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, 11784 mkexpr(leftV), mkexpr(rightV) ) ); 11785 goto decode_success; 11786 } 11787 11788 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */ 11789 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */ 11790 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) { 11791 IRTemp e1 = newTemp(Ity_I64); 11792 IRTemp e0 = newTemp(Ity_I64); 11793 IRTemp g1 = newTemp(Ity_I64); 11794 IRTemp g0 = newTemp(Ity_I64); 11795 IRTemp eV = newTemp(Ity_V128); 11796 IRTemp gV = newTemp(Ity_V128); 11797 IRTemp leftV = newTemp(Ity_V128); 11798 IRTemp rightV = newTemp(Ity_V128); 11799 Bool isAdd = insn[1] == 0x7C; 11800 HChar* str = isAdd ? "add" : "sub"; 11801 11802 modrm = insn[2]; 11803 if (epartIsReg(modrm)) { 11804 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11805 DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 11806 nameXMMReg(gregOfRM(modrm))); 11807 delta += 2+1; 11808 } else { 11809 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11810 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11811 DIP("h%spd %s,%s\n", str, dis_buf, 11812 nameXMMReg(gregOfRM(modrm))); 11813 delta += 2+alen; 11814 } 11815 11816 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11817 11818 assign( e1, unop(Iop_V128HIto64, mkexpr(eV) )); 11819 assign( e0, unop(Iop_V128to64, mkexpr(eV) )); 11820 assign( g1, unop(Iop_V128HIto64, mkexpr(gV) )); 11821 assign( g0, unop(Iop_V128to64, mkexpr(gV) )); 11822 11823 assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) ); 11824 assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) ); 11825 11826 putXMMReg( gregOfRM(modrm), 11827 binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, 11828 mkexpr(leftV), mkexpr(rightV) ) ); 11829 goto decode_success; 11830 } 11831 11832 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */ 11833 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xF0) { 11834 modrm = getIByte(delta+3); 11835 if (epartIsReg(modrm)) { 11836 goto decode_failure; 11837 } else { 11838 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11839 putXMMReg( gregOfRM(modrm), 11840 loadLE(Ity_V128, mkexpr(addr)) ); 11841 DIP("lddqu %s,%s\n", dis_buf, 11842 nameXMMReg(gregOfRM(modrm))); 11843 delta += 3+alen; 11844 } 11845 goto decode_success; 11846 } 11847 11848 /* ---------------------------------------------------- */ 11849 /* --- end of the SSE3 decoder. --- */ 11850 /* ---------------------------------------------------- */ 11851 11852 /* ---------------------------------------------------- */ 11853 /* --- start of the SSSE3 decoder. --- */ 11854 /* ---------------------------------------------------- */ 11855 11856 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 11857 Unsigned Bytes (MMX) */ 11858 if (sz == 4 11859 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { 11860 IRTemp sV = newTemp(Ity_I64); 11861 IRTemp dV = newTemp(Ity_I64); 11862 IRTemp sVoddsSX = newTemp(Ity_I64); 11863 IRTemp sVevensSX = newTemp(Ity_I64); 11864 IRTemp dVoddsZX = newTemp(Ity_I64); 11865 IRTemp dVevensZX = newTemp(Ity_I64); 11866 11867 modrm = insn[3]; 11868 do_MMX_preamble(); 11869 assign( dV, getMMXReg(gregOfRM(modrm)) ); 11870 11871 if (epartIsReg(modrm)) { 11872 assign( sV, getMMXReg(eregOfRM(modrm)) ); 11873 delta += 3+1; 11874 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm)), 11875 nameMMXReg(gregOfRM(modrm))); 11876 } else { 11877 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11878 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 11879 delta += 3+alen; 11880 DIP("pmaddubsw %s,%s\n", dis_buf, 11881 nameMMXReg(gregOfRM(modrm))); 11882 } 11883 11884 /* compute dV unsigned x sV signed */ 11885 assign( sVoddsSX, 11886 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) ); 11887 assign( sVevensSX, 11888 binop(Iop_SarN16x4, 11889 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)), 11890 mkU8(8)) ); 11891 assign( dVoddsZX, 11892 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) ); 11893 assign( dVevensZX, 11894 binop(Iop_ShrN16x4, 11895 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)), 11896 mkU8(8)) ); 11897 11898 putMMXReg( 11899 gregOfRM(modrm), 11900 binop(Iop_QAdd16Sx4, 11901 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 11902 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX)) 11903 ) 11904 ); 11905 goto decode_success; 11906 } 11907 11908 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 11909 Unsigned Bytes (XMM) */ 11910 if (sz == 2 11911 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { 11912 IRTemp sV = newTemp(Ity_V128); 11913 IRTemp dV = newTemp(Ity_V128); 11914 IRTemp sVoddsSX = newTemp(Ity_V128); 11915 IRTemp sVevensSX = newTemp(Ity_V128); 11916 IRTemp dVoddsZX = newTemp(Ity_V128); 11917 IRTemp dVevensZX = newTemp(Ity_V128); 11918 11919 modrm = insn[3]; 11920 assign( dV, getXMMReg(gregOfRM(modrm)) ); 11921 11922 if (epartIsReg(modrm)) { 11923 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11924 delta += 3+1; 11925 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11926 nameXMMReg(gregOfRM(modrm))); 11927 } else { 11928 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11929 gen_SEGV_if_not_16_aligned( addr ); 11930 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11931 delta += 3+alen; 11932 DIP("pmaddubsw %s,%s\n", dis_buf, 11933 nameXMMReg(gregOfRM(modrm))); 11934 } 11935 11936 /* compute dV unsigned x sV signed */ 11937 assign( sVoddsSX, 11938 binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) ); 11939 assign( sVevensSX, 11940 binop(Iop_SarN16x8, 11941 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)), 11942 mkU8(8)) ); 11943 assign( dVoddsZX, 11944 binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) ); 11945 assign( dVevensZX, 11946 binop(Iop_ShrN16x8, 11947 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)), 11948 mkU8(8)) ); 11949 11950 putXMMReg( 11951 gregOfRM(modrm), 11952 binop(Iop_QAdd16Sx8, 11953 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 11954 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX)) 11955 ) 11956 ); 11957 goto decode_success; 11958 } 11959 11960 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */ 11961 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or 11962 mmx) and G to G (mmx). */ 11963 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or 11964 mmx) and G to G (mmx). */ 11965 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G 11966 to G (mmx). */ 11967 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G 11968 to G (mmx). */ 11969 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G 11970 to G (mmx). */ 11971 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G 11972 to G (mmx). */ 11973 11974 if (sz == 4 11975 && insn[0] == 0x0F && insn[1] == 0x38 11976 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 11977 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { 11978 HChar* str = "???"; 11979 IROp opV64 = Iop_INVALID; 11980 IROp opCatO = Iop_CatOddLanes16x4; 11981 IROp opCatE = Iop_CatEvenLanes16x4; 11982 IRTemp sV = newTemp(Ity_I64); 11983 IRTemp dV = newTemp(Ity_I64); 11984 11985 modrm = insn[3]; 11986 11987 switch (insn[2]) { 11988 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 11989 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 11990 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 11991 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 11992 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 11993 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 11994 default: vassert(0); 11995 } 11996 if (insn[2] == 0x02 || insn[2] == 0x06) { 11997 opCatO = Iop_InterleaveHI32x2; 11998 opCatE = Iop_InterleaveLO32x2; 11999 } 12000 12001 do_MMX_preamble(); 12002 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12003 12004 if (epartIsReg(modrm)) { 12005 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12006 delta += 3+1; 12007 DIP("ph%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), 12008 nameMMXReg(gregOfRM(modrm))); 12009 } else { 12010 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12011 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12012 delta += 3+alen; 12013 DIP("ph%s %s,%s\n", str, dis_buf, 12014 nameMMXReg(gregOfRM(modrm))); 12015 } 12016 12017 putMMXReg( 12018 gregOfRM(modrm), 12019 binop(opV64, 12020 binop(opCatE,mkexpr(sV),mkexpr(dV)), 12021 binop(opCatO,mkexpr(sV),mkexpr(dV)) 12022 ) 12023 ); 12024 goto decode_success; 12025 } 12026 12027 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or 12028 xmm) and G to G (xmm). */ 12029 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or 12030 xmm) and G to G (xmm). */ 12031 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and 12032 G to G (xmm). */ 12033 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and 12034 G to G (xmm). */ 12035 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and 12036 G to G (xmm). */ 12037 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and 12038 G to G (xmm). */ 12039 12040 if (sz == 2 12041 && insn[0] == 0x0F && insn[1] == 0x38 12042 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 12043 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { 12044 HChar* str = "???"; 12045 IROp opV64 = Iop_INVALID; 12046 IROp opCatO = Iop_CatOddLanes16x4; 12047 IROp opCatE = Iop_CatEvenLanes16x4; 12048 IRTemp sV = newTemp(Ity_V128); 12049 IRTemp dV = newTemp(Ity_V128); 12050 IRTemp sHi = newTemp(Ity_I64); 12051 IRTemp sLo = newTemp(Ity_I64); 12052 IRTemp dHi = newTemp(Ity_I64); 12053 IRTemp dLo = newTemp(Ity_I64); 12054 12055 modrm = insn[3]; 12056 12057 switch (insn[2]) { 12058 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 12059 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 12060 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 12061 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 12062 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 12063 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 12064 default: vassert(0); 12065 } 12066 if (insn[2] == 0x02 || insn[2] == 0x06) { 12067 opCatO = Iop_InterleaveHI32x2; 12068 opCatE = Iop_InterleaveLO32x2; 12069 } 12070 12071 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12072 12073 if (epartIsReg(modrm)) { 12074 assign( sV, getXMMReg( eregOfRM(modrm)) ); 12075 DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 12076 nameXMMReg(gregOfRM(modrm))); 12077 delta += 3+1; 12078 } else { 12079 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12080 gen_SEGV_if_not_16_aligned( addr ); 12081 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12082 DIP("ph%s %s,%s\n", str, dis_buf, 12083 nameXMMReg(gregOfRM(modrm))); 12084 delta += 3+alen; 12085 } 12086 12087 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12088 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12089 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12090 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12091 12092 /* This isn't a particularly efficient way to compute the 12093 result, but at least it avoids a proliferation of IROps, 12094 hence avoids complication all the backends. */ 12095 putXMMReg( 12096 gregOfRM(modrm), 12097 binop(Iop_64HLtoV128, 12098 binop(opV64, 12099 binop(opCatE,mkexpr(sHi),mkexpr(sLo)), 12100 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) 12101 ), 12102 binop(opV64, 12103 binop(opCatE,mkexpr(dHi),mkexpr(dLo)), 12104 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) 12105 ) 12106 ) 12107 ); 12108 goto decode_success; 12109 } 12110 12111 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale 12112 (MMX) */ 12113 if (sz == 4 12114 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { 12115 IRTemp sV = newTemp(Ity_I64); 12116 IRTemp dV = newTemp(Ity_I64); 12117 12118 modrm = insn[3]; 12119 do_MMX_preamble(); 12120 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12121 12122 if (epartIsReg(modrm)) { 12123 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12124 delta += 3+1; 12125 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm)), 12126 nameMMXReg(gregOfRM(modrm))); 12127 } else { 12128 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12129 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12130 delta += 3+alen; 12131 DIP("pmulhrsw %s,%s\n", dis_buf, 12132 nameMMXReg(gregOfRM(modrm))); 12133 } 12134 12135 putMMXReg( 12136 gregOfRM(modrm), 12137 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) ) 12138 ); 12139 goto decode_success; 12140 } 12141 12142 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and 12143 Scale (XMM) */ 12144 if (sz == 2 12145 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { 12146 IRTemp sV = newTemp(Ity_V128); 12147 IRTemp dV = newTemp(Ity_V128); 12148 IRTemp sHi = newTemp(Ity_I64); 12149 IRTemp sLo = newTemp(Ity_I64); 12150 IRTemp dHi = newTemp(Ity_I64); 12151 IRTemp dLo = newTemp(Ity_I64); 12152 12153 modrm = insn[3]; 12154 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12155 12156 if (epartIsReg(modrm)) { 12157 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12158 delta += 3+1; 12159 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm)), 12160 nameXMMReg(gregOfRM(modrm))); 12161 } else { 12162 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12163 gen_SEGV_if_not_16_aligned( addr ); 12164 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12165 delta += 3+alen; 12166 DIP("pmulhrsw %s,%s\n", dis_buf, 12167 nameXMMReg(gregOfRM(modrm))); 12168 } 12169 12170 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12171 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12172 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12173 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12174 12175 putXMMReg( 12176 gregOfRM(modrm), 12177 binop(Iop_64HLtoV128, 12178 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), 12179 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) 12180 ) 12181 ); 12182 goto decode_success; 12183 } 12184 12185 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */ 12186 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */ 12187 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */ 12188 if (sz == 4 12189 && insn[0] == 0x0F && insn[1] == 0x38 12190 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { 12191 IRTemp sV = newTemp(Ity_I64); 12192 IRTemp dV = newTemp(Ity_I64); 12193 HChar* str = "???"; 12194 Int laneszB = 0; 12195 12196 switch (insn[2]) { 12197 case 0x08: laneszB = 1; str = "b"; break; 12198 case 0x09: laneszB = 2; str = "w"; break; 12199 case 0x0A: laneszB = 4; str = "d"; break; 12200 default: vassert(0); 12201 } 12202 12203 modrm = insn[3]; 12204 do_MMX_preamble(); 12205 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12206 12207 if (epartIsReg(modrm)) { 12208 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12209 delta += 3+1; 12210 DIP("psign%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), 12211 nameMMXReg(gregOfRM(modrm))); 12212 } else { 12213 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12214 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12215 delta += 3+alen; 12216 DIP("psign%s %s,%s\n", str, dis_buf, 12217 nameMMXReg(gregOfRM(modrm))); 12218 } 12219 12220 putMMXReg( 12221 gregOfRM(modrm), 12222 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB ) 12223 ); 12224 goto decode_success; 12225 } 12226 12227 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */ 12228 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */ 12229 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */ 12230 if (sz == 2 12231 && insn[0] == 0x0F && insn[1] == 0x38 12232 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { 12233 IRTemp sV = newTemp(Ity_V128); 12234 IRTemp dV = newTemp(Ity_V128); 12235 IRTemp sHi = newTemp(Ity_I64); 12236 IRTemp sLo = newTemp(Ity_I64); 12237 IRTemp dHi = newTemp(Ity_I64); 12238 IRTemp dLo = newTemp(Ity_I64); 12239 HChar* str = "???"; 12240 Int laneszB = 0; 12241 12242 switch (insn[2]) { 12243 case 0x08: laneszB = 1; str = "b"; break; 12244 case 0x09: laneszB = 2; str = "w"; break; 12245 case 0x0A: laneszB = 4; str = "d"; break; 12246 default: vassert(0); 12247 } 12248 12249 modrm = insn[3]; 12250 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12251 12252 if (epartIsReg(modrm)) { 12253 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12254 delta += 3+1; 12255 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 12256 nameXMMReg(gregOfRM(modrm))); 12257 } else { 12258 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12259 gen_SEGV_if_not_16_aligned( addr ); 12260 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12261 delta += 3+alen; 12262 DIP("psign%s %s,%s\n", str, dis_buf, 12263 nameXMMReg(gregOfRM(modrm))); 12264 } 12265 12266 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12267 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12268 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12269 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12270 12271 putXMMReg( 12272 gregOfRM(modrm), 12273 binop(Iop_64HLtoV128, 12274 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), 12275 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) 12276 ) 12277 ); 12278 goto decode_success; 12279 } 12280 12281 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */ 12282 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */ 12283 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */ 12284 if (sz == 4 12285 && insn[0] == 0x0F && insn[1] == 0x38 12286 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { 12287 IRTemp sV = newTemp(Ity_I64); 12288 HChar* str = "???"; 12289 Int laneszB = 0; 12290 12291 switch (insn[2]) { 12292 case 0x1C: laneszB = 1; str = "b"; break; 12293 case 0x1D: laneszB = 2; str = "w"; break; 12294 case 0x1E: laneszB = 4; str = "d"; break; 12295 default: vassert(0); 12296 } 12297 12298 modrm = insn[3]; 12299 do_MMX_preamble(); 12300 12301 if (epartIsReg(modrm)) { 12302 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12303 delta += 3+1; 12304 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), 12305 nameMMXReg(gregOfRM(modrm))); 12306 } else { 12307 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12308 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12309 delta += 3+alen; 12310 DIP("pabs%s %s,%s\n", str, dis_buf, 12311 nameMMXReg(gregOfRM(modrm))); 12312 } 12313 12314 putMMXReg( 12315 gregOfRM(modrm), 12316 dis_PABS_helper( mkexpr(sV), laneszB ) 12317 ); 12318 goto decode_success; 12319 } 12320 12321 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */ 12322 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */ 12323 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */ 12324 if (sz == 2 12325 && insn[0] == 0x0F && insn[1] == 0x38 12326 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { 12327 IRTemp sV = newTemp(Ity_V128); 12328 IRTemp sHi = newTemp(Ity_I64); 12329 IRTemp sLo = newTemp(Ity_I64); 12330 HChar* str = "???"; 12331 Int laneszB = 0; 12332 12333 switch (insn[2]) { 12334 case 0x1C: laneszB = 1; str = "b"; break; 12335 case 0x1D: laneszB = 2; str = "w"; break; 12336 case 0x1E: laneszB = 4; str = "d"; break; 12337 default: vassert(0); 12338 } 12339 12340 modrm = insn[3]; 12341 12342 if (epartIsReg(modrm)) { 12343 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12344 delta += 3+1; 12345 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 12346 nameXMMReg(gregOfRM(modrm))); 12347 } else { 12348 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12349 gen_SEGV_if_not_16_aligned( addr ); 12350 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12351 delta += 3+alen; 12352 DIP("pabs%s %s,%s\n", str, dis_buf, 12353 nameXMMReg(gregOfRM(modrm))); 12354 } 12355 12356 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12357 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12358 12359 putXMMReg( 12360 gregOfRM(modrm), 12361 binop(Iop_64HLtoV128, 12362 dis_PABS_helper( mkexpr(sHi), laneszB ), 12363 dis_PABS_helper( mkexpr(sLo), laneszB ) 12364 ) 12365 ); 12366 goto decode_success; 12367 } 12368 12369 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */ 12370 if (sz == 4 12371 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { 12372 IRTemp sV = newTemp(Ity_I64); 12373 IRTemp dV = newTemp(Ity_I64); 12374 IRTemp res = newTemp(Ity_I64); 12375 12376 modrm = insn[3]; 12377 do_MMX_preamble(); 12378 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12379 12380 if (epartIsReg(modrm)) { 12381 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12382 d32 = (UInt)insn[3+1]; 12383 delta += 3+1+1; 12384 DIP("palignr $%d,%s,%s\n", (Int)d32, 12385 nameMMXReg(eregOfRM(modrm)), 12386 nameMMXReg(gregOfRM(modrm))); 12387 } else { 12388 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12389 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12390 d32 = (UInt)insn[3+alen]; 12391 delta += 3+alen+1; 12392 DIP("palignr $%d%s,%s\n", (Int)d32, 12393 dis_buf, 12394 nameMMXReg(gregOfRM(modrm))); 12395 } 12396 12397 if (d32 == 0) { 12398 assign( res, mkexpr(sV) ); 12399 } 12400 else if (d32 >= 1 && d32 <= 7) { 12401 assign(res, 12402 binop(Iop_Or64, 12403 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d32)), 12404 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d32)) 12405 ))); 12406 } 12407 else if (d32 == 8) { 12408 assign( res, mkexpr(dV) ); 12409 } 12410 else if (d32 >= 9 && d32 <= 15) { 12411 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d32-8))) ); 12412 } 12413 else if (d32 >= 16 && d32 <= 255) { 12414 assign( res, mkU64(0) ); 12415 } 12416 else 12417 vassert(0); 12418 12419 putMMXReg( gregOfRM(modrm), mkexpr(res) ); 12420 goto decode_success; 12421 } 12422 12423 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */ 12424 if (sz == 2 12425 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { 12426 IRTemp sV = newTemp(Ity_V128); 12427 IRTemp dV = newTemp(Ity_V128); 12428 IRTemp sHi = newTemp(Ity_I64); 12429 IRTemp sLo = newTemp(Ity_I64); 12430 IRTemp dHi = newTemp(Ity_I64); 12431 IRTemp dLo = newTemp(Ity_I64); 12432 IRTemp rHi = newTemp(Ity_I64); 12433 IRTemp rLo = newTemp(Ity_I64); 12434 12435 modrm = insn[3]; 12436 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12437 12438 if (epartIsReg(modrm)) { 12439 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12440 d32 = (UInt)insn[3+1]; 12441 delta += 3+1+1; 12442 DIP("palignr $%d,%s,%s\n", (Int)d32, 12443 nameXMMReg(eregOfRM(modrm)), 12444 nameXMMReg(gregOfRM(modrm))); 12445 } else { 12446 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12447 gen_SEGV_if_not_16_aligned( addr ); 12448 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12449 d32 = (UInt)insn[3+alen]; 12450 delta += 3+alen+1; 12451 DIP("palignr $%d,%s,%s\n", (Int)d32, 12452 dis_buf, 12453 nameXMMReg(gregOfRM(modrm))); 12454 } 12455 12456 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12457 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12458 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12459 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12460 12461 if (d32 == 0) { 12462 assign( rHi, mkexpr(sHi) ); 12463 assign( rLo, mkexpr(sLo) ); 12464 } 12465 else if (d32 >= 1 && d32 <= 7) { 12466 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d32) ); 12467 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d32) ); 12468 } 12469 else if (d32 == 8) { 12470 assign( rHi, mkexpr(dLo) ); 12471 assign( rLo, mkexpr(sHi) ); 12472 } 12473 else if (d32 >= 9 && d32 <= 15) { 12474 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d32-8) ); 12475 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d32-8) ); 12476 } 12477 else if (d32 == 16) { 12478 assign( rHi, mkexpr(dHi) ); 12479 assign( rLo, mkexpr(dLo) ); 12480 } 12481 else if (d32 >= 17 && d32 <= 23) { 12482 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-16))) ); 12483 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d32-16) ); 12484 } 12485 else if (d32 == 24) { 12486 assign( rHi, mkU64(0) ); 12487 assign( rLo, mkexpr(dHi) ); 12488 } 12489 else if (d32 >= 25 && d32 <= 31) { 12490 assign( rHi, mkU64(0) ); 12491 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-24))) ); 12492 } 12493 else if (d32 >= 32 && d32 <= 255) { 12494 assign( rHi, mkU64(0) ); 12495 assign( rLo, mkU64(0) ); 12496 } 12497 else 12498 vassert(0); 12499 12500 putXMMReg( 12501 gregOfRM(modrm), 12502 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) 12503 ); 12504 goto decode_success; 12505 } 12506 12507 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */ 12508 if (sz == 4 12509 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { 12510 IRTemp sV = newTemp(Ity_I64); 12511 IRTemp dV = newTemp(Ity_I64); 12512 12513 modrm = insn[3]; 12514 do_MMX_preamble(); 12515 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12516 12517 if (epartIsReg(modrm)) { 12518 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12519 delta += 3+1; 12520 DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm)), 12521 nameMMXReg(gregOfRM(modrm))); 12522 } else { 12523 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12524 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12525 delta += 3+alen; 12526 DIP("pshufb %s,%s\n", dis_buf, 12527 nameMMXReg(gregOfRM(modrm))); 12528 } 12529 12530 putMMXReg( 12531 gregOfRM(modrm), 12532 binop( 12533 Iop_And64, 12534 /* permute the lanes */ 12535 binop( 12536 Iop_Perm8x8, 12537 mkexpr(dV), 12538 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL)) 12539 ), 12540 /* mask off lanes which have (index & 0x80) == 0x80 */ 12541 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7))) 12542 ) 12543 ); 12544 goto decode_success; 12545 } 12546 12547 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */ 12548 if (sz == 2 12549 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { 12550 IRTemp sV = newTemp(Ity_V128); 12551 IRTemp dV = newTemp(Ity_V128); 12552 IRTemp sHi = newTemp(Ity_I64); 12553 IRTemp sLo = newTemp(Ity_I64); 12554 IRTemp dHi = newTemp(Ity_I64); 12555 IRTemp dLo = newTemp(Ity_I64); 12556 IRTemp rHi = newTemp(Ity_I64); 12557 IRTemp rLo = newTemp(Ity_I64); 12558 IRTemp sevens = newTemp(Ity_I64); 12559 IRTemp mask0x80hi = newTemp(Ity_I64); 12560 IRTemp mask0x80lo = newTemp(Ity_I64); 12561 IRTemp maskBit3hi = newTemp(Ity_I64); 12562 IRTemp maskBit3lo = newTemp(Ity_I64); 12563 IRTemp sAnd7hi = newTemp(Ity_I64); 12564 IRTemp sAnd7lo = newTemp(Ity_I64); 12565 IRTemp permdHi = newTemp(Ity_I64); 12566 IRTemp permdLo = newTemp(Ity_I64); 12567 12568 modrm = insn[3]; 12569 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12570 12571 if (epartIsReg(modrm)) { 12572 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12573 delta += 3+1; 12574 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm)), 12575 nameXMMReg(gregOfRM(modrm))); 12576 } else { 12577 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12578 gen_SEGV_if_not_16_aligned( addr ); 12579 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12580 delta += 3+alen; 12581 DIP("pshufb %s,%s\n", dis_buf, 12582 nameXMMReg(gregOfRM(modrm))); 12583 } 12584 12585 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12586 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12587 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12588 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12589 12590 assign( sevens, mkU64(0x0707070707070707ULL) ); 12591 12592 /* 12593 mask0x80hi = Not(SarN8x8(sHi,7)) 12594 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7) 12595 sAnd7hi = And(sHi,sevens) 12596 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi), 12597 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) ) 12598 rHi = And(permdHi,mask0x80hi) 12599 */ 12600 assign( 12601 mask0x80hi, 12602 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7)))); 12603 12604 assign( 12605 maskBit3hi, 12606 binop(Iop_SarN8x8, 12607 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)), 12608 mkU8(7))); 12609 12610 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens))); 12611 12612 assign( 12613 permdHi, 12614 binop( 12615 Iop_Or64, 12616 binop(Iop_And64, 12617 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)), 12618 mkexpr(maskBit3hi)), 12619 binop(Iop_And64, 12620 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)), 12621 unop(Iop_Not64,mkexpr(maskBit3hi))) )); 12622 12623 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) ); 12624 12625 /* And the same for the lower half of the result. What fun. */ 12626 12627 assign( 12628 mask0x80lo, 12629 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7)))); 12630 12631 assign( 12632 maskBit3lo, 12633 binop(Iop_SarN8x8, 12634 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)), 12635 mkU8(7))); 12636 12637 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens))); 12638 12639 assign( 12640 permdLo, 12641 binop( 12642 Iop_Or64, 12643 binop(Iop_And64, 12644 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)), 12645 mkexpr(maskBit3lo)), 12646 binop(Iop_And64, 12647 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)), 12648 unop(Iop_Not64,mkexpr(maskBit3lo))) )); 12649 12650 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) ); 12651 12652 putXMMReg( 12653 gregOfRM(modrm), 12654 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) 12655 ); 12656 goto decode_success; 12657 } 12658 12659 /* 0F 38 F0 = MOVBE m16/32(E), r16/32(G) */ 12660 /* 0F 38 F1 = MOVBE r16/32(G), m16/32(E) */ 12661 if ((sz == 2 || sz == 4) 12662 && insn[0] == 0x0F && insn[1] == 0x38 12663 && (insn[2] == 0xF0 || insn[2] == 0xF1) 12664 && !epartIsReg(insn[3])) { 12665 12666 modrm = insn[3]; 12667 addr = disAMode(&alen, sorb, delta + 3, dis_buf); 12668 delta += 3 + alen; 12669 ty = szToITy(sz); 12670 IRTemp src = newTemp(ty); 12671 12672 if (insn[2] == 0xF0) { /* LOAD */ 12673 assign(src, loadLE(ty, mkexpr(addr))); 12674 IRTemp dst = math_BSWAP(src, ty); 12675 putIReg(sz, gregOfRM(modrm), mkexpr(dst)); 12676 DIP("movbe %s,%s\n", dis_buf, nameIReg(sz, gregOfRM(modrm))); 12677 } else { /* STORE */ 12678 assign(src, getIReg(sz, gregOfRM(modrm))); 12679 IRTemp dst = math_BSWAP(src, ty); 12680 storeLE(mkexpr(addr), mkexpr(dst)); 12681 DIP("movbe %s,%s\n", nameIReg(sz, gregOfRM(modrm)), dis_buf); 12682 } 12683 goto decode_success; 12684 } 12685 12686 /* ---------------------------------------------------- */ 12687 /* --- end of the SSSE3 decoder. --- */ 12688 /* ---------------------------------------------------- */ 12689 12690 /* ---------------------------------------------------- */ 12691 /* --- start of the SSE4 decoder --- */ 12692 /* ---------------------------------------------------- */ 12693 12694 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 12695 (Partial implementation only -- only deal with cases where 12696 the rounding mode is specified directly by the immediate byte.) 12697 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1 12698 (Limitations ditto) 12699 */ 12700 if (sz == 2 12701 && insn[0] == 0x0F && insn[1] == 0x3A 12702 && (/*insn[2] == 0x0B || */insn[2] == 0x0A)) { 12703 12704 Bool isD = insn[2] == 0x0B; 12705 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); 12706 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); 12707 Int imm = 0; 12708 12709 modrm = insn[3]; 12710 12711 if (epartIsReg(modrm)) { 12712 assign( src, 12713 isD ? getXMMRegLane64F( eregOfRM(modrm), 0 ) 12714 : getXMMRegLane32F( eregOfRM(modrm), 0 ) ); 12715 imm = insn[3+1]; 12716 if (imm & ~3) goto decode_failure; 12717 delta += 3+1+1; 12718 DIP( "rounds%c $%d,%s,%s\n", 12719 isD ? 'd' : 's', 12720 imm, nameXMMReg( eregOfRM(modrm) ), 12721 nameXMMReg( gregOfRM(modrm) ) ); 12722 } else { 12723 addr = disAMode( &alen, sorb, delta+3, dis_buf ); 12724 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); 12725 imm = insn[3+alen]; 12726 if (imm & ~3) goto decode_failure; 12727 delta += 3+alen+1; 12728 DIP( "roundsd $%d,%s,%s\n", 12729 imm, dis_buf, nameXMMReg( gregOfRM(modrm) ) ); 12730 } 12731 12732 /* (imm & 3) contains an Intel-encoded rounding mode. Because 12733 that encoding is the same as the encoding for IRRoundingMode, 12734 we can use that value directly in the IR as a rounding 12735 mode. */ 12736 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 12737 mkU32(imm & 3), mkexpr(src)) ); 12738 12739 if (isD) 12740 putXMMRegLane64F( gregOfRM(modrm), 0, mkexpr(res) ); 12741 else 12742 putXMMRegLane32F( gregOfRM(modrm), 0, mkexpr(res) ); 12743 12744 goto decode_success; 12745 } 12746 12747 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, 12748 which we can only decode if we're sure this is an AMD cpu that 12749 supports LZCNT, since otherwise it's BSR, which behaves 12750 differently. */ 12751 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD 12752 && 0 != (archinfo->hwcaps & VEX_HWCAPS_X86_LZCNT)) { 12753 vassert(sz == 2 || sz == 4); 12754 /*IRType*/ ty = szToITy(sz); 12755 IRTemp src = newTemp(ty); 12756 modrm = insn[3]; 12757 if (epartIsReg(modrm)) { 12758 assign(src, getIReg(sz, eregOfRM(modrm))); 12759 delta += 3+1; 12760 DIP("lzcnt%c %s, %s\n", nameISize(sz), 12761 nameIReg(sz, eregOfRM(modrm)), 12762 nameIReg(sz, gregOfRM(modrm))); 12763 } else { 12764 addr = disAMode( &alen, sorb, delta+3, dis_buf ); 12765 assign(src, loadLE(ty, mkexpr(addr))); 12766 delta += 3+alen; 12767 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf, 12768 nameIReg(sz, gregOfRM(modrm))); 12769 } 12770 12771 IRTemp res = gen_LZCNT(ty, src); 12772 putIReg(sz, gregOfRM(modrm), mkexpr(res)); 12773 12774 // Update flags. This is pretty lame .. perhaps can do better 12775 // if this turns out to be performance critical. 12776 // O S A P are cleared. Z is set if RESULT == 0. 12777 // C is set if SRC is zero. 12778 IRTemp src32 = newTemp(Ity_I32); 12779 IRTemp res32 = newTemp(Ity_I32); 12780 assign(src32, widenUto32(mkexpr(src))); 12781 assign(res32, widenUto32(mkexpr(res))); 12782 12783 IRTemp oszacp = newTemp(Ity_I32); 12784 assign( 12785 oszacp, 12786 binop(Iop_Or32, 12787 binop(Iop_Shl32, 12788 unop(Iop_1Uto32, 12789 binop(Iop_CmpEQ32, mkexpr(res32), mkU32(0))), 12790 mkU8(X86G_CC_SHIFT_Z)), 12791 binop(Iop_Shl32, 12792 unop(Iop_1Uto32, 12793 binop(Iop_CmpEQ32, mkexpr(src32), mkU32(0))), 12794 mkU8(X86G_CC_SHIFT_C)) 12795 ) 12796 ); 12797 12798 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 12799 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 12800 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 12801 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) )); 12802 12803 goto decode_success; 12804 } 12805 12806 /* ---------------------------------------------------- */ 12807 /* --- end of the SSE4 decoder --- */ 12808 /* ---------------------------------------------------- */ 12809 12810 after_sse_decoders: 12811 12812 /* ---------------------------------------------------- */ 12813 /* --- deal with misc 0x67 pfxs (addr size override) -- */ 12814 /* ---------------------------------------------------- */ 12815 12816 /* 67 E3 = JCXZ (for JECXZ see below) */ 12817 if (insn[0] == 0x67 && insn[1] == 0xE3 && sz == 4) { 12818 delta += 2; 12819 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 12820 delta ++; 12821 stmt( IRStmt_Exit( 12822 binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)), 12823 Ijk_Boring, 12824 IRConst_U32(d32), 12825 OFFB_EIP 12826 )); 12827 DIP("jcxz 0x%x\n", d32); 12828 goto decode_success; 12829 } 12830 12831 /* ---------------------------------------------------- */ 12832 /* --- start of the baseline insn decoder -- */ 12833 /* ---------------------------------------------------- */ 12834 12835 /* Get the primary opcode. */ 12836 opc = getIByte(delta); delta++; 12837 12838 /* We get here if the current insn isn't SSE, or this CPU doesn't 12839 support SSE. */ 12840 12841 switch (opc) { 12842 12843 /* ------------------------ Control flow --------------- */ 12844 12845 case 0xC2: /* RET imm16 */ 12846 d32 = getUDisp16(delta); 12847 delta += 2; 12848 dis_ret(&dres, d32); 12849 DIP("ret %d\n", (Int)d32); 12850 break; 12851 case 0xC3: /* RET */ 12852 dis_ret(&dres, 0); 12853 DIP("ret\n"); 12854 break; 12855 12856 case 0xCF: /* IRET */ 12857 /* Note, this is an extremely kludgey and limited implementation 12858 of iret. All it really does is: 12859 popl %EIP; popl %CS; popl %EFLAGS. 12860 %CS is set but ignored (as it is in (eg) popw %cs)". */ 12861 t1 = newTemp(Ity_I32); /* ESP */ 12862 t2 = newTemp(Ity_I32); /* new EIP */ 12863 t3 = newTemp(Ity_I32); /* new CS */ 12864 t4 = newTemp(Ity_I32); /* new EFLAGS */ 12865 assign(t1, getIReg(4,R_ESP)); 12866 assign(t2, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(0) ))); 12867 assign(t3, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(4) ))); 12868 assign(t4, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(8) ))); 12869 /* Get stuff off stack */ 12870 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(12))); 12871 /* set %CS (which is ignored anyway) */ 12872 putSReg( R_CS, unop(Iop_32to16, mkexpr(t3)) ); 12873 /* set %EFLAGS */ 12874 set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ ); 12875 /* goto new EIP value */ 12876 jmp_treg(&dres, Ijk_Ret, t2); 12877 vassert(dres.whatNext == Dis_StopHere); 12878 DIP("iret (very kludgey)\n"); 12879 break; 12880 12881 case 0xE8: /* CALL J4 */ 12882 d32 = getUDisp32(delta); delta += 4; 12883 d32 += (guest_EIP_bbstart+delta); 12884 /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */ 12885 if (d32 == guest_EIP_bbstart+delta && getIByte(delta) >= 0x58 12886 && getIByte(delta) <= 0x5F) { 12887 /* Specially treat the position-independent-code idiom 12888 call X 12889 X: popl %reg 12890 as 12891 movl %eip, %reg. 12892 since this generates better code, but for no other reason. */ 12893 Int archReg = getIByte(delta) - 0x58; 12894 /* vex_printf("-- fPIC thingy\n"); */ 12895 putIReg(4, archReg, mkU32(guest_EIP_bbstart+delta)); 12896 delta++; /* Step over the POP */ 12897 DIP("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg)); 12898 } else { 12899 /* The normal sequence for a call. */ 12900 t1 = newTemp(Ity_I32); 12901 assign(t1, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 12902 putIReg(4, R_ESP, mkexpr(t1)); 12903 storeLE( mkexpr(t1), mkU32(guest_EIP_bbstart+delta)); 12904 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32 )) { 12905 /* follow into the call target. */ 12906 dres.whatNext = Dis_ResteerU; 12907 dres.continueAt = (Addr64)(Addr32)d32; 12908 } else { 12909 jmp_lit(&dres, Ijk_Call, d32); 12910 vassert(dres.whatNext == Dis_StopHere); 12911 } 12912 DIP("call 0x%x\n",d32); 12913 } 12914 break; 12915 12916//-- case 0xC8: /* ENTER */ 12917//-- d32 = getUDisp16(eip); eip += 2; 12918//-- abyte = getIByte(delta); delta++; 12919//-- 12920//-- vg_assert(sz == 4); 12921//-- vg_assert(abyte == 0); 12922//-- 12923//-- t1 = newTemp(cb); t2 = newTemp(cb); 12924//-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1); 12925//-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2); 12926//-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); 12927//-- uLiteral(cb, sz); 12928//-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); 12929//-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); 12930//-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP); 12931//-- if (d32) { 12932//-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); 12933//-- uLiteral(cb, d32); 12934//-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); 12935//-- } 12936//-- DIP("enter 0x%x, 0x%x", d32, abyte); 12937//-- break; 12938 12939 case 0xC9: /* LEAVE */ 12940 vassert(sz == 4); 12941 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32); 12942 assign(t1, getIReg(4,R_EBP)); 12943 /* First PUT ESP looks redundant, but need it because ESP must 12944 always be up-to-date for Memcheck to work... */ 12945 putIReg(4, R_ESP, mkexpr(t1)); 12946 assign(t2, loadLE(Ity_I32,mkexpr(t1))); 12947 putIReg(4, R_EBP, mkexpr(t2)); 12948 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(4)) ); 12949 DIP("leave\n"); 12950 break; 12951 12952 /* ---------------- Misc weird-ass insns --------------- */ 12953 12954 case 0x27: /* DAA */ 12955 case 0x2F: /* DAS */ 12956 case 0x37: /* AAA */ 12957 case 0x3F: /* AAS */ 12958 /* An ugly implementation for some ugly instructions. Oh 12959 well. */ 12960 if (sz != 4) goto decode_failure; 12961 t1 = newTemp(Ity_I32); 12962 t2 = newTemp(Ity_I32); 12963 /* Make up a 32-bit value (t1), with the old value of AX in the 12964 bottom 16 bits, and the old OSZACP bitmask in the upper 16 12965 bits. */ 12966 assign(t1, 12967 binop(Iop_16HLto32, 12968 unop(Iop_32to16, 12969 mk_x86g_calculate_eflags_all()), 12970 getIReg(2, R_EAX) 12971 )); 12972 /* Call the helper fn, to get a new AX and OSZACP value, and 12973 poke both back into the guest state. Also pass the helper 12974 the actual opcode so it knows which of the 4 instructions it 12975 is doing the computation for. */ 12976 vassert(opc == 0x27 || opc == 0x2F || opc == 0x37 || opc == 0x3F); 12977 assign(t2, 12978 mkIRExprCCall( 12979 Ity_I32, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas", 12980 &x86g_calculate_daa_das_aaa_aas, 12981 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) ) 12982 )); 12983 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) )); 12984 12985 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 12986 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 12987 stmt( IRStmt_Put( OFFB_CC_DEP1, 12988 binop(Iop_And32, 12989 binop(Iop_Shr32, mkexpr(t2), mkU8(16)), 12990 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P 12991 | X86G_CC_MASK_A | X86G_CC_MASK_Z 12992 | X86G_CC_MASK_S| X86G_CC_MASK_O ) 12993 ) 12994 ) 12995 ); 12996 /* Set NDEP even though it isn't used. This makes redundant-PUT 12997 elimination of previous stores to this field work better. */ 12998 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 12999 switch (opc) { 13000 case 0x27: DIP("daa\n"); break; 13001 case 0x2F: DIP("das\n"); break; 13002 case 0x37: DIP("aaa\n"); break; 13003 case 0x3F: DIP("aas\n"); break; 13004 default: vassert(0); 13005 } 13006 break; 13007 13008 case 0xD4: /* AAM */ 13009 case 0xD5: /* AAD */ 13010 d32 = getIByte(delta); delta++; 13011 if (sz != 4 || d32 != 10) goto decode_failure; 13012 t1 = newTemp(Ity_I32); 13013 t2 = newTemp(Ity_I32); 13014 /* Make up a 32-bit value (t1), with the old value of AX in the 13015 bottom 16 bits, and the old OSZACP bitmask in the upper 16 13016 bits. */ 13017 assign(t1, 13018 binop(Iop_16HLto32, 13019 unop(Iop_32to16, 13020 mk_x86g_calculate_eflags_all()), 13021 getIReg(2, R_EAX) 13022 )); 13023 /* Call the helper fn, to get a new AX and OSZACP value, and 13024 poke both back into the guest state. Also pass the helper 13025 the actual opcode so it knows which of the 2 instructions it 13026 is doing the computation for. */ 13027 assign(t2, 13028 mkIRExprCCall( 13029 Ity_I32, 0/*regparm*/, "x86g_calculate_aad_aam", 13030 &x86g_calculate_aad_aam, 13031 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) ) 13032 )); 13033 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) )); 13034 13035 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 13036 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 13037 stmt( IRStmt_Put( OFFB_CC_DEP1, 13038 binop(Iop_And32, 13039 binop(Iop_Shr32, mkexpr(t2), mkU8(16)), 13040 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P 13041 | X86G_CC_MASK_A | X86G_CC_MASK_Z 13042 | X86G_CC_MASK_S| X86G_CC_MASK_O ) 13043 ) 13044 ) 13045 ); 13046 /* Set NDEP even though it isn't used. This makes 13047 redundant-PUT elimination of previous stores to this field 13048 work better. */ 13049 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 13050 13051 DIP(opc == 0xD4 ? "aam\n" : "aad\n"); 13052 break; 13053 13054 /* ------------------------ CWD/CDQ -------------------- */ 13055 13056 case 0x98: /* CBW */ 13057 if (sz == 4) { 13058 putIReg(4, R_EAX, unop(Iop_16Sto32, getIReg(2, R_EAX))); 13059 DIP("cwde\n"); 13060 } else { 13061 vassert(sz == 2); 13062 putIReg(2, R_EAX, unop(Iop_8Sto16, getIReg(1, R_EAX))); 13063 DIP("cbw\n"); 13064 } 13065 break; 13066 13067 case 0x99: /* CWD/CDQ */ 13068 ty = szToITy(sz); 13069 putIReg(sz, R_EDX, 13070 binop(mkSizedOp(ty,Iop_Sar8), 13071 getIReg(sz, R_EAX), 13072 mkU8(sz == 2 ? 15 : 31)) ); 13073 DIP(sz == 2 ? "cwdq\n" : "cdqq\n"); 13074 break; 13075 13076 /* ------------------------ FPU ops -------------------- */ 13077 13078 case 0x9E: /* SAHF */ 13079 codegen_SAHF(); 13080 DIP("sahf\n"); 13081 break; 13082 13083 case 0x9F: /* LAHF */ 13084 codegen_LAHF(); 13085 DIP("lahf\n"); 13086 break; 13087 13088 case 0x9B: /* FWAIT */ 13089 /* ignore? */ 13090 DIP("fwait\n"); 13091 break; 13092 13093 case 0xD8: 13094 case 0xD9: 13095 case 0xDA: 13096 case 0xDB: 13097 case 0xDC: 13098 case 0xDD: 13099 case 0xDE: 13100 case 0xDF: { 13101 Int delta0 = delta; 13102 Bool decode_OK = False; 13103 delta = dis_FPU ( &decode_OK, sorb, delta ); 13104 if (!decode_OK) { 13105 delta = delta0; 13106 goto decode_failure; 13107 } 13108 break; 13109 } 13110 13111 /* ------------------------ INC & DEC ------------------ */ 13112 13113 case 0x40: /* INC eAX */ 13114 case 0x41: /* INC eCX */ 13115 case 0x42: /* INC eDX */ 13116 case 0x43: /* INC eBX */ 13117 case 0x44: /* INC eSP */ 13118 case 0x45: /* INC eBP */ 13119 case 0x46: /* INC eSI */ 13120 case 0x47: /* INC eDI */ 13121 vassert(sz == 2 || sz == 4); 13122 ty = szToITy(sz); 13123 t1 = newTemp(ty); 13124 assign( t1, binop(mkSizedOp(ty,Iop_Add8), 13125 getIReg(sz, (UInt)(opc - 0x40)), 13126 mkU(ty,1)) ); 13127 setFlags_INC_DEC( True, t1, ty ); 13128 putIReg(sz, (UInt)(opc - 0x40), mkexpr(t1)); 13129 DIP("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40)); 13130 break; 13131 13132 case 0x48: /* DEC eAX */ 13133 case 0x49: /* DEC eCX */ 13134 case 0x4A: /* DEC eDX */ 13135 case 0x4B: /* DEC eBX */ 13136 case 0x4C: /* DEC eSP */ 13137 case 0x4D: /* DEC eBP */ 13138 case 0x4E: /* DEC eSI */ 13139 case 0x4F: /* DEC eDI */ 13140 vassert(sz == 2 || sz == 4); 13141 ty = szToITy(sz); 13142 t1 = newTemp(ty); 13143 assign( t1, binop(mkSizedOp(ty,Iop_Sub8), 13144 getIReg(sz, (UInt)(opc - 0x48)), 13145 mkU(ty,1)) ); 13146 setFlags_INC_DEC( False, t1, ty ); 13147 putIReg(sz, (UInt)(opc - 0x48), mkexpr(t1)); 13148 DIP("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48)); 13149 break; 13150 13151 /* ------------------------ INT ------------------------ */ 13152 13153 case 0xCC: /* INT 3 */ 13154 jmp_lit(&dres, Ijk_SigTRAP, ((Addr32)guest_EIP_bbstart)+delta); 13155 vassert(dres.whatNext == Dis_StopHere); 13156 DIP("int $0x3\n"); 13157 break; 13158 13159 case 0xCD: /* INT imm8 */ 13160 d32 = getIByte(delta); delta++; 13161 13162 /* For any of the cases where we emit a jump (that is, for all 13163 currently handled cases), it's important that all ArchRegs 13164 carry their up-to-date value at this point. So we declare an 13165 end-of-block here, which forces any TempRegs caching ArchRegs 13166 to be flushed. */ 13167 13168 /* Handle int $0x3F .. $0x4F by synthesising a segfault and a 13169 restart of this instruction (hence the "-2" two lines below, 13170 to get the restart EIP to be this instruction. This is 13171 probably Linux-specific and it would be more correct to only 13172 do this if the VexAbiInfo says that is what we should do. 13173 This used to handle just 0x40-0x43; Jikes RVM uses a larger 13174 range (0x3F-0x49), and this allows some slack as well. */ 13175 if (d32 >= 0x3F && d32 <= 0x4F) { 13176 jmp_lit(&dres, Ijk_SigSEGV, ((Addr32)guest_EIP_bbstart)+delta-2); 13177 vassert(dres.whatNext == Dis_StopHere); 13178 DIP("int $0x%x\n", (Int)d32); 13179 break; 13180 } 13181 13182 /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82 13183 (darwin syscalls). As part of this, note where we are, so we 13184 can back up the guest to this point if the syscall needs to 13185 be restarted. */ 13186 if (d32 == 0x80) { 13187 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 13188 mkU32(guest_EIP_curr_instr) ) ); 13189 jmp_lit(&dres, Ijk_Sys_int128, ((Addr32)guest_EIP_bbstart)+delta); 13190 vassert(dres.whatNext == Dis_StopHere); 13191 DIP("int $0x80\n"); 13192 break; 13193 } 13194 if (d32 == 0x81) { 13195 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 13196 mkU32(guest_EIP_curr_instr) ) ); 13197 jmp_lit(&dres, Ijk_Sys_int129, ((Addr32)guest_EIP_bbstart)+delta); 13198 vassert(dres.whatNext == Dis_StopHere); 13199 DIP("int $0x81\n"); 13200 break; 13201 } 13202 if (d32 == 0x82) { 13203 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 13204 mkU32(guest_EIP_curr_instr) ) ); 13205 jmp_lit(&dres, Ijk_Sys_int130, ((Addr32)guest_EIP_bbstart)+delta); 13206 vassert(dres.whatNext == Dis_StopHere); 13207 DIP("int $0x82\n"); 13208 break; 13209 } 13210 13211 /* none of the above */ 13212 goto decode_failure; 13213 13214 /* ------------------------ Jcond, byte offset --------- */ 13215 13216 case 0xEB: /* Jb (jump, byte offset) */ 13217 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 13218 delta++; 13219 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { 13220 dres.whatNext = Dis_ResteerU; 13221 dres.continueAt = (Addr64)(Addr32)d32; 13222 } else { 13223 jmp_lit(&dres, Ijk_Boring, d32); 13224 vassert(dres.whatNext == Dis_StopHere); 13225 } 13226 DIP("jmp-8 0x%x\n", d32); 13227 break; 13228 13229 case 0xE9: /* Jv (jump, 16/32 offset) */ 13230 vassert(sz == 4); /* JRS added 2004 July 11 */ 13231 d32 = (((Addr32)guest_EIP_bbstart)+delta+sz) + getSDisp(sz,delta); 13232 delta += sz; 13233 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { 13234 dres.whatNext = Dis_ResteerU; 13235 dres.continueAt = (Addr64)(Addr32)d32; 13236 } else { 13237 jmp_lit(&dres, Ijk_Boring, d32); 13238 vassert(dres.whatNext == Dis_StopHere); 13239 } 13240 DIP("jmp 0x%x\n", d32); 13241 break; 13242 13243 case 0x70: 13244 case 0x71: 13245 case 0x72: /* JBb/JNAEb (jump below) */ 13246 case 0x73: /* JNBb/JAEb (jump not below) */ 13247 case 0x74: /* JZb/JEb (jump zero) */ 13248 case 0x75: /* JNZb/JNEb (jump not zero) */ 13249 case 0x76: /* JBEb/JNAb (jump below or equal) */ 13250 case 0x77: /* JNBEb/JAb (jump not below or equal) */ 13251 case 0x78: /* JSb (jump negative) */ 13252 case 0x79: /* JSb (jump not negative) */ 13253 case 0x7A: /* JP (jump parity even) */ 13254 case 0x7B: /* JNP/JPO (jump parity odd) */ 13255 case 0x7C: /* JLb/JNGEb (jump less) */ 13256 case 0x7D: /* JGEb/JNLb (jump greater or equal) */ 13257 case 0x7E: /* JLEb/JNGb (jump less or equal) */ 13258 case 0x7F: /* JGb/JNLEb (jump greater) */ 13259 { Int jmpDelta; 13260 HChar* comment = ""; 13261 jmpDelta = (Int)getSDisp8(delta); 13262 vassert(-128 <= jmpDelta && jmpDelta < 128); 13263 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + jmpDelta; 13264 delta++; 13265 if (resteerCisOk 13266 && vex_control.guest_chase_cond 13267 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 13268 && jmpDelta < 0 13269 && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { 13270 /* Speculation: assume this backward branch is taken. So we 13271 need to emit a side-exit to the insn following this one, 13272 on the negation of the condition, and continue at the 13273 branch target address (d32). If we wind up back at the 13274 first instruction of the trace, just stop; it's better to 13275 let the IR loop unroller handle that case. */ 13276 stmt( IRStmt_Exit( 13277 mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))), 13278 Ijk_Boring, 13279 IRConst_U32(guest_EIP_bbstart+delta), 13280 OFFB_EIP ) ); 13281 dres.whatNext = Dis_ResteerC; 13282 dres.continueAt = (Addr64)(Addr32)d32; 13283 comment = "(assumed taken)"; 13284 } 13285 else 13286 if (resteerCisOk 13287 && vex_control.guest_chase_cond 13288 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 13289 && jmpDelta >= 0 13290 && resteerOkFn( callback_opaque, 13291 (Addr64)(Addr32)(guest_EIP_bbstart+delta)) ) { 13292 /* Speculation: assume this forward branch is not taken. So 13293 we need to emit a side-exit to d32 (the dest) and continue 13294 disassembling at the insn immediately following this 13295 one. */ 13296 stmt( IRStmt_Exit( 13297 mk_x86g_calculate_condition((X86Condcode)(opc - 0x70)), 13298 Ijk_Boring, 13299 IRConst_U32(d32), 13300 OFFB_EIP ) ); 13301 dres.whatNext = Dis_ResteerC; 13302 dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta); 13303 comment = "(assumed not taken)"; 13304 } 13305 else { 13306 /* Conservative default translation - end the block at this 13307 point. */ 13308 jcc_01( &dres, (X86Condcode)(opc - 0x70), 13309 (Addr32)(guest_EIP_bbstart+delta), d32); 13310 vassert(dres.whatNext == Dis_StopHere); 13311 } 13312 DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc - 0x70), d32, comment); 13313 break; 13314 } 13315 13316 case 0xE3: /* JECXZ (for JCXZ see above) */ 13317 if (sz != 4) goto decode_failure; 13318 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 13319 delta ++; 13320 stmt( IRStmt_Exit( 13321 binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)), 13322 Ijk_Boring, 13323 IRConst_U32(d32), 13324 OFFB_EIP 13325 )); 13326 DIP("jecxz 0x%x\n", d32); 13327 break; 13328 13329 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */ 13330 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */ 13331 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */ 13332 { /* Again, the docs say this uses ECX/CX as a count depending on 13333 the address size override, not the operand one. Since we 13334 don't handle address size overrides, I guess that means 13335 ECX. */ 13336 IRExpr* zbit = NULL; 13337 IRExpr* count = NULL; 13338 IRExpr* cond = NULL; 13339 HChar* xtra = NULL; 13340 13341 if (sz != 4) goto decode_failure; 13342 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 13343 delta++; 13344 putIReg(4, R_ECX, binop(Iop_Sub32, getIReg(4,R_ECX), mkU32(1))); 13345 13346 count = getIReg(4,R_ECX); 13347 cond = binop(Iop_CmpNE32, count, mkU32(0)); 13348 switch (opc) { 13349 case 0xE2: 13350 xtra = ""; 13351 break; 13352 case 0xE1: 13353 xtra = "e"; 13354 zbit = mk_x86g_calculate_condition( X86CondZ ); 13355 cond = mkAnd1(cond, zbit); 13356 break; 13357 case 0xE0: 13358 xtra = "ne"; 13359 zbit = mk_x86g_calculate_condition( X86CondNZ ); 13360 cond = mkAnd1(cond, zbit); 13361 break; 13362 default: 13363 vassert(0); 13364 } 13365 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32), OFFB_EIP) ); 13366 13367 DIP("loop%s 0x%x\n", xtra, d32); 13368 break; 13369 } 13370 13371 /* ------------------------ IMUL ----------------------- */ 13372 13373 case 0x69: /* IMUL Iv, Ev, Gv */ 13374 delta = dis_imul_I_E_G ( sorb, sz, delta, sz ); 13375 break; 13376 case 0x6B: /* IMUL Ib, Ev, Gv */ 13377 delta = dis_imul_I_E_G ( sorb, sz, delta, 1 ); 13378 break; 13379 13380 /* ------------------------ MOV ------------------------ */ 13381 13382 case 0x88: /* MOV Gb,Eb */ 13383 delta = dis_mov_G_E(sorb, 1, delta); 13384 break; 13385 13386 case 0x89: /* MOV Gv,Ev */ 13387 delta = dis_mov_G_E(sorb, sz, delta); 13388 break; 13389 13390 case 0x8A: /* MOV Eb,Gb */ 13391 delta = dis_mov_E_G(sorb, 1, delta); 13392 break; 13393 13394 case 0x8B: /* MOV Ev,Gv */ 13395 delta = dis_mov_E_G(sorb, sz, delta); 13396 break; 13397 13398 case 0x8D: /* LEA M,Gv */ 13399 if (sz != 4) 13400 goto decode_failure; 13401 modrm = getIByte(delta); 13402 if (epartIsReg(modrm)) 13403 goto decode_failure; 13404 /* NOTE! this is the one place where a segment override prefix 13405 has no effect on the address calculation. Therefore we pass 13406 zero instead of sorb here. */ 13407 addr = disAMode ( &alen, /*sorb*/ 0, delta, dis_buf ); 13408 delta += alen; 13409 putIReg(sz, gregOfRM(modrm), mkexpr(addr)); 13410 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf, 13411 nameIReg(sz,gregOfRM(modrm))); 13412 break; 13413 13414 case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */ 13415 delta = dis_mov_Sw_Ew(sorb, sz, delta); 13416 break; 13417 13418 case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */ 13419 delta = dis_mov_Ew_Sw(sorb, delta); 13420 break; 13421 13422 case 0xA0: /* MOV Ob,AL */ 13423 sz = 1; 13424 /* Fall through ... */ 13425 case 0xA1: /* MOV Ov,eAX */ 13426 d32 = getUDisp32(delta); delta += 4; 13427 ty = szToITy(sz); 13428 addr = newTemp(Ity_I32); 13429 assign( addr, handleSegOverride(sorb, mkU32(d32)) ); 13430 putIReg(sz, R_EAX, loadLE(ty, mkexpr(addr))); 13431 DIP("mov%c %s0x%x, %s\n", nameISize(sz), sorbTxt(sorb), 13432 d32, nameIReg(sz,R_EAX)); 13433 break; 13434 13435 case 0xA2: /* MOV Ob,AL */ 13436 sz = 1; 13437 /* Fall through ... */ 13438 case 0xA3: /* MOV eAX,Ov */ 13439 d32 = getUDisp32(delta); delta += 4; 13440 ty = szToITy(sz); 13441 addr = newTemp(Ity_I32); 13442 assign( addr, handleSegOverride(sorb, mkU32(d32)) ); 13443 storeLE( mkexpr(addr), getIReg(sz,R_EAX) ); 13444 DIP("mov%c %s, %s0x%x\n", nameISize(sz), nameIReg(sz,R_EAX), 13445 sorbTxt(sorb), d32); 13446 break; 13447 13448 case 0xB0: /* MOV imm,AL */ 13449 case 0xB1: /* MOV imm,CL */ 13450 case 0xB2: /* MOV imm,DL */ 13451 case 0xB3: /* MOV imm,BL */ 13452 case 0xB4: /* MOV imm,AH */ 13453 case 0xB5: /* MOV imm,CH */ 13454 case 0xB6: /* MOV imm,DH */ 13455 case 0xB7: /* MOV imm,BH */ 13456 d32 = getIByte(delta); delta += 1; 13457 putIReg(1, opc-0xB0, mkU8(d32)); 13458 DIP("movb $0x%x,%s\n", d32, nameIReg(1,opc-0xB0)); 13459 break; 13460 13461 case 0xB8: /* MOV imm,eAX */ 13462 case 0xB9: /* MOV imm,eCX */ 13463 case 0xBA: /* MOV imm,eDX */ 13464 case 0xBB: /* MOV imm,eBX */ 13465 case 0xBC: /* MOV imm,eSP */ 13466 case 0xBD: /* MOV imm,eBP */ 13467 case 0xBE: /* MOV imm,eSI */ 13468 case 0xBF: /* MOV imm,eDI */ 13469 d32 = getUDisp(sz,delta); delta += sz; 13470 putIReg(sz, opc-0xB8, mkU(szToITy(sz), d32)); 13471 DIP("mov%c $0x%x,%s\n", nameISize(sz), d32, nameIReg(sz,opc-0xB8)); 13472 break; 13473 13474 case 0xC6: /* MOV Ib,Eb */ 13475 sz = 1; 13476 goto do_Mov_I_E; 13477 case 0xC7: /* MOV Iv,Ev */ 13478 goto do_Mov_I_E; 13479 13480 do_Mov_I_E: 13481 modrm = getIByte(delta); 13482 if (epartIsReg(modrm)) { 13483 delta++; /* mod/rm byte */ 13484 d32 = getUDisp(sz,delta); delta += sz; 13485 putIReg(sz, eregOfRM(modrm), mkU(szToITy(sz), d32)); 13486 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, 13487 nameIReg(sz,eregOfRM(modrm))); 13488 } else { 13489 addr = disAMode ( &alen, sorb, delta, dis_buf ); 13490 delta += alen; 13491 d32 = getUDisp(sz,delta); delta += sz; 13492 storeLE(mkexpr(addr), mkU(szToITy(sz), d32)); 13493 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf); 13494 } 13495 break; 13496 13497 /* ------------------------ opl imm, A ----------------- */ 13498 13499 case 0x04: /* ADD Ib, AL */ 13500 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" ); 13501 break; 13502 case 0x05: /* ADD Iv, eAX */ 13503 delta = dis_op_imm_A( sz, False, Iop_Add8, True, delta, "add" ); 13504 break; 13505 13506 case 0x0C: /* OR Ib, AL */ 13507 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" ); 13508 break; 13509 case 0x0D: /* OR Iv, eAX */ 13510 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" ); 13511 break; 13512 13513 case 0x14: /* ADC Ib, AL */ 13514 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" ); 13515 break; 13516 case 0x15: /* ADC Iv, eAX */ 13517 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" ); 13518 break; 13519 13520 case 0x1C: /* SBB Ib, AL */ 13521 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" ); 13522 break; 13523 case 0x1D: /* SBB Iv, eAX */ 13524 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" ); 13525 break; 13526 13527 case 0x24: /* AND Ib, AL */ 13528 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" ); 13529 break; 13530 case 0x25: /* AND Iv, eAX */ 13531 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" ); 13532 break; 13533 13534 case 0x2C: /* SUB Ib, AL */ 13535 delta = dis_op_imm_A( 1, False, Iop_Sub8, True, delta, "sub" ); 13536 break; 13537 case 0x2D: /* SUB Iv, eAX */ 13538 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" ); 13539 break; 13540 13541 case 0x34: /* XOR Ib, AL */ 13542 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" ); 13543 break; 13544 case 0x35: /* XOR Iv, eAX */ 13545 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" ); 13546 break; 13547 13548 case 0x3C: /* CMP Ib, AL */ 13549 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" ); 13550 break; 13551 case 0x3D: /* CMP Iv, eAX */ 13552 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" ); 13553 break; 13554 13555 case 0xA8: /* TEST Ib, AL */ 13556 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" ); 13557 break; 13558 case 0xA9: /* TEST Iv, eAX */ 13559 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" ); 13560 break; 13561 13562 /* ------------------------ opl Ev, Gv ----------------- */ 13563 13564 case 0x02: /* ADD Eb,Gb */ 13565 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, 1, delta, "add" ); 13566 break; 13567 case 0x03: /* ADD Ev,Gv */ 13568 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, sz, delta, "add" ); 13569 break; 13570 13571 case 0x0A: /* OR Eb,Gb */ 13572 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, 1, delta, "or" ); 13573 break; 13574 case 0x0B: /* OR Ev,Gv */ 13575 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, sz, delta, "or" ); 13576 break; 13577 13578 case 0x12: /* ADC Eb,Gb */ 13579 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, 1, delta, "adc" ); 13580 break; 13581 case 0x13: /* ADC Ev,Gv */ 13582 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, sz, delta, "adc" ); 13583 break; 13584 13585 case 0x1A: /* SBB Eb,Gb */ 13586 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, 1, delta, "sbb" ); 13587 break; 13588 case 0x1B: /* SBB Ev,Gv */ 13589 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, sz, delta, "sbb" ); 13590 break; 13591 13592 case 0x22: /* AND Eb,Gb */ 13593 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, 1, delta, "and" ); 13594 break; 13595 case 0x23: /* AND Ev,Gv */ 13596 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, sz, delta, "and" ); 13597 break; 13598 13599 case 0x2A: /* SUB Eb,Gb */ 13600 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, 1, delta, "sub" ); 13601 break; 13602 case 0x2B: /* SUB Ev,Gv */ 13603 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, sz, delta, "sub" ); 13604 break; 13605 13606 case 0x32: /* XOR Eb,Gb */ 13607 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, 1, delta, "xor" ); 13608 break; 13609 case 0x33: /* XOR Ev,Gv */ 13610 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, sz, delta, "xor" ); 13611 break; 13612 13613 case 0x3A: /* CMP Eb,Gb */ 13614 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, 1, delta, "cmp" ); 13615 break; 13616 case 0x3B: /* CMP Ev,Gv */ 13617 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, sz, delta, "cmp" ); 13618 break; 13619 13620 case 0x84: /* TEST Eb,Gb */ 13621 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, 1, delta, "test" ); 13622 break; 13623 case 0x85: /* TEST Ev,Gv */ 13624 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, sz, delta, "test" ); 13625 break; 13626 13627 /* ------------------------ opl Gv, Ev ----------------- */ 13628 13629 case 0x00: /* ADD Gb,Eb */ 13630 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13631 Iop_Add8, True, 1, delta, "add" ); 13632 break; 13633 case 0x01: /* ADD Gv,Ev */ 13634 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13635 Iop_Add8, True, sz, delta, "add" ); 13636 break; 13637 13638 case 0x08: /* OR Gb,Eb */ 13639 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13640 Iop_Or8, True, 1, delta, "or" ); 13641 break; 13642 case 0x09: /* OR Gv,Ev */ 13643 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13644 Iop_Or8, True, sz, delta, "or" ); 13645 break; 13646 13647 case 0x10: /* ADC Gb,Eb */ 13648 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13649 Iop_Add8, True, 1, delta, "adc" ); 13650 break; 13651 case 0x11: /* ADC Gv,Ev */ 13652 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13653 Iop_Add8, True, sz, delta, "adc" ); 13654 break; 13655 13656 case 0x18: /* SBB Gb,Eb */ 13657 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13658 Iop_Sub8, True, 1, delta, "sbb" ); 13659 break; 13660 case 0x19: /* SBB Gv,Ev */ 13661 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13662 Iop_Sub8, True, sz, delta, "sbb" ); 13663 break; 13664 13665 case 0x20: /* AND Gb,Eb */ 13666 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13667 Iop_And8, True, 1, delta, "and" ); 13668 break; 13669 case 0x21: /* AND Gv,Ev */ 13670 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13671 Iop_And8, True, sz, delta, "and" ); 13672 break; 13673 13674 case 0x28: /* SUB Gb,Eb */ 13675 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13676 Iop_Sub8, True, 1, delta, "sub" ); 13677 break; 13678 case 0x29: /* SUB Gv,Ev */ 13679 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13680 Iop_Sub8, True, sz, delta, "sub" ); 13681 break; 13682 13683 case 0x30: /* XOR Gb,Eb */ 13684 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13685 Iop_Xor8, True, 1, delta, "xor" ); 13686 break; 13687 case 0x31: /* XOR Gv,Ev */ 13688 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13689 Iop_Xor8, True, sz, delta, "xor" ); 13690 break; 13691 13692 case 0x38: /* CMP Gb,Eb */ 13693 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13694 Iop_Sub8, False, 1, delta, "cmp" ); 13695 break; 13696 case 0x39: /* CMP Gv,Ev */ 13697 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13698 Iop_Sub8, False, sz, delta, "cmp" ); 13699 break; 13700 13701 /* ------------------------ POP ------------------------ */ 13702 13703 case 0x58: /* POP eAX */ 13704 case 0x59: /* POP eCX */ 13705 case 0x5A: /* POP eDX */ 13706 case 0x5B: /* POP eBX */ 13707 case 0x5D: /* POP eBP */ 13708 case 0x5E: /* POP eSI */ 13709 case 0x5F: /* POP eDI */ 13710 case 0x5C: /* POP eSP */ 13711 vassert(sz == 2 || sz == 4); 13712 t1 = newTemp(szToITy(sz)); t2 = newTemp(Ity_I32); 13713 assign(t2, getIReg(4, R_ESP)); 13714 assign(t1, loadLE(szToITy(sz),mkexpr(t2))); 13715 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz))); 13716 putIReg(sz, opc-0x58, mkexpr(t1)); 13717 DIP("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58)); 13718 break; 13719 13720 case 0x9D: /* POPF */ 13721 vassert(sz == 2 || sz == 4); 13722 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32); 13723 assign(t2, getIReg(4, R_ESP)); 13724 assign(t1, widenUto32(loadLE(szToITy(sz),mkexpr(t2)))); 13725 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz))); 13726 13727 /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the 13728 value in t1. */ 13729 set_EFLAGS_from_value( t1, True/*emit_AC_emwarn*/, 13730 ((Addr32)guest_EIP_bbstart)+delta ); 13731 13732 DIP("popf%c\n", nameISize(sz)); 13733 break; 13734 13735 case 0x61: /* POPA */ 13736 /* This is almost certainly wrong for sz==2. So ... */ 13737 if (sz != 4) goto decode_failure; 13738 13739 /* t5 is the old %ESP value. */ 13740 t5 = newTemp(Ity_I32); 13741 assign( t5, getIReg(4, R_ESP) ); 13742 13743 /* Reload all the registers, except %esp. */ 13744 putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) )); 13745 putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) )); 13746 putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) )); 13747 putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) )); 13748 /* ignore saved %ESP */ 13749 putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) )); 13750 putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) )); 13751 putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) )); 13752 13753 /* and move %ESP back up */ 13754 putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) ); 13755 13756 DIP("popa%c\n", nameISize(sz)); 13757 break; 13758 13759 case 0x8F: /* POPL/POPW m32 */ 13760 { Int len; 13761 UChar rm = getIByte(delta); 13762 13763 /* make sure this instruction is correct POP */ 13764 if (epartIsReg(rm) || gregOfRM(rm) != 0) 13765 goto decode_failure; 13766 /* and has correct size */ 13767 if (sz != 4 && sz != 2) 13768 goto decode_failure; 13769 ty = szToITy(sz); 13770 13771 t1 = newTemp(Ity_I32); /* stack address */ 13772 t3 = newTemp(ty); /* data */ 13773 /* set t1 to ESP: t1 = ESP */ 13774 assign( t1, getIReg(4, R_ESP) ); 13775 /* load M[ESP] to virtual register t3: t3 = M[t1] */ 13776 assign( t3, loadLE(ty, mkexpr(t1)) ); 13777 13778 /* increase ESP; must be done before the STORE. Intel manual says: 13779 If the ESP register is used as a base register for addressing 13780 a destination operand in memory, the POP instruction computes 13781 the effective address of the operand after it increments the 13782 ESP register. 13783 */ 13784 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(sz)) ); 13785 13786 /* resolve MODR/M */ 13787 addr = disAMode ( &len, sorb, delta, dis_buf); 13788 storeLE( mkexpr(addr), mkexpr(t3) ); 13789 13790 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', dis_buf); 13791 13792 delta += len; 13793 break; 13794 } 13795 13796 case 0x1F: /* POP %DS */ 13797 dis_pop_segreg( R_DS, sz ); break; 13798 case 0x07: /* POP %ES */ 13799 dis_pop_segreg( R_ES, sz ); break; 13800 case 0x17: /* POP %SS */ 13801 dis_pop_segreg( R_SS, sz ); break; 13802 13803 /* ------------------------ PUSH ----------------------- */ 13804 13805 case 0x50: /* PUSH eAX */ 13806 case 0x51: /* PUSH eCX */ 13807 case 0x52: /* PUSH eDX */ 13808 case 0x53: /* PUSH eBX */ 13809 case 0x55: /* PUSH eBP */ 13810 case 0x56: /* PUSH eSI */ 13811 case 0x57: /* PUSH eDI */ 13812 case 0x54: /* PUSH eSP */ 13813 /* This is the Right Way, in that the value to be pushed is 13814 established before %esp is changed, so that pushl %esp 13815 correctly pushes the old value. */ 13816 vassert(sz == 2 || sz == 4); 13817 ty = sz==2 ? Ity_I16 : Ity_I32; 13818 t1 = newTemp(ty); t2 = newTemp(Ity_I32); 13819 assign(t1, getIReg(sz, opc-0x50)); 13820 assign(t2, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz))); 13821 putIReg(4, R_ESP, mkexpr(t2) ); 13822 storeLE(mkexpr(t2),mkexpr(t1)); 13823 DIP("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50)); 13824 break; 13825 13826 13827 case 0x68: /* PUSH Iv */ 13828 d32 = getUDisp(sz,delta); delta += sz; 13829 goto do_push_I; 13830 case 0x6A: /* PUSH Ib, sign-extended to sz */ 13831 d32 = getSDisp8(delta); delta += 1; 13832 goto do_push_I; 13833 do_push_I: 13834 ty = szToITy(sz); 13835 t1 = newTemp(Ity_I32); t2 = newTemp(ty); 13836 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 13837 putIReg(4, R_ESP, mkexpr(t1) ); 13838 /* stop mkU16 asserting if d32 is a negative 16-bit number 13839 (bug #132813) */ 13840 if (ty == Ity_I16) 13841 d32 &= 0xFFFF; 13842 storeLE( mkexpr(t1), mkU(ty,d32) ); 13843 DIP("push%c $0x%x\n", nameISize(sz), d32); 13844 break; 13845 13846 case 0x9C: /* PUSHF */ { 13847 vassert(sz == 2 || sz == 4); 13848 13849 t1 = newTemp(Ity_I32); 13850 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 13851 putIReg(4, R_ESP, mkexpr(t1) ); 13852 13853 /* Calculate OSZACP, and patch in fixed fields as per 13854 Intel docs. 13855 - bit 1 is always 1 13856 - bit 9 is Interrupt Enable (should always be 1 in user mode?) 13857 */ 13858 t2 = newTemp(Ity_I32); 13859 assign( t2, binop(Iop_Or32, 13860 mk_x86g_calculate_eflags_all(), 13861 mkU32( (1<<1)|(1<<9) ) )); 13862 13863 /* Patch in the D flag. This can simply be a copy of bit 10 of 13864 baseBlock[OFFB_DFLAG]. */ 13865 t3 = newTemp(Ity_I32); 13866 assign( t3, binop(Iop_Or32, 13867 mkexpr(t2), 13868 binop(Iop_And32, 13869 IRExpr_Get(OFFB_DFLAG,Ity_I32), 13870 mkU32(1<<10))) 13871 ); 13872 13873 /* And patch in the ID flag. */ 13874 t4 = newTemp(Ity_I32); 13875 assign( t4, binop(Iop_Or32, 13876 mkexpr(t3), 13877 binop(Iop_And32, 13878 binop(Iop_Shl32, IRExpr_Get(OFFB_IDFLAG,Ity_I32), 13879 mkU8(21)), 13880 mkU32(1<<21))) 13881 ); 13882 13883 /* And patch in the AC flag. */ 13884 t5 = newTemp(Ity_I32); 13885 assign( t5, binop(Iop_Or32, 13886 mkexpr(t4), 13887 binop(Iop_And32, 13888 binop(Iop_Shl32, IRExpr_Get(OFFB_ACFLAG,Ity_I32), 13889 mkU8(18)), 13890 mkU32(1<<18))) 13891 ); 13892 13893 /* if sz==2, the stored value needs to be narrowed. */ 13894 if (sz == 2) 13895 storeLE( mkexpr(t1), unop(Iop_32to16,mkexpr(t5)) ); 13896 else 13897 storeLE( mkexpr(t1), mkexpr(t5) ); 13898 13899 DIP("pushf%c\n", nameISize(sz)); 13900 break; 13901 } 13902 13903 case 0x60: /* PUSHA */ 13904 /* This is almost certainly wrong for sz==2. So ... */ 13905 if (sz != 4) goto decode_failure; 13906 13907 /* This is the Right Way, in that the value to be pushed is 13908 established before %esp is changed, so that pusha 13909 correctly pushes the old %esp value. New value of %esp is 13910 pushed at start. */ 13911 /* t0 is the %ESP value we're going to push. */ 13912 t0 = newTemp(Ity_I32); 13913 assign( t0, getIReg(4, R_ESP) ); 13914 13915 /* t5 will be the new %ESP value. */ 13916 t5 = newTemp(Ity_I32); 13917 assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) ); 13918 13919 /* Update guest state before prodding memory. */ 13920 putIReg(4, R_ESP, mkexpr(t5)); 13921 13922 /* Dump all the registers. */ 13923 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) ); 13924 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) ); 13925 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) ); 13926 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) ); 13927 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/); 13928 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) ); 13929 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) ); 13930 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) ); 13931 13932 DIP("pusha%c\n", nameISize(sz)); 13933 break; 13934 13935 case 0x0E: /* PUSH %CS */ 13936 dis_push_segreg( R_CS, sz ); break; 13937 case 0x1E: /* PUSH %DS */ 13938 dis_push_segreg( R_DS, sz ); break; 13939 case 0x06: /* PUSH %ES */ 13940 dis_push_segreg( R_ES, sz ); break; 13941 case 0x16: /* PUSH %SS */ 13942 dis_push_segreg( R_SS, sz ); break; 13943 13944 /* ------------------------ SCAS et al ----------------- */ 13945 13946 case 0xA4: /* MOVS, no REP prefix */ 13947 case 0xA5: 13948 if (sorb != 0) 13949 goto decode_failure; /* else dis_string_op asserts */ 13950 dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb ); 13951 break; 13952 13953 case 0xA6: /* CMPSb, no REP prefix */ 13954 case 0xA7: 13955 if (sorb != 0) 13956 goto decode_failure; /* else dis_string_op asserts */ 13957 dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb ); 13958 break; 13959 13960 case 0xAA: /* STOS, no REP prefix */ 13961 case 0xAB: 13962 if (sorb != 0) 13963 goto decode_failure; /* else dis_string_op asserts */ 13964 dis_string_op( dis_STOS, ( opc == 0xAA ? 1 : sz ), "stos", sorb ); 13965 break; 13966 13967 case 0xAC: /* LODS, no REP prefix */ 13968 case 0xAD: 13969 if (sorb != 0) 13970 goto decode_failure; /* else dis_string_op asserts */ 13971 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", sorb ); 13972 break; 13973 13974 case 0xAE: /* SCAS, no REP prefix */ 13975 case 0xAF: 13976 if (sorb != 0) 13977 goto decode_failure; /* else dis_string_op asserts */ 13978 dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb ); 13979 break; 13980 13981 13982 case 0xFC: /* CLD */ 13983 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(1)) ); 13984 DIP("cld\n"); 13985 break; 13986 13987 case 0xFD: /* STD */ 13988 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(0xFFFFFFFF)) ); 13989 DIP("std\n"); 13990 break; 13991 13992 case 0xF8: /* CLC */ 13993 case 0xF9: /* STC */ 13994 case 0xF5: /* CMC */ 13995 t0 = newTemp(Ity_I32); 13996 t1 = newTemp(Ity_I32); 13997 assign( t0, mk_x86g_calculate_eflags_all() ); 13998 switch (opc) { 13999 case 0xF8: 14000 assign( t1, binop(Iop_And32, mkexpr(t0), 14001 mkU32(~X86G_CC_MASK_C))); 14002 DIP("clc\n"); 14003 break; 14004 case 0xF9: 14005 assign( t1, binop(Iop_Or32, mkexpr(t0), 14006 mkU32(X86G_CC_MASK_C))); 14007 DIP("stc\n"); 14008 break; 14009 case 0xF5: 14010 assign( t1, binop(Iop_Xor32, mkexpr(t0), 14011 mkU32(X86G_CC_MASK_C))); 14012 DIP("cmc\n"); 14013 break; 14014 default: 14015 vpanic("disInstr(x86)(clc/stc/cmc)"); 14016 } 14017 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 14018 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 14019 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) )); 14020 /* Set NDEP even though it isn't used. This makes redundant-PUT 14021 elimination of previous stores to this field work better. */ 14022 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 14023 break; 14024 14025 case 0xD6: /* SALC */ 14026 t0 = newTemp(Ity_I32); 14027 t1 = newTemp(Ity_I32); 14028 assign( t0, binop(Iop_And32, 14029 mk_x86g_calculate_eflags_c(), 14030 mkU32(1)) ); 14031 assign( t1, binop(Iop_Sar32, 14032 binop(Iop_Shl32, mkexpr(t0), mkU8(31)), 14033 mkU8(31)) ); 14034 putIReg(1, R_EAX, unop(Iop_32to8, mkexpr(t1)) ); 14035 DIP("salc\n"); 14036 break; 14037 14038 /* REPNE prefix insn */ 14039 case 0xF2: { 14040 Addr32 eip_orig = guest_EIP_bbstart + delta_start; 14041 if (sorb != 0) goto decode_failure; 14042 abyte = getIByte(delta); delta++; 14043 14044 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; } 14045 14046 switch (abyte) { 14047 /* According to the Intel manual, "repne movs" should never occur, but 14048 * in practice it has happened, so allow for it here... */ 14049 case 0xA4: sz = 1; /* REPNE MOVS<sz> */ 14050 case 0xA5: 14051 dis_REP_op ( &dres, X86CondNZ, dis_MOVS, sz, eip_orig, 14052 guest_EIP_bbstart+delta, "repne movs" ); 14053 break; 14054 14055 case 0xA6: sz = 1; /* REPNE CMP<sz> */ 14056 case 0xA7: 14057 dis_REP_op ( &dres, X86CondNZ, dis_CMPS, sz, eip_orig, 14058 guest_EIP_bbstart+delta, "repne cmps" ); 14059 break; 14060 14061 case 0xAA: sz = 1; /* REPNE STOS<sz> */ 14062 case 0xAB: 14063 dis_REP_op ( &dres, X86CondNZ, dis_STOS, sz, eip_orig, 14064 guest_EIP_bbstart+delta, "repne stos" ); 14065 break; 14066 14067 case 0xAE: sz = 1; /* REPNE SCAS<sz> */ 14068 case 0xAF: 14069 dis_REP_op ( &dres, X86CondNZ, dis_SCAS, sz, eip_orig, 14070 guest_EIP_bbstart+delta, "repne scas" ); 14071 break; 14072 14073 default: 14074 goto decode_failure; 14075 } 14076 break; 14077 } 14078 14079 /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE, 14080 for the rest, it means REP) */ 14081 case 0xF3: { 14082 Addr32 eip_orig = guest_EIP_bbstart + delta_start; 14083 abyte = getIByte(delta); delta++; 14084 14085 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; } 14086 14087 if (sorb != 0 && abyte != 0x0F) goto decode_failure; 14088 14089 switch (abyte) { 14090 case 0x0F: 14091 switch (getIByte(delta)) { 14092 /* On older CPUs, TZCNT behaves the same as BSF. */ 14093 case 0xBC: /* REP BSF Gv,Ev */ 14094 delta = dis_bs_E_G ( sorb, sz, delta + 1, True ); 14095 break; 14096 /* On older CPUs, LZCNT behaves the same as BSR. */ 14097 case 0xBD: /* REP BSR Gv,Ev */ 14098 delta = dis_bs_E_G ( sorb, sz, delta + 1, False ); 14099 break; 14100 default: 14101 goto decode_failure; 14102 } 14103 break; 14104 14105 case 0xA4: sz = 1; /* REP MOVS<sz> */ 14106 case 0xA5: 14107 dis_REP_op ( &dres, X86CondAlways, dis_MOVS, sz, eip_orig, 14108 guest_EIP_bbstart+delta, "rep movs" ); 14109 break; 14110 14111 case 0xA6: sz = 1; /* REPE CMP<sz> */ 14112 case 0xA7: 14113 dis_REP_op ( &dres, X86CondZ, dis_CMPS, sz, eip_orig, 14114 guest_EIP_bbstart+delta, "repe cmps" ); 14115 break; 14116 14117 case 0xAA: sz = 1; /* REP STOS<sz> */ 14118 case 0xAB: 14119 dis_REP_op ( &dres, X86CondAlways, dis_STOS, sz, eip_orig, 14120 guest_EIP_bbstart+delta, "rep stos" ); 14121 break; 14122 14123 case 0xAC: sz = 1; /* REP LODS<sz> */ 14124 case 0xAD: 14125 dis_REP_op ( &dres, X86CondAlways, dis_LODS, sz, eip_orig, 14126 guest_EIP_bbstart+delta, "rep lods" ); 14127 break; 14128 14129 case 0xAE: sz = 1; /* REPE SCAS<sz> */ 14130 case 0xAF: 14131 dis_REP_op ( &dres, X86CondZ, dis_SCAS, sz, eip_orig, 14132 guest_EIP_bbstart+delta, "repe scas" ); 14133 break; 14134 14135 case 0x90: /* REP NOP (PAUSE) */ 14136 /* a hint to the P4 re spin-wait loop */ 14137 DIP("rep nop (P4 pause)\n"); 14138 /* "observe" the hint. The Vex client needs to be careful not 14139 to cause very long delays as a result, though. */ 14140 jmp_lit(&dres, Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta); 14141 vassert(dres.whatNext == Dis_StopHere); 14142 break; 14143 14144 case 0xC3: /* REP RET -- same as normal ret? */ 14145 dis_ret(&dres, 0); 14146 DIP("rep ret\n"); 14147 break; 14148 14149 default: 14150 goto decode_failure; 14151 } 14152 break; 14153 } 14154 14155 /* ------------------------ XCHG ----------------------- */ 14156 14157 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK 14158 prefix; hence it must be translated with an IRCAS (at least, the 14159 memory variant). */ 14160 case 0x86: /* XCHG Gb,Eb */ 14161 sz = 1; 14162 /* Fall through ... */ 14163 case 0x87: /* XCHG Gv,Ev */ 14164 modrm = getIByte(delta); 14165 ty = szToITy(sz); 14166 t1 = newTemp(ty); t2 = newTemp(ty); 14167 if (epartIsReg(modrm)) { 14168 assign(t1, getIReg(sz, eregOfRM(modrm))); 14169 assign(t2, getIReg(sz, gregOfRM(modrm))); 14170 putIReg(sz, gregOfRM(modrm), mkexpr(t1)); 14171 putIReg(sz, eregOfRM(modrm), mkexpr(t2)); 14172 delta++; 14173 DIP("xchg%c %s, %s\n", 14174 nameISize(sz), nameIReg(sz,gregOfRM(modrm)), 14175 nameIReg(sz,eregOfRM(modrm))); 14176 } else { 14177 *expect_CAS = True; 14178 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14179 assign( t1, loadLE(ty,mkexpr(addr)) ); 14180 assign( t2, getIReg(sz,gregOfRM(modrm)) ); 14181 casLE( mkexpr(addr), 14182 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); 14183 putIReg( sz, gregOfRM(modrm), mkexpr(t1) ); 14184 delta += alen; 14185 DIP("xchg%c %s, %s\n", nameISize(sz), 14186 nameIReg(sz,gregOfRM(modrm)), dis_buf); 14187 } 14188 break; 14189 14190 case 0x90: /* XCHG eAX,eAX */ 14191 DIP("nop\n"); 14192 break; 14193 case 0x91: /* XCHG eAX,eCX */ 14194 case 0x92: /* XCHG eAX,eDX */ 14195 case 0x93: /* XCHG eAX,eBX */ 14196 case 0x94: /* XCHG eAX,eSP */ 14197 case 0x95: /* XCHG eAX,eBP */ 14198 case 0x96: /* XCHG eAX,eSI */ 14199 case 0x97: /* XCHG eAX,eDI */ 14200 codegen_xchg_eAX_Reg ( sz, opc - 0x90 ); 14201 break; 14202 14203 /* ------------------------ XLAT ----------------------- */ 14204 14205 case 0xD7: /* XLAT */ 14206 if (sz != 4) goto decode_failure; /* sz == 2 is also allowed (0x66) */ 14207 putIReg( 14208 1, 14209 R_EAX/*AL*/, 14210 loadLE(Ity_I8, 14211 handleSegOverride( 14212 sorb, 14213 binop(Iop_Add32, 14214 getIReg(4, R_EBX), 14215 unop(Iop_8Uto32, getIReg(1, R_EAX/*AL*/)))))); 14216 14217 DIP("xlat%c [ebx]\n", nameISize(sz)); 14218 break; 14219 14220 /* ------------------------ IN / OUT ----------------------- */ 14221 14222 case 0xE4: /* IN imm8, AL */ 14223 sz = 1; 14224 t1 = newTemp(Ity_I32); 14225 abyte = getIByte(delta); delta++; 14226 assign(t1, mkU32( abyte & 0xFF )); 14227 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX)); 14228 goto do_IN; 14229 case 0xE5: /* IN imm8, eAX */ 14230 vassert(sz == 2 || sz == 4); 14231 t1 = newTemp(Ity_I32); 14232 abyte = getIByte(delta); delta++; 14233 assign(t1, mkU32( abyte & 0xFF )); 14234 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX)); 14235 goto do_IN; 14236 case 0xEC: /* IN %DX, AL */ 14237 sz = 1; 14238 t1 = newTemp(Ity_I32); 14239 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX))); 14240 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX), 14241 nameIReg(sz,R_EAX)); 14242 goto do_IN; 14243 case 0xED: /* IN %DX, eAX */ 14244 vassert(sz == 2 || sz == 4); 14245 t1 = newTemp(Ity_I32); 14246 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX))); 14247 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX), 14248 nameIReg(sz,R_EAX)); 14249 goto do_IN; 14250 do_IN: { 14251 /* At this point, sz indicates the width, and t1 is a 32-bit 14252 value giving port number. */ 14253 IRDirty* d; 14254 vassert(sz == 1 || sz == 2 || sz == 4); 14255 ty = szToITy(sz); 14256 t2 = newTemp(Ity_I32); 14257 d = unsafeIRDirty_1_N( 14258 t2, 14259 0/*regparms*/, 14260 "x86g_dirtyhelper_IN", 14261 &x86g_dirtyhelper_IN, 14262 mkIRExprVec_2( mkexpr(t1), mkU32(sz) ) 14263 ); 14264 /* do the call, dumping the result in t2. */ 14265 stmt( IRStmt_Dirty(d) ); 14266 putIReg(sz, R_EAX, narrowTo( ty, mkexpr(t2) ) ); 14267 break; 14268 } 14269 14270 case 0xE6: /* OUT AL, imm8 */ 14271 sz = 1; 14272 t1 = newTemp(Ity_I32); 14273 abyte = getIByte(delta); delta++; 14274 assign( t1, mkU32( abyte & 0xFF ) ); 14275 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte); 14276 goto do_OUT; 14277 case 0xE7: /* OUT eAX, imm8 */ 14278 vassert(sz == 2 || sz == 4); 14279 t1 = newTemp(Ity_I32); 14280 abyte = getIByte(delta); delta++; 14281 assign( t1, mkU32( abyte & 0xFF ) ); 14282 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte); 14283 goto do_OUT; 14284 case 0xEE: /* OUT AL, %DX */ 14285 sz = 1; 14286 t1 = newTemp(Ity_I32); 14287 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) ); 14288 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX), 14289 nameIReg(2,R_EDX)); 14290 goto do_OUT; 14291 case 0xEF: /* OUT eAX, %DX */ 14292 vassert(sz == 2 || sz == 4); 14293 t1 = newTemp(Ity_I32); 14294 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) ); 14295 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX), 14296 nameIReg(2,R_EDX)); 14297 goto do_OUT; 14298 do_OUT: { 14299 /* At this point, sz indicates the width, and t1 is a 32-bit 14300 value giving port number. */ 14301 IRDirty* d; 14302 vassert(sz == 1 || sz == 2 || sz == 4); 14303 ty = szToITy(sz); 14304 d = unsafeIRDirty_0_N( 14305 0/*regparms*/, 14306 "x86g_dirtyhelper_OUT", 14307 &x86g_dirtyhelper_OUT, 14308 mkIRExprVec_3( mkexpr(t1), 14309 widenUto32( getIReg(sz, R_EAX) ), 14310 mkU32(sz) ) 14311 ); 14312 stmt( IRStmt_Dirty(d) ); 14313 break; 14314 } 14315 14316 /* ------------------------ (Grp1 extensions) ---------- */ 14317 14318 case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as 14319 case 0x80, but only in 32-bit mode. */ 14320 /* fallthru */ 14321 case 0x80: /* Grp1 Ib,Eb */ 14322 modrm = getIByte(delta); 14323 am_sz = lengthAMode(delta); 14324 sz = 1; 14325 d_sz = 1; 14326 d32 = getUChar(delta + am_sz); 14327 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); 14328 break; 14329 14330 case 0x81: /* Grp1 Iv,Ev */ 14331 modrm = getIByte(delta); 14332 am_sz = lengthAMode(delta); 14333 d_sz = sz; 14334 d32 = getUDisp(d_sz, delta + am_sz); 14335 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); 14336 break; 14337 14338 case 0x83: /* Grp1 Ib,Ev */ 14339 modrm = getIByte(delta); 14340 am_sz = lengthAMode(delta); 14341 d_sz = 1; 14342 d32 = getSDisp8(delta + am_sz); 14343 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); 14344 break; 14345 14346 /* ------------------------ (Grp2 extensions) ---------- */ 14347 14348 case 0xC0: { /* Grp2 Ib,Eb */ 14349 Bool decode_OK = True; 14350 modrm = getIByte(delta); 14351 am_sz = lengthAMode(delta); 14352 d_sz = 1; 14353 d32 = getUChar(delta + am_sz); 14354 sz = 1; 14355 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14356 mkU8(d32 & 0xFF), NULL, &decode_OK ); 14357 if (!decode_OK) 14358 goto decode_failure; 14359 break; 14360 } 14361 case 0xC1: { /* Grp2 Ib,Ev */ 14362 Bool decode_OK = True; 14363 modrm = getIByte(delta); 14364 am_sz = lengthAMode(delta); 14365 d_sz = 1; 14366 d32 = getUChar(delta + am_sz); 14367 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14368 mkU8(d32 & 0xFF), NULL, &decode_OK ); 14369 if (!decode_OK) 14370 goto decode_failure; 14371 break; 14372 } 14373 case 0xD0: { /* Grp2 1,Eb */ 14374 Bool decode_OK = True; 14375 modrm = getIByte(delta); 14376 am_sz = lengthAMode(delta); 14377 d_sz = 0; 14378 d32 = 1; 14379 sz = 1; 14380 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14381 mkU8(d32), NULL, &decode_OK ); 14382 if (!decode_OK) 14383 goto decode_failure; 14384 break; 14385 } 14386 case 0xD1: { /* Grp2 1,Ev */ 14387 Bool decode_OK = True; 14388 modrm = getUChar(delta); 14389 am_sz = lengthAMode(delta); 14390 d_sz = 0; 14391 d32 = 1; 14392 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14393 mkU8(d32), NULL, &decode_OK ); 14394 if (!decode_OK) 14395 goto decode_failure; 14396 break; 14397 } 14398 case 0xD2: { /* Grp2 CL,Eb */ 14399 Bool decode_OK = True; 14400 modrm = getUChar(delta); 14401 am_sz = lengthAMode(delta); 14402 d_sz = 0; 14403 sz = 1; 14404 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14405 getIReg(1,R_ECX), "%cl", &decode_OK ); 14406 if (!decode_OK) 14407 goto decode_failure; 14408 break; 14409 } 14410 case 0xD3: { /* Grp2 CL,Ev */ 14411 Bool decode_OK = True; 14412 modrm = getIByte(delta); 14413 am_sz = lengthAMode(delta); 14414 d_sz = 0; 14415 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14416 getIReg(1,R_ECX), "%cl", &decode_OK ); 14417 if (!decode_OK) 14418 goto decode_failure; 14419 break; 14420 } 14421 14422 /* ------------------------ (Grp3 extensions) ---------- */ 14423 14424 case 0xF6: { /* Grp3 Eb */ 14425 Bool decode_OK = True; 14426 delta = dis_Grp3 ( sorb, pfx_lock, 1, delta, &decode_OK ); 14427 if (!decode_OK) 14428 goto decode_failure; 14429 break; 14430 } 14431 case 0xF7: { /* Grp3 Ev */ 14432 Bool decode_OK = True; 14433 delta = dis_Grp3 ( sorb, pfx_lock, sz, delta, &decode_OK ); 14434 if (!decode_OK) 14435 goto decode_failure; 14436 break; 14437 } 14438 14439 /* ------------------------ (Grp4 extensions) ---------- */ 14440 14441 case 0xFE: { /* Grp4 Eb */ 14442 Bool decode_OK = True; 14443 delta = dis_Grp4 ( sorb, pfx_lock, delta, &decode_OK ); 14444 if (!decode_OK) 14445 goto decode_failure; 14446 break; 14447 } 14448 14449 /* ------------------------ (Grp5 extensions) ---------- */ 14450 14451 case 0xFF: { /* Grp5 Ev */ 14452 Bool decode_OK = True; 14453 delta = dis_Grp5 ( sorb, pfx_lock, sz, delta, &dres, &decode_OK ); 14454 if (!decode_OK) 14455 goto decode_failure; 14456 break; 14457 } 14458 14459 /* ------------------------ Escapes to 2-byte opcodes -- */ 14460 14461 case 0x0F: { 14462 opc = getIByte(delta); delta++; 14463 switch (opc) { 14464 14465 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */ 14466 14467 case 0xBA: { /* Grp8 Ib,Ev */ 14468 Bool decode_OK = False; 14469 modrm = getUChar(delta); 14470 am_sz = lengthAMode(delta); 14471 d32 = getSDisp8(delta + am_sz); 14472 delta = dis_Grp8_Imm ( sorb, pfx_lock, delta, modrm, 14473 am_sz, sz, d32, &decode_OK ); 14474 if (!decode_OK) 14475 goto decode_failure; 14476 break; 14477 } 14478 14479 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */ 14480 14481 case 0xBC: /* BSF Gv,Ev */ 14482 delta = dis_bs_E_G ( sorb, sz, delta, True ); 14483 break; 14484 case 0xBD: /* BSR Gv,Ev */ 14485 delta = dis_bs_E_G ( sorb, sz, delta, False ); 14486 break; 14487 14488 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */ 14489 14490 case 0xC8: /* BSWAP %eax */ 14491 case 0xC9: 14492 case 0xCA: 14493 case 0xCB: 14494 case 0xCC: 14495 case 0xCD: 14496 case 0xCE: 14497 case 0xCF: /* BSWAP %edi */ 14498 /* AFAICS from the Intel docs, this only exists at size 4. */ 14499 if (sz != 4) goto decode_failure; 14500 14501 t1 = newTemp(Ity_I32); 14502 assign( t1, getIReg(4, opc-0xC8) ); 14503 t2 = math_BSWAP(t1, Ity_I32); 14504 14505 putIReg(4, opc-0xC8, mkexpr(t2)); 14506 DIP("bswapl %s\n", nameIReg(4, opc-0xC8)); 14507 break; 14508 14509 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */ 14510 14511 case 0xA3: /* BT Gv,Ev */ 14512 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpNone ); 14513 break; 14514 case 0xB3: /* BTR Gv,Ev */ 14515 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpReset ); 14516 break; 14517 case 0xAB: /* BTS Gv,Ev */ 14518 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpSet ); 14519 break; 14520 case 0xBB: /* BTC Gv,Ev */ 14521 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpComp ); 14522 break; 14523 14524 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */ 14525 14526 case 0x40: 14527 case 0x41: 14528 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */ 14529 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */ 14530 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */ 14531 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */ 14532 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */ 14533 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */ 14534 case 0x48: /* CMOVSb (cmov negative) */ 14535 case 0x49: /* CMOVSb (cmov not negative) */ 14536 case 0x4A: /* CMOVP (cmov parity even) */ 14537 case 0x4B: /* CMOVNP (cmov parity odd) */ 14538 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */ 14539 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */ 14540 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */ 14541 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */ 14542 delta = dis_cmov_E_G(sorb, sz, (X86Condcode)(opc - 0x40), delta); 14543 break; 14544 14545 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */ 14546 14547 case 0xB0: /* CMPXCHG Gb,Eb */ 14548 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, 1, delta ); 14549 break; 14550 case 0xB1: /* CMPXCHG Gv,Ev */ 14551 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, sz, delta ); 14552 break; 14553 14554 case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */ 14555 IRTemp expdHi = newTemp(Ity_I32); 14556 IRTemp expdLo = newTemp(Ity_I32); 14557 IRTemp dataHi = newTemp(Ity_I32); 14558 IRTemp dataLo = newTemp(Ity_I32); 14559 IRTemp oldHi = newTemp(Ity_I32); 14560 IRTemp oldLo = newTemp(Ity_I32); 14561 IRTemp flags_old = newTemp(Ity_I32); 14562 IRTemp flags_new = newTemp(Ity_I32); 14563 IRTemp success = newTemp(Ity_I1); 14564 14565 /* Translate this using a DCAS, even if there is no LOCK 14566 prefix. Life is too short to bother with generating two 14567 different translations for the with/without-LOCK-prefix 14568 cases. */ 14569 *expect_CAS = True; 14570 14571 /* Decode, and generate address. */ 14572 if (sz != 4) goto decode_failure; 14573 modrm = getIByte(delta); 14574 if (epartIsReg(modrm)) goto decode_failure; 14575 if (gregOfRM(modrm) != 1) goto decode_failure; 14576 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14577 delta += alen; 14578 14579 /* Get the expected and new values. */ 14580 assign( expdHi, getIReg(4,R_EDX) ); 14581 assign( expdLo, getIReg(4,R_EAX) ); 14582 assign( dataHi, getIReg(4,R_ECX) ); 14583 assign( dataLo, getIReg(4,R_EBX) ); 14584 14585 /* Do the DCAS */ 14586 stmt( IRStmt_CAS( 14587 mkIRCAS( oldHi, oldLo, 14588 Iend_LE, mkexpr(addr), 14589 mkexpr(expdHi), mkexpr(expdLo), 14590 mkexpr(dataHi), mkexpr(dataLo) 14591 ))); 14592 14593 /* success when oldHi:oldLo == expdHi:expdLo */ 14594 assign( success, 14595 binop(Iop_CasCmpEQ32, 14596 binop(Iop_Or32, 14597 binop(Iop_Xor32, mkexpr(oldHi), mkexpr(expdHi)), 14598 binop(Iop_Xor32, mkexpr(oldLo), mkexpr(expdLo)) 14599 ), 14600 mkU32(0) 14601 )); 14602 14603 /* If the DCAS is successful, that is to say oldHi:oldLo == 14604 expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX, 14605 which is where they came from originally. Both the actual 14606 contents of these two regs, and any shadow values, are 14607 unchanged. If the DCAS fails then we're putting into 14608 EDX:EAX the value seen in memory. */ 14609 putIReg(4, R_EDX, 14610 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), 14611 mkexpr(oldHi), 14612 mkexpr(expdHi) 14613 )); 14614 putIReg(4, R_EAX, 14615 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), 14616 mkexpr(oldLo), 14617 mkexpr(expdLo) 14618 )); 14619 14620 /* Copy the success bit into the Z flag and leave the others 14621 unchanged */ 14622 assign( flags_old, widenUto32(mk_x86g_calculate_eflags_all())); 14623 assign( 14624 flags_new, 14625 binop(Iop_Or32, 14626 binop(Iop_And32, mkexpr(flags_old), 14627 mkU32(~X86G_CC_MASK_Z)), 14628 binop(Iop_Shl32, 14629 binop(Iop_And32, 14630 unop(Iop_1Uto32, mkexpr(success)), mkU32(1)), 14631 mkU8(X86G_CC_SHIFT_Z)) )); 14632 14633 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 14634 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) )); 14635 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 14636 /* Set NDEP even though it isn't used. This makes 14637 redundant-PUT elimination of previous stores to this field 14638 work better. */ 14639 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 14640 14641 /* Sheesh. Aren't you glad it was me and not you that had to 14642 write and validate all this grunge? */ 14643 14644 DIP("cmpxchg8b %s\n", dis_buf); 14645 break; 14646 } 14647 14648 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */ 14649 14650 case 0xA2: { /* CPUID */ 14651 /* Uses dirty helper: 14652 void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* ) 14653 declared to mod eax, wr ebx, ecx, edx 14654 */ 14655 IRDirty* d = NULL; 14656 HChar* fName = NULL; 14657 void* fAddr = NULL; 14658 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2) { 14659 fName = "x86g_dirtyhelper_CPUID_sse2"; 14660 fAddr = &x86g_dirtyhelper_CPUID_sse2; 14661 } 14662 else 14663 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE1) { 14664 fName = "x86g_dirtyhelper_CPUID_sse1"; 14665 fAddr = &x86g_dirtyhelper_CPUID_sse1; 14666 } 14667 else 14668 if (archinfo->hwcaps == 0/*no SSE*/) { 14669 fName = "x86g_dirtyhelper_CPUID_sse0"; 14670 fAddr = &x86g_dirtyhelper_CPUID_sse0; 14671 } else 14672 vpanic("disInstr(x86)(cpuid)"); 14673 14674 vassert(fName); vassert(fAddr); 14675 d = unsafeIRDirty_0_N ( 0/*regparms*/, 14676 fName, fAddr, mkIRExprVec_0() ); 14677 /* declare guest state effects */ 14678 d->needsBBP = True; 14679 d->nFxState = 4; 14680 vex_bzero(&d->fxState, sizeof(d->fxState)); 14681 d->fxState[0].fx = Ifx_Modify; 14682 d->fxState[0].offset = OFFB_EAX; 14683 d->fxState[0].size = 4; 14684 d->fxState[1].fx = Ifx_Write; 14685 d->fxState[1].offset = OFFB_EBX; 14686 d->fxState[1].size = 4; 14687 d->fxState[2].fx = Ifx_Modify; 14688 d->fxState[2].offset = OFFB_ECX; 14689 d->fxState[2].size = 4; 14690 d->fxState[3].fx = Ifx_Write; 14691 d->fxState[3].offset = OFFB_EDX; 14692 d->fxState[3].size = 4; 14693 /* execute the dirty call, side-effecting guest state */ 14694 stmt( IRStmt_Dirty(d) ); 14695 /* CPUID is a serialising insn. So, just in case someone is 14696 using it as a memory fence ... */ 14697 stmt( IRStmt_MBE(Imbe_Fence) ); 14698 DIP("cpuid\n"); 14699 break; 14700 } 14701 14702//-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID)) 14703//-- goto decode_failure; 14704//-- 14705//-- t1 = newTemp(cb); 14706//-- t2 = newTemp(cb); 14707//-- t3 = newTemp(cb); 14708//-- t4 = newTemp(cb); 14709//-- uInstr0(cb, CALLM_S, 0); 14710//-- 14711//-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1); 14712//-- uInstr1(cb, PUSH, 4, TempReg, t1); 14713//-- 14714//-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2); 14715//-- uLiteral(cb, 0); 14716//-- uInstr1(cb, PUSH, 4, TempReg, t2); 14717//-- 14718//-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3); 14719//-- uLiteral(cb, 0); 14720//-- uInstr1(cb, PUSH, 4, TempReg, t3); 14721//-- 14722//-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4); 14723//-- uLiteral(cb, 0); 14724//-- uInstr1(cb, PUSH, 4, TempReg, t4); 14725//-- 14726//-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID)); 14727//-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty); 14728//-- 14729//-- uInstr1(cb, POP, 4, TempReg, t4); 14730//-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX); 14731//-- 14732//-- uInstr1(cb, POP, 4, TempReg, t3); 14733//-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX); 14734//-- 14735//-- uInstr1(cb, POP, 4, TempReg, t2); 14736//-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX); 14737//-- 14738//-- uInstr1(cb, POP, 4, TempReg, t1); 14739//-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX); 14740//-- 14741//-- uInstr0(cb, CALLM_E, 0); 14742//-- DIP("cpuid\n"); 14743//-- break; 14744//-- 14745 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */ 14746 14747 case 0xB6: /* MOVZXb Eb,Gv */ 14748 if (sz != 2 && sz != 4) 14749 goto decode_failure; 14750 delta = dis_movx_E_G ( sorb, delta, 1, sz, False ); 14751 break; 14752 14753 case 0xB7: /* MOVZXw Ew,Gv */ 14754 if (sz != 4) 14755 goto decode_failure; 14756 delta = dis_movx_E_G ( sorb, delta, 2, 4, False ); 14757 break; 14758 14759 case 0xBE: /* MOVSXb Eb,Gv */ 14760 if (sz != 2 && sz != 4) 14761 goto decode_failure; 14762 delta = dis_movx_E_G ( sorb, delta, 1, sz, True ); 14763 break; 14764 14765 case 0xBF: /* MOVSXw Ew,Gv */ 14766 if (sz != 4 && /* accept movsww, sigh, see #250799 */sz != 2) 14767 goto decode_failure; 14768 delta = dis_movx_E_G ( sorb, delta, 2, sz, True ); 14769 break; 14770 14771//-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */ 14772//-- 14773//-- case 0xC3: /* MOVNTI Gv,Ev */ 14774//-- vg_assert(sz == 4); 14775//-- modrm = getUChar(eip); 14776//-- vg_assert(!epartIsReg(modrm)); 14777//-- t1 = newTemp(cb); 14778//-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1); 14779//-- pair = disAMode ( cb, sorb, eip, dis_buf ); 14780//-- t2 = LOW24(pair); 14781//-- eip += HI8(pair); 14782//-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); 14783//-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf); 14784//-- break; 14785 14786 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */ 14787 14788 case 0xAF: /* IMUL Ev, Gv */ 14789 delta = dis_mul_E_G ( sorb, sz, delta ); 14790 break; 14791 14792 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */ 14793 14794 case 0x1F: 14795 modrm = getUChar(delta); 14796 if (epartIsReg(modrm)) goto decode_failure; 14797 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14798 delta += alen; 14799 DIP("nop%c %s\n", nameISize(sz), dis_buf); 14800 break; 14801 14802 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */ 14803 case 0x80: 14804 case 0x81: 14805 case 0x82: /* JBb/JNAEb (jump below) */ 14806 case 0x83: /* JNBb/JAEb (jump not below) */ 14807 case 0x84: /* JZb/JEb (jump zero) */ 14808 case 0x85: /* JNZb/JNEb (jump not zero) */ 14809 case 0x86: /* JBEb/JNAb (jump below or equal) */ 14810 case 0x87: /* JNBEb/JAb (jump not below or equal) */ 14811 case 0x88: /* JSb (jump negative) */ 14812 case 0x89: /* JSb (jump not negative) */ 14813 case 0x8A: /* JP (jump parity even) */ 14814 case 0x8B: /* JNP/JPO (jump parity odd) */ 14815 case 0x8C: /* JLb/JNGEb (jump less) */ 14816 case 0x8D: /* JGEb/JNLb (jump greater or equal) */ 14817 case 0x8E: /* JLEb/JNGb (jump less or equal) */ 14818 case 0x8F: /* JGb/JNLEb (jump greater) */ 14819 { Int jmpDelta; 14820 HChar* comment = ""; 14821 jmpDelta = (Int)getUDisp32(delta); 14822 d32 = (((Addr32)guest_EIP_bbstart)+delta+4) + jmpDelta; 14823 delta += 4; 14824 if (resteerCisOk 14825 && vex_control.guest_chase_cond 14826 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 14827 && jmpDelta < 0 14828 && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { 14829 /* Speculation: assume this backward branch is taken. So 14830 we need to emit a side-exit to the insn following this 14831 one, on the negation of the condition, and continue at 14832 the branch target address (d32). If we wind up back at 14833 the first instruction of the trace, just stop; it's 14834 better to let the IR loop unroller handle that case.*/ 14835 stmt( IRStmt_Exit( 14836 mk_x86g_calculate_condition((X86Condcode) 14837 (1 ^ (opc - 0x80))), 14838 Ijk_Boring, 14839 IRConst_U32(guest_EIP_bbstart+delta), 14840 OFFB_EIP ) ); 14841 dres.whatNext = Dis_ResteerC; 14842 dres.continueAt = (Addr64)(Addr32)d32; 14843 comment = "(assumed taken)"; 14844 } 14845 else 14846 if (resteerCisOk 14847 && vex_control.guest_chase_cond 14848 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 14849 && jmpDelta >= 0 14850 && resteerOkFn( callback_opaque, 14851 (Addr64)(Addr32)(guest_EIP_bbstart+delta)) ) { 14852 /* Speculation: assume this forward branch is not taken. 14853 So we need to emit a side-exit to d32 (the dest) and 14854 continue disassembling at the insn immediately 14855 following this one. */ 14856 stmt( IRStmt_Exit( 14857 mk_x86g_calculate_condition((X86Condcode)(opc - 0x80)), 14858 Ijk_Boring, 14859 IRConst_U32(d32), 14860 OFFB_EIP ) ); 14861 dres.whatNext = Dis_ResteerC; 14862 dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta); 14863 comment = "(assumed not taken)"; 14864 } 14865 else { 14866 /* Conservative default translation - end the block at 14867 this point. */ 14868 jcc_01( &dres, (X86Condcode)(opc - 0x80), 14869 (Addr32)(guest_EIP_bbstart+delta), d32); 14870 vassert(dres.whatNext == Dis_StopHere); 14871 } 14872 DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc - 0x80), d32, comment); 14873 break; 14874 } 14875 14876 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */ 14877 case 0x31: { /* RDTSC */ 14878 IRTemp val = newTemp(Ity_I64); 14879 IRExpr** args = mkIRExprVec_0(); 14880 IRDirty* d = unsafeIRDirty_1_N ( 14881 val, 14882 0/*regparms*/, 14883 "x86g_dirtyhelper_RDTSC", 14884 &x86g_dirtyhelper_RDTSC, 14885 args 14886 ); 14887 /* execute the dirty call, dumping the result in val. */ 14888 stmt( IRStmt_Dirty(d) ); 14889 putIReg(4, R_EDX, unop(Iop_64HIto32, mkexpr(val))); 14890 putIReg(4, R_EAX, unop(Iop_64to32, mkexpr(val))); 14891 DIP("rdtsc\n"); 14892 break; 14893 } 14894 14895 /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */ 14896 14897 case 0xA1: /* POP %FS */ 14898 dis_pop_segreg( R_FS, sz ); break; 14899 case 0xA9: /* POP %GS */ 14900 dis_pop_segreg( R_GS, sz ); break; 14901 14902 case 0xA0: /* PUSH %FS */ 14903 dis_push_segreg( R_FS, sz ); break; 14904 case 0xA8: /* PUSH %GS */ 14905 dis_push_segreg( R_GS, sz ); break; 14906 14907 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */ 14908 case 0x90: 14909 case 0x91: 14910 case 0x92: /* set-Bb/set-NAEb (jump below) */ 14911 case 0x93: /* set-NBb/set-AEb (jump not below) */ 14912 case 0x94: /* set-Zb/set-Eb (jump zero) */ 14913 case 0x95: /* set-NZb/set-NEb (jump not zero) */ 14914 case 0x96: /* set-BEb/set-NAb (jump below or equal) */ 14915 case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */ 14916 case 0x98: /* set-Sb (jump negative) */ 14917 case 0x99: /* set-Sb (jump not negative) */ 14918 case 0x9A: /* set-P (jump parity even) */ 14919 case 0x9B: /* set-NP (jump parity odd) */ 14920 case 0x9C: /* set-Lb/set-NGEb (jump less) */ 14921 case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */ 14922 case 0x9E: /* set-LEb/set-NGb (jump less or equal) */ 14923 case 0x9F: /* set-Gb/set-NLEb (jump greater) */ 14924 t1 = newTemp(Ity_I8); 14925 assign( t1, unop(Iop_1Uto8,mk_x86g_calculate_condition(opc-0x90)) ); 14926 modrm = getIByte(delta); 14927 if (epartIsReg(modrm)) { 14928 delta++; 14929 putIReg(1, eregOfRM(modrm), mkexpr(t1)); 14930 DIP("set%s %s\n", name_X86Condcode(opc-0x90), 14931 nameIReg(1,eregOfRM(modrm))); 14932 } else { 14933 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14934 delta += alen; 14935 storeLE( mkexpr(addr), mkexpr(t1) ); 14936 DIP("set%s %s\n", name_X86Condcode(opc-0x90), dis_buf); 14937 } 14938 break; 14939 14940 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */ 14941 14942 case 0xA4: /* SHLDv imm8,Gv,Ev */ 14943 modrm = getIByte(delta); 14944 d32 = delta + lengthAMode(delta); 14945 vex_sprintf(dis_buf, "$%d", getIByte(d32)); 14946 delta = dis_SHLRD_Gv_Ev ( 14947 sorb, delta, modrm, sz, 14948 mkU8(getIByte(d32)), True, /* literal */ 14949 dis_buf, True ); 14950 break; 14951 case 0xA5: /* SHLDv %cl,Gv,Ev */ 14952 modrm = getIByte(delta); 14953 delta = dis_SHLRD_Gv_Ev ( 14954 sorb, delta, modrm, sz, 14955 getIReg(1,R_ECX), False, /* not literal */ 14956 "%cl", True ); 14957 break; 14958 14959 case 0xAC: /* SHRDv imm8,Gv,Ev */ 14960 modrm = getIByte(delta); 14961 d32 = delta + lengthAMode(delta); 14962 vex_sprintf(dis_buf, "$%d", getIByte(d32)); 14963 delta = dis_SHLRD_Gv_Ev ( 14964 sorb, delta, modrm, sz, 14965 mkU8(getIByte(d32)), True, /* literal */ 14966 dis_buf, False ); 14967 break; 14968 case 0xAD: /* SHRDv %cl,Gv,Ev */ 14969 modrm = getIByte(delta); 14970 delta = dis_SHLRD_Gv_Ev ( 14971 sorb, delta, modrm, sz, 14972 getIReg(1,R_ECX), False, /* not literal */ 14973 "%cl", False ); 14974 break; 14975 14976 /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */ 14977 14978 case 0x34: 14979 /* Simple implementation needing a long explaination. 14980 14981 sysenter is a kind of syscall entry. The key thing here 14982 is that the return address is not known -- that is 14983 something that is beyond Vex's knowledge. So this IR 14984 forces a return to the scheduler, which can do what it 14985 likes to simulate the systenter, but it MUST set this 14986 thread's guest_EIP field with the continuation address 14987 before resuming execution. If that doesn't happen, the 14988 thread will jump to address zero, which is probably 14989 fatal. 14990 */ 14991 14992 /* Note where we are, so we can back up the guest to this 14993 point if the syscall needs to be restarted. */ 14994 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 14995 mkU32(guest_EIP_curr_instr) ) ); 14996 jmp_lit(&dres, Ijk_Sys_sysenter, 0/*bogus next EIP value*/); 14997 vassert(dres.whatNext == Dis_StopHere); 14998 DIP("sysenter"); 14999 break; 15000 15001 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */ 15002 15003 case 0xC0: { /* XADD Gb,Eb */ 15004 Bool decodeOK; 15005 delta = dis_xadd_G_E ( sorb, pfx_lock, 1, delta, &decodeOK ); 15006 if (!decodeOK) goto decode_failure; 15007 break; 15008 } 15009 case 0xC1: { /* XADD Gv,Ev */ 15010 Bool decodeOK; 15011 delta = dis_xadd_G_E ( sorb, pfx_lock, sz, delta, &decodeOK ); 15012 if (!decodeOK) goto decode_failure; 15013 break; 15014 } 15015 15016 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */ 15017 15018 case 0x71: 15019 case 0x72: 15020 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 15021 15022 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */ 15023 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */ 15024 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 15025 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 15026 15027 case 0xFC: 15028 case 0xFD: 15029 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 15030 15031 case 0xEC: 15032 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 15033 15034 case 0xDC: 15035 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 15036 15037 case 0xF8: 15038 case 0xF9: 15039 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 15040 15041 case 0xE8: 15042 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 15043 15044 case 0xD8: 15045 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 15046 15047 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 15048 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 15049 15050 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 15051 15052 case 0x74: 15053 case 0x75: 15054 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 15055 15056 case 0x64: 15057 case 0x65: 15058 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 15059 15060 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 15061 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 15062 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 15063 15064 case 0x68: 15065 case 0x69: 15066 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 15067 15068 case 0x60: 15069 case 0x61: 15070 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 15071 15072 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 15073 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 15074 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 15075 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 15076 15077 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 15078 case 0xF2: 15079 case 0xF3: 15080 15081 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 15082 case 0xD2: 15083 case 0xD3: 15084 15085 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 15086 case 0xE2: 15087 { 15088 Int delta0 = delta-1; 15089 Bool decode_OK = False; 15090 15091 /* If sz==2 this is SSE, and we assume sse idec has 15092 already spotted those cases by now. */ 15093 if (sz != 4) 15094 goto decode_failure; 15095 15096 delta = dis_MMX ( &decode_OK, sorb, sz, delta-1 ); 15097 if (!decode_OK) { 15098 delta = delta0; 15099 goto decode_failure; 15100 } 15101 break; 15102 } 15103 15104 case 0x0E: /* FEMMS */ 15105 case 0x77: /* EMMS */ 15106 if (sz != 4) 15107 goto decode_failure; 15108 do_EMMS_preamble(); 15109 DIP("{f}emms\n"); 15110 break; 15111 15112 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */ 15113 case 0x01: /* 0F 01 /0 -- SGDT */ 15114 /* 0F 01 /1 -- SIDT */ 15115 { 15116 /* This is really revolting, but ... since each processor 15117 (core) only has one IDT and one GDT, just let the guest 15118 see it (pass-through semantics). I can't see any way to 15119 construct a faked-up value, so don't bother to try. */ 15120 modrm = getUChar(delta); 15121 addr = disAMode ( &alen, sorb, delta, dis_buf ); 15122 delta += alen; 15123 if (epartIsReg(modrm)) goto decode_failure; 15124 if (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1) 15125 goto decode_failure; 15126 switch (gregOfRM(modrm)) { 15127 case 0: DIP("sgdt %s\n", dis_buf); break; 15128 case 1: DIP("sidt %s\n", dis_buf); break; 15129 default: vassert(0); /*NOTREACHED*/ 15130 } 15131 15132 IRDirty* d = unsafeIRDirty_0_N ( 15133 0/*regparms*/, 15134 "x86g_dirtyhelper_SxDT", 15135 &x86g_dirtyhelper_SxDT, 15136 mkIRExprVec_2( mkexpr(addr), 15137 mkU32(gregOfRM(modrm)) ) 15138 ); 15139 /* declare we're writing memory */ 15140 d->mFx = Ifx_Write; 15141 d->mAddr = mkexpr(addr); 15142 d->mSize = 6; 15143 stmt( IRStmt_Dirty(d) ); 15144 break; 15145 } 15146 15147 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */ 15148 15149 default: 15150 goto decode_failure; 15151 } /* switch (opc) for the 2-byte opcodes */ 15152 goto decode_success; 15153 } /* case 0x0F: of primary opcode */ 15154 15155 /* ------------------------ ??? ------------------------ */ 15156 15157 default: 15158 decode_failure: 15159 /* All decode failures end up here. */ 15160 vex_printf("vex x86->IR: unhandled instruction bytes: " 15161 "0x%x 0x%x 0x%x 0x%x\n", 15162 (Int)getIByte(delta_start+0), 15163 (Int)getIByte(delta_start+1), 15164 (Int)getIByte(delta_start+2), 15165 (Int)getIByte(delta_start+3) ); 15166 15167 /* Tell the dispatcher that this insn cannot be decoded, and so has 15168 not been executed, and (is currently) the next to be executed. 15169 EIP should be up-to-date since it made so at the start of each 15170 insn, but nevertheless be paranoid and update it again right 15171 now. */ 15172 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) ); 15173 jmp_lit(&dres, Ijk_NoDecode, guest_EIP_curr_instr); 15174 vassert(dres.whatNext == Dis_StopHere); 15175 dres.len = 0; 15176 /* We also need to say that a CAS is not expected now, regardless 15177 of what it might have been set to at the start of the function, 15178 since the IR that we've emitted just above (to synthesis a 15179 SIGILL) does not involve any CAS, and presumably no other IR has 15180 been emitted for this (non-decoded) insn. */ 15181 *expect_CAS = False; 15182 return dres; 15183 15184 } /* switch (opc) for the main (primary) opcode switch. */ 15185 15186 decode_success: 15187 /* All decode successes end up here. */ 15188 switch (dres.whatNext) { 15189 case Dis_Continue: 15190 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) ); 15191 break; 15192 case Dis_ResteerU: 15193 case Dis_ResteerC: 15194 stmt( IRStmt_Put( OFFB_EIP, mkU32(dres.continueAt) ) ); 15195 break; 15196 case Dis_StopHere: 15197 break; 15198 default: 15199 vassert(0); 15200 } 15201 15202 DIP("\n"); 15203 dres.len = delta - delta_start; 15204 return dres; 15205} 15206 15207#undef DIP 15208#undef DIS 15209 15210 15211/*------------------------------------------------------------*/ 15212/*--- Top-level fn ---*/ 15213/*------------------------------------------------------------*/ 15214 15215/* Disassemble a single instruction into IR. The instruction 15216 is located in host memory at &guest_code[delta]. */ 15217 15218DisResult disInstr_X86 ( IRSB* irsb_IN, 15219 Bool (*resteerOkFn) ( void*, Addr64 ), 15220 Bool resteerCisOk, 15221 void* callback_opaque, 15222 UChar* guest_code_IN, 15223 Long delta, 15224 Addr64 guest_IP, 15225 VexArch guest_arch, 15226 VexArchInfo* archinfo, 15227 VexAbiInfo* abiinfo, 15228 Bool host_bigendian_IN ) 15229{ 15230 Int i, x1, x2; 15231 Bool expect_CAS, has_CAS; 15232 DisResult dres; 15233 15234 /* Set globals (see top of this file) */ 15235 vassert(guest_arch == VexArchX86); 15236 guest_code = guest_code_IN; 15237 irsb = irsb_IN; 15238 host_is_bigendian = host_bigendian_IN; 15239 guest_EIP_curr_instr = (Addr32)guest_IP; 15240 guest_EIP_bbstart = (Addr32)toUInt(guest_IP - delta); 15241 15242 x1 = irsb_IN->stmts_used; 15243 expect_CAS = False; 15244 dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn, 15245 resteerCisOk, 15246 callback_opaque, 15247 delta, archinfo, abiinfo ); 15248 x2 = irsb_IN->stmts_used; 15249 vassert(x2 >= x1); 15250 15251 /* See comment at the top of disInstr_X86_WRK for meaning of 15252 expect_CAS. Here, we (sanity-)check for the presence/absence of 15253 IRCAS as directed by the returned expect_CAS value. */ 15254 has_CAS = False; 15255 for (i = x1; i < x2; i++) { 15256 if (irsb_IN->stmts[i]->tag == Ist_CAS) 15257 has_CAS = True; 15258 } 15259 15260 if (expect_CAS != has_CAS) { 15261 /* inconsistency detected. re-disassemble the instruction so as 15262 to generate a useful error message; then assert. */ 15263 vex_traceflags |= VEX_TRACE_FE; 15264 dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn, 15265 resteerCisOk, 15266 callback_opaque, 15267 delta, archinfo, abiinfo ); 15268 for (i = x1; i < x2; i++) { 15269 vex_printf("\t\t"); 15270 ppIRStmt(irsb_IN->stmts[i]); 15271 vex_printf("\n"); 15272 } 15273 /* Failure of this assertion is serious and denotes a bug in 15274 disInstr. */ 15275 vpanic("disInstr_X86: inconsistency in LOCK prefix handling"); 15276 } 15277 15278 return dres; 15279} 15280 15281 15282/*--------------------------------------------------------------------*/ 15283/*--- end guest_x86_toIR.c ---*/ 15284/*--------------------------------------------------------------------*/ 15285