1 2/*--------------------------------------------------------------------*/ 3/*--- begin guest_x86_toIR.c ---*/ 4/*--------------------------------------------------------------------*/ 5 6/* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2015 OpenWorks LLP 11 info@open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34*/ 35 36/* Translates x86 code to IR. */ 37 38/* TODO: 39 40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked 41 to ensure a 32-bit value is being written. 42 43 FUCOMI(P): what happens to A and S flags? Currently are forced 44 to zero. 45 46 x87 FP Limitations: 47 48 * all arithmetic done at 64 bits 49 50 * no FP exceptions, except for handling stack over/underflow 51 52 * FP rounding mode observed only for float->int conversions 53 and int->float conversions which could lose accuracy, and 54 for float-to-float rounding. For all other operations, 55 round-to-nearest is used, regardless. 56 57 * some of the FCOM cases could do with testing -- not convinced 58 that the args are the right way round. 59 60 * FSAVE does not re-initialise the FPU; it should do 61 62 * FINIT not only initialises the FPU environment, it also 63 zeroes all the FP registers. It should leave the registers 64 unchanged. 65 66 SAHF should cause eflags[1] == 1, and in fact it produces 0. As 67 per Intel docs this bit has no meaning anyway. Since PUSHF is the 68 only way to observe eflags[1], a proper fix would be to make that 69 bit be set by PUSHF. 70 71 The state of %eflags.AC (alignment check, bit 18) is recorded by 72 the simulation (viz, if you set it with popf then a pushf produces 73 the value you set it to), but it is otherwise ignored. In 74 particular, setting it to 1 does NOT cause alignment checking to 75 happen. Programs that set it to 1 and then rely on the resulting 76 SIGBUSs to inform them of misaligned accesses will not work. 77 78 Implementation of sysenter is necessarily partial. sysenter is a 79 kind of system call entry. When doing a sysenter, the return 80 address is not known -- that is something that is beyond Vex's 81 knowledge. So the generated IR forces a return to the scheduler, 82 which can do what it likes to simulate the systenter, but it MUST 83 set this thread's guest_EIP field with the continuation address 84 before resuming execution. If that doesn't happen, the thread will 85 jump to address zero, which is probably fatal. 86 87 This module uses global variables and so is not MT-safe (if that 88 should ever become relevant). 89 90 The delta values are 32-bit ints, not 64-bit ints. That means 91 this module may not work right if run on a 64-bit host. That should 92 be fixed properly, really -- if anyone ever wants to use Vex to 93 translate x86 code for execution on a 64-bit host. 94 95 casLE (implementation of lock-prefixed insns) and rep-prefixed 96 insns: the side-exit back to the start of the insn is done with 97 Ijk_Boring. This is quite wrong, it should be done with 98 Ijk_NoRedir, since otherwise the side exit, which is intended to 99 restart the instruction for whatever reason, could go somewhere 100 entirely else. Doing it right (with Ijk_NoRedir jumps) would make 101 no-redir jumps performance critical, at least for rep-prefixed 102 instructions, since all iterations thereof would involve such a 103 jump. It's not such a big deal with casLE since the side exit is 104 only taken if the CAS fails, that is, the location is contended, 105 which is relatively unlikely. 106 107 XXXX: Nov 2009: handling of SWP on ARM suffers from the same 108 problem. 109 110 Note also, the test for CAS success vs failure is done using 111 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary 112 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it 113 shouldn't definedness-check these comparisons. See 114 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for 115 background/rationale. 116*/ 117 118/* Performance holes: 119 120 - fcom ; fstsw %ax ; sahf 121 sahf does not update the O flag (sigh) and so O needs to 122 be computed. This is done expensively; it would be better 123 to have a calculate_eflags_o helper. 124 125 - emwarns; some FP codes can generate huge numbers of these 126 if the fpucw is changed in an inner loop. It would be 127 better for the guest state to have an emwarn-enable reg 128 which can be set zero or nonzero. If it is zero, emwarns 129 are not flagged, and instead control just flows all the 130 way through bbs as usual. 131*/ 132 133/* "Special" instructions. 134 135 This instruction decoder can decode three special instructions 136 which mean nothing natively (are no-ops as far as regs/mem are 137 concerned) but have meaning for supporting Valgrind. A special 138 instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D 139 C1C713 (in the standard interpretation, that means: roll $3, %edi; 140 roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that, 141 one of the following 3 are allowed (standard interpretation in 142 parentheses): 143 144 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX ) 145 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR 146 87D2 (xchgl %edx,%edx) call-noredir *%EAX 147 87FF (xchgl %edi,%edi) IR injection 148 149 Any other bytes following the 12-byte preamble are illegal and 150 constitute a failure in instruction decoding. This all assumes 151 that the preamble will never occur except in specific code 152 fragments designed for Valgrind to catch. 153 154 No prefixes may precede a "Special" instruction. 155*/ 156 157/* LOCK prefixed instructions. These are translated using IR-level 158 CAS statements (IRCAS) and are believed to preserve atomicity, even 159 from the point of view of some other process racing against a 160 simulated one (presumably they communicate via a shared memory 161 segment). 162 163 Handlers which are aware of LOCK prefixes are: 164 dis_op2_G_E (add, or, adc, sbb, and, sub, xor) 165 dis_cmpxchg_G_E (cmpxchg) 166 dis_Grp1 (add, or, adc, sbb, and, sub, xor) 167 dis_Grp3 (not, neg) 168 dis_Grp4 (inc, dec) 169 dis_Grp5 (inc, dec) 170 dis_Grp8_Imm (bts, btc, btr) 171 dis_bt_G_E (bts, btc, btr) 172 dis_xadd_G_E (xadd) 173*/ 174 175 176#include "libvex_basictypes.h" 177#include "libvex_ir.h" 178#include "libvex.h" 179#include "libvex_guest_x86.h" 180 181#include "main_util.h" 182#include "main_globals.h" 183#include "guest_generic_bb_to_IR.h" 184#include "guest_generic_x87.h" 185#include "guest_x86_defs.h" 186 187 188/*------------------------------------------------------------*/ 189/*--- Globals ---*/ 190/*------------------------------------------------------------*/ 191 192/* These are set at the start of the translation of an insn, right 193 down in disInstr_X86, so that we don't have to pass them around 194 endlessly. They are all constant during the translation of any 195 given insn. */ 196 197/* We need to know this to do sub-register accesses correctly. */ 198static VexEndness host_endness; 199 200/* Pointer to the guest code area (points to start of BB, not to the 201 insn being processed). */ 202static const UChar* guest_code; 203 204/* The guest address corresponding to guest_code[0]. */ 205static Addr32 guest_EIP_bbstart; 206 207/* The guest address for the instruction currently being 208 translated. */ 209static Addr32 guest_EIP_curr_instr; 210 211/* The IRSB* into which we're generating code. */ 212static IRSB* irsb; 213 214 215/*------------------------------------------------------------*/ 216/*--- Debugging output ---*/ 217/*------------------------------------------------------------*/ 218 219#define DIP(format, args...) \ 220 if (vex_traceflags & VEX_TRACE_FE) \ 221 vex_printf(format, ## args) 222 223#define DIS(buf, format, args...) \ 224 if (vex_traceflags & VEX_TRACE_FE) \ 225 vex_sprintf(buf, format, ## args) 226 227 228/*------------------------------------------------------------*/ 229/*--- Offsets of various parts of the x86 guest state. ---*/ 230/*------------------------------------------------------------*/ 231 232#define OFFB_EAX offsetof(VexGuestX86State,guest_EAX) 233#define OFFB_EBX offsetof(VexGuestX86State,guest_EBX) 234#define OFFB_ECX offsetof(VexGuestX86State,guest_ECX) 235#define OFFB_EDX offsetof(VexGuestX86State,guest_EDX) 236#define OFFB_ESP offsetof(VexGuestX86State,guest_ESP) 237#define OFFB_EBP offsetof(VexGuestX86State,guest_EBP) 238#define OFFB_ESI offsetof(VexGuestX86State,guest_ESI) 239#define OFFB_EDI offsetof(VexGuestX86State,guest_EDI) 240 241#define OFFB_EIP offsetof(VexGuestX86State,guest_EIP) 242 243#define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP) 244#define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1) 245#define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2) 246#define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP) 247 248#define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0]) 249#define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0]) 250#define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG) 251#define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG) 252#define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG) 253#define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP) 254#define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210) 255#define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND) 256 257#define OFFB_CS offsetof(VexGuestX86State,guest_CS) 258#define OFFB_DS offsetof(VexGuestX86State,guest_DS) 259#define OFFB_ES offsetof(VexGuestX86State,guest_ES) 260#define OFFB_FS offsetof(VexGuestX86State,guest_FS) 261#define OFFB_GS offsetof(VexGuestX86State,guest_GS) 262#define OFFB_SS offsetof(VexGuestX86State,guest_SS) 263#define OFFB_LDT offsetof(VexGuestX86State,guest_LDT) 264#define OFFB_GDT offsetof(VexGuestX86State,guest_GDT) 265 266#define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND) 267#define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0) 268#define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1) 269#define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2) 270#define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3) 271#define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4) 272#define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5) 273#define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6) 274#define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7) 275 276#define OFFB_EMNOTE offsetof(VexGuestX86State,guest_EMNOTE) 277 278#define OFFB_CMSTART offsetof(VexGuestX86State,guest_CMSTART) 279#define OFFB_CMLEN offsetof(VexGuestX86State,guest_CMLEN) 280#define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR) 281 282#define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL) 283 284 285/*------------------------------------------------------------*/ 286/*--- Helper bits and pieces for deconstructing the ---*/ 287/*--- x86 insn stream. ---*/ 288/*------------------------------------------------------------*/ 289 290/* This is the Intel register encoding -- integer regs. */ 291#define R_EAX 0 292#define R_ECX 1 293#define R_EDX 2 294#define R_EBX 3 295#define R_ESP 4 296#define R_EBP 5 297#define R_ESI 6 298#define R_EDI 7 299 300#define R_AL (0+R_EAX) 301#define R_AH (4+R_EAX) 302 303/* This is the Intel register encoding -- segment regs. */ 304#define R_ES 0 305#define R_CS 1 306#define R_SS 2 307#define R_DS 3 308#define R_FS 4 309#define R_GS 5 310 311 312/* Add a statement to the list held by "irbb". */ 313static void stmt ( IRStmt* st ) 314{ 315 addStmtToIRSB( irsb, st ); 316} 317 318/* Generate a new temporary of the given type. */ 319static IRTemp newTemp ( IRType ty ) 320{ 321 vassert(isPlausibleIRType(ty)); 322 return newIRTemp( irsb->tyenv, ty ); 323} 324 325/* Various simple conversions */ 326 327static UInt extend_s_8to32( UInt x ) 328{ 329 return (UInt)((Int)(x << 24) >> 24); 330} 331 332static UInt extend_s_16to32 ( UInt x ) 333{ 334 return (UInt)((Int)(x << 16) >> 16); 335} 336 337/* Fetch a byte from the guest insn stream. */ 338static UChar getIByte ( Int delta ) 339{ 340 return guest_code[delta]; 341} 342 343/* Extract the reg field from a modRM byte. */ 344static Int gregOfRM ( UChar mod_reg_rm ) 345{ 346 return (Int)( (mod_reg_rm >> 3) & 7 ); 347} 348 349/* Figure out whether the mod and rm parts of a modRM byte refer to a 350 register or memory. If so, the byte will have the form 11XXXYYY, 351 where YYY is the register number. */ 352static Bool epartIsReg ( UChar mod_reg_rm ) 353{ 354 return toBool(0xC0 == (mod_reg_rm & 0xC0)); 355} 356 357/* ... and extract the register number ... */ 358static Int eregOfRM ( UChar mod_reg_rm ) 359{ 360 return (Int)(mod_reg_rm & 0x7); 361} 362 363/* Get a 8/16/32-bit unsigned value out of the insn stream. */ 364 365static UChar getUChar ( Int delta ) 366{ 367 UChar v = guest_code[delta+0]; 368 return toUChar(v); 369} 370 371static UInt getUDisp16 ( Int delta ) 372{ 373 UInt v = guest_code[delta+1]; v <<= 8; 374 v |= guest_code[delta+0]; 375 return v & 0xFFFF; 376} 377 378static UInt getUDisp32 ( Int delta ) 379{ 380 UInt v = guest_code[delta+3]; v <<= 8; 381 v |= guest_code[delta+2]; v <<= 8; 382 v |= guest_code[delta+1]; v <<= 8; 383 v |= guest_code[delta+0]; 384 return v; 385} 386 387static UInt getUDisp ( Int size, Int delta ) 388{ 389 switch (size) { 390 case 4: return getUDisp32(delta); 391 case 2: return getUDisp16(delta); 392 case 1: return (UInt)getUChar(delta); 393 default: vpanic("getUDisp(x86)"); 394 } 395 return 0; /*notreached*/ 396} 397 398 399/* Get a byte value out of the insn stream and sign-extend to 32 400 bits. */ 401static UInt getSDisp8 ( Int delta ) 402{ 403 return extend_s_8to32( (UInt) (guest_code[delta]) ); 404} 405 406static UInt getSDisp16 ( Int delta0 ) 407{ 408 const UChar* eip = &guest_code[delta0]; 409 UInt d = *eip++; 410 d |= ((*eip++) << 8); 411 return extend_s_16to32(d); 412} 413 414static UInt getSDisp ( Int size, Int delta ) 415{ 416 switch (size) { 417 case 4: return getUDisp32(delta); 418 case 2: return getSDisp16(delta); 419 case 1: return getSDisp8(delta); 420 default: vpanic("getSDisp(x86)"); 421 } 422 return 0; /*notreached*/ 423} 424 425 426/*------------------------------------------------------------*/ 427/*--- Helpers for constructing IR. ---*/ 428/*------------------------------------------------------------*/ 429 430/* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit 431 register references, we need to take the host endianness into 432 account. Supplied value is 0 .. 7 and in the Intel instruction 433 encoding. */ 434 435static IRType szToITy ( Int n ) 436{ 437 switch (n) { 438 case 1: return Ity_I8; 439 case 2: return Ity_I16; 440 case 4: return Ity_I32; 441 default: vpanic("szToITy(x86)"); 442 } 443} 444 445/* On a little-endian host, less significant bits of the guest 446 registers are at lower addresses. Therefore, if a reference to a 447 register low half has the safe guest state offset as a reference to 448 the full register. 449*/ 450static Int integerGuestRegOffset ( Int sz, UInt archreg ) 451{ 452 vassert(archreg < 8); 453 454 /* Correct for little-endian host only. */ 455 vassert(host_endness == VexEndnessLE); 456 457 if (sz == 4 || sz == 2 || (sz == 1 && archreg < 4)) { 458 switch (archreg) { 459 case R_EAX: return OFFB_EAX; 460 case R_EBX: return OFFB_EBX; 461 case R_ECX: return OFFB_ECX; 462 case R_EDX: return OFFB_EDX; 463 case R_ESI: return OFFB_ESI; 464 case R_EDI: return OFFB_EDI; 465 case R_ESP: return OFFB_ESP; 466 case R_EBP: return OFFB_EBP; 467 default: vpanic("integerGuestRegOffset(x86,le)(4,2)"); 468 } 469 } 470 471 vassert(archreg >= 4 && archreg < 8 && sz == 1); 472 switch (archreg-4) { 473 case R_EAX: return 1+ OFFB_EAX; 474 case R_EBX: return 1+ OFFB_EBX; 475 case R_ECX: return 1+ OFFB_ECX; 476 case R_EDX: return 1+ OFFB_EDX; 477 default: vpanic("integerGuestRegOffset(x86,le)(1h)"); 478 } 479 480 /* NOTREACHED */ 481 vpanic("integerGuestRegOffset(x86,le)"); 482} 483 484static Int segmentGuestRegOffset ( UInt sreg ) 485{ 486 switch (sreg) { 487 case R_ES: return OFFB_ES; 488 case R_CS: return OFFB_CS; 489 case R_SS: return OFFB_SS; 490 case R_DS: return OFFB_DS; 491 case R_FS: return OFFB_FS; 492 case R_GS: return OFFB_GS; 493 default: vpanic("segmentGuestRegOffset(x86)"); 494 } 495} 496 497static Int xmmGuestRegOffset ( UInt xmmreg ) 498{ 499 switch (xmmreg) { 500 case 0: return OFFB_XMM0; 501 case 1: return OFFB_XMM1; 502 case 2: return OFFB_XMM2; 503 case 3: return OFFB_XMM3; 504 case 4: return OFFB_XMM4; 505 case 5: return OFFB_XMM5; 506 case 6: return OFFB_XMM6; 507 case 7: return OFFB_XMM7; 508 default: vpanic("xmmGuestRegOffset"); 509 } 510} 511 512/* Lanes of vector registers are always numbered from zero being the 513 least significant lane (rightmost in the register). */ 514 515static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) 516{ 517 /* Correct for little-endian host only. */ 518 vassert(host_endness == VexEndnessLE); 519 vassert(laneno >= 0 && laneno < 8); 520 return xmmGuestRegOffset( xmmreg ) + 2 * laneno; 521} 522 523static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) 524{ 525 /* Correct for little-endian host only. */ 526 vassert(host_endness == VexEndnessLE); 527 vassert(laneno >= 0 && laneno < 4); 528 return xmmGuestRegOffset( xmmreg ) + 4 * laneno; 529} 530 531static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) 532{ 533 /* Correct for little-endian host only. */ 534 vassert(host_endness == VexEndnessLE); 535 vassert(laneno >= 0 && laneno < 2); 536 return xmmGuestRegOffset( xmmreg ) + 8 * laneno; 537} 538 539static IRExpr* getIReg ( Int sz, UInt archreg ) 540{ 541 vassert(sz == 1 || sz == 2 || sz == 4); 542 vassert(archreg < 8); 543 return IRExpr_Get( integerGuestRegOffset(sz,archreg), 544 szToITy(sz) ); 545} 546 547/* Ditto, but write to a reg instead. */ 548static void putIReg ( Int sz, UInt archreg, IRExpr* e ) 549{ 550 IRType ty = typeOfIRExpr(irsb->tyenv, e); 551 switch (sz) { 552 case 1: vassert(ty == Ity_I8); break; 553 case 2: vassert(ty == Ity_I16); break; 554 case 4: vassert(ty == Ity_I32); break; 555 default: vpanic("putIReg(x86)"); 556 } 557 vassert(archreg < 8); 558 stmt( IRStmt_Put(integerGuestRegOffset(sz,archreg), e) ); 559} 560 561static IRExpr* getSReg ( UInt sreg ) 562{ 563 return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 ); 564} 565 566static void putSReg ( UInt sreg, IRExpr* e ) 567{ 568 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 569 stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) ); 570} 571 572static IRExpr* getXMMReg ( UInt xmmreg ) 573{ 574 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); 575} 576 577static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) 578{ 579 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); 580} 581 582static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) 583{ 584 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); 585} 586 587static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) 588{ 589 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); 590} 591 592static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) 593{ 594 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); 595} 596 597static void putXMMReg ( UInt xmmreg, IRExpr* e ) 598{ 599 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 600 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); 601} 602 603static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) 604{ 605 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 606 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 607} 608 609static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) 610{ 611 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 612 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 613} 614 615static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) 616{ 617 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 618 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 619} 620 621static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) 622{ 623 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 624 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 625} 626 627static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e ) 628{ 629 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 630 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) ); 631} 632 633static void assign ( IRTemp dst, IRExpr* e ) 634{ 635 stmt( IRStmt_WrTmp(dst, e) ); 636} 637 638static void storeLE ( IRExpr* addr, IRExpr* data ) 639{ 640 stmt( IRStmt_Store(Iend_LE, addr, data) ); 641} 642 643static IRExpr* unop ( IROp op, IRExpr* a ) 644{ 645 return IRExpr_Unop(op, a); 646} 647 648static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 649{ 650 return IRExpr_Binop(op, a1, a2); 651} 652 653static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 654{ 655 return IRExpr_Triop(op, a1, a2, a3); 656} 657 658static IRExpr* mkexpr ( IRTemp tmp ) 659{ 660 return IRExpr_RdTmp(tmp); 661} 662 663static IRExpr* mkU8 ( UInt i ) 664{ 665 vassert(i < 256); 666 return IRExpr_Const(IRConst_U8( (UChar)i )); 667} 668 669static IRExpr* mkU16 ( UInt i ) 670{ 671 vassert(i < 65536); 672 return IRExpr_Const(IRConst_U16( (UShort)i )); 673} 674 675static IRExpr* mkU32 ( UInt i ) 676{ 677 return IRExpr_Const(IRConst_U32(i)); 678} 679 680static IRExpr* mkU64 ( ULong i ) 681{ 682 return IRExpr_Const(IRConst_U64(i)); 683} 684 685static IRExpr* mkU ( IRType ty, UInt i ) 686{ 687 if (ty == Ity_I8) return mkU8(i); 688 if (ty == Ity_I16) return mkU16(i); 689 if (ty == Ity_I32) return mkU32(i); 690 /* If this panics, it usually means you passed a size (1,2,4) 691 value as the IRType, rather than a real IRType. */ 692 vpanic("mkU(x86)"); 693} 694 695static IRExpr* mkV128 ( UShort mask ) 696{ 697 return IRExpr_Const(IRConst_V128(mask)); 698} 699 700static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 701{ 702 return IRExpr_Load(Iend_LE, ty, addr); 703} 704 705static IROp mkSizedOp ( IRType ty, IROp op8 ) 706{ 707 Int adj; 708 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 709 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 710 || op8 == Iop_Mul8 711 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 712 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 713 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 714 || op8 == Iop_CasCmpNE8 715 || op8 == Iop_ExpCmpNE8 716 || op8 == Iop_Not8); 717 adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 718 return adj + op8; 719} 720 721static IROp mkWidenOp ( Int szSmall, Int szBig, Bool signd ) 722{ 723 if (szSmall == 1 && szBig == 4) { 724 return signd ? Iop_8Sto32 : Iop_8Uto32; 725 } 726 if (szSmall == 1 && szBig == 2) { 727 return signd ? Iop_8Sto16 : Iop_8Uto16; 728 } 729 if (szSmall == 2 && szBig == 4) { 730 return signd ? Iop_16Sto32 : Iop_16Uto32; 731 } 732 vpanic("mkWidenOp(x86,guest)"); 733} 734 735static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) 736{ 737 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); 738 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); 739 return unop(Iop_32to1, 740 binop(Iop_And32, 741 unop(Iop_1Uto32,x), 742 unop(Iop_1Uto32,y))); 743} 744 745/* Generate a compare-and-swap operation, operating on memory at 746 'addr'. The expected value is 'expVal' and the new value is 747 'newVal'. If the operation fails, then transfer control (with a 748 no-redir jump (XXX no -- see comment at top of this file)) to 749 'restart_point', which is presumably the address of the guest 750 instruction again -- retrying, essentially. */ 751static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, 752 Addr32 restart_point ) 753{ 754 IRCAS* cas; 755 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); 756 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); 757 IRTemp oldTmp = newTemp(tyE); 758 IRTemp expTmp = newTemp(tyE); 759 vassert(tyE == tyN); 760 vassert(tyE == Ity_I32 || tyE == Ity_I16 || tyE == Ity_I8); 761 assign(expTmp, expVal); 762 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, 763 NULL, mkexpr(expTmp), NULL, newVal ); 764 stmt( IRStmt_CAS(cas) ); 765 stmt( IRStmt_Exit( 766 binop( mkSizedOp(tyE,Iop_CasCmpNE8), 767 mkexpr(oldTmp), mkexpr(expTmp) ), 768 Ijk_Boring, /*Ijk_NoRedir*/ 769 IRConst_U32( restart_point ), 770 OFFB_EIP 771 )); 772} 773 774 775/*------------------------------------------------------------*/ 776/*--- Helpers for %eflags. ---*/ 777/*------------------------------------------------------------*/ 778 779/* -------------- Evaluating the flags-thunk. -------------- */ 780 781/* Build IR to calculate all the eflags from stored 782 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 783 Ity_I32. */ 784static IRExpr* mk_x86g_calculate_eflags_all ( void ) 785{ 786 IRExpr** args 787 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 788 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 789 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 790 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 791 IRExpr* call 792 = mkIRExprCCall( 793 Ity_I32, 794 0/*regparm*/, 795 "x86g_calculate_eflags_all", &x86g_calculate_eflags_all, 796 args 797 ); 798 /* Exclude OP and NDEP from definedness checking. We're only 799 interested in DEP1 and DEP2. */ 800 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 801 return call; 802} 803 804/* Build IR to calculate some particular condition from stored 805 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 806 Ity_Bit. */ 807static IRExpr* mk_x86g_calculate_condition ( X86Condcode cond ) 808{ 809 IRExpr** args 810 = mkIRExprVec_5( mkU32(cond), 811 IRExpr_Get(OFFB_CC_OP, Ity_I32), 812 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 813 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 814 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 815 IRExpr* call 816 = mkIRExprCCall( 817 Ity_I32, 818 0/*regparm*/, 819 "x86g_calculate_condition", &x86g_calculate_condition, 820 args 821 ); 822 /* Exclude the requested condition, OP and NDEP from definedness 823 checking. We're only interested in DEP1 and DEP2. */ 824 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); 825 return unop(Iop_32to1, call); 826} 827 828/* Build IR to calculate just the carry flag from stored 829 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */ 830static IRExpr* mk_x86g_calculate_eflags_c ( void ) 831{ 832 IRExpr** args 833 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 834 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 835 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 836 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 837 IRExpr* call 838 = mkIRExprCCall( 839 Ity_I32, 840 3/*regparm*/, 841 "x86g_calculate_eflags_c", &x86g_calculate_eflags_c, 842 args 843 ); 844 /* Exclude OP and NDEP from definedness checking. We're only 845 interested in DEP1 and DEP2. */ 846 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 847 return call; 848} 849 850 851/* -------------- Building the flags-thunk. -------------- */ 852 853/* The machinery in this section builds the flag-thunk following a 854 flag-setting operation. Hence the various setFlags_* functions. 855*/ 856 857static Bool isAddSub ( IROp op8 ) 858{ 859 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); 860} 861 862static Bool isLogic ( IROp op8 ) 863{ 864 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); 865} 866 867/* U-widen 8/16/32 bit int expr to 32. */ 868static IRExpr* widenUto32 ( IRExpr* e ) 869{ 870 switch (typeOfIRExpr(irsb->tyenv,e)) { 871 case Ity_I32: return e; 872 case Ity_I16: return unop(Iop_16Uto32,e); 873 case Ity_I8: return unop(Iop_8Uto32,e); 874 default: vpanic("widenUto32"); 875 } 876} 877 878/* S-widen 8/16/32 bit int expr to 32. */ 879static IRExpr* widenSto32 ( IRExpr* e ) 880{ 881 switch (typeOfIRExpr(irsb->tyenv,e)) { 882 case Ity_I32: return e; 883 case Ity_I16: return unop(Iop_16Sto32,e); 884 case Ity_I8: return unop(Iop_8Sto32,e); 885 default: vpanic("widenSto32"); 886 } 887} 888 889/* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some 890 of these combinations make sense. */ 891static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) 892{ 893 IRType src_ty = typeOfIRExpr(irsb->tyenv,e); 894 if (src_ty == dst_ty) 895 return e; 896 if (src_ty == Ity_I32 && dst_ty == Ity_I16) 897 return unop(Iop_32to16, e); 898 if (src_ty == Ity_I32 && dst_ty == Ity_I8) 899 return unop(Iop_32to8, e); 900 901 vex_printf("\nsrc, dst tys are: "); 902 ppIRType(src_ty); 903 vex_printf(", "); 904 ppIRType(dst_ty); 905 vex_printf("\n"); 906 vpanic("narrowTo(x86)"); 907} 908 909 910/* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is 911 auto-sized up to the real op. */ 912 913static 914void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) 915{ 916 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 917 918 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 919 920 switch (op8) { 921 case Iop_Add8: ccOp += X86G_CC_OP_ADDB; break; 922 case Iop_Sub8: ccOp += X86G_CC_OP_SUBB; break; 923 default: ppIROp(op8); 924 vpanic("setFlags_DEP1_DEP2(x86)"); 925 } 926 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); 927 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) ); 928 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(dep2))) ); 929 /* Set NDEP even though it isn't used. This makes redundant-PUT 930 elimination of previous stores to this field work better. */ 931 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 932} 933 934 935/* Set the OP and DEP1 fields only, and write zero to DEP2. */ 936 937static 938void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) 939{ 940 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 941 942 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 943 944 switch (op8) { 945 case Iop_Or8: 946 case Iop_And8: 947 case Iop_Xor8: ccOp += X86G_CC_OP_LOGICB; break; 948 default: ppIROp(op8); 949 vpanic("setFlags_DEP1(x86)"); 950 } 951 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); 952 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) ); 953 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) ); 954 /* Set NDEP even though it isn't used. This makes redundant-PUT 955 elimination of previous stores to this field work better. */ 956 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 957} 958 959 960/* For shift operations, we put in the result and the undershifted 961 result. Except if the shift amount is zero, the thunk is left 962 unchanged. */ 963 964static void setFlags_DEP1_DEP2_shift ( IROp op32, 965 IRTemp res, 966 IRTemp resUS, 967 IRType ty, 968 IRTemp guard ) 969{ 970 Int ccOp = ty==Ity_I8 ? 2 : (ty==Ity_I16 ? 1 : 0); 971 972 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 973 vassert(guard); 974 975 /* Both kinds of right shifts are handled by the same thunk 976 operation. */ 977 switch (op32) { 978 case Iop_Shr32: 979 case Iop_Sar32: ccOp = X86G_CC_OP_SHRL - ccOp; break; 980 case Iop_Shl32: ccOp = X86G_CC_OP_SHLL - ccOp; break; 981 default: ppIROp(op32); 982 vpanic("setFlags_DEP1_DEP2_shift(x86)"); 983 } 984 985 /* guard :: Ity_I8. We need to convert it to I1. */ 986 IRTemp guardB = newTemp(Ity_I1); 987 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) ); 988 989 /* DEP1 contains the result, DEP2 contains the undershifted value. */ 990 stmt( IRStmt_Put( OFFB_CC_OP, 991 IRExpr_ITE( mkexpr(guardB), 992 mkU32(ccOp), 993 IRExpr_Get(OFFB_CC_OP,Ity_I32) ) )); 994 stmt( IRStmt_Put( OFFB_CC_DEP1, 995 IRExpr_ITE( mkexpr(guardB), 996 widenUto32(mkexpr(res)), 997 IRExpr_Get(OFFB_CC_DEP1,Ity_I32) ) )); 998 stmt( IRStmt_Put( OFFB_CC_DEP2, 999 IRExpr_ITE( mkexpr(guardB), 1000 widenUto32(mkexpr(resUS)), 1001 IRExpr_Get(OFFB_CC_DEP2,Ity_I32) ) )); 1002 /* Set NDEP even though it isn't used. This makes redundant-PUT 1003 elimination of previous stores to this field work better. */ 1004 stmt( IRStmt_Put( OFFB_CC_NDEP, 1005 IRExpr_ITE( mkexpr(guardB), 1006 mkU32(0), 1007 IRExpr_Get(OFFB_CC_NDEP,Ity_I32) ) )); 1008} 1009 1010 1011/* For the inc/dec case, we store in DEP1 the result value and in NDEP 1012 the former value of the carry flag, which unfortunately we have to 1013 compute. */ 1014 1015static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) 1016{ 1017 Int ccOp = inc ? X86G_CC_OP_INCB : X86G_CC_OP_DECB; 1018 1019 ccOp += ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 1020 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 1021 1022 /* This has to come first, because calculating the C flag 1023 may require reading all four thunk fields. */ 1024 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_x86g_calculate_eflags_c()) ); 1025 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); 1026 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(res))) ); 1027 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) ); 1028} 1029 1030 1031/* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the 1032 two arguments. */ 1033 1034static 1035void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, UInt base_op ) 1036{ 1037 switch (ty) { 1038 case Ity_I8: 1039 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+0) ) ); 1040 break; 1041 case Ity_I16: 1042 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+1) ) ); 1043 break; 1044 case Ity_I32: 1045 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+2) ) ); 1046 break; 1047 default: 1048 vpanic("setFlags_MUL(x86)"); 1049 } 1050 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(arg1)) )); 1051 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(arg2)) )); 1052 /* Set NDEP even though it isn't used. This makes redundant-PUT 1053 elimination of previous stores to this field work better. */ 1054 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 1055} 1056 1057 1058/* -------------- Condition codes. -------------- */ 1059 1060/* Condition codes, using the Intel encoding. */ 1061 1062static const HChar* name_X86Condcode ( X86Condcode cond ) 1063{ 1064 switch (cond) { 1065 case X86CondO: return "o"; 1066 case X86CondNO: return "no"; 1067 case X86CondB: return "b"; 1068 case X86CondNB: return "nb"; 1069 case X86CondZ: return "z"; 1070 case X86CondNZ: return "nz"; 1071 case X86CondBE: return "be"; 1072 case X86CondNBE: return "nbe"; 1073 case X86CondS: return "s"; 1074 case X86CondNS: return "ns"; 1075 case X86CondP: return "p"; 1076 case X86CondNP: return "np"; 1077 case X86CondL: return "l"; 1078 case X86CondNL: return "nl"; 1079 case X86CondLE: return "le"; 1080 case X86CondNLE: return "nle"; 1081 case X86CondAlways: return "ALWAYS"; 1082 default: vpanic("name_X86Condcode"); 1083 } 1084} 1085 1086static 1087X86Condcode positiveIse_X86Condcode ( X86Condcode cond, 1088 Bool* needInvert ) 1089{ 1090 vassert(cond >= X86CondO && cond <= X86CondNLE); 1091 if (cond & 1) { 1092 *needInvert = True; 1093 return cond-1; 1094 } else { 1095 *needInvert = False; 1096 return cond; 1097 } 1098} 1099 1100 1101/* -------------- Helpers for ADD/SUB with carry. -------------- */ 1102 1103/* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags 1104 appropriately. 1105 1106 Optionally, generate a store for the 'tres' value. This can either 1107 be a normal store, or it can be a cas-with-possible-failure style 1108 store: 1109 1110 if taddr is IRTemp_INVALID, then no store is generated. 1111 1112 if taddr is not IRTemp_INVALID, then a store (using taddr as 1113 the address) is generated: 1114 1115 if texpVal is IRTemp_INVALID then a normal store is 1116 generated, and restart_point must be zero (it is irrelevant). 1117 1118 if texpVal is not IRTemp_INVALID then a cas-style store is 1119 generated. texpVal is the expected value, restart_point 1120 is the restart point if the store fails, and texpVal must 1121 have the same type as tres. 1122*/ 1123static void helper_ADC ( Int sz, 1124 IRTemp tres, IRTemp ta1, IRTemp ta2, 1125 /* info about optional store: */ 1126 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1127{ 1128 UInt thunkOp; 1129 IRType ty = szToITy(sz); 1130 IRTemp oldc = newTemp(Ity_I32); 1131 IRTemp oldcn = newTemp(ty); 1132 IROp plus = mkSizedOp(ty, Iop_Add8); 1133 IROp xor = mkSizedOp(ty, Iop_Xor8); 1134 1135 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1136 vassert(sz == 1 || sz == 2 || sz == 4); 1137 thunkOp = sz==4 ? X86G_CC_OP_ADCL 1138 : (sz==2 ? X86G_CC_OP_ADCW : X86G_CC_OP_ADCB); 1139 1140 /* oldc = old carry flag, 0 or 1 */ 1141 assign( oldc, binop(Iop_And32, 1142 mk_x86g_calculate_eflags_c(), 1143 mkU32(1)) ); 1144 1145 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1146 1147 assign( tres, binop(plus, 1148 binop(plus,mkexpr(ta1),mkexpr(ta2)), 1149 mkexpr(oldcn)) ); 1150 1151 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1152 start of this function. */ 1153 if (taddr != IRTemp_INVALID) { 1154 if (texpVal == IRTemp_INVALID) { 1155 vassert(restart_point == 0); 1156 storeLE( mkexpr(taddr), mkexpr(tres) ); 1157 } else { 1158 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1159 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1160 casLE( mkexpr(taddr), 1161 mkexpr(texpVal), mkexpr(tres), restart_point ); 1162 } 1163 } 1164 1165 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) ); 1166 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1)) )); 1167 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2), 1168 mkexpr(oldcn)) )) ); 1169 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 1170} 1171 1172 1173/* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags 1174 appropriately. As with helper_ADC, possibly generate a store of 1175 the result -- see comments on helper_ADC for details. 1176*/ 1177static void helper_SBB ( Int sz, 1178 IRTemp tres, IRTemp ta1, IRTemp ta2, 1179 /* info about optional store: */ 1180 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1181{ 1182 UInt thunkOp; 1183 IRType ty = szToITy(sz); 1184 IRTemp oldc = newTemp(Ity_I32); 1185 IRTemp oldcn = newTemp(ty); 1186 IROp minus = mkSizedOp(ty, Iop_Sub8); 1187 IROp xor = mkSizedOp(ty, Iop_Xor8); 1188 1189 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1190 vassert(sz == 1 || sz == 2 || sz == 4); 1191 thunkOp = sz==4 ? X86G_CC_OP_SBBL 1192 : (sz==2 ? X86G_CC_OP_SBBW : X86G_CC_OP_SBBB); 1193 1194 /* oldc = old carry flag, 0 or 1 */ 1195 assign( oldc, binop(Iop_And32, 1196 mk_x86g_calculate_eflags_c(), 1197 mkU32(1)) ); 1198 1199 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1200 1201 assign( tres, binop(minus, 1202 binop(minus,mkexpr(ta1),mkexpr(ta2)), 1203 mkexpr(oldcn)) ); 1204 1205 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1206 start of this function. */ 1207 if (taddr != IRTemp_INVALID) { 1208 if (texpVal == IRTemp_INVALID) { 1209 vassert(restart_point == 0); 1210 storeLE( mkexpr(taddr), mkexpr(tres) ); 1211 } else { 1212 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1213 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1214 casLE( mkexpr(taddr), 1215 mkexpr(texpVal), mkexpr(tres), restart_point ); 1216 } 1217 } 1218 1219 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) ); 1220 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1) )) ); 1221 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2), 1222 mkexpr(oldcn)) )) ); 1223 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 1224} 1225 1226 1227/* -------------- Helpers for disassembly printing. -------------- */ 1228 1229static const HChar* nameGrp1 ( Int opc_aux ) 1230{ 1231 static const HChar* grp1_names[8] 1232 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; 1233 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(x86)"); 1234 return grp1_names[opc_aux]; 1235} 1236 1237static const HChar* nameGrp2 ( Int opc_aux ) 1238{ 1239 static const HChar* grp2_names[8] 1240 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; 1241 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(x86)"); 1242 return grp2_names[opc_aux]; 1243} 1244 1245static const HChar* nameGrp4 ( Int opc_aux ) 1246{ 1247 static const HChar* grp4_names[8] 1248 = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; 1249 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(x86)"); 1250 return grp4_names[opc_aux]; 1251} 1252 1253static const HChar* nameGrp5 ( Int opc_aux ) 1254{ 1255 static const HChar* grp5_names[8] 1256 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; 1257 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(x86)"); 1258 return grp5_names[opc_aux]; 1259} 1260 1261static const HChar* nameGrp8 ( Int opc_aux ) 1262{ 1263 static const HChar* grp8_names[8] 1264 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; 1265 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(x86)"); 1266 return grp8_names[opc_aux]; 1267} 1268 1269static const HChar* nameIReg ( Int size, Int reg ) 1270{ 1271 static const HChar* ireg32_names[8] 1272 = { "%eax", "%ecx", "%edx", "%ebx", 1273 "%esp", "%ebp", "%esi", "%edi" }; 1274 static const HChar* ireg16_names[8] 1275 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" }; 1276 static const HChar* ireg8_names[8] 1277 = { "%al", "%cl", "%dl", "%bl", 1278 "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" }; 1279 if (reg < 0 || reg > 7) goto bad; 1280 switch (size) { 1281 case 4: return ireg32_names[reg]; 1282 case 2: return ireg16_names[reg]; 1283 case 1: return ireg8_names[reg]; 1284 } 1285 bad: 1286 vpanic("nameIReg(X86)"); 1287 return NULL; /*notreached*/ 1288} 1289 1290static const HChar* nameSReg ( UInt sreg ) 1291{ 1292 switch (sreg) { 1293 case R_ES: return "%es"; 1294 case R_CS: return "%cs"; 1295 case R_SS: return "%ss"; 1296 case R_DS: return "%ds"; 1297 case R_FS: return "%fs"; 1298 case R_GS: return "%gs"; 1299 default: vpanic("nameSReg(x86)"); 1300 } 1301} 1302 1303static const HChar* nameMMXReg ( Int mmxreg ) 1304{ 1305 static const HChar* mmx_names[8] 1306 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; 1307 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(x86,guest)"); 1308 return mmx_names[mmxreg]; 1309} 1310 1311static const HChar* nameXMMReg ( Int xmmreg ) 1312{ 1313 static const HChar* xmm_names[8] 1314 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", 1315 "%xmm4", "%xmm5", "%xmm6", "%xmm7" }; 1316 if (xmmreg < 0 || xmmreg > 7) vpanic("name_of_xmm_reg"); 1317 return xmm_names[xmmreg]; 1318} 1319 1320static const HChar* nameMMXGran ( Int gran ) 1321{ 1322 switch (gran) { 1323 case 0: return "b"; 1324 case 1: return "w"; 1325 case 2: return "d"; 1326 case 3: return "q"; 1327 default: vpanic("nameMMXGran(x86,guest)"); 1328 } 1329} 1330 1331static HChar nameISize ( Int size ) 1332{ 1333 switch (size) { 1334 case 4: return 'l'; 1335 case 2: return 'w'; 1336 case 1: return 'b'; 1337 default: vpanic("nameISize(x86)"); 1338 } 1339} 1340 1341 1342/*------------------------------------------------------------*/ 1343/*--- JMP helpers ---*/ 1344/*------------------------------------------------------------*/ 1345 1346static void jmp_lit( /*MOD*/DisResult* dres, 1347 IRJumpKind kind, Addr32 d32 ) 1348{ 1349 vassert(dres->whatNext == Dis_Continue); 1350 vassert(dres->len == 0); 1351 vassert(dres->continueAt == 0); 1352 vassert(dres->jk_StopHere == Ijk_INVALID); 1353 dres->whatNext = Dis_StopHere; 1354 dres->jk_StopHere = kind; 1355 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32) ) ); 1356} 1357 1358static void jmp_treg( /*MOD*/DisResult* dres, 1359 IRJumpKind kind, IRTemp t ) 1360{ 1361 vassert(dres->whatNext == Dis_Continue); 1362 vassert(dres->len == 0); 1363 vassert(dres->continueAt == 0); 1364 vassert(dres->jk_StopHere == Ijk_INVALID); 1365 dres->whatNext = Dis_StopHere; 1366 dres->jk_StopHere = kind; 1367 stmt( IRStmt_Put( OFFB_EIP, mkexpr(t) ) ); 1368} 1369 1370static 1371void jcc_01( /*MOD*/DisResult* dres, 1372 X86Condcode cond, Addr32 d32_false, Addr32 d32_true ) 1373{ 1374 Bool invert; 1375 X86Condcode condPos; 1376 vassert(dres->whatNext == Dis_Continue); 1377 vassert(dres->len == 0); 1378 vassert(dres->continueAt == 0); 1379 vassert(dres->jk_StopHere == Ijk_INVALID); 1380 dres->whatNext = Dis_StopHere; 1381 dres->jk_StopHere = Ijk_Boring; 1382 condPos = positiveIse_X86Condcode ( cond, &invert ); 1383 if (invert) { 1384 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos), 1385 Ijk_Boring, 1386 IRConst_U32(d32_false), 1387 OFFB_EIP ) ); 1388 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_true) ) ); 1389 } else { 1390 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos), 1391 Ijk_Boring, 1392 IRConst_U32(d32_true), 1393 OFFB_EIP ) ); 1394 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_false) ) ); 1395 } 1396} 1397 1398 1399/*------------------------------------------------------------*/ 1400/*--- Disassembling addressing modes ---*/ 1401/*------------------------------------------------------------*/ 1402 1403static 1404const HChar* sorbTxt ( UChar sorb ) 1405{ 1406 switch (sorb) { 1407 case 0: return ""; /* no override */ 1408 case 0x3E: return "%ds"; 1409 case 0x26: return "%es:"; 1410 case 0x64: return "%fs:"; 1411 case 0x65: return "%gs:"; 1412 default: vpanic("sorbTxt(x86,guest)"); 1413 } 1414} 1415 1416 1417/* 'virtual' is an IRExpr* holding a virtual address. Convert it to a 1418 linear address by adding any required segment override as indicated 1419 by sorb. */ 1420static 1421IRExpr* handleSegOverride ( UChar sorb, IRExpr* virtual ) 1422{ 1423 Int sreg; 1424 IRType hWordTy; 1425 IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; 1426 1427 if (sorb == 0) 1428 /* the common case - no override */ 1429 return virtual; 1430 1431 switch (sorb) { 1432 case 0x3E: sreg = R_DS; break; 1433 case 0x26: sreg = R_ES; break; 1434 case 0x64: sreg = R_FS; break; 1435 case 0x65: sreg = R_GS; break; 1436 default: vpanic("handleSegOverride(x86,guest)"); 1437 } 1438 1439 hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; 1440 1441 seg_selector = newTemp(Ity_I32); 1442 ldt_ptr = newTemp(hWordTy); 1443 gdt_ptr = newTemp(hWordTy); 1444 r64 = newTemp(Ity_I64); 1445 1446 assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); 1447 assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); 1448 assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); 1449 1450 /* 1451 Call this to do the translation and limit checks: 1452 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 1453 UInt seg_selector, UInt virtual_addr ) 1454 */ 1455 assign( 1456 r64, 1457 mkIRExprCCall( 1458 Ity_I64, 1459 0/*regparms*/, 1460 "x86g_use_seg_selector", 1461 &x86g_use_seg_selector, 1462 mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), 1463 mkexpr(seg_selector), virtual) 1464 ) 1465 ); 1466 1467 /* If the high 32 of the result are non-zero, there was a 1468 failure in address translation. In which case, make a 1469 quick exit. 1470 */ 1471 stmt( 1472 IRStmt_Exit( 1473 binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), 1474 Ijk_MapFail, 1475 IRConst_U32( guest_EIP_curr_instr ), 1476 OFFB_EIP 1477 ) 1478 ); 1479 1480 /* otherwise, here's the translated result. */ 1481 return unop(Iop_64to32, mkexpr(r64)); 1482} 1483 1484 1485/* Generate IR to calculate an address indicated by a ModRM and 1486 following SIB bytes. The expression, and the number of bytes in 1487 the address mode, are returned. Note that this fn should not be 1488 called if the R/M part of the address denotes a register instead of 1489 memory. If print_codegen is true, text of the addressing mode is 1490 placed in buf. 1491 1492 The computed address is stored in a new tempreg, and the 1493 identity of the tempreg is returned. */ 1494 1495static IRTemp disAMode_copy2tmp ( IRExpr* addr32 ) 1496{ 1497 IRTemp tmp = newTemp(Ity_I32); 1498 assign( tmp, addr32 ); 1499 return tmp; 1500} 1501 1502static 1503IRTemp disAMode ( Int* len, UChar sorb, Int delta, HChar* buf ) 1504{ 1505 UChar mod_reg_rm = getIByte(delta); 1506 delta++; 1507 1508 buf[0] = (UChar)0; 1509 1510 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 1511 jump table seems a bit excessive. 1512 */ 1513 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 1514 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 1515 /* is now XX0XXYYY */ 1516 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 1517 switch (mod_reg_rm) { 1518 1519 /* (%eax) .. (%edi), not including (%esp) or (%ebp). 1520 --> GET %reg, t 1521 */ 1522 case 0x00: case 0x01: case 0x02: case 0x03: 1523 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 1524 { UChar rm = mod_reg_rm; 1525 DIS(buf, "%s(%s)", sorbTxt(sorb), nameIReg(4,rm)); 1526 *len = 1; 1527 return disAMode_copy2tmp( 1528 handleSegOverride(sorb, getIReg(4,rm))); 1529 } 1530 1531 /* d8(%eax) ... d8(%edi), not including d8(%esp) 1532 --> GET %reg, t ; ADDL d8, t 1533 */ 1534 case 0x08: case 0x09: case 0x0A: case 0x0B: 1535 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 1536 { UChar rm = toUChar(mod_reg_rm & 7); 1537 UInt d = getSDisp8(delta); 1538 DIS(buf, "%s%d(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm)); 1539 *len = 2; 1540 return disAMode_copy2tmp( 1541 handleSegOverride(sorb, 1542 binop(Iop_Add32,getIReg(4,rm),mkU32(d)))); 1543 } 1544 1545 /* d32(%eax) ... d32(%edi), not including d32(%esp) 1546 --> GET %reg, t ; ADDL d8, t 1547 */ 1548 case 0x10: case 0x11: case 0x12: case 0x13: 1549 /* ! 14 */ case 0x15: case 0x16: case 0x17: 1550 { UChar rm = toUChar(mod_reg_rm & 7); 1551 UInt d = getUDisp32(delta); 1552 DIS(buf, "%s0x%x(%s)", sorbTxt(sorb), d, nameIReg(4,rm)); 1553 *len = 5; 1554 return disAMode_copy2tmp( 1555 handleSegOverride(sorb, 1556 binop(Iop_Add32,getIReg(4,rm),mkU32(d)))); 1557 } 1558 1559 /* a register, %eax .. %edi. This shouldn't happen. */ 1560 case 0x18: case 0x19: case 0x1A: case 0x1B: 1561 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 1562 vpanic("disAMode(x86): not an addr!"); 1563 1564 /* a 32-bit literal address 1565 --> MOV d32, tmp 1566 */ 1567 case 0x05: 1568 { UInt d = getUDisp32(delta); 1569 *len = 5; 1570 DIS(buf, "%s(0x%x)", sorbTxt(sorb), d); 1571 return disAMode_copy2tmp( 1572 handleSegOverride(sorb, mkU32(d))); 1573 } 1574 1575 case 0x04: { 1576 /* SIB, with no displacement. Special cases: 1577 -- %esp cannot act as an index value. 1578 If index_r indicates %esp, zero is used for the index. 1579 -- when mod is zero and base indicates EBP, base is instead 1580 a 32-bit literal. 1581 It's all madness, I tell you. Extract %index, %base and 1582 scale from the SIB byte. The value denoted is then: 1583 | %index == %ESP && %base == %EBP 1584 = d32 following SIB byte 1585 | %index == %ESP && %base != %EBP 1586 = %base 1587 | %index != %ESP && %base == %EBP 1588 = d32 following SIB byte + (%index << scale) 1589 | %index != %ESP && %base != %ESP 1590 = %base + (%index << scale) 1591 1592 What happens to the souls of CPU architects who dream up such 1593 horrendous schemes, do you suppose? 1594 */ 1595 UChar sib = getIByte(delta); 1596 UChar scale = toUChar((sib >> 6) & 3); 1597 UChar index_r = toUChar((sib >> 3) & 7); 1598 UChar base_r = toUChar(sib & 7); 1599 delta++; 1600 1601 if (index_r != R_ESP && base_r != R_EBP) { 1602 DIS(buf, "%s(%s,%s,%d)", sorbTxt(sorb), 1603 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale); 1604 *len = 2; 1605 return 1606 disAMode_copy2tmp( 1607 handleSegOverride(sorb, 1608 binop(Iop_Add32, 1609 getIReg(4,base_r), 1610 binop(Iop_Shl32, getIReg(4,index_r), 1611 mkU8(scale))))); 1612 } 1613 1614 if (index_r != R_ESP && base_r == R_EBP) { 1615 UInt d = getUDisp32(delta); 1616 DIS(buf, "%s0x%x(,%s,%d)", sorbTxt(sorb), d, 1617 nameIReg(4,index_r), 1<<scale); 1618 *len = 6; 1619 return 1620 disAMode_copy2tmp( 1621 handleSegOverride(sorb, 1622 binop(Iop_Add32, 1623 binop(Iop_Shl32, getIReg(4,index_r), mkU8(scale)), 1624 mkU32(d)))); 1625 } 1626 1627 if (index_r == R_ESP && base_r != R_EBP) { 1628 DIS(buf, "%s(%s,,)", sorbTxt(sorb), nameIReg(4,base_r)); 1629 *len = 2; 1630 return disAMode_copy2tmp( 1631 handleSegOverride(sorb, getIReg(4,base_r))); 1632 } 1633 1634 if (index_r == R_ESP && base_r == R_EBP) { 1635 UInt d = getUDisp32(delta); 1636 DIS(buf, "%s0x%x(,,)", sorbTxt(sorb), d); 1637 *len = 6; 1638 return disAMode_copy2tmp( 1639 handleSegOverride(sorb, mkU32(d))); 1640 } 1641 /*NOTREACHED*/ 1642 vassert(0); 1643 } 1644 1645 /* SIB, with 8-bit displacement. Special cases: 1646 -- %esp cannot act as an index value. 1647 If index_r indicates %esp, zero is used for the index. 1648 Denoted value is: 1649 | %index == %ESP 1650 = d8 + %base 1651 | %index != %ESP 1652 = d8 + %base + (%index << scale) 1653 */ 1654 case 0x0C: { 1655 UChar sib = getIByte(delta); 1656 UChar scale = toUChar((sib >> 6) & 3); 1657 UChar index_r = toUChar((sib >> 3) & 7); 1658 UChar base_r = toUChar(sib & 7); 1659 UInt d = getSDisp8(delta+1); 1660 1661 if (index_r == R_ESP) { 1662 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb), 1663 (Int)d, nameIReg(4,base_r)); 1664 *len = 3; 1665 return disAMode_copy2tmp( 1666 handleSegOverride(sorb, 1667 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) )); 1668 } else { 1669 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d, 1670 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale); 1671 *len = 3; 1672 return 1673 disAMode_copy2tmp( 1674 handleSegOverride(sorb, 1675 binop(Iop_Add32, 1676 binop(Iop_Add32, 1677 getIReg(4,base_r), 1678 binop(Iop_Shl32, 1679 getIReg(4,index_r), mkU8(scale))), 1680 mkU32(d)))); 1681 } 1682 /*NOTREACHED*/ 1683 vassert(0); 1684 } 1685 1686 /* SIB, with 32-bit displacement. Special cases: 1687 -- %esp cannot act as an index value. 1688 If index_r indicates %esp, zero is used for the index. 1689 Denoted value is: 1690 | %index == %ESP 1691 = d32 + %base 1692 | %index != %ESP 1693 = d32 + %base + (%index << scale) 1694 */ 1695 case 0x14: { 1696 UChar sib = getIByte(delta); 1697 UChar scale = toUChar((sib >> 6) & 3); 1698 UChar index_r = toUChar((sib >> 3) & 7); 1699 UChar base_r = toUChar(sib & 7); 1700 UInt d = getUDisp32(delta+1); 1701 1702 if (index_r == R_ESP) { 1703 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb), 1704 (Int)d, nameIReg(4,base_r)); 1705 *len = 6; 1706 return disAMode_copy2tmp( 1707 handleSegOverride(sorb, 1708 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) )); 1709 } else { 1710 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d, 1711 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale); 1712 *len = 6; 1713 return 1714 disAMode_copy2tmp( 1715 handleSegOverride(sorb, 1716 binop(Iop_Add32, 1717 binop(Iop_Add32, 1718 getIReg(4,base_r), 1719 binop(Iop_Shl32, 1720 getIReg(4,index_r), mkU8(scale))), 1721 mkU32(d)))); 1722 } 1723 /*NOTREACHED*/ 1724 vassert(0); 1725 } 1726 1727 default: 1728 vpanic("disAMode(x86)"); 1729 return 0; /*notreached*/ 1730 } 1731} 1732 1733 1734/* Figure out the number of (insn-stream) bytes constituting the amode 1735 beginning at delta. Is useful for getting hold of literals beyond 1736 the end of the amode before it has been disassembled. */ 1737 1738static UInt lengthAMode ( Int delta ) 1739{ 1740 UChar mod_reg_rm = getIByte(delta); delta++; 1741 1742 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 1743 jump table seems a bit excessive. 1744 */ 1745 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 1746 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 1747 /* is now XX0XXYYY */ 1748 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 1749 switch (mod_reg_rm) { 1750 1751 /* (%eax) .. (%edi), not including (%esp) or (%ebp). */ 1752 case 0x00: case 0x01: case 0x02: case 0x03: 1753 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 1754 return 1; 1755 1756 /* d8(%eax) ... d8(%edi), not including d8(%esp). */ 1757 case 0x08: case 0x09: case 0x0A: case 0x0B: 1758 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 1759 return 2; 1760 1761 /* d32(%eax) ... d32(%edi), not including d32(%esp). */ 1762 case 0x10: case 0x11: case 0x12: case 0x13: 1763 /* ! 14 */ case 0x15: case 0x16: case 0x17: 1764 return 5; 1765 1766 /* a register, %eax .. %edi. (Not an addr, but still handled.) */ 1767 case 0x18: case 0x19: case 0x1A: case 0x1B: 1768 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 1769 return 1; 1770 1771 /* a 32-bit literal address. */ 1772 case 0x05: return 5; 1773 1774 /* SIB, no displacement. */ 1775 case 0x04: { 1776 UChar sib = getIByte(delta); 1777 UChar base_r = toUChar(sib & 7); 1778 if (base_r == R_EBP) return 6; else return 2; 1779 } 1780 /* SIB, with 8-bit displacement. */ 1781 case 0x0C: return 3; 1782 1783 /* SIB, with 32-bit displacement. */ 1784 case 0x14: return 6; 1785 1786 default: 1787 vpanic("lengthAMode"); 1788 return 0; /*notreached*/ 1789 } 1790} 1791 1792/*------------------------------------------------------------*/ 1793/*--- Disassembling common idioms ---*/ 1794/*------------------------------------------------------------*/ 1795 1796/* Handle binary integer instructions of the form 1797 op E, G meaning 1798 op reg-or-mem, reg 1799 Is passed the a ptr to the modRM byte, the actual operation, and the 1800 data size. Returns the address advanced completely over this 1801 instruction. 1802 1803 E(src) is reg-or-mem 1804 G(dst) is reg. 1805 1806 If E is reg, --> GET %G, tmp 1807 OP %E, tmp 1808 PUT tmp, %G 1809 1810 If E is mem and OP is not reversible, 1811 --> (getAddr E) -> tmpa 1812 LD (tmpa), tmpa 1813 GET %G, tmp2 1814 OP tmpa, tmp2 1815 PUT tmp2, %G 1816 1817 If E is mem and OP is reversible 1818 --> (getAddr E) -> tmpa 1819 LD (tmpa), tmpa 1820 OP %G, tmpa 1821 PUT tmpa, %G 1822*/ 1823static 1824UInt dis_op2_E_G ( UChar sorb, 1825 Bool addSubCarry, 1826 IROp op8, 1827 Bool keep, 1828 Int size, 1829 Int delta0, 1830 const HChar* t_x86opc ) 1831{ 1832 HChar dis_buf[50]; 1833 Int len; 1834 IRType ty = szToITy(size); 1835 IRTemp dst1 = newTemp(ty); 1836 IRTemp src = newTemp(ty); 1837 IRTemp dst0 = newTemp(ty); 1838 UChar rm = getUChar(delta0); 1839 IRTemp addr = IRTemp_INVALID; 1840 1841 /* addSubCarry == True indicates the intended operation is 1842 add-with-carry or subtract-with-borrow. */ 1843 if (addSubCarry) { 1844 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 1845 vassert(keep); 1846 } 1847 1848 if (epartIsReg(rm)) { 1849 /* Specially handle XOR reg,reg, because that doesn't really 1850 depend on reg, and doing the obvious thing potentially 1851 generates a spurious value check failure due to the bogus 1852 dependency. Ditto SBB reg,reg. */ 1853 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 1854 && gregOfRM(rm) == eregOfRM(rm)) { 1855 putIReg(size, gregOfRM(rm), mkU(ty,0)); 1856 } 1857 assign( dst0, getIReg(size,gregOfRM(rm)) ); 1858 assign( src, getIReg(size,eregOfRM(rm)) ); 1859 1860 if (addSubCarry && op8 == Iop_Add8) { 1861 helper_ADC( size, dst1, dst0, src, 1862 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1863 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1864 } else 1865 if (addSubCarry && op8 == Iop_Sub8) { 1866 helper_SBB( size, dst1, dst0, src, 1867 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1868 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1869 } else { 1870 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 1871 if (isAddSub(op8)) 1872 setFlags_DEP1_DEP2(op8, dst0, src, ty); 1873 else 1874 setFlags_DEP1(op8, dst1, ty); 1875 if (keep) 1876 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1877 } 1878 1879 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 1880 nameIReg(size,eregOfRM(rm)), 1881 nameIReg(size,gregOfRM(rm))); 1882 return 1+delta0; 1883 } else { 1884 /* E refers to memory */ 1885 addr = disAMode ( &len, sorb, delta0, dis_buf); 1886 assign( dst0, getIReg(size,gregOfRM(rm)) ); 1887 assign( src, loadLE(szToITy(size), mkexpr(addr)) ); 1888 1889 if (addSubCarry && op8 == Iop_Add8) { 1890 helper_ADC( size, dst1, dst0, src, 1891 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1892 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1893 } else 1894 if (addSubCarry && op8 == Iop_Sub8) { 1895 helper_SBB( size, dst1, dst0, src, 1896 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1897 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1898 } else { 1899 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 1900 if (isAddSub(op8)) 1901 setFlags_DEP1_DEP2(op8, dst0, src, ty); 1902 else 1903 setFlags_DEP1(op8, dst1, ty); 1904 if (keep) 1905 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1906 } 1907 1908 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 1909 dis_buf,nameIReg(size,gregOfRM(rm))); 1910 return len+delta0; 1911 } 1912} 1913 1914 1915 1916/* Handle binary integer instructions of the form 1917 op G, E meaning 1918 op reg, reg-or-mem 1919 Is passed the a ptr to the modRM byte, the actual operation, and the 1920 data size. Returns the address advanced completely over this 1921 instruction. 1922 1923 G(src) is reg. 1924 E(dst) is reg-or-mem 1925 1926 If E is reg, --> GET %E, tmp 1927 OP %G, tmp 1928 PUT tmp, %E 1929 1930 If E is mem, --> (getAddr E) -> tmpa 1931 LD (tmpa), tmpv 1932 OP %G, tmpv 1933 ST tmpv, (tmpa) 1934*/ 1935static 1936UInt dis_op2_G_E ( UChar sorb, 1937 Bool locked, 1938 Bool addSubCarry, 1939 IROp op8, 1940 Bool keep, 1941 Int size, 1942 Int delta0, 1943 const HChar* t_x86opc ) 1944{ 1945 HChar dis_buf[50]; 1946 Int len; 1947 IRType ty = szToITy(size); 1948 IRTemp dst1 = newTemp(ty); 1949 IRTemp src = newTemp(ty); 1950 IRTemp dst0 = newTemp(ty); 1951 UChar rm = getIByte(delta0); 1952 IRTemp addr = IRTemp_INVALID; 1953 1954 /* addSubCarry == True indicates the intended operation is 1955 add-with-carry or subtract-with-borrow. */ 1956 if (addSubCarry) { 1957 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 1958 vassert(keep); 1959 } 1960 1961 if (epartIsReg(rm)) { 1962 /* Specially handle XOR reg,reg, because that doesn't really 1963 depend on reg, and doing the obvious thing potentially 1964 generates a spurious value check failure due to the bogus 1965 dependency. Ditto SBB reg,reg.*/ 1966 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 1967 && gregOfRM(rm) == eregOfRM(rm)) { 1968 putIReg(size, eregOfRM(rm), mkU(ty,0)); 1969 } 1970 assign(dst0, getIReg(size,eregOfRM(rm))); 1971 assign(src, getIReg(size,gregOfRM(rm))); 1972 1973 if (addSubCarry && op8 == Iop_Add8) { 1974 helper_ADC( size, dst1, dst0, src, 1975 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1976 putIReg(size, eregOfRM(rm), mkexpr(dst1)); 1977 } else 1978 if (addSubCarry && op8 == Iop_Sub8) { 1979 helper_SBB( size, dst1, dst0, src, 1980 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1981 putIReg(size, eregOfRM(rm), mkexpr(dst1)); 1982 } else { 1983 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 1984 if (isAddSub(op8)) 1985 setFlags_DEP1_DEP2(op8, dst0, src, ty); 1986 else 1987 setFlags_DEP1(op8, dst1, ty); 1988 if (keep) 1989 putIReg(size, eregOfRM(rm), mkexpr(dst1)); 1990 } 1991 1992 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 1993 nameIReg(size,gregOfRM(rm)), 1994 nameIReg(size,eregOfRM(rm))); 1995 return 1+delta0; 1996 } 1997 1998 /* E refers to memory */ 1999 { 2000 addr = disAMode ( &len, sorb, delta0, dis_buf); 2001 assign(dst0, loadLE(ty,mkexpr(addr))); 2002 assign(src, getIReg(size,gregOfRM(rm))); 2003 2004 if (addSubCarry && op8 == Iop_Add8) { 2005 if (locked) { 2006 /* cas-style store */ 2007 helper_ADC( size, dst1, dst0, src, 2008 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 2009 } else { 2010 /* normal store */ 2011 helper_ADC( size, dst1, dst0, src, 2012 /*store*/addr, IRTemp_INVALID, 0 ); 2013 } 2014 } else 2015 if (addSubCarry && op8 == Iop_Sub8) { 2016 if (locked) { 2017 /* cas-style store */ 2018 helper_SBB( size, dst1, dst0, src, 2019 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 2020 } else { 2021 /* normal store */ 2022 helper_SBB( size, dst1, dst0, src, 2023 /*store*/addr, IRTemp_INVALID, 0 ); 2024 } 2025 } else { 2026 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2027 if (keep) { 2028 if (locked) { 2029 if (0) vex_printf("locked case\n" ); 2030 casLE( mkexpr(addr), 2031 mkexpr(dst0)/*expval*/, 2032 mkexpr(dst1)/*newval*/, guest_EIP_curr_instr ); 2033 } else { 2034 if (0) vex_printf("nonlocked case\n"); 2035 storeLE(mkexpr(addr), mkexpr(dst1)); 2036 } 2037 } 2038 if (isAddSub(op8)) 2039 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2040 else 2041 setFlags_DEP1(op8, dst1, ty); 2042 } 2043 2044 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 2045 nameIReg(size,gregOfRM(rm)), dis_buf); 2046 return len+delta0; 2047 } 2048} 2049 2050 2051/* Handle move instructions of the form 2052 mov E, G meaning 2053 mov reg-or-mem, reg 2054 Is passed the a ptr to the modRM byte, and the data size. Returns 2055 the address advanced completely over this instruction. 2056 2057 E(src) is reg-or-mem 2058 G(dst) is reg. 2059 2060 If E is reg, --> GET %E, tmpv 2061 PUT tmpv, %G 2062 2063 If E is mem --> (getAddr E) -> tmpa 2064 LD (tmpa), tmpb 2065 PUT tmpb, %G 2066*/ 2067static 2068UInt dis_mov_E_G ( UChar sorb, 2069 Int size, 2070 Int delta0 ) 2071{ 2072 Int len; 2073 UChar rm = getIByte(delta0); 2074 HChar dis_buf[50]; 2075 2076 if (epartIsReg(rm)) { 2077 putIReg(size, gregOfRM(rm), getIReg(size, eregOfRM(rm))); 2078 DIP("mov%c %s,%s\n", nameISize(size), 2079 nameIReg(size,eregOfRM(rm)), 2080 nameIReg(size,gregOfRM(rm))); 2081 return 1+delta0; 2082 } 2083 2084 /* E refers to memory */ 2085 { 2086 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 2087 putIReg(size, gregOfRM(rm), loadLE(szToITy(size), mkexpr(addr))); 2088 DIP("mov%c %s,%s\n", nameISize(size), 2089 dis_buf,nameIReg(size,gregOfRM(rm))); 2090 return delta0+len; 2091 } 2092} 2093 2094 2095/* Handle move instructions of the form 2096 mov G, E meaning 2097 mov reg, reg-or-mem 2098 Is passed the a ptr to the modRM byte, and the data size. Returns 2099 the address advanced completely over this instruction. 2100 2101 G(src) is reg. 2102 E(dst) is reg-or-mem 2103 2104 If E is reg, --> GET %G, tmp 2105 PUT tmp, %E 2106 2107 If E is mem, --> (getAddr E) -> tmpa 2108 GET %G, tmpv 2109 ST tmpv, (tmpa) 2110*/ 2111static 2112UInt dis_mov_G_E ( UChar sorb, 2113 Int size, 2114 Int delta0 ) 2115{ 2116 Int len; 2117 UChar rm = getIByte(delta0); 2118 HChar dis_buf[50]; 2119 2120 if (epartIsReg(rm)) { 2121 putIReg(size, eregOfRM(rm), getIReg(size, gregOfRM(rm))); 2122 DIP("mov%c %s,%s\n", nameISize(size), 2123 nameIReg(size,gregOfRM(rm)), 2124 nameIReg(size,eregOfRM(rm))); 2125 return 1+delta0; 2126 } 2127 2128 /* E refers to memory */ 2129 { 2130 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf); 2131 storeLE( mkexpr(addr), getIReg(size, gregOfRM(rm)) ); 2132 DIP("mov%c %s,%s\n", nameISize(size), 2133 nameIReg(size,gregOfRM(rm)), dis_buf); 2134 return len+delta0; 2135 } 2136} 2137 2138 2139/* op $immediate, AL/AX/EAX. */ 2140static 2141UInt dis_op_imm_A ( Int size, 2142 Bool carrying, 2143 IROp op8, 2144 Bool keep, 2145 Int delta, 2146 const HChar* t_x86opc ) 2147{ 2148 IRType ty = szToITy(size); 2149 IRTemp dst0 = newTemp(ty); 2150 IRTemp src = newTemp(ty); 2151 IRTemp dst1 = newTemp(ty); 2152 UInt lit = getUDisp(size,delta); 2153 assign(dst0, getIReg(size,R_EAX)); 2154 assign(src, mkU(ty,lit)); 2155 2156 if (isAddSub(op8) && !carrying) { 2157 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2158 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2159 } 2160 else 2161 if (isLogic(op8)) { 2162 vassert(!carrying); 2163 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2164 setFlags_DEP1(op8, dst1, ty); 2165 } 2166 else 2167 if (op8 == Iop_Add8 && carrying) { 2168 helper_ADC( size, dst1, dst0, src, 2169 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2170 } 2171 else 2172 if (op8 == Iop_Sub8 && carrying) { 2173 helper_SBB( size, dst1, dst0, src, 2174 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2175 } 2176 else 2177 vpanic("dis_op_imm_A(x86,guest)"); 2178 2179 if (keep) 2180 putIReg(size, R_EAX, mkexpr(dst1)); 2181 2182 DIP("%s%c $0x%x, %s\n", t_x86opc, nameISize(size), 2183 lit, nameIReg(size,R_EAX)); 2184 return delta+size; 2185} 2186 2187 2188/* Sign- and Zero-extending moves. */ 2189static 2190UInt dis_movx_E_G ( UChar sorb, 2191 Int delta, Int szs, Int szd, Bool sign_extend ) 2192{ 2193 UChar rm = getIByte(delta); 2194 if (epartIsReg(rm)) { 2195 if (szd == szs) { 2196 // mutant case. See #250799 2197 putIReg(szd, gregOfRM(rm), 2198 getIReg(szs,eregOfRM(rm))); 2199 } else { 2200 // normal case 2201 putIReg(szd, gregOfRM(rm), 2202 unop(mkWidenOp(szs,szd,sign_extend), 2203 getIReg(szs,eregOfRM(rm)))); 2204 } 2205 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 2206 nameISize(szs), nameISize(szd), 2207 nameIReg(szs,eregOfRM(rm)), 2208 nameIReg(szd,gregOfRM(rm))); 2209 return 1+delta; 2210 } 2211 2212 /* E refers to memory */ 2213 { 2214 Int len; 2215 HChar dis_buf[50]; 2216 IRTemp addr = disAMode ( &len, sorb, delta, dis_buf ); 2217 if (szd == szs) { 2218 // mutant case. See #250799 2219 putIReg(szd, gregOfRM(rm), 2220 loadLE(szToITy(szs),mkexpr(addr))); 2221 } else { 2222 // normal case 2223 putIReg(szd, gregOfRM(rm), 2224 unop(mkWidenOp(szs,szd,sign_extend), 2225 loadLE(szToITy(szs),mkexpr(addr)))); 2226 } 2227 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 2228 nameISize(szs), nameISize(szd), 2229 dis_buf, nameIReg(szd,gregOfRM(rm))); 2230 return len+delta; 2231 } 2232} 2233 2234 2235/* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 / 2236 16 / 8 bit quantity in the given IRTemp. */ 2237static 2238void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) 2239{ 2240 IROp op = signed_divide ? Iop_DivModS64to32 : Iop_DivModU64to32; 2241 IRTemp src64 = newTemp(Ity_I64); 2242 IRTemp dst64 = newTemp(Ity_I64); 2243 switch (sz) { 2244 case 4: 2245 assign( src64, binop(Iop_32HLto64, 2246 getIReg(4,R_EDX), getIReg(4,R_EAX)) ); 2247 assign( dst64, binop(op, mkexpr(src64), mkexpr(t)) ); 2248 putIReg( 4, R_EAX, unop(Iop_64to32,mkexpr(dst64)) ); 2249 putIReg( 4, R_EDX, unop(Iop_64HIto32,mkexpr(dst64)) ); 2250 break; 2251 case 2: { 2252 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 2253 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 2254 assign( src64, unop(widen3264, 2255 binop(Iop_16HLto32, 2256 getIReg(2,R_EDX), getIReg(2,R_EAX))) ); 2257 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); 2258 putIReg( 2, R_EAX, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); 2259 putIReg( 2, R_EDX, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); 2260 break; 2261 } 2262 case 1: { 2263 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 2264 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 2265 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; 2266 assign( src64, unop(widen3264, unop(widen1632, getIReg(2,R_EAX))) ); 2267 assign( dst64, 2268 binop(op, mkexpr(src64), 2269 unop(widen1632, unop(widen816, mkexpr(t)))) ); 2270 putIReg( 1, R_AL, unop(Iop_16to8, unop(Iop_32to16, 2271 unop(Iop_64to32,mkexpr(dst64)))) ); 2272 putIReg( 1, R_AH, unop(Iop_16to8, unop(Iop_32to16, 2273 unop(Iop_64HIto32,mkexpr(dst64)))) ); 2274 break; 2275 } 2276 default: vpanic("codegen_div(x86)"); 2277 } 2278} 2279 2280 2281static 2282UInt dis_Grp1 ( UChar sorb, Bool locked, 2283 Int delta, UChar modrm, 2284 Int am_sz, Int d_sz, Int sz, UInt d32 ) 2285{ 2286 Int len; 2287 HChar dis_buf[50]; 2288 IRType ty = szToITy(sz); 2289 IRTemp dst1 = newTemp(ty); 2290 IRTemp src = newTemp(ty); 2291 IRTemp dst0 = newTemp(ty); 2292 IRTemp addr = IRTemp_INVALID; 2293 IROp op8 = Iop_INVALID; 2294 UInt mask = sz==1 ? 0xFF : (sz==2 ? 0xFFFF : 0xFFFFFFFF); 2295 2296 switch (gregOfRM(modrm)) { 2297 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; 2298 case 2: break; // ADC 2299 case 3: break; // SBB 2300 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; 2301 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; 2302 /*NOTREACHED*/ 2303 default: vpanic("dis_Grp1: unhandled case"); 2304 } 2305 2306 if (epartIsReg(modrm)) { 2307 vassert(am_sz == 1); 2308 2309 assign(dst0, getIReg(sz,eregOfRM(modrm))); 2310 assign(src, mkU(ty,d32 & mask)); 2311 2312 if (gregOfRM(modrm) == 2 /* ADC */) { 2313 helper_ADC( sz, dst1, dst0, src, 2314 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2315 } else 2316 if (gregOfRM(modrm) == 3 /* SBB */) { 2317 helper_SBB( sz, dst1, dst0, src, 2318 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2319 } else { 2320 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2321 if (isAddSub(op8)) 2322 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2323 else 2324 setFlags_DEP1(op8, dst1, ty); 2325 } 2326 2327 if (gregOfRM(modrm) < 7) 2328 putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); 2329 2330 delta += (am_sz + d_sz); 2331 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), d32, 2332 nameIReg(sz,eregOfRM(modrm))); 2333 } else { 2334 addr = disAMode ( &len, sorb, delta, dis_buf); 2335 2336 assign(dst0, loadLE(ty,mkexpr(addr))); 2337 assign(src, mkU(ty,d32 & mask)); 2338 2339 if (gregOfRM(modrm) == 2 /* ADC */) { 2340 if (locked) { 2341 /* cas-style store */ 2342 helper_ADC( sz, dst1, dst0, src, 2343 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 2344 } else { 2345 /* normal store */ 2346 helper_ADC( sz, dst1, dst0, src, 2347 /*store*/addr, IRTemp_INVALID, 0 ); 2348 } 2349 } else 2350 if (gregOfRM(modrm) == 3 /* SBB */) { 2351 if (locked) { 2352 /* cas-style store */ 2353 helper_SBB( sz, dst1, dst0, src, 2354 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 2355 } else { 2356 /* normal store */ 2357 helper_SBB( sz, dst1, dst0, src, 2358 /*store*/addr, IRTemp_INVALID, 0 ); 2359 } 2360 } else { 2361 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2362 if (gregOfRM(modrm) < 7) { 2363 if (locked) { 2364 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, 2365 mkexpr(dst1)/*newVal*/, 2366 guest_EIP_curr_instr ); 2367 } else { 2368 storeLE(mkexpr(addr), mkexpr(dst1)); 2369 } 2370 } 2371 if (isAddSub(op8)) 2372 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2373 else 2374 setFlags_DEP1(op8, dst1, ty); 2375 } 2376 2377 delta += (len+d_sz); 2378 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), 2379 d32, dis_buf); 2380 } 2381 return delta; 2382} 2383 2384 2385/* Group 2 extended opcodes. shift_expr must be an 8-bit typed 2386 expression. */ 2387 2388static 2389UInt dis_Grp2 ( UChar sorb, 2390 Int delta, UChar modrm, 2391 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, 2392 const HChar* shift_expr_txt, Bool* decode_OK ) 2393{ 2394 /* delta on entry points at the modrm byte. */ 2395 HChar dis_buf[50]; 2396 Int len; 2397 Bool isShift, isRotate, isRotateC; 2398 IRType ty = szToITy(sz); 2399 IRTemp dst0 = newTemp(ty); 2400 IRTemp dst1 = newTemp(ty); 2401 IRTemp addr = IRTemp_INVALID; 2402 2403 *decode_OK = True; 2404 2405 vassert(sz == 1 || sz == 2 || sz == 4); 2406 2407 /* Put value to shift/rotate in dst0. */ 2408 if (epartIsReg(modrm)) { 2409 assign(dst0, getIReg(sz, eregOfRM(modrm))); 2410 delta += (am_sz + d_sz); 2411 } else { 2412 addr = disAMode ( &len, sorb, delta, dis_buf); 2413 assign(dst0, loadLE(ty,mkexpr(addr))); 2414 delta += len + d_sz; 2415 } 2416 2417 isShift = False; 2418 switch (gregOfRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; } 2419 2420 isRotate = False; 2421 switch (gregOfRM(modrm)) { case 0: case 1: isRotate = True; } 2422 2423 isRotateC = False; 2424 switch (gregOfRM(modrm)) { case 2: case 3: isRotateC = True; } 2425 2426 if (!isShift && !isRotate && !isRotateC) { 2427 /*NOTREACHED*/ 2428 vpanic("dis_Grp2(Reg): unhandled case(x86)"); 2429 } 2430 2431 if (isRotateC) { 2432 /* call a helper; these insns are so ridiculous they do not 2433 deserve better */ 2434 Bool left = toBool(gregOfRM(modrm) == 2); 2435 IRTemp r64 = newTemp(Ity_I64); 2436 IRExpr** args 2437 = mkIRExprVec_4( widenUto32(mkexpr(dst0)), /* thing to rotate */ 2438 widenUto32(shift_expr), /* rotate amount */ 2439 widenUto32(mk_x86g_calculate_eflags_all()), 2440 mkU32(sz) ); 2441 assign( r64, mkIRExprCCall( 2442 Ity_I64, 2443 0/*regparm*/, 2444 left ? "x86g_calculate_RCL" : "x86g_calculate_RCR", 2445 left ? &x86g_calculate_RCL : &x86g_calculate_RCR, 2446 args 2447 ) 2448 ); 2449 /* new eflags in hi half r64; new value in lo half r64 */ 2450 assign( dst1, narrowTo(ty, unop(Iop_64to32, mkexpr(r64))) ); 2451 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 2452 stmt( IRStmt_Put( OFFB_CC_DEP1, unop(Iop_64HIto32, mkexpr(r64)) )); 2453 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 2454 /* Set NDEP even though it isn't used. This makes redundant-PUT 2455 elimination of previous stores to this field work better. */ 2456 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 2457 } 2458 2459 if (isShift) { 2460 2461 IRTemp pre32 = newTemp(Ity_I32); 2462 IRTemp res32 = newTemp(Ity_I32); 2463 IRTemp res32ss = newTemp(Ity_I32); 2464 IRTemp shift_amt = newTemp(Ity_I8); 2465 IROp op32; 2466 2467 switch (gregOfRM(modrm)) { 2468 case 4: op32 = Iop_Shl32; break; 2469 case 5: op32 = Iop_Shr32; break; 2470 case 6: op32 = Iop_Shl32; break; 2471 case 7: op32 = Iop_Sar32; break; 2472 /*NOTREACHED*/ 2473 default: vpanic("dis_Grp2:shift"); break; 2474 } 2475 2476 /* Widen the value to be shifted to 32 bits, do the shift, and 2477 narrow back down. This seems surprisingly long-winded, but 2478 unfortunately the Intel semantics requires that 8/16-bit 2479 shifts give defined results for shift values all the way up 2480 to 31, and this seems the simplest way to do it. It has the 2481 advantage that the only IR level shifts generated are of 32 2482 bit values, and the shift amount is guaranteed to be in the 2483 range 0 .. 31, thereby observing the IR semantics requiring 2484 all shift values to be in the range 0 .. 2^word_size-1. */ 2485 2486 /* shift_amt = shift_expr & 31, regardless of operation size */ 2487 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(31)) ); 2488 2489 /* suitably widen the value to be shifted to 32 bits. */ 2490 assign( pre32, op32==Iop_Sar32 ? widenSto32(mkexpr(dst0)) 2491 : widenUto32(mkexpr(dst0)) ); 2492 2493 /* res32 = pre32 `shift` shift_amt */ 2494 assign( res32, binop(op32, mkexpr(pre32), mkexpr(shift_amt)) ); 2495 2496 /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */ 2497 assign( res32ss, 2498 binop(op32, 2499 mkexpr(pre32), 2500 binop(Iop_And8, 2501 binop(Iop_Sub8, 2502 mkexpr(shift_amt), mkU8(1)), 2503 mkU8(31))) ); 2504 2505 /* Build the flags thunk. */ 2506 setFlags_DEP1_DEP2_shift(op32, res32, res32ss, ty, shift_amt); 2507 2508 /* Narrow the result back down. */ 2509 assign( dst1, narrowTo(ty, mkexpr(res32)) ); 2510 2511 } /* if (isShift) */ 2512 2513 else 2514 if (isRotate) { 2515 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 2516 Bool left = toBool(gregOfRM(modrm) == 0); 2517 IRTemp rot_amt = newTemp(Ity_I8); 2518 IRTemp rot_amt32 = newTemp(Ity_I8); 2519 IRTemp oldFlags = newTemp(Ity_I32); 2520 2521 /* rot_amt = shift_expr & mask */ 2522 /* By masking the rotate amount thusly, the IR-level Shl/Shr 2523 expressions never shift beyond the word size and thus remain 2524 well defined. */ 2525 assign(rot_amt32, binop(Iop_And8, shift_expr, mkU8(31))); 2526 2527 if (ty == Ity_I32) 2528 assign(rot_amt, mkexpr(rot_amt32)); 2529 else 2530 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt32), mkU8(8*sz-1))); 2531 2532 if (left) { 2533 2534 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ 2535 assign(dst1, 2536 binop( mkSizedOp(ty,Iop_Or8), 2537 binop( mkSizedOp(ty,Iop_Shl8), 2538 mkexpr(dst0), 2539 mkexpr(rot_amt) 2540 ), 2541 binop( mkSizedOp(ty,Iop_Shr8), 2542 mkexpr(dst0), 2543 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 2544 ) 2545 ) 2546 ); 2547 ccOp += X86G_CC_OP_ROLB; 2548 2549 } else { /* right */ 2550 2551 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ 2552 assign(dst1, 2553 binop( mkSizedOp(ty,Iop_Or8), 2554 binop( mkSizedOp(ty,Iop_Shr8), 2555 mkexpr(dst0), 2556 mkexpr(rot_amt) 2557 ), 2558 binop( mkSizedOp(ty,Iop_Shl8), 2559 mkexpr(dst0), 2560 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 2561 ) 2562 ) 2563 ); 2564 ccOp += X86G_CC_OP_RORB; 2565 2566 } 2567 2568 /* dst1 now holds the rotated value. Build flag thunk. We 2569 need the resulting value for this, and the previous flags. 2570 Except don't set it if the rotate count is zero. */ 2571 2572 assign(oldFlags, mk_x86g_calculate_eflags_all()); 2573 2574 /* rot_amt32 :: Ity_I8. We need to convert it to I1. */ 2575 IRTemp rot_amt32b = newTemp(Ity_I1); 2576 assign(rot_amt32b, binop(Iop_CmpNE8, mkexpr(rot_amt32), mkU8(0)) ); 2577 2578 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ 2579 stmt( IRStmt_Put( OFFB_CC_OP, 2580 IRExpr_ITE( mkexpr(rot_amt32b), 2581 mkU32(ccOp), 2582 IRExpr_Get(OFFB_CC_OP,Ity_I32) ) )); 2583 stmt( IRStmt_Put( OFFB_CC_DEP1, 2584 IRExpr_ITE( mkexpr(rot_amt32b), 2585 widenUto32(mkexpr(dst1)), 2586 IRExpr_Get(OFFB_CC_DEP1,Ity_I32) ) )); 2587 stmt( IRStmt_Put( OFFB_CC_DEP2, 2588 IRExpr_ITE( mkexpr(rot_amt32b), 2589 mkU32(0), 2590 IRExpr_Get(OFFB_CC_DEP2,Ity_I32) ) )); 2591 stmt( IRStmt_Put( OFFB_CC_NDEP, 2592 IRExpr_ITE( mkexpr(rot_amt32b), 2593 mkexpr(oldFlags), 2594 IRExpr_Get(OFFB_CC_NDEP,Ity_I32) ) )); 2595 } /* if (isRotate) */ 2596 2597 /* Save result, and finish up. */ 2598 if (epartIsReg(modrm)) { 2599 putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); 2600 if (vex_traceflags & VEX_TRACE_FE) { 2601 vex_printf("%s%c ", 2602 nameGrp2(gregOfRM(modrm)), nameISize(sz) ); 2603 if (shift_expr_txt) 2604 vex_printf("%s", shift_expr_txt); 2605 else 2606 ppIRExpr(shift_expr); 2607 vex_printf(", %s\n", nameIReg(sz,eregOfRM(modrm))); 2608 } 2609 } else { 2610 storeLE(mkexpr(addr), mkexpr(dst1)); 2611 if (vex_traceflags & VEX_TRACE_FE) { 2612 vex_printf("%s%c ", 2613 nameGrp2(gregOfRM(modrm)), nameISize(sz) ); 2614 if (shift_expr_txt) 2615 vex_printf("%s", shift_expr_txt); 2616 else 2617 ppIRExpr(shift_expr); 2618 vex_printf(", %s\n", dis_buf); 2619 } 2620 } 2621 return delta; 2622} 2623 2624 2625/* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ 2626static 2627UInt dis_Grp8_Imm ( UChar sorb, 2628 Bool locked, 2629 Int delta, UChar modrm, 2630 Int am_sz, Int sz, UInt src_val, 2631 Bool* decode_OK ) 2632{ 2633 /* src_val denotes a d8. 2634 And delta on entry points at the modrm byte. */ 2635 2636 IRType ty = szToITy(sz); 2637 IRTemp t2 = newTemp(Ity_I32); 2638 IRTemp t2m = newTemp(Ity_I32); 2639 IRTemp t_addr = IRTemp_INVALID; 2640 HChar dis_buf[50]; 2641 UInt mask; 2642 2643 /* we're optimists :-) */ 2644 *decode_OK = True; 2645 2646 /* Limit src_val -- the bit offset -- to something within a word. 2647 The Intel docs say that literal offsets larger than a word are 2648 masked in this way. */ 2649 switch (sz) { 2650 case 2: src_val &= 15; break; 2651 case 4: src_val &= 31; break; 2652 default: *decode_OK = False; return delta; 2653 } 2654 2655 /* Invent a mask suitable for the operation. */ 2656 switch (gregOfRM(modrm)) { 2657 case 4: /* BT */ mask = 0; break; 2658 case 5: /* BTS */ mask = 1 << src_val; break; 2659 case 6: /* BTR */ mask = ~(1 << src_val); break; 2660 case 7: /* BTC */ mask = 1 << src_val; break; 2661 /* If this needs to be extended, probably simplest to make a 2662 new function to handle the other cases (0 .. 3). The 2663 Intel docs do however not indicate any use for 0 .. 3, so 2664 we don't expect this to happen. */ 2665 default: *decode_OK = False; return delta; 2666 } 2667 2668 /* Fetch the value to be tested and modified into t2, which is 2669 32-bits wide regardless of sz. */ 2670 if (epartIsReg(modrm)) { 2671 vassert(am_sz == 1); 2672 assign( t2, widenUto32(getIReg(sz, eregOfRM(modrm))) ); 2673 delta += (am_sz + 1); 2674 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz), 2675 src_val, nameIReg(sz,eregOfRM(modrm))); 2676 } else { 2677 Int len; 2678 t_addr = disAMode ( &len, sorb, delta, dis_buf); 2679 delta += (len+1); 2680 assign( t2, widenUto32(loadLE(ty, mkexpr(t_addr))) ); 2681 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz), 2682 src_val, dis_buf); 2683 } 2684 2685 /* Compute the new value into t2m, if non-BT. */ 2686 switch (gregOfRM(modrm)) { 2687 case 4: /* BT */ 2688 break; 2689 case 5: /* BTS */ 2690 assign( t2m, binop(Iop_Or32, mkU32(mask), mkexpr(t2)) ); 2691 break; 2692 case 6: /* BTR */ 2693 assign( t2m, binop(Iop_And32, mkU32(mask), mkexpr(t2)) ); 2694 break; 2695 case 7: /* BTC */ 2696 assign( t2m, binop(Iop_Xor32, mkU32(mask), mkexpr(t2)) ); 2697 break; 2698 default: 2699 /*NOTREACHED*/ /*the previous switch guards this*/ 2700 vassert(0); 2701 } 2702 2703 /* Write the result back, if non-BT. If the CAS fails then we 2704 side-exit from the trace at this point, and so the flag state is 2705 not affected. This is of course as required. */ 2706 if (gregOfRM(modrm) != 4 /* BT */) { 2707 if (epartIsReg(modrm)) { 2708 putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m))); 2709 } else { 2710 if (locked) { 2711 casLE( mkexpr(t_addr), 2712 narrowTo(ty, mkexpr(t2))/*expd*/, 2713 narrowTo(ty, mkexpr(t2m))/*new*/, 2714 guest_EIP_curr_instr ); 2715 } else { 2716 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); 2717 } 2718 } 2719 } 2720 2721 /* Copy relevant bit from t2 into the carry flag. */ 2722 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 2723 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 2724 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 2725 stmt( IRStmt_Put( 2726 OFFB_CC_DEP1, 2727 binop(Iop_And32, 2728 binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)), 2729 mkU32(1)) 2730 )); 2731 /* Set NDEP even though it isn't used. This makes redundant-PUT 2732 elimination of previous stores to this field work better. */ 2733 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 2734 2735 return delta; 2736} 2737 2738 2739/* Signed/unsigned widening multiply. Generate IR to multiply the 2740 value in EAX/AX/AL by the given IRTemp, and park the result in 2741 EDX:EAX/DX:AX/AX. 2742*/ 2743static void codegen_mulL_A_D ( Int sz, Bool syned, 2744 IRTemp tmp, const HChar* tmp_txt ) 2745{ 2746 IRType ty = szToITy(sz); 2747 IRTemp t1 = newTemp(ty); 2748 2749 assign( t1, getIReg(sz, R_EAX) ); 2750 2751 switch (ty) { 2752 case Ity_I32: { 2753 IRTemp res64 = newTemp(Ity_I64); 2754 IRTemp resHi = newTemp(Ity_I32); 2755 IRTemp resLo = newTemp(Ity_I32); 2756 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; 2757 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; 2758 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); 2759 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 2760 assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); 2761 assign( resLo, unop(Iop_64to32,mkexpr(res64))); 2762 putIReg(4, R_EDX, mkexpr(resHi)); 2763 putIReg(4, R_EAX, mkexpr(resLo)); 2764 break; 2765 } 2766 case Ity_I16: { 2767 IRTemp res32 = newTemp(Ity_I32); 2768 IRTemp resHi = newTemp(Ity_I16); 2769 IRTemp resLo = newTemp(Ity_I16); 2770 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; 2771 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; 2772 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); 2773 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 2774 assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); 2775 assign( resLo, unop(Iop_32to16,mkexpr(res32))); 2776 putIReg(2, R_EDX, mkexpr(resHi)); 2777 putIReg(2, R_EAX, mkexpr(resLo)); 2778 break; 2779 } 2780 case Ity_I8: { 2781 IRTemp res16 = newTemp(Ity_I16); 2782 IRTemp resHi = newTemp(Ity_I8); 2783 IRTemp resLo = newTemp(Ity_I8); 2784 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; 2785 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; 2786 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); 2787 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 2788 assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); 2789 assign( resLo, unop(Iop_16to8,mkexpr(res16))); 2790 putIReg(2, R_EAX, mkexpr(res16)); 2791 break; 2792 } 2793 default: 2794 vpanic("codegen_mulL_A_D(x86)"); 2795 } 2796 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); 2797} 2798 2799 2800/* Group 3 extended opcodes. */ 2801static 2802UInt dis_Grp3 ( UChar sorb, Bool locked, Int sz, Int delta, Bool* decode_OK ) 2803{ 2804 UInt d32; 2805 UChar modrm; 2806 HChar dis_buf[50]; 2807 Int len; 2808 IRTemp addr; 2809 IRType ty = szToITy(sz); 2810 IRTemp t1 = newTemp(ty); 2811 IRTemp dst1, src, dst0; 2812 2813 *decode_OK = True; /* may change this later */ 2814 2815 modrm = getIByte(delta); 2816 2817 if (locked && (gregOfRM(modrm) != 2 && gregOfRM(modrm) != 3)) { 2818 /* LOCK prefix only allowed with not and neg subopcodes */ 2819 *decode_OK = False; 2820 return delta; 2821 } 2822 2823 if (epartIsReg(modrm)) { 2824 switch (gregOfRM(modrm)) { 2825 case 0: { /* TEST */ 2826 delta++; d32 = getUDisp(sz, delta); delta += sz; 2827 dst1 = newTemp(ty); 2828 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 2829 getIReg(sz,eregOfRM(modrm)), 2830 mkU(ty,d32))); 2831 setFlags_DEP1( Iop_And8, dst1, ty ); 2832 DIP("test%c $0x%x, %s\n", nameISize(sz), d32, 2833 nameIReg(sz, eregOfRM(modrm))); 2834 break; 2835 } 2836 case 1: /* UNDEFINED */ 2837 /* The Intel docs imply this insn is undefined and binutils 2838 agrees. Unfortunately Core 2 will run it (with who 2839 knows what result?) sandpile.org reckons it's an alias 2840 for case 0. We play safe. */ 2841 *decode_OK = False; 2842 break; 2843 case 2: /* NOT */ 2844 delta++; 2845 putIReg(sz, eregOfRM(modrm), 2846 unop(mkSizedOp(ty,Iop_Not8), 2847 getIReg(sz, eregOfRM(modrm)))); 2848 DIP("not%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2849 break; 2850 case 3: /* NEG */ 2851 delta++; 2852 dst0 = newTemp(ty); 2853 src = newTemp(ty); 2854 dst1 = newTemp(ty); 2855 assign(dst0, mkU(ty,0)); 2856 assign(src, getIReg(sz,eregOfRM(modrm))); 2857 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src))); 2858 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 2859 putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); 2860 DIP("neg%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2861 break; 2862 case 4: /* MUL (unsigned widening) */ 2863 delta++; 2864 src = newTemp(ty); 2865 assign(src, getIReg(sz,eregOfRM(modrm))); 2866 codegen_mulL_A_D ( sz, False, src, nameIReg(sz,eregOfRM(modrm)) ); 2867 break; 2868 case 5: /* IMUL (signed widening) */ 2869 delta++; 2870 src = newTemp(ty); 2871 assign(src, getIReg(sz,eregOfRM(modrm))); 2872 codegen_mulL_A_D ( sz, True, src, nameIReg(sz,eregOfRM(modrm)) ); 2873 break; 2874 case 6: /* DIV */ 2875 delta++; 2876 assign( t1, getIReg(sz, eregOfRM(modrm)) ); 2877 codegen_div ( sz, t1, False ); 2878 DIP("div%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2879 break; 2880 case 7: /* IDIV */ 2881 delta++; 2882 assign( t1, getIReg(sz, eregOfRM(modrm)) ); 2883 codegen_div ( sz, t1, True ); 2884 DIP("idiv%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2885 break; 2886 default: 2887 /* This can't happen - gregOfRM should return 0 .. 7 only */ 2888 vpanic("Grp3(x86)"); 2889 } 2890 } else { 2891 addr = disAMode ( &len, sorb, delta, dis_buf ); 2892 t1 = newTemp(ty); 2893 delta += len; 2894 assign(t1, loadLE(ty,mkexpr(addr))); 2895 switch (gregOfRM(modrm)) { 2896 case 0: { /* TEST */ 2897 d32 = getUDisp(sz, delta); delta += sz; 2898 dst1 = newTemp(ty); 2899 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 2900 mkexpr(t1), mkU(ty,d32))); 2901 setFlags_DEP1( Iop_And8, dst1, ty ); 2902 DIP("test%c $0x%x, %s\n", nameISize(sz), d32, dis_buf); 2903 break; 2904 } 2905 case 1: /* UNDEFINED */ 2906 /* See comment above on R case */ 2907 *decode_OK = False; 2908 break; 2909 case 2: /* NOT */ 2910 dst1 = newTemp(ty); 2911 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); 2912 if (locked) { 2913 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 2914 guest_EIP_curr_instr ); 2915 } else { 2916 storeLE( mkexpr(addr), mkexpr(dst1) ); 2917 } 2918 DIP("not%c %s\n", nameISize(sz), dis_buf); 2919 break; 2920 case 3: /* NEG */ 2921 dst0 = newTemp(ty); 2922 src = newTemp(ty); 2923 dst1 = newTemp(ty); 2924 assign(dst0, mkU(ty,0)); 2925 assign(src, mkexpr(t1)); 2926 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), 2927 mkexpr(dst0), mkexpr(src))); 2928 if (locked) { 2929 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 2930 guest_EIP_curr_instr ); 2931 } else { 2932 storeLE( mkexpr(addr), mkexpr(dst1) ); 2933 } 2934 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 2935 DIP("neg%c %s\n", nameISize(sz), dis_buf); 2936 break; 2937 case 4: /* MUL */ 2938 codegen_mulL_A_D ( sz, False, t1, dis_buf ); 2939 break; 2940 case 5: /* IMUL */ 2941 codegen_mulL_A_D ( sz, True, t1, dis_buf ); 2942 break; 2943 case 6: /* DIV */ 2944 codegen_div ( sz, t1, False ); 2945 DIP("div%c %s\n", nameISize(sz), dis_buf); 2946 break; 2947 case 7: /* IDIV */ 2948 codegen_div ( sz, t1, True ); 2949 DIP("idiv%c %s\n", nameISize(sz), dis_buf); 2950 break; 2951 default: 2952 /* This can't happen - gregOfRM should return 0 .. 7 only */ 2953 vpanic("Grp3(x86)"); 2954 } 2955 } 2956 return delta; 2957} 2958 2959 2960/* Group 4 extended opcodes. */ 2961static 2962UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK ) 2963{ 2964 Int alen; 2965 UChar modrm; 2966 HChar dis_buf[50]; 2967 IRType ty = Ity_I8; 2968 IRTemp t1 = newTemp(ty); 2969 IRTemp t2 = newTemp(ty); 2970 2971 *decode_OK = True; 2972 2973 modrm = getIByte(delta); 2974 2975 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) { 2976 /* LOCK prefix only allowed with inc and dec subopcodes */ 2977 *decode_OK = False; 2978 return delta; 2979 } 2980 2981 if (epartIsReg(modrm)) { 2982 assign(t1, getIReg(1, eregOfRM(modrm))); 2983 switch (gregOfRM(modrm)) { 2984 case 0: /* INC */ 2985 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 2986 putIReg(1, eregOfRM(modrm), mkexpr(t2)); 2987 setFlags_INC_DEC( True, t2, ty ); 2988 break; 2989 case 1: /* DEC */ 2990 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 2991 putIReg(1, eregOfRM(modrm), mkexpr(t2)); 2992 setFlags_INC_DEC( False, t2, ty ); 2993 break; 2994 default: 2995 *decode_OK = False; 2996 return delta; 2997 } 2998 delta++; 2999 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), 3000 nameIReg(1, eregOfRM(modrm))); 3001 } else { 3002 IRTemp addr = disAMode ( &alen, sorb, delta, dis_buf ); 3003 assign( t1, loadLE(ty, mkexpr(addr)) ); 3004 switch (gregOfRM(modrm)) { 3005 case 0: /* INC */ 3006 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 3007 if (locked) { 3008 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 3009 guest_EIP_curr_instr ); 3010 } else { 3011 storeLE( mkexpr(addr), mkexpr(t2) ); 3012 } 3013 setFlags_INC_DEC( True, t2, ty ); 3014 break; 3015 case 1: /* DEC */ 3016 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 3017 if (locked) { 3018 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 3019 guest_EIP_curr_instr ); 3020 } else { 3021 storeLE( mkexpr(addr), mkexpr(t2) ); 3022 } 3023 setFlags_INC_DEC( False, t2, ty ); 3024 break; 3025 default: 3026 *decode_OK = False; 3027 return delta; 3028 } 3029 delta += alen; 3030 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf); 3031 } 3032 return delta; 3033} 3034 3035 3036/* Group 5 extended opcodes. */ 3037static 3038UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta, 3039 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK ) 3040{ 3041 Int len; 3042 UChar modrm; 3043 HChar dis_buf[50]; 3044 IRTemp addr = IRTemp_INVALID; 3045 IRType ty = szToITy(sz); 3046 IRTemp t1 = newTemp(ty); 3047 IRTemp t2 = IRTemp_INVALID; 3048 3049 *decode_OK = True; 3050 3051 modrm = getIByte(delta); 3052 3053 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) { 3054 /* LOCK prefix only allowed with inc and dec subopcodes */ 3055 *decode_OK = False; 3056 return delta; 3057 } 3058 3059 if (epartIsReg(modrm)) { 3060 assign(t1, getIReg(sz,eregOfRM(modrm))); 3061 switch (gregOfRM(modrm)) { 3062 case 0: /* INC */ 3063 vassert(sz == 2 || sz == 4); 3064 t2 = newTemp(ty); 3065 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 3066 mkexpr(t1), mkU(ty,1))); 3067 setFlags_INC_DEC( True, t2, ty ); 3068 putIReg(sz,eregOfRM(modrm),mkexpr(t2)); 3069 break; 3070 case 1: /* DEC */ 3071 vassert(sz == 2 || sz == 4); 3072 t2 = newTemp(ty); 3073 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 3074 mkexpr(t1), mkU(ty,1))); 3075 setFlags_INC_DEC( False, t2, ty ); 3076 putIReg(sz,eregOfRM(modrm),mkexpr(t2)); 3077 break; 3078 case 2: /* call Ev */ 3079 vassert(sz == 4); 3080 t2 = newTemp(Ity_I32); 3081 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 3082 putIReg(4, R_ESP, mkexpr(t2)); 3083 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1)); 3084 jmp_treg(dres, Ijk_Call, t1); 3085 vassert(dres->whatNext == Dis_StopHere); 3086 break; 3087 case 4: /* jmp Ev */ 3088 vassert(sz == 4); 3089 jmp_treg(dres, Ijk_Boring, t1); 3090 vassert(dres->whatNext == Dis_StopHere); 3091 break; 3092 case 6: /* PUSH Ev */ 3093 vassert(sz == 4 || sz == 2); 3094 t2 = newTemp(Ity_I32); 3095 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 3096 putIReg(4, R_ESP, mkexpr(t2) ); 3097 storeLE( mkexpr(t2), mkexpr(t1) ); 3098 break; 3099 default: 3100 *decode_OK = False; 3101 return delta; 3102 } 3103 delta++; 3104 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)), 3105 nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 3106 } else { 3107 addr = disAMode ( &len, sorb, delta, dis_buf ); 3108 assign(t1, loadLE(ty,mkexpr(addr))); 3109 switch (gregOfRM(modrm)) { 3110 case 0: /* INC */ 3111 t2 = newTemp(ty); 3112 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 3113 mkexpr(t1), mkU(ty,1))); 3114 if (locked) { 3115 casLE( mkexpr(addr), 3116 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); 3117 } else { 3118 storeLE(mkexpr(addr),mkexpr(t2)); 3119 } 3120 setFlags_INC_DEC( True, t2, ty ); 3121 break; 3122 case 1: /* DEC */ 3123 t2 = newTemp(ty); 3124 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 3125 mkexpr(t1), mkU(ty,1))); 3126 if (locked) { 3127 casLE( mkexpr(addr), 3128 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); 3129 } else { 3130 storeLE(mkexpr(addr),mkexpr(t2)); 3131 } 3132 setFlags_INC_DEC( False, t2, ty ); 3133 break; 3134 case 2: /* call Ev */ 3135 vassert(sz == 4); 3136 t2 = newTemp(Ity_I32); 3137 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 3138 putIReg(4, R_ESP, mkexpr(t2)); 3139 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len)); 3140 jmp_treg(dres, Ijk_Call, t1); 3141 vassert(dres->whatNext == Dis_StopHere); 3142 break; 3143 case 4: /* JMP Ev */ 3144 vassert(sz == 4); 3145 jmp_treg(dres, Ijk_Boring, t1); 3146 vassert(dres->whatNext == Dis_StopHere); 3147 break; 3148 case 6: /* PUSH Ev */ 3149 vassert(sz == 4 || sz == 2); 3150 t2 = newTemp(Ity_I32); 3151 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 3152 putIReg(4, R_ESP, mkexpr(t2) ); 3153 storeLE( mkexpr(t2), mkexpr(t1) ); 3154 break; 3155 default: 3156 *decode_OK = False; 3157 return delta; 3158 } 3159 delta += len; 3160 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)), 3161 nameISize(sz), dis_buf); 3162 } 3163 return delta; 3164} 3165 3166 3167/*------------------------------------------------------------*/ 3168/*--- Disassembling string ops (including REP prefixes) ---*/ 3169/*------------------------------------------------------------*/ 3170 3171/* Code shared by all the string ops */ 3172static 3173void dis_string_op_increment(Int sz, Int t_inc) 3174{ 3175 if (sz == 4 || sz == 2) { 3176 assign( t_inc, 3177 binop(Iop_Shl32, IRExpr_Get( OFFB_DFLAG, Ity_I32 ), 3178 mkU8(sz/2) ) ); 3179 } else { 3180 assign( t_inc, 3181 IRExpr_Get( OFFB_DFLAG, Ity_I32 ) ); 3182 } 3183} 3184 3185static 3186void dis_string_op( void (*dis_OP)( Int, IRTemp ), 3187 Int sz, const HChar* name, UChar sorb ) 3188{ 3189 IRTemp t_inc = newTemp(Ity_I32); 3190 vassert(sorb == 0); /* hmm. so what was the point of passing it in? */ 3191 dis_string_op_increment(sz, t_inc); 3192 dis_OP( sz, t_inc ); 3193 DIP("%s%c\n", name, nameISize(sz)); 3194} 3195 3196static 3197void dis_MOVS ( Int sz, IRTemp t_inc ) 3198{ 3199 IRType ty = szToITy(sz); 3200 IRTemp td = newTemp(Ity_I32); /* EDI */ 3201 IRTemp ts = newTemp(Ity_I32); /* ESI */ 3202 3203 assign( td, getIReg(4, R_EDI) ); 3204 assign( ts, getIReg(4, R_ESI) ); 3205 3206 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); 3207 3208 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3209 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); 3210} 3211 3212static 3213void dis_LODS ( Int sz, IRTemp t_inc ) 3214{ 3215 IRType ty = szToITy(sz); 3216 IRTemp ts = newTemp(Ity_I32); /* ESI */ 3217 3218 assign( ts, getIReg(4, R_ESI) ); 3219 3220 putIReg( sz, R_EAX, loadLE(ty, mkexpr(ts)) ); 3221 3222 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); 3223} 3224 3225static 3226void dis_STOS ( Int sz, IRTemp t_inc ) 3227{ 3228 IRType ty = szToITy(sz); 3229 IRTemp ta = newTemp(ty); /* EAX */ 3230 IRTemp td = newTemp(Ity_I32); /* EDI */ 3231 3232 assign( ta, getIReg(sz, R_EAX) ); 3233 assign( td, getIReg(4, R_EDI) ); 3234 3235 storeLE( mkexpr(td), mkexpr(ta) ); 3236 3237 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3238} 3239 3240static 3241void dis_CMPS ( Int sz, IRTemp t_inc ) 3242{ 3243 IRType ty = szToITy(sz); 3244 IRTemp tdv = newTemp(ty); /* (EDI) */ 3245 IRTemp tsv = newTemp(ty); /* (ESI) */ 3246 IRTemp td = newTemp(Ity_I32); /* EDI */ 3247 IRTemp ts = newTemp(Ity_I32); /* ESI */ 3248 3249 assign( td, getIReg(4, R_EDI) ); 3250 assign( ts, getIReg(4, R_ESI) ); 3251 3252 assign( tdv, loadLE(ty,mkexpr(td)) ); 3253 assign( tsv, loadLE(ty,mkexpr(ts)) ); 3254 3255 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); 3256 3257 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3258 putIReg(4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); 3259} 3260 3261static 3262void dis_SCAS ( Int sz, IRTemp t_inc ) 3263{ 3264 IRType ty = szToITy(sz); 3265 IRTemp ta = newTemp(ty); /* EAX */ 3266 IRTemp td = newTemp(Ity_I32); /* EDI */ 3267 IRTemp tdv = newTemp(ty); /* (EDI) */ 3268 3269 assign( ta, getIReg(sz, R_EAX) ); 3270 assign( td, getIReg(4, R_EDI) ); 3271 3272 assign( tdv, loadLE(ty,mkexpr(td)) ); 3273 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); 3274 3275 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3276} 3277 3278 3279/* Wrap the appropriate string op inside a REP/REPE/REPNE. 3280 We assume the insn is the last one in the basic block, and so emit a jump 3281 to the next insn, rather than just falling through. */ 3282static 3283void dis_REP_op ( /*MOD*/DisResult* dres, 3284 X86Condcode cond, 3285 void (*dis_OP)(Int, IRTemp), 3286 Int sz, Addr32 eip, Addr32 eip_next, const HChar* name ) 3287{ 3288 IRTemp t_inc = newTemp(Ity_I32); 3289 IRTemp tc = newTemp(Ity_I32); /* ECX */ 3290 3291 assign( tc, getIReg(4,R_ECX) ); 3292 3293 stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)), 3294 Ijk_Boring, 3295 IRConst_U32(eip_next), OFFB_EIP ) ); 3296 3297 putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); 3298 3299 dis_string_op_increment(sz, t_inc); 3300 dis_OP (sz, t_inc); 3301 3302 if (cond == X86CondAlways) { 3303 jmp_lit(dres, Ijk_Boring, eip); 3304 vassert(dres->whatNext == Dis_StopHere); 3305 } else { 3306 stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond), 3307 Ijk_Boring, 3308 IRConst_U32(eip), OFFB_EIP ) ); 3309 jmp_lit(dres, Ijk_Boring, eip_next); 3310 vassert(dres->whatNext == Dis_StopHere); 3311 } 3312 DIP("%s%c\n", name, nameISize(sz)); 3313} 3314 3315 3316/*------------------------------------------------------------*/ 3317/*--- Arithmetic, etc. ---*/ 3318/*------------------------------------------------------------*/ 3319 3320/* IMUL E, G. Supplied eip points to the modR/M byte. */ 3321static 3322UInt dis_mul_E_G ( UChar sorb, 3323 Int size, 3324 Int delta0 ) 3325{ 3326 Int alen; 3327 HChar dis_buf[50]; 3328 UChar rm = getIByte(delta0); 3329 IRType ty = szToITy(size); 3330 IRTemp te = newTemp(ty); 3331 IRTemp tg = newTemp(ty); 3332 IRTemp resLo = newTemp(ty); 3333 3334 assign( tg, getIReg(size, gregOfRM(rm)) ); 3335 if (epartIsReg(rm)) { 3336 assign( te, getIReg(size, eregOfRM(rm)) ); 3337 } else { 3338 IRTemp addr = disAMode( &alen, sorb, delta0, dis_buf ); 3339 assign( te, loadLE(ty,mkexpr(addr)) ); 3340 } 3341 3342 setFlags_MUL ( ty, te, tg, X86G_CC_OP_SMULB ); 3343 3344 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); 3345 3346 putIReg(size, gregOfRM(rm), mkexpr(resLo) ); 3347 3348 if (epartIsReg(rm)) { 3349 DIP("imul%c %s, %s\n", nameISize(size), 3350 nameIReg(size,eregOfRM(rm)), 3351 nameIReg(size,gregOfRM(rm))); 3352 return 1+delta0; 3353 } else { 3354 DIP("imul%c %s, %s\n", nameISize(size), 3355 dis_buf, nameIReg(size,gregOfRM(rm))); 3356 return alen+delta0; 3357 } 3358} 3359 3360 3361/* IMUL I * E -> G. Supplied eip points to the modR/M byte. */ 3362static 3363UInt dis_imul_I_E_G ( UChar sorb, 3364 Int size, 3365 Int delta, 3366 Int litsize ) 3367{ 3368 Int d32, alen; 3369 HChar dis_buf[50]; 3370 UChar rm = getIByte(delta); 3371 IRType ty = szToITy(size); 3372 IRTemp te = newTemp(ty); 3373 IRTemp tl = newTemp(ty); 3374 IRTemp resLo = newTemp(ty); 3375 3376 vassert(size == 1 || size == 2 || size == 4); 3377 3378 if (epartIsReg(rm)) { 3379 assign(te, getIReg(size, eregOfRM(rm))); 3380 delta++; 3381 } else { 3382 IRTemp addr = disAMode( &alen, sorb, delta, dis_buf ); 3383 assign(te, loadLE(ty, mkexpr(addr))); 3384 delta += alen; 3385 } 3386 d32 = getSDisp(litsize,delta); 3387 delta += litsize; 3388 3389 if (size == 1) d32 &= 0xFF; 3390 if (size == 2) d32 &= 0xFFFF; 3391 3392 assign(tl, mkU(ty,d32)); 3393 3394 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); 3395 3396 setFlags_MUL ( ty, te, tl, X86G_CC_OP_SMULB ); 3397 3398 putIReg(size, gregOfRM(rm), mkexpr(resLo)); 3399 3400 DIP("imul %d, %s, %s\n", d32, 3401 ( epartIsReg(rm) ? nameIReg(size,eregOfRM(rm)) : dis_buf ), 3402 nameIReg(size,gregOfRM(rm)) ); 3403 return delta; 3404} 3405 3406 3407/* Generate an IR sequence to do a count-leading-zeroes operation on 3408 the supplied IRTemp, and return a new IRTemp holding the result. 3409 'ty' may be Ity_I16 or Ity_I32 only. In the case where the 3410 argument is zero, return the number of bits in the word (the 3411 natural semantics). */ 3412static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 3413{ 3414 vassert(ty == Ity_I32 || ty == Ity_I16); 3415 3416 IRTemp src32 = newTemp(Ity_I32); 3417 assign(src32, widenUto32( mkexpr(src) )); 3418 3419 IRTemp src32x = newTemp(Ity_I32); 3420 assign(src32x, 3421 binop(Iop_Shl32, mkexpr(src32), 3422 mkU8(32 - 8 * sizeofIRType(ty)))); 3423 3424 // Clz32 has undefined semantics when its input is zero, so 3425 // special-case around that. 3426 IRTemp res32 = newTemp(Ity_I32); 3427 assign(res32, 3428 IRExpr_ITE( 3429 binop(Iop_CmpEQ32, mkexpr(src32x), mkU32(0)), 3430 mkU32(8 * sizeofIRType(ty)), 3431 unop(Iop_Clz32, mkexpr(src32x)) 3432 )); 3433 3434 IRTemp res = newTemp(ty); 3435 assign(res, narrowTo(ty, mkexpr(res32))); 3436 return res; 3437} 3438 3439 3440/*------------------------------------------------------------*/ 3441/*--- ---*/ 3442/*--- x87 FLOATING POINT INSTRUCTIONS ---*/ 3443/*--- ---*/ 3444/*------------------------------------------------------------*/ 3445 3446/* --- Helper functions for dealing with the register stack. --- */ 3447 3448/* --- Set the emulation-warning pseudo-register. --- */ 3449 3450static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) 3451{ 3452 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 3453 stmt( IRStmt_Put( OFFB_EMNOTE, e ) ); 3454} 3455 3456/* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ 3457 3458static IRExpr* mkQNaN64 ( void ) 3459{ 3460 /* QNaN is 0 2047 1 0(51times) 3461 == 0b 11111111111b 1 0(51times) 3462 == 0x7FF8 0000 0000 0000 3463 */ 3464 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); 3465} 3466 3467/* --------- Get/put the top-of-stack pointer. --------- */ 3468 3469static IRExpr* get_ftop ( void ) 3470{ 3471 return IRExpr_Get( OFFB_FTOP, Ity_I32 ); 3472} 3473 3474static void put_ftop ( IRExpr* e ) 3475{ 3476 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 3477 stmt( IRStmt_Put( OFFB_FTOP, e ) ); 3478} 3479 3480/* --------- Get/put the C3210 bits. --------- */ 3481 3482static IRExpr* get_C3210 ( void ) 3483{ 3484 return IRExpr_Get( OFFB_FC3210, Ity_I32 ); 3485} 3486 3487static void put_C3210 ( IRExpr* e ) 3488{ 3489 stmt( IRStmt_Put( OFFB_FC3210, e ) ); 3490} 3491 3492/* --------- Get/put the FPU rounding mode. --------- */ 3493static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) 3494{ 3495 return IRExpr_Get( OFFB_FPROUND, Ity_I32 ); 3496} 3497 3498static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) 3499{ 3500 stmt( IRStmt_Put( OFFB_FPROUND, e ) ); 3501} 3502 3503 3504/* --------- Synthesise a 2-bit FPU rounding mode. --------- */ 3505/* Produces a value in 0 .. 3, which is encoded as per the type 3506 IRRoundingMode. Since the guest_FPROUND value is also encoded as 3507 per IRRoundingMode, we merely need to get it and mask it for 3508 safety. 3509*/ 3510static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) 3511{ 3512 return binop( Iop_And32, get_fpround(), mkU32(3) ); 3513} 3514 3515static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 3516{ 3517 return mkU32(Irrm_NEAREST); 3518} 3519 3520 3521/* --------- Get/set FP register tag bytes. --------- */ 3522 3523/* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ 3524 3525static void put_ST_TAG ( Int i, IRExpr* value ) 3526{ 3527 IRRegArray* descr; 3528 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); 3529 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 3530 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 3531} 3532 3533/* Given i, generate an expression yielding 'ST_TAG(i)'. This will be 3534 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ 3535 3536static IRExpr* get_ST_TAG ( Int i ) 3537{ 3538 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 3539 return IRExpr_GetI( descr, get_ftop(), i ); 3540} 3541 3542 3543/* --------- Get/set FP registers. --------- */ 3544 3545/* Given i, and some expression e, emit 'ST(i) = e' and set the 3546 register's tag to indicate the register is full. The previous 3547 state of the register is not checked. */ 3548 3549static void put_ST_UNCHECKED ( Int i, IRExpr* value ) 3550{ 3551 IRRegArray* descr; 3552 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); 3553 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 3554 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 3555 /* Mark the register as in-use. */ 3556 put_ST_TAG(i, mkU8(1)); 3557} 3558 3559/* Given i, and some expression e, emit 3560 ST(i) = is_full(i) ? NaN : e 3561 and set the tag accordingly. 3562*/ 3563 3564static void put_ST ( Int i, IRExpr* value ) 3565{ 3566 put_ST_UNCHECKED( 3567 i, 3568 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)), 3569 /* non-0 means full */ 3570 mkQNaN64(), 3571 /* 0 means empty */ 3572 value 3573 ) 3574 ); 3575} 3576 3577 3578/* Given i, generate an expression yielding 'ST(i)'. */ 3579 3580static IRExpr* get_ST_UNCHECKED ( Int i ) 3581{ 3582 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 3583 return IRExpr_GetI( descr, get_ftop(), i ); 3584} 3585 3586 3587/* Given i, generate an expression yielding 3588 is_full(i) ? ST(i) : NaN 3589*/ 3590 3591static IRExpr* get_ST ( Int i ) 3592{ 3593 return 3594 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)), 3595 /* non-0 means full */ 3596 get_ST_UNCHECKED(i), 3597 /* 0 means empty */ 3598 mkQNaN64()); 3599} 3600 3601 3602/* Given i, and some expression e, and a condition cond, generate IR 3603 which has the same effect as put_ST(i,e) when cond is true and has 3604 no effect when cond is false. Given the lack of proper 3605 if-then-else in the IR, this is pretty tricky. 3606*/ 3607 3608static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value ) 3609{ 3610 // new_tag = if cond then FULL else old_tag 3611 // new_val = if cond then (if old_tag==FULL then NaN else val) 3612 // else old_val 3613 3614 IRTemp old_tag = newTemp(Ity_I8); 3615 assign(old_tag, get_ST_TAG(i)); 3616 IRTemp new_tag = newTemp(Ity_I8); 3617 assign(new_tag, 3618 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag))); 3619 3620 IRTemp old_val = newTemp(Ity_F64); 3621 assign(old_val, get_ST_UNCHECKED(i)); 3622 IRTemp new_val = newTemp(Ity_F64); 3623 assign(new_val, 3624 IRExpr_ITE(mkexpr(cond), 3625 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)), 3626 /* non-0 means full */ 3627 mkQNaN64(), 3628 /* 0 means empty */ 3629 value), 3630 mkexpr(old_val))); 3631 3632 put_ST_UNCHECKED(i, mkexpr(new_val)); 3633 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So 3634 // now set it to new_tag instead. 3635 put_ST_TAG(i, mkexpr(new_tag)); 3636} 3637 3638/* Adjust FTOP downwards by one register. */ 3639 3640static void fp_push ( void ) 3641{ 3642 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); 3643} 3644 3645/* Adjust FTOP downwards by one register when COND is 1:I1. Else 3646 don't change it. */ 3647 3648static void maybe_fp_push ( IRTemp cond ) 3649{ 3650 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) ); 3651} 3652 3653/* Adjust FTOP upwards by one register, and mark the vacated register 3654 as empty. */ 3655 3656static void fp_pop ( void ) 3657{ 3658 put_ST_TAG(0, mkU8(0)); 3659 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 3660} 3661 3662/* Set the C2 bit of the FPU status register to e[0]. Assumes that 3663 e[31:1] == 0. 3664*/ 3665static void set_C2 ( IRExpr* e ) 3666{ 3667 IRExpr* cleared = binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)); 3668 put_C3210( binop(Iop_Or32, 3669 cleared, 3670 binop(Iop_Shl32, e, mkU8(X86G_FC_SHIFT_C2))) ); 3671} 3672 3673/* Generate code to check that abs(d64) < 2^63 and is finite. This is 3674 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The 3675 test is simple, but the derivation of it is not so simple. 3676 3677 The exponent field for an IEEE754 double is 11 bits. That means it 3678 can take values 0 through 0x7FF. If the exponent has value 0x7FF, 3679 the number is either a NaN or an Infinity and so is not finite. 3680 Furthermore, a finite value of exactly 2^63 is the smallest value 3681 that has exponent value 0x43E. Hence, what we need to do is 3682 extract the exponent, ignoring the sign bit and mantissa, and check 3683 it is < 0x43E, or <= 0x43D. 3684 3685 To make this easily applicable to 32- and 64-bit targets, a 3686 roundabout approach is used. First the number is converted to I64, 3687 then the top 32 bits are taken. Shifting them right by 20 bits 3688 places the sign bit and exponent in the bottom 12 bits. Anding 3689 with 0x7FF gets rid of the sign bit, leaving just the exponent 3690 available for comparison. 3691*/ 3692static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 ) 3693{ 3694 IRTemp i64 = newTemp(Ity_I64); 3695 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) ); 3696 IRTemp exponent = newTemp(Ity_I32); 3697 assign(exponent, 3698 binop(Iop_And32, 3699 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)), 3700 mkU32(0x7FF))); 3701 IRTemp in_range_and_finite = newTemp(Ity_I1); 3702 assign(in_range_and_finite, 3703 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D))); 3704 return in_range_and_finite; 3705} 3706 3707/* Invent a plausible-looking FPU status word value: 3708 ((ftop & 7) << 11) | (c3210 & 0x4700) 3709 */ 3710static IRExpr* get_FPU_sw ( void ) 3711{ 3712 return 3713 unop(Iop_32to16, 3714 binop(Iop_Or32, 3715 binop(Iop_Shl32, 3716 binop(Iop_And32, get_ftop(), mkU32(7)), 3717 mkU8(11)), 3718 binop(Iop_And32, get_C3210(), mkU32(0x4700)) 3719 )); 3720} 3721 3722 3723/* ------------------------------------------------------- */ 3724/* Given all that stack-mangling junk, we can now go ahead 3725 and describe FP instructions. 3726*/ 3727 3728/* ST(0) = ST(0) `op` mem64/32(addr) 3729 Need to check ST(0)'s tag on read, but not on write. 3730*/ 3731static 3732void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf, 3733 IROp op, Bool dbl ) 3734{ 3735 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 3736 if (dbl) { 3737 put_ST_UNCHECKED(0, 3738 triop( op, 3739 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3740 get_ST(0), 3741 loadLE(Ity_F64,mkexpr(addr)) 3742 )); 3743 } else { 3744 put_ST_UNCHECKED(0, 3745 triop( op, 3746 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3747 get_ST(0), 3748 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) 3749 )); 3750 } 3751} 3752 3753 3754/* ST(0) = mem64/32(addr) `op` ST(0) 3755 Need to check ST(0)'s tag on read, but not on write. 3756*/ 3757static 3758void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf, 3759 IROp op, Bool dbl ) 3760{ 3761 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 3762 if (dbl) { 3763 put_ST_UNCHECKED(0, 3764 triop( op, 3765 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3766 loadLE(Ity_F64,mkexpr(addr)), 3767 get_ST(0) 3768 )); 3769 } else { 3770 put_ST_UNCHECKED(0, 3771 triop( op, 3772 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3773 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), 3774 get_ST(0) 3775 )); 3776 } 3777} 3778 3779 3780/* ST(dst) = ST(dst) `op` ST(src). 3781 Check dst and src tags when reading but not on write. 3782*/ 3783static 3784void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 3785 Bool pop_after ) 3786{ 3787 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", 3788 st_src, st_dst); 3789 put_ST_UNCHECKED( 3790 st_dst, 3791 triop( op, 3792 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3793 get_ST(st_dst), 3794 get_ST(st_src) ) 3795 ); 3796 if (pop_after) 3797 fp_pop(); 3798} 3799 3800/* ST(dst) = ST(src) `op` ST(dst). 3801 Check dst and src tags when reading but not on write. 3802*/ 3803static 3804void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, 3805 UInt st_dst, Bool pop_after ) 3806{ 3807 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", 3808 st_src, st_dst); 3809 put_ST_UNCHECKED( 3810 st_dst, 3811 triop( op, 3812 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3813 get_ST(st_src), 3814 get_ST(st_dst) ) 3815 ); 3816 if (pop_after) 3817 fp_pop(); 3818} 3819 3820/* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */ 3821static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) 3822{ 3823 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i); 3824 /* This is a bit of a hack (and isn't really right). It sets 3825 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel 3826 documentation implies A and S are unchanged. 3827 */ 3828 /* It's also fishy in that it is used both for COMIP and 3829 UCOMIP, and they aren't the same (although similar). */ 3830 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 3831 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 3832 stmt( IRStmt_Put( OFFB_CC_DEP1, 3833 binop( Iop_And32, 3834 binop(Iop_CmpF64, get_ST(0), get_ST(i)), 3835 mkU32(0x45) 3836 ))); 3837 /* Set NDEP even though it isn't used. This makes redundant-PUT 3838 elimination of previous stores to this field work better. */ 3839 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 3840 if (pop_after) 3841 fp_pop(); 3842} 3843 3844 3845static 3846UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta ) 3847{ 3848 Int len; 3849 UInt r_src, r_dst; 3850 HChar dis_buf[50]; 3851 IRTemp t1, t2; 3852 3853 /* On entry, delta points at the second byte of the insn (the modrm 3854 byte).*/ 3855 UChar first_opcode = getIByte(delta-1); 3856 UChar modrm = getIByte(delta+0); 3857 3858 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ 3859 3860 if (first_opcode == 0xD8) { 3861 if (modrm < 0xC0) { 3862 3863 /* bits 5,4,3 are an opcode extension, and the modRM also 3864 specifies an address. */ 3865 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 3866 delta += len; 3867 3868 switch (gregOfRM(modrm)) { 3869 3870 case 0: /* FADD single-real */ 3871 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); 3872 break; 3873 3874 case 1: /* FMUL single-real */ 3875 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); 3876 break; 3877 3878 case 2: /* FCOM single-real */ 3879 DIP("fcoms %s\n", dis_buf); 3880 /* This forces C1 to zero, which isn't right. */ 3881 put_C3210( 3882 binop( Iop_And32, 3883 binop(Iop_Shl32, 3884 binop(Iop_CmpF64, 3885 get_ST(0), 3886 unop(Iop_F32toF64, 3887 loadLE(Ity_F32,mkexpr(addr)))), 3888 mkU8(8)), 3889 mkU32(0x4500) 3890 )); 3891 break; 3892 3893 case 3: /* FCOMP single-real */ 3894 DIP("fcomps %s\n", dis_buf); 3895 /* This forces C1 to zero, which isn't right. */ 3896 put_C3210( 3897 binop( Iop_And32, 3898 binop(Iop_Shl32, 3899 binop(Iop_CmpF64, 3900 get_ST(0), 3901 unop(Iop_F32toF64, 3902 loadLE(Ity_F32,mkexpr(addr)))), 3903 mkU8(8)), 3904 mkU32(0x4500) 3905 )); 3906 fp_pop(); 3907 break; 3908 3909 case 4: /* FSUB single-real */ 3910 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); 3911 break; 3912 3913 case 5: /* FSUBR single-real */ 3914 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); 3915 break; 3916 3917 case 6: /* FDIV single-real */ 3918 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); 3919 break; 3920 3921 case 7: /* FDIVR single-real */ 3922 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); 3923 break; 3924 3925 default: 3926 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm)); 3927 vex_printf("first_opcode == 0xD8\n"); 3928 goto decode_fail; 3929 } 3930 } else { 3931 delta++; 3932 switch (modrm) { 3933 3934 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ 3935 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); 3936 break; 3937 3938 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ 3939 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); 3940 break; 3941 3942 /* Dunno if this is right */ 3943 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ 3944 r_dst = (UInt)modrm - 0xD0; 3945 DIP("fcom %%st(0),%%st(%u)\n", r_dst); 3946 /* This forces C1 to zero, which isn't right. */ 3947 put_C3210( 3948 binop( Iop_And32, 3949 binop(Iop_Shl32, 3950 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 3951 mkU8(8)), 3952 mkU32(0x4500) 3953 )); 3954 break; 3955 3956 /* Dunno if this is right */ 3957 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ 3958 r_dst = (UInt)modrm - 0xD8; 3959 DIP("fcomp %%st(0),%%st(%u)\n", r_dst); 3960 /* This forces C1 to zero, which isn't right. */ 3961 put_C3210( 3962 binop( Iop_And32, 3963 binop(Iop_Shl32, 3964 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 3965 mkU8(8)), 3966 mkU32(0x4500) 3967 )); 3968 fp_pop(); 3969 break; 3970 3971 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ 3972 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); 3973 break; 3974 3975 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ 3976 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); 3977 break; 3978 3979 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ 3980 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); 3981 break; 3982 3983 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ 3984 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); 3985 break; 3986 3987 default: 3988 goto decode_fail; 3989 } 3990 } 3991 } 3992 3993 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ 3994 else 3995 if (first_opcode == 0xD9) { 3996 if (modrm < 0xC0) { 3997 3998 /* bits 5,4,3 are an opcode extension, and the modRM also 3999 specifies an address. */ 4000 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4001 delta += len; 4002 4003 switch (gregOfRM(modrm)) { 4004 4005 case 0: /* FLD single-real */ 4006 DIP("flds %s\n", dis_buf); 4007 fp_push(); 4008 put_ST(0, unop(Iop_F32toF64, 4009 loadLE(Ity_F32, mkexpr(addr)))); 4010 break; 4011 4012 case 2: /* FST single-real */ 4013 DIP("fsts %s\n", dis_buf); 4014 storeLE(mkexpr(addr), 4015 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 4016 break; 4017 4018 case 3: /* FSTP single-real */ 4019 DIP("fstps %s\n", dis_buf); 4020 storeLE(mkexpr(addr), 4021 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 4022 fp_pop(); 4023 break; 4024 4025 case 4: { /* FLDENV m28 */ 4026 /* Uses dirty helper: 4027 VexEmNote x86g_do_FLDENV ( VexGuestX86State*, HWord ) */ 4028 IRTemp ew = newTemp(Ity_I32); 4029 IRDirty* d = unsafeIRDirty_0_N ( 4030 0/*regparms*/, 4031 "x86g_dirtyhelper_FLDENV", 4032 &x86g_dirtyhelper_FLDENV, 4033 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 4034 ); 4035 d->tmp = ew; 4036 /* declare we're reading memory */ 4037 d->mFx = Ifx_Read; 4038 d->mAddr = mkexpr(addr); 4039 d->mSize = 28; 4040 4041 /* declare we're writing guest state */ 4042 d->nFxState = 4; 4043 vex_bzero(&d->fxState, sizeof(d->fxState)); 4044 4045 d->fxState[0].fx = Ifx_Write; 4046 d->fxState[0].offset = OFFB_FTOP; 4047 d->fxState[0].size = sizeof(UInt); 4048 4049 d->fxState[1].fx = Ifx_Write; 4050 d->fxState[1].offset = OFFB_FPTAGS; 4051 d->fxState[1].size = 8 * sizeof(UChar); 4052 4053 d->fxState[2].fx = Ifx_Write; 4054 d->fxState[2].offset = OFFB_FPROUND; 4055 d->fxState[2].size = sizeof(UInt); 4056 4057 d->fxState[3].fx = Ifx_Write; 4058 d->fxState[3].offset = OFFB_FC3210; 4059 d->fxState[3].size = sizeof(UInt); 4060 4061 stmt( IRStmt_Dirty(d) ); 4062 4063 /* ew contains any emulation warning we may need to 4064 issue. If needed, side-exit to the next insn, 4065 reporting the warning, so that Valgrind's dispatcher 4066 sees the warning. */ 4067 put_emwarn( mkexpr(ew) ); 4068 stmt( 4069 IRStmt_Exit( 4070 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 4071 Ijk_EmWarn, 4072 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta), 4073 OFFB_EIP 4074 ) 4075 ); 4076 4077 DIP("fldenv %s\n", dis_buf); 4078 break; 4079 } 4080 4081 case 5: {/* FLDCW */ 4082 /* The only thing we observe in the control word is the 4083 rounding mode. Therefore, pass the 16-bit value 4084 (x87 native-format control word) to a clean helper, 4085 getting back a 64-bit value, the lower half of which 4086 is the FPROUND value to store, and the upper half of 4087 which is the emulation-warning token which may be 4088 generated. 4089 */ 4090 /* ULong x86h_check_fldcw ( UInt ); */ 4091 IRTemp t64 = newTemp(Ity_I64); 4092 IRTemp ew = newTemp(Ity_I32); 4093 DIP("fldcw %s\n", dis_buf); 4094 assign( t64, mkIRExprCCall( 4095 Ity_I64, 0/*regparms*/, 4096 "x86g_check_fldcw", 4097 &x86g_check_fldcw, 4098 mkIRExprVec_1( 4099 unop( Iop_16Uto32, 4100 loadLE(Ity_I16, mkexpr(addr))) 4101 ) 4102 ) 4103 ); 4104 4105 put_fpround( unop(Iop_64to32, mkexpr(t64)) ); 4106 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 4107 put_emwarn( mkexpr(ew) ); 4108 /* Finally, if an emulation warning was reported, 4109 side-exit to the next insn, reporting the warning, 4110 so that Valgrind's dispatcher sees the warning. */ 4111 stmt( 4112 IRStmt_Exit( 4113 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 4114 Ijk_EmWarn, 4115 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta), 4116 OFFB_EIP 4117 ) 4118 ); 4119 break; 4120 } 4121 4122 case 6: { /* FNSTENV m28 */ 4123 /* Uses dirty helper: 4124 void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */ 4125 IRDirty* d = unsafeIRDirty_0_N ( 4126 0/*regparms*/, 4127 "x86g_dirtyhelper_FSTENV", 4128 &x86g_dirtyhelper_FSTENV, 4129 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 4130 ); 4131 /* declare we're writing memory */ 4132 d->mFx = Ifx_Write; 4133 d->mAddr = mkexpr(addr); 4134 d->mSize = 28; 4135 4136 /* declare we're reading guest state */ 4137 d->nFxState = 4; 4138 vex_bzero(&d->fxState, sizeof(d->fxState)); 4139 4140 d->fxState[0].fx = Ifx_Read; 4141 d->fxState[0].offset = OFFB_FTOP; 4142 d->fxState[0].size = sizeof(UInt); 4143 4144 d->fxState[1].fx = Ifx_Read; 4145 d->fxState[1].offset = OFFB_FPTAGS; 4146 d->fxState[1].size = 8 * sizeof(UChar); 4147 4148 d->fxState[2].fx = Ifx_Read; 4149 d->fxState[2].offset = OFFB_FPROUND; 4150 d->fxState[2].size = sizeof(UInt); 4151 4152 d->fxState[3].fx = Ifx_Read; 4153 d->fxState[3].offset = OFFB_FC3210; 4154 d->fxState[3].size = sizeof(UInt); 4155 4156 stmt( IRStmt_Dirty(d) ); 4157 4158 DIP("fnstenv %s\n", dis_buf); 4159 break; 4160 } 4161 4162 case 7: /* FNSTCW */ 4163 /* Fake up a native x87 FPU control word. The only 4164 thing it depends on is FPROUND[1:0], so call a clean 4165 helper to cook it up. */ 4166 /* UInt x86h_create_fpucw ( UInt fpround ) */ 4167 DIP("fnstcw %s\n", dis_buf); 4168 storeLE( 4169 mkexpr(addr), 4170 unop( Iop_32to16, 4171 mkIRExprCCall( 4172 Ity_I32, 0/*regp*/, 4173 "x86g_create_fpucw", &x86g_create_fpucw, 4174 mkIRExprVec_1( get_fpround() ) 4175 ) 4176 ) 4177 ); 4178 break; 4179 4180 default: 4181 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm)); 4182 vex_printf("first_opcode == 0xD9\n"); 4183 goto decode_fail; 4184 } 4185 4186 } else { 4187 delta++; 4188 switch (modrm) { 4189 4190 case 0xC0 ... 0xC7: /* FLD %st(?) */ 4191 r_src = (UInt)modrm - 0xC0; 4192 DIP("fld %%st(%u)\n", r_src); 4193 t1 = newTemp(Ity_F64); 4194 assign(t1, get_ST(r_src)); 4195 fp_push(); 4196 put_ST(0, mkexpr(t1)); 4197 break; 4198 4199 case 0xC8 ... 0xCF: /* FXCH %st(?) */ 4200 r_src = (UInt)modrm - 0xC8; 4201 DIP("fxch %%st(%u)\n", r_src); 4202 t1 = newTemp(Ity_F64); 4203 t2 = newTemp(Ity_F64); 4204 assign(t1, get_ST(0)); 4205 assign(t2, get_ST(r_src)); 4206 put_ST_UNCHECKED(0, mkexpr(t2)); 4207 put_ST_UNCHECKED(r_src, mkexpr(t1)); 4208 break; 4209 4210 case 0xE0: /* FCHS */ 4211 DIP("fchs\n"); 4212 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); 4213 break; 4214 4215 case 0xE1: /* FABS */ 4216 DIP("fabs\n"); 4217 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); 4218 break; 4219 4220 case 0xE4: /* FTST */ 4221 DIP("ftst\n"); 4222 /* This forces C1 to zero, which isn't right. */ 4223 /* Well, in fact the Intel docs say (bizarrely): "C1 is 4224 set to 0 if stack underflow occurred; otherwise, set 4225 to 0" which is pretty nonsensical. I guess it's a 4226 typo. */ 4227 put_C3210( 4228 binop( Iop_And32, 4229 binop(Iop_Shl32, 4230 binop(Iop_CmpF64, 4231 get_ST(0), 4232 IRExpr_Const(IRConst_F64i(0x0ULL))), 4233 mkU8(8)), 4234 mkU32(0x4500) 4235 )); 4236 break; 4237 4238 case 0xE5: { /* FXAM */ 4239 /* This is an interesting one. It examines %st(0), 4240 regardless of whether the tag says it's empty or not. 4241 Here, just pass both the tag (in our format) and the 4242 value (as a double, actually a ULong) to a helper 4243 function. */ 4244 IRExpr** args 4245 = mkIRExprVec_2( unop(Iop_8Uto32, get_ST_TAG(0)), 4246 unop(Iop_ReinterpF64asI64, 4247 get_ST_UNCHECKED(0)) ); 4248 put_C3210(mkIRExprCCall( 4249 Ity_I32, 4250 0/*regparm*/, 4251 "x86g_calculate_FXAM", &x86g_calculate_FXAM, 4252 args 4253 )); 4254 DIP("fxam\n"); 4255 break; 4256 } 4257 4258 case 0xE8: /* FLD1 */ 4259 DIP("fld1\n"); 4260 fp_push(); 4261 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ 4262 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); 4263 break; 4264 4265 case 0xE9: /* FLDL2T */ 4266 DIP("fldl2t\n"); 4267 fp_push(); 4268 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ 4269 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); 4270 break; 4271 4272 case 0xEA: /* FLDL2E */ 4273 DIP("fldl2e\n"); 4274 fp_push(); 4275 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ 4276 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); 4277 break; 4278 4279 case 0xEB: /* FLDPI */ 4280 DIP("fldpi\n"); 4281 fp_push(); 4282 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ 4283 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); 4284 break; 4285 4286 case 0xEC: /* FLDLG2 */ 4287 DIP("fldlg2\n"); 4288 fp_push(); 4289 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ 4290 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); 4291 break; 4292 4293 case 0xED: /* FLDLN2 */ 4294 DIP("fldln2\n"); 4295 fp_push(); 4296 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ 4297 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); 4298 break; 4299 4300 case 0xEE: /* FLDZ */ 4301 DIP("fldz\n"); 4302 fp_push(); 4303 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ 4304 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); 4305 break; 4306 4307 case 0xF0: /* F2XM1 */ 4308 DIP("f2xm1\n"); 4309 put_ST_UNCHECKED(0, 4310 binop(Iop_2xm1F64, 4311 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4312 get_ST(0))); 4313 break; 4314 4315 case 0xF1: /* FYL2X */ 4316 DIP("fyl2x\n"); 4317 put_ST_UNCHECKED(1, 4318 triop(Iop_Yl2xF64, 4319 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4320 get_ST(1), 4321 get_ST(0))); 4322 fp_pop(); 4323 break; 4324 4325 case 0xF2: { /* FPTAN */ 4326 DIP("fptan\n"); 4327 IRTemp argD = newTemp(Ity_F64); 4328 assign(argD, get_ST(0)); 4329 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 4330 IRTemp resD = newTemp(Ity_F64); 4331 assign(resD, 4332 IRExpr_ITE( 4333 mkexpr(argOK), 4334 binop(Iop_TanF64, 4335 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4336 mkexpr(argD)), 4337 mkexpr(argD)) 4338 ); 4339 put_ST_UNCHECKED(0, mkexpr(resD)); 4340 /* Conditionally push 1.0 on the stack, if the arg is 4341 in range */ 4342 maybe_fp_push(argOK); 4343 maybe_put_ST(argOK, 0, 4344 IRExpr_Const(IRConst_F64(1.0))); 4345 set_C2( binop(Iop_Xor32, 4346 unop(Iop_1Uto32, mkexpr(argOK)), 4347 mkU32(1)) ); 4348 break; 4349 } 4350 4351 case 0xF3: /* FPATAN */ 4352 DIP("fpatan\n"); 4353 put_ST_UNCHECKED(1, 4354 triop(Iop_AtanF64, 4355 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4356 get_ST(1), 4357 get_ST(0))); 4358 fp_pop(); 4359 break; 4360 4361 case 0xF4: { /* FXTRACT */ 4362 IRTemp argF = newTemp(Ity_F64); 4363 IRTemp sigF = newTemp(Ity_F64); 4364 IRTemp expF = newTemp(Ity_F64); 4365 IRTemp argI = newTemp(Ity_I64); 4366 IRTemp sigI = newTemp(Ity_I64); 4367 IRTemp expI = newTemp(Ity_I64); 4368 DIP("fxtract\n"); 4369 assign( argF, get_ST(0) ); 4370 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); 4371 assign( sigI, 4372 mkIRExprCCall( 4373 Ity_I64, 0/*regparms*/, 4374 "x86amd64g_calculate_FXTRACT", 4375 &x86amd64g_calculate_FXTRACT, 4376 mkIRExprVec_2( mkexpr(argI), 4377 mkIRExpr_HWord(0)/*sig*/ )) 4378 ); 4379 assign( expI, 4380 mkIRExprCCall( 4381 Ity_I64, 0/*regparms*/, 4382 "x86amd64g_calculate_FXTRACT", 4383 &x86amd64g_calculate_FXTRACT, 4384 mkIRExprVec_2( mkexpr(argI), 4385 mkIRExpr_HWord(1)/*exp*/ )) 4386 ); 4387 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); 4388 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); 4389 /* exponent */ 4390 put_ST_UNCHECKED(0, mkexpr(expF) ); 4391 fp_push(); 4392 /* significand */ 4393 put_ST(0, mkexpr(sigF) ); 4394 break; 4395 } 4396 4397 case 0xF5: { /* FPREM1 -- IEEE compliant */ 4398 IRTemp a1 = newTemp(Ity_F64); 4399 IRTemp a2 = newTemp(Ity_F64); 4400 DIP("fprem1\n"); 4401 /* Do FPREM1 twice, once to get the remainder, and once 4402 to get the C3210 flag values. */ 4403 assign( a1, get_ST(0) ); 4404 assign( a2, get_ST(1) ); 4405 put_ST_UNCHECKED(0, 4406 triop(Iop_PRem1F64, 4407 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4408 mkexpr(a1), 4409 mkexpr(a2))); 4410 put_C3210( 4411 triop(Iop_PRem1C3210F64, 4412 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4413 mkexpr(a1), 4414 mkexpr(a2)) ); 4415 break; 4416 } 4417 4418 case 0xF7: /* FINCSTP */ 4419 DIP("fprem\n"); 4420 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 4421 break; 4422 4423 case 0xF8: { /* FPREM -- not IEEE compliant */ 4424 IRTemp a1 = newTemp(Ity_F64); 4425 IRTemp a2 = newTemp(Ity_F64); 4426 DIP("fprem\n"); 4427 /* Do FPREM twice, once to get the remainder, and once 4428 to get the C3210 flag values. */ 4429 assign( a1, get_ST(0) ); 4430 assign( a2, get_ST(1) ); 4431 put_ST_UNCHECKED(0, 4432 triop(Iop_PRemF64, 4433 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4434 mkexpr(a1), 4435 mkexpr(a2))); 4436 put_C3210( 4437 triop(Iop_PRemC3210F64, 4438 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4439 mkexpr(a1), 4440 mkexpr(a2)) ); 4441 break; 4442 } 4443 4444 case 0xF9: /* FYL2XP1 */ 4445 DIP("fyl2xp1\n"); 4446 put_ST_UNCHECKED(1, 4447 triop(Iop_Yl2xp1F64, 4448 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4449 get_ST(1), 4450 get_ST(0))); 4451 fp_pop(); 4452 break; 4453 4454 case 0xFA: /* FSQRT */ 4455 DIP("fsqrt\n"); 4456 put_ST_UNCHECKED(0, 4457 binop(Iop_SqrtF64, 4458 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4459 get_ST(0))); 4460 break; 4461 4462 case 0xFB: { /* FSINCOS */ 4463 DIP("fsincos\n"); 4464 IRTemp argD = newTemp(Ity_F64); 4465 assign(argD, get_ST(0)); 4466 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 4467 IRTemp resD = newTemp(Ity_F64); 4468 assign(resD, 4469 IRExpr_ITE( 4470 mkexpr(argOK), 4471 binop(Iop_SinF64, 4472 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4473 mkexpr(argD)), 4474 mkexpr(argD)) 4475 ); 4476 put_ST_UNCHECKED(0, mkexpr(resD)); 4477 /* Conditionally push the cos value on the stack, if 4478 the arg is in range */ 4479 maybe_fp_push(argOK); 4480 maybe_put_ST(argOK, 0, 4481 binop(Iop_CosF64, 4482 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4483 mkexpr(argD))); 4484 set_C2( binop(Iop_Xor32, 4485 unop(Iop_1Uto32, mkexpr(argOK)), 4486 mkU32(1)) ); 4487 break; 4488 } 4489 4490 case 0xFC: /* FRNDINT */ 4491 DIP("frndint\n"); 4492 put_ST_UNCHECKED(0, 4493 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); 4494 break; 4495 4496 case 0xFD: /* FSCALE */ 4497 DIP("fscale\n"); 4498 put_ST_UNCHECKED(0, 4499 triop(Iop_ScaleF64, 4500 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4501 get_ST(0), 4502 get_ST(1))); 4503 break; 4504 4505 case 0xFE: /* FSIN */ 4506 case 0xFF: { /* FCOS */ 4507 Bool isSIN = modrm == 0xFE; 4508 DIP("%s\n", isSIN ? "fsin" : "fcos"); 4509 IRTemp argD = newTemp(Ity_F64); 4510 assign(argD, get_ST(0)); 4511 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 4512 IRTemp resD = newTemp(Ity_F64); 4513 assign(resD, 4514 IRExpr_ITE( 4515 mkexpr(argOK), 4516 binop(isSIN ? Iop_SinF64 : Iop_CosF64, 4517 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4518 mkexpr(argD)), 4519 mkexpr(argD)) 4520 ); 4521 put_ST_UNCHECKED(0, mkexpr(resD)); 4522 set_C2( binop(Iop_Xor32, 4523 unop(Iop_1Uto32, mkexpr(argOK)), 4524 mkU32(1)) ); 4525 break; 4526 } 4527 4528 default: 4529 goto decode_fail; 4530 } 4531 } 4532 } 4533 4534 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ 4535 else 4536 if (first_opcode == 0xDA) { 4537 4538 if (modrm < 0xC0) { 4539 4540 /* bits 5,4,3 are an opcode extension, and the modRM also 4541 specifies an address. */ 4542 IROp fop; 4543 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4544 delta += len; 4545 switch (gregOfRM(modrm)) { 4546 4547 case 0: /* FIADD m32int */ /* ST(0) += m32int */ 4548 DIP("fiaddl %s\n", dis_buf); 4549 fop = Iop_AddF64; 4550 goto do_fop_m32; 4551 4552 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ 4553 DIP("fimull %s\n", dis_buf); 4554 fop = Iop_MulF64; 4555 goto do_fop_m32; 4556 4557 case 2: /* FICOM m32int */ 4558 DIP("ficoml %s\n", dis_buf); 4559 /* This forces C1 to zero, which isn't right. */ 4560 put_C3210( 4561 binop( Iop_And32, 4562 binop(Iop_Shl32, 4563 binop(Iop_CmpF64, 4564 get_ST(0), 4565 unop(Iop_I32StoF64, 4566 loadLE(Ity_I32,mkexpr(addr)))), 4567 mkU8(8)), 4568 mkU32(0x4500) 4569 )); 4570 break; 4571 4572 case 3: /* FICOMP m32int */ 4573 DIP("ficompl %s\n", dis_buf); 4574 /* This forces C1 to zero, which isn't right. */ 4575 put_C3210( 4576 binop( Iop_And32, 4577 binop(Iop_Shl32, 4578 binop(Iop_CmpF64, 4579 get_ST(0), 4580 unop(Iop_I32StoF64, 4581 loadLE(Ity_I32,mkexpr(addr)))), 4582 mkU8(8)), 4583 mkU32(0x4500) 4584 )); 4585 fp_pop(); 4586 break; 4587 4588 case 4: /* FISUB m32int */ /* ST(0) -= m32int */ 4589 DIP("fisubl %s\n", dis_buf); 4590 fop = Iop_SubF64; 4591 goto do_fop_m32; 4592 4593 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ 4594 DIP("fisubrl %s\n", dis_buf); 4595 fop = Iop_SubF64; 4596 goto do_foprev_m32; 4597 4598 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ 4599 DIP("fidivl %s\n", dis_buf); 4600 fop = Iop_DivF64; 4601 goto do_fop_m32; 4602 4603 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ 4604 DIP("fidivrl %s\n", dis_buf); 4605 fop = Iop_DivF64; 4606 goto do_foprev_m32; 4607 4608 do_fop_m32: 4609 put_ST_UNCHECKED(0, 4610 triop(fop, 4611 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4612 get_ST(0), 4613 unop(Iop_I32StoF64, 4614 loadLE(Ity_I32, mkexpr(addr))))); 4615 break; 4616 4617 do_foprev_m32: 4618 put_ST_UNCHECKED(0, 4619 triop(fop, 4620 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4621 unop(Iop_I32StoF64, 4622 loadLE(Ity_I32, mkexpr(addr))), 4623 get_ST(0))); 4624 break; 4625 4626 default: 4627 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm)); 4628 vex_printf("first_opcode == 0xDA\n"); 4629 goto decode_fail; 4630 } 4631 4632 } else { 4633 4634 delta++; 4635 switch (modrm) { 4636 4637 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ 4638 r_src = (UInt)modrm - 0xC0; 4639 DIP("fcmovb %%st(%u), %%st(0)\n", r_src); 4640 put_ST_UNCHECKED(0, 4641 IRExpr_ITE( 4642 mk_x86g_calculate_condition(X86CondB), 4643 get_ST(r_src), get_ST(0)) ); 4644 break; 4645 4646 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ 4647 r_src = (UInt)modrm - 0xC8; 4648 DIP("fcmovz %%st(%u), %%st(0)\n", r_src); 4649 put_ST_UNCHECKED(0, 4650 IRExpr_ITE( 4651 mk_x86g_calculate_condition(X86CondZ), 4652 get_ST(r_src), get_ST(0)) ); 4653 break; 4654 4655 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ 4656 r_src = (UInt)modrm - 0xD0; 4657 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src); 4658 put_ST_UNCHECKED(0, 4659 IRExpr_ITE( 4660 mk_x86g_calculate_condition(X86CondBE), 4661 get_ST(r_src), get_ST(0)) ); 4662 break; 4663 4664 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ 4665 r_src = (UInt)modrm - 0xD8; 4666 DIP("fcmovu %%st(%u), %%st(0)\n", r_src); 4667 put_ST_UNCHECKED(0, 4668 IRExpr_ITE( 4669 mk_x86g_calculate_condition(X86CondP), 4670 get_ST(r_src), get_ST(0)) ); 4671 break; 4672 4673 case 0xE9: /* FUCOMPP %st(0),%st(1) */ 4674 DIP("fucompp %%st(0),%%st(1)\n"); 4675 /* This forces C1 to zero, which isn't right. */ 4676 put_C3210( 4677 binop( Iop_And32, 4678 binop(Iop_Shl32, 4679 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 4680 mkU8(8)), 4681 mkU32(0x4500) 4682 )); 4683 fp_pop(); 4684 fp_pop(); 4685 break; 4686 4687 default: 4688 goto decode_fail; 4689 } 4690 4691 } 4692 } 4693 4694 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ 4695 else 4696 if (first_opcode == 0xDB) { 4697 if (modrm < 0xC0) { 4698 4699 /* bits 5,4,3 are an opcode extension, and the modRM also 4700 specifies an address. */ 4701 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4702 delta += len; 4703 4704 switch (gregOfRM(modrm)) { 4705 4706 case 0: /* FILD m32int */ 4707 DIP("fildl %s\n", dis_buf); 4708 fp_push(); 4709 put_ST(0, unop(Iop_I32StoF64, 4710 loadLE(Ity_I32, mkexpr(addr)))); 4711 break; 4712 4713 case 1: /* FISTTPL m32 (SSE3) */ 4714 DIP("fisttpl %s\n", dis_buf); 4715 storeLE( mkexpr(addr), 4716 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ); 4717 fp_pop(); 4718 break; 4719 4720 case 2: /* FIST m32 */ 4721 DIP("fistl %s\n", dis_buf); 4722 storeLE( mkexpr(addr), 4723 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 4724 break; 4725 4726 case 3: /* FISTP m32 */ 4727 DIP("fistpl %s\n", dis_buf); 4728 storeLE( mkexpr(addr), 4729 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 4730 fp_pop(); 4731 break; 4732 4733 case 5: { /* FLD extended-real */ 4734 /* Uses dirty helper: 4735 ULong x86g_loadF80le ( UInt ) 4736 addr holds the address. First, do a dirty call to 4737 get hold of the data. */ 4738 IRTemp val = newTemp(Ity_I64); 4739 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); 4740 4741 IRDirty* d = unsafeIRDirty_1_N ( 4742 val, 4743 0/*regparms*/, 4744 "x86g_dirtyhelper_loadF80le", 4745 &x86g_dirtyhelper_loadF80le, 4746 args 4747 ); 4748 /* declare that we're reading memory */ 4749 d->mFx = Ifx_Read; 4750 d->mAddr = mkexpr(addr); 4751 d->mSize = 10; 4752 4753 /* execute the dirty call, dumping the result in val. */ 4754 stmt( IRStmt_Dirty(d) ); 4755 fp_push(); 4756 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); 4757 4758 DIP("fldt %s\n", dis_buf); 4759 break; 4760 } 4761 4762 case 7: { /* FSTP extended-real */ 4763 /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */ 4764 IRExpr** args 4765 = mkIRExprVec_2( mkexpr(addr), 4766 unop(Iop_ReinterpF64asI64, get_ST(0)) ); 4767 4768 IRDirty* d = unsafeIRDirty_0_N ( 4769 0/*regparms*/, 4770 "x86g_dirtyhelper_storeF80le", 4771 &x86g_dirtyhelper_storeF80le, 4772 args 4773 ); 4774 /* declare we're writing memory */ 4775 d->mFx = Ifx_Write; 4776 d->mAddr = mkexpr(addr); 4777 d->mSize = 10; 4778 4779 /* execute the dirty call. */ 4780 stmt( IRStmt_Dirty(d) ); 4781 fp_pop(); 4782 4783 DIP("fstpt\n %s", dis_buf); 4784 break; 4785 } 4786 4787 default: 4788 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm)); 4789 vex_printf("first_opcode == 0xDB\n"); 4790 goto decode_fail; 4791 } 4792 4793 } else { 4794 4795 delta++; 4796 switch (modrm) { 4797 4798 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ 4799 r_src = (UInt)modrm - 0xC0; 4800 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src); 4801 put_ST_UNCHECKED(0, 4802 IRExpr_ITE( 4803 mk_x86g_calculate_condition(X86CondNB), 4804 get_ST(r_src), get_ST(0)) ); 4805 break; 4806 4807 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ 4808 r_src = (UInt)modrm - 0xC8; 4809 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src); 4810 put_ST_UNCHECKED(0, 4811 IRExpr_ITE( 4812 mk_x86g_calculate_condition(X86CondNZ), 4813 get_ST(r_src), get_ST(0)) ); 4814 break; 4815 4816 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ 4817 r_src = (UInt)modrm - 0xD0; 4818 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src); 4819 put_ST_UNCHECKED(0, 4820 IRExpr_ITE( 4821 mk_x86g_calculate_condition(X86CondNBE), 4822 get_ST(r_src), get_ST(0)) ); 4823 break; 4824 4825 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ 4826 r_src = (UInt)modrm - 0xD8; 4827 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src); 4828 put_ST_UNCHECKED(0, 4829 IRExpr_ITE( 4830 mk_x86g_calculate_condition(X86CondNP), 4831 get_ST(r_src), get_ST(0)) ); 4832 break; 4833 4834 case 0xE2: 4835 DIP("fnclex\n"); 4836 break; 4837 4838 case 0xE3: { 4839 /* Uses dirty helper: 4840 void x86g_do_FINIT ( VexGuestX86State* ) */ 4841 IRDirty* d = unsafeIRDirty_0_N ( 4842 0/*regparms*/, 4843 "x86g_dirtyhelper_FINIT", 4844 &x86g_dirtyhelper_FINIT, 4845 mkIRExprVec_1(IRExpr_BBPTR()) 4846 ); 4847 4848 /* declare we're writing guest state */ 4849 d->nFxState = 5; 4850 vex_bzero(&d->fxState, sizeof(d->fxState)); 4851 4852 d->fxState[0].fx = Ifx_Write; 4853 d->fxState[0].offset = OFFB_FTOP; 4854 d->fxState[0].size = sizeof(UInt); 4855 4856 d->fxState[1].fx = Ifx_Write; 4857 d->fxState[1].offset = OFFB_FPREGS; 4858 d->fxState[1].size = 8 * sizeof(ULong); 4859 4860 d->fxState[2].fx = Ifx_Write; 4861 d->fxState[2].offset = OFFB_FPTAGS; 4862 d->fxState[2].size = 8 * sizeof(UChar); 4863 4864 d->fxState[3].fx = Ifx_Write; 4865 d->fxState[3].offset = OFFB_FPROUND; 4866 d->fxState[3].size = sizeof(UInt); 4867 4868 d->fxState[4].fx = Ifx_Write; 4869 d->fxState[4].offset = OFFB_FC3210; 4870 d->fxState[4].size = sizeof(UInt); 4871 4872 stmt( IRStmt_Dirty(d) ); 4873 4874 DIP("fninit\n"); 4875 break; 4876 } 4877 4878 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ 4879 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); 4880 break; 4881 4882 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ 4883 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); 4884 break; 4885 4886 default: 4887 goto decode_fail; 4888 } 4889 } 4890 } 4891 4892 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ 4893 else 4894 if (first_opcode == 0xDC) { 4895 if (modrm < 0xC0) { 4896 4897 /* bits 5,4,3 are an opcode extension, and the modRM also 4898 specifies an address. */ 4899 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4900 delta += len; 4901 4902 switch (gregOfRM(modrm)) { 4903 4904 case 0: /* FADD double-real */ 4905 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); 4906 break; 4907 4908 case 1: /* FMUL double-real */ 4909 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); 4910 break; 4911 4912 case 2: /* FCOM double-real */ 4913 DIP("fcoml %s\n", dis_buf); 4914 /* This forces C1 to zero, which isn't right. */ 4915 put_C3210( 4916 binop( Iop_And32, 4917 binop(Iop_Shl32, 4918 binop(Iop_CmpF64, 4919 get_ST(0), 4920 loadLE(Ity_F64,mkexpr(addr))), 4921 mkU8(8)), 4922 mkU32(0x4500) 4923 )); 4924 break; 4925 4926 case 3: /* FCOMP double-real */ 4927 DIP("fcompl %s\n", dis_buf); 4928 /* This forces C1 to zero, which isn't right. */ 4929 put_C3210( 4930 binop( Iop_And32, 4931 binop(Iop_Shl32, 4932 binop(Iop_CmpF64, 4933 get_ST(0), 4934 loadLE(Ity_F64,mkexpr(addr))), 4935 mkU8(8)), 4936 mkU32(0x4500) 4937 )); 4938 fp_pop(); 4939 break; 4940 4941 case 4: /* FSUB double-real */ 4942 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); 4943 break; 4944 4945 case 5: /* FSUBR double-real */ 4946 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); 4947 break; 4948 4949 case 6: /* FDIV double-real */ 4950 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); 4951 break; 4952 4953 case 7: /* FDIVR double-real */ 4954 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); 4955 break; 4956 4957 default: 4958 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm)); 4959 vex_printf("first_opcode == 0xDC\n"); 4960 goto decode_fail; 4961 } 4962 4963 } else { 4964 4965 delta++; 4966 switch (modrm) { 4967 4968 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ 4969 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); 4970 break; 4971 4972 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ 4973 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); 4974 break; 4975 4976 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ 4977 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); 4978 break; 4979 4980 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ 4981 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); 4982 break; 4983 4984 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ 4985 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); 4986 break; 4987 4988 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ 4989 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); 4990 break; 4991 4992 default: 4993 goto decode_fail; 4994 } 4995 4996 } 4997 } 4998 4999 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ 5000 else 5001 if (first_opcode == 0xDD) { 5002 5003 if (modrm < 0xC0) { 5004 5005 /* bits 5,4,3 are an opcode extension, and the modRM also 5006 specifies an address. */ 5007 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5008 delta += len; 5009 5010 switch (gregOfRM(modrm)) { 5011 5012 case 0: /* FLD double-real */ 5013 DIP("fldl %s\n", dis_buf); 5014 fp_push(); 5015 put_ST(0, loadLE(Ity_F64, mkexpr(addr))); 5016 break; 5017 5018 case 1: /* FISTTPQ m64 (SSE3) */ 5019 DIP("fistppll %s\n", dis_buf); 5020 storeLE( mkexpr(addr), 5021 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) ); 5022 fp_pop(); 5023 break; 5024 5025 case 2: /* FST double-real */ 5026 DIP("fstl %s\n", dis_buf); 5027 storeLE(mkexpr(addr), get_ST(0)); 5028 break; 5029 5030 case 3: /* FSTP double-real */ 5031 DIP("fstpl %s\n", dis_buf); 5032 storeLE(mkexpr(addr), get_ST(0)); 5033 fp_pop(); 5034 break; 5035 5036 case 4: { /* FRSTOR m108 */ 5037 /* Uses dirty helper: 5038 VexEmNote x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */ 5039 IRTemp ew = newTemp(Ity_I32); 5040 IRDirty* d = unsafeIRDirty_0_N ( 5041 0/*regparms*/, 5042 "x86g_dirtyhelper_FRSTOR", 5043 &x86g_dirtyhelper_FRSTOR, 5044 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 5045 ); 5046 d->tmp = ew; 5047 /* declare we're reading memory */ 5048 d->mFx = Ifx_Read; 5049 d->mAddr = mkexpr(addr); 5050 d->mSize = 108; 5051 5052 /* declare we're writing guest state */ 5053 d->nFxState = 5; 5054 vex_bzero(&d->fxState, sizeof(d->fxState)); 5055 5056 d->fxState[0].fx = Ifx_Write; 5057 d->fxState[0].offset = OFFB_FTOP; 5058 d->fxState[0].size = sizeof(UInt); 5059 5060 d->fxState[1].fx = Ifx_Write; 5061 d->fxState[1].offset = OFFB_FPREGS; 5062 d->fxState[1].size = 8 * sizeof(ULong); 5063 5064 d->fxState[2].fx = Ifx_Write; 5065 d->fxState[2].offset = OFFB_FPTAGS; 5066 d->fxState[2].size = 8 * sizeof(UChar); 5067 5068 d->fxState[3].fx = Ifx_Write; 5069 d->fxState[3].offset = OFFB_FPROUND; 5070 d->fxState[3].size = sizeof(UInt); 5071 5072 d->fxState[4].fx = Ifx_Write; 5073 d->fxState[4].offset = OFFB_FC3210; 5074 d->fxState[4].size = sizeof(UInt); 5075 5076 stmt( IRStmt_Dirty(d) ); 5077 5078 /* ew contains any emulation warning we may need to 5079 issue. If needed, side-exit to the next insn, 5080 reporting the warning, so that Valgrind's dispatcher 5081 sees the warning. */ 5082 put_emwarn( mkexpr(ew) ); 5083 stmt( 5084 IRStmt_Exit( 5085 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5086 Ijk_EmWarn, 5087 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta), 5088 OFFB_EIP 5089 ) 5090 ); 5091 5092 DIP("frstor %s\n", dis_buf); 5093 break; 5094 } 5095 5096 case 6: { /* FNSAVE m108 */ 5097 /* Uses dirty helper: 5098 void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */ 5099 IRDirty* d = unsafeIRDirty_0_N ( 5100 0/*regparms*/, 5101 "x86g_dirtyhelper_FSAVE", 5102 &x86g_dirtyhelper_FSAVE, 5103 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 5104 ); 5105 /* declare we're writing memory */ 5106 d->mFx = Ifx_Write; 5107 d->mAddr = mkexpr(addr); 5108 d->mSize = 108; 5109 5110 /* declare we're reading guest state */ 5111 d->nFxState = 5; 5112 vex_bzero(&d->fxState, sizeof(d->fxState)); 5113 5114 d->fxState[0].fx = Ifx_Read; 5115 d->fxState[0].offset = OFFB_FTOP; 5116 d->fxState[0].size = sizeof(UInt); 5117 5118 d->fxState[1].fx = Ifx_Read; 5119 d->fxState[1].offset = OFFB_FPREGS; 5120 d->fxState[1].size = 8 * sizeof(ULong); 5121 5122 d->fxState[2].fx = Ifx_Read; 5123 d->fxState[2].offset = OFFB_FPTAGS; 5124 d->fxState[2].size = 8 * sizeof(UChar); 5125 5126 d->fxState[3].fx = Ifx_Read; 5127 d->fxState[3].offset = OFFB_FPROUND; 5128 d->fxState[3].size = sizeof(UInt); 5129 5130 d->fxState[4].fx = Ifx_Read; 5131 d->fxState[4].offset = OFFB_FC3210; 5132 d->fxState[4].size = sizeof(UInt); 5133 5134 stmt( IRStmt_Dirty(d) ); 5135 5136 DIP("fnsave %s\n", dis_buf); 5137 break; 5138 } 5139 5140 case 7: { /* FNSTSW m16 */ 5141 IRExpr* sw = get_FPU_sw(); 5142 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); 5143 storeLE( mkexpr(addr), sw ); 5144 DIP("fnstsw %s\n", dis_buf); 5145 break; 5146 } 5147 5148 default: 5149 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm)); 5150 vex_printf("first_opcode == 0xDD\n"); 5151 goto decode_fail; 5152 } 5153 } else { 5154 delta++; 5155 switch (modrm) { 5156 5157 case 0xC0 ... 0xC7: /* FFREE %st(?) */ 5158 r_dst = (UInt)modrm - 0xC0; 5159 DIP("ffree %%st(%u)\n", r_dst); 5160 put_ST_TAG ( r_dst, mkU8(0) ); 5161 break; 5162 5163 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ 5164 r_dst = (UInt)modrm - 0xD0; 5165 DIP("fst %%st(0),%%st(%u)\n", r_dst); 5166 /* P4 manual says: "If the destination operand is a 5167 non-empty register, the invalid-operation exception 5168 is not generated. Hence put_ST_UNCHECKED. */ 5169 put_ST_UNCHECKED(r_dst, get_ST(0)); 5170 break; 5171 5172 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ 5173 r_dst = (UInt)modrm - 0xD8; 5174 DIP("fstp %%st(0),%%st(%u)\n", r_dst); 5175 /* P4 manual says: "If the destination operand is a 5176 non-empty register, the invalid-operation exception 5177 is not generated. Hence put_ST_UNCHECKED. */ 5178 put_ST_UNCHECKED(r_dst, get_ST(0)); 5179 fp_pop(); 5180 break; 5181 5182 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ 5183 r_dst = (UInt)modrm - 0xE0; 5184 DIP("fucom %%st(0),%%st(%u)\n", r_dst); 5185 /* This forces C1 to zero, which isn't right. */ 5186 put_C3210( 5187 binop( Iop_And32, 5188 binop(Iop_Shl32, 5189 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5190 mkU8(8)), 5191 mkU32(0x4500) 5192 )); 5193 break; 5194 5195 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ 5196 r_dst = (UInt)modrm - 0xE8; 5197 DIP("fucomp %%st(0),%%st(%u)\n", r_dst); 5198 /* This forces C1 to zero, which isn't right. */ 5199 put_C3210( 5200 binop( Iop_And32, 5201 binop(Iop_Shl32, 5202 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5203 mkU8(8)), 5204 mkU32(0x4500) 5205 )); 5206 fp_pop(); 5207 break; 5208 5209 default: 5210 goto decode_fail; 5211 } 5212 } 5213 } 5214 5215 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ 5216 else 5217 if (first_opcode == 0xDE) { 5218 5219 if (modrm < 0xC0) { 5220 5221 /* bits 5,4,3 are an opcode extension, and the modRM also 5222 specifies an address. */ 5223 IROp fop; 5224 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5225 delta += len; 5226 5227 switch (gregOfRM(modrm)) { 5228 5229 case 0: /* FIADD m16int */ /* ST(0) += m16int */ 5230 DIP("fiaddw %s\n", dis_buf); 5231 fop = Iop_AddF64; 5232 goto do_fop_m16; 5233 5234 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ 5235 DIP("fimulw %s\n", dis_buf); 5236 fop = Iop_MulF64; 5237 goto do_fop_m16; 5238 5239 case 2: /* FICOM m16int */ 5240 DIP("ficomw %s\n", dis_buf); 5241 /* This forces C1 to zero, which isn't right. */ 5242 put_C3210( 5243 binop( Iop_And32, 5244 binop(Iop_Shl32, 5245 binop(Iop_CmpF64, 5246 get_ST(0), 5247 unop(Iop_I32StoF64, 5248 unop(Iop_16Sto32, 5249 loadLE(Ity_I16,mkexpr(addr))))), 5250 mkU8(8)), 5251 mkU32(0x4500) 5252 )); 5253 break; 5254 5255 case 3: /* FICOMP m16int */ 5256 DIP("ficompw %s\n", dis_buf); 5257 /* This forces C1 to zero, which isn't right. */ 5258 put_C3210( 5259 binop( Iop_And32, 5260 binop(Iop_Shl32, 5261 binop(Iop_CmpF64, 5262 get_ST(0), 5263 unop(Iop_I32StoF64, 5264 unop(Iop_16Sto32, 5265 loadLE(Ity_I16,mkexpr(addr))))), 5266 mkU8(8)), 5267 mkU32(0x4500) 5268 )); 5269 fp_pop(); 5270 break; 5271 5272 case 4: /* FISUB m16int */ /* ST(0) -= m16int */ 5273 DIP("fisubw %s\n", dis_buf); 5274 fop = Iop_SubF64; 5275 goto do_fop_m16; 5276 5277 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ 5278 DIP("fisubrw %s\n", dis_buf); 5279 fop = Iop_SubF64; 5280 goto do_foprev_m16; 5281 5282 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ 5283 DIP("fisubw %s\n", dis_buf); 5284 fop = Iop_DivF64; 5285 goto do_fop_m16; 5286 5287 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ 5288 DIP("fidivrw %s\n", dis_buf); 5289 fop = Iop_DivF64; 5290 goto do_foprev_m16; 5291 5292 do_fop_m16: 5293 put_ST_UNCHECKED(0, 5294 triop(fop, 5295 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5296 get_ST(0), 5297 unop(Iop_I32StoF64, 5298 unop(Iop_16Sto32, 5299 loadLE(Ity_I16, mkexpr(addr)))))); 5300 break; 5301 5302 do_foprev_m16: 5303 put_ST_UNCHECKED(0, 5304 triop(fop, 5305 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5306 unop(Iop_I32StoF64, 5307 unop(Iop_16Sto32, 5308 loadLE(Ity_I16, mkexpr(addr)))), 5309 get_ST(0))); 5310 break; 5311 5312 default: 5313 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm)); 5314 vex_printf("first_opcode == 0xDE\n"); 5315 goto decode_fail; 5316 } 5317 5318 } else { 5319 5320 delta++; 5321 switch (modrm) { 5322 5323 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ 5324 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); 5325 break; 5326 5327 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ 5328 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); 5329 break; 5330 5331 case 0xD9: /* FCOMPP %st(0),%st(1) */ 5332 DIP("fuompp %%st(0),%%st(1)\n"); 5333 /* This forces C1 to zero, which isn't right. */ 5334 put_C3210( 5335 binop( Iop_And32, 5336 binop(Iop_Shl32, 5337 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 5338 mkU8(8)), 5339 mkU32(0x4500) 5340 )); 5341 fp_pop(); 5342 fp_pop(); 5343 break; 5344 5345 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ 5346 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); 5347 break; 5348 5349 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ 5350 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); 5351 break; 5352 5353 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ 5354 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); 5355 break; 5356 5357 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ 5358 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); 5359 break; 5360 5361 default: 5362 goto decode_fail; 5363 } 5364 5365 } 5366 } 5367 5368 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ 5369 else 5370 if (first_opcode == 0xDF) { 5371 5372 if (modrm < 0xC0) { 5373 5374 /* bits 5,4,3 are an opcode extension, and the modRM also 5375 specifies an address. */ 5376 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5377 delta += len; 5378 5379 switch (gregOfRM(modrm)) { 5380 5381 case 0: /* FILD m16int */ 5382 DIP("fildw %s\n", dis_buf); 5383 fp_push(); 5384 put_ST(0, unop(Iop_I32StoF64, 5385 unop(Iop_16Sto32, 5386 loadLE(Ity_I16, mkexpr(addr))))); 5387 break; 5388 5389 case 1: /* FISTTPS m16 (SSE3) */ 5390 DIP("fisttps %s\n", dis_buf); 5391 storeLE( mkexpr(addr), 5392 binop(Iop_F64toI16S, mkU32(Irrm_ZERO), get_ST(0)) ); 5393 fp_pop(); 5394 break; 5395 5396 case 2: /* FIST m16 */ 5397 DIP("fistp %s\n", dis_buf); 5398 storeLE( mkexpr(addr), 5399 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) ); 5400 break; 5401 5402 case 3: /* FISTP m16 */ 5403 DIP("fistps %s\n", dis_buf); 5404 storeLE( mkexpr(addr), 5405 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) ); 5406 fp_pop(); 5407 break; 5408 5409 case 5: /* FILD m64 */ 5410 DIP("fildll %s\n", dis_buf); 5411 fp_push(); 5412 put_ST(0, binop(Iop_I64StoF64, 5413 get_roundingmode(), 5414 loadLE(Ity_I64, mkexpr(addr)))); 5415 break; 5416 5417 case 7: /* FISTP m64 */ 5418 DIP("fistpll %s\n", dis_buf); 5419 storeLE( mkexpr(addr), 5420 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) ); 5421 fp_pop(); 5422 break; 5423 5424 default: 5425 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm)); 5426 vex_printf("first_opcode == 0xDF\n"); 5427 goto decode_fail; 5428 } 5429 5430 } else { 5431 5432 delta++; 5433 switch (modrm) { 5434 5435 case 0xC0: /* FFREEP %st(0) */ 5436 DIP("ffreep %%st(%d)\n", 0); 5437 put_ST_TAG ( 0, mkU8(0) ); 5438 fp_pop(); 5439 break; 5440 5441 case 0xE0: /* FNSTSW %ax */ 5442 DIP("fnstsw %%ax\n"); 5443 /* Get the FPU status word value and dump it in %AX. */ 5444 if (0) { 5445 /* The obvious thing to do is simply dump the 16-bit 5446 status word value in %AX. However, due to a 5447 limitation in Memcheck's origin tracking 5448 machinery, this causes Memcheck not to track the 5449 origin of any undefinedness into %AH (only into 5450 %AL/%AX/%EAX), which means origins are lost in 5451 the sequence "fnstsw %ax; test $M,%ah; jcond .." */ 5452 putIReg(2, R_EAX, get_FPU_sw()); 5453 } else { 5454 /* So a somewhat lame kludge is to make it very 5455 clear to Memcheck that the value is written to 5456 both %AH and %AL. This generates marginally 5457 worse code, but I don't think it matters much. */ 5458 IRTemp t16 = newTemp(Ity_I16); 5459 assign(t16, get_FPU_sw()); 5460 putIReg( 1, R_AL, unop(Iop_16to8, mkexpr(t16)) ); 5461 putIReg( 1, R_AH, unop(Iop_16HIto8, mkexpr(t16)) ); 5462 } 5463 break; 5464 5465 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ 5466 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); 5467 break; 5468 5469 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ 5470 /* not really right since COMIP != UCOMIP */ 5471 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); 5472 break; 5473 5474 default: 5475 goto decode_fail; 5476 } 5477 } 5478 5479 } 5480 5481 else 5482 vpanic("dis_FPU(x86): invalid primary opcode"); 5483 5484 *decode_ok = True; 5485 return delta; 5486 5487 decode_fail: 5488 *decode_ok = False; 5489 return delta; 5490} 5491 5492 5493/*------------------------------------------------------------*/ 5494/*--- ---*/ 5495/*--- MMX INSTRUCTIONS ---*/ 5496/*--- ---*/ 5497/*------------------------------------------------------------*/ 5498 5499/* Effect of MMX insns on x87 FPU state (table 11-2 of 5500 IA32 arch manual, volume 3): 5501 5502 Read from, or write to MMX register (viz, any insn except EMMS): 5503 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero 5504 * FP stack pointer set to zero 5505 5506 EMMS: 5507 * All tags set to Invalid (empty) -- FPTAGS[i] := zero 5508 * FP stack pointer set to zero 5509*/ 5510 5511static void do_MMX_preamble ( void ) 5512{ 5513 Int i; 5514 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5515 IRExpr* zero = mkU32(0); 5516 IRExpr* tag1 = mkU8(1); 5517 put_ftop(zero); 5518 for (i = 0; i < 8; i++) 5519 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) ); 5520} 5521 5522static void do_EMMS_preamble ( void ) 5523{ 5524 Int i; 5525 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5526 IRExpr* zero = mkU32(0); 5527 IRExpr* tag0 = mkU8(0); 5528 put_ftop(zero); 5529 for (i = 0; i < 8; i++) 5530 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) ); 5531} 5532 5533 5534static IRExpr* getMMXReg ( UInt archreg ) 5535{ 5536 vassert(archreg < 8); 5537 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); 5538} 5539 5540 5541static void putMMXReg ( UInt archreg, IRExpr* e ) 5542{ 5543 vassert(archreg < 8); 5544 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 5545 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); 5546} 5547 5548 5549/* Helper for non-shift MMX insns. Note this is incomplete in the 5550 sense that it does not first call do_MMX_preamble() -- that is the 5551 responsibility of its caller. */ 5552 5553static 5554UInt dis_MMXop_regmem_to_reg ( UChar sorb, 5555 Int delta, 5556 UChar opc, 5557 const HChar* name, 5558 Bool show_granularity ) 5559{ 5560 HChar dis_buf[50]; 5561 UChar modrm = getIByte(delta); 5562 Bool isReg = epartIsReg(modrm); 5563 IRExpr* argL = NULL; 5564 IRExpr* argR = NULL; 5565 IRExpr* argG = NULL; 5566 IRExpr* argE = NULL; 5567 IRTemp res = newTemp(Ity_I64); 5568 5569 Bool invG = False; 5570 IROp op = Iop_INVALID; 5571 void* hAddr = NULL; 5572 Bool eLeft = False; 5573 const HChar* hName = NULL; 5574 5575# define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) 5576 5577 switch (opc) { 5578 /* Original MMX ones */ 5579 case 0xFC: op = Iop_Add8x8; break; 5580 case 0xFD: op = Iop_Add16x4; break; 5581 case 0xFE: op = Iop_Add32x2; break; 5582 5583 case 0xEC: op = Iop_QAdd8Sx8; break; 5584 case 0xED: op = Iop_QAdd16Sx4; break; 5585 5586 case 0xDC: op = Iop_QAdd8Ux8; break; 5587 case 0xDD: op = Iop_QAdd16Ux4; break; 5588 5589 case 0xF8: op = Iop_Sub8x8; break; 5590 case 0xF9: op = Iop_Sub16x4; break; 5591 case 0xFA: op = Iop_Sub32x2; break; 5592 5593 case 0xE8: op = Iop_QSub8Sx8; break; 5594 case 0xE9: op = Iop_QSub16Sx4; break; 5595 5596 case 0xD8: op = Iop_QSub8Ux8; break; 5597 case 0xD9: op = Iop_QSub16Ux4; break; 5598 5599 case 0xE5: op = Iop_MulHi16Sx4; break; 5600 case 0xD5: op = Iop_Mul16x4; break; 5601 case 0xF5: XXX(x86g_calculate_mmx_pmaddwd); break; 5602 5603 case 0x74: op = Iop_CmpEQ8x8; break; 5604 case 0x75: op = Iop_CmpEQ16x4; break; 5605 case 0x76: op = Iop_CmpEQ32x2; break; 5606 5607 case 0x64: op = Iop_CmpGT8Sx8; break; 5608 case 0x65: op = Iop_CmpGT16Sx4; break; 5609 case 0x66: op = Iop_CmpGT32Sx2; break; 5610 5611 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break; 5612 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break; 5613 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break; 5614 5615 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; 5616 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; 5617 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; 5618 5619 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; 5620 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; 5621 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; 5622 5623 case 0xDB: op = Iop_And64; break; 5624 case 0xDF: op = Iop_And64; invG = True; break; 5625 case 0xEB: op = Iop_Or64; break; 5626 case 0xEF: /* Possibly do better here if argL and argR are the 5627 same reg */ 5628 op = Iop_Xor64; break; 5629 5630 /* Introduced in SSE1 */ 5631 case 0xE0: op = Iop_Avg8Ux8; break; 5632 case 0xE3: op = Iop_Avg16Ux4; break; 5633 case 0xEE: op = Iop_Max16Sx4; break; 5634 case 0xDE: op = Iop_Max8Ux8; break; 5635 case 0xEA: op = Iop_Min16Sx4; break; 5636 case 0xDA: op = Iop_Min8Ux8; break; 5637 case 0xE4: op = Iop_MulHi16Ux4; break; 5638 case 0xF6: XXX(x86g_calculate_mmx_psadbw); break; 5639 5640 /* Introduced in SSE2 */ 5641 case 0xD4: op = Iop_Add64; break; 5642 case 0xFB: op = Iop_Sub64; break; 5643 5644 default: 5645 vex_printf("\n0x%x\n", opc); 5646 vpanic("dis_MMXop_regmem_to_reg"); 5647 } 5648 5649# undef XXX 5650 5651 argG = getMMXReg(gregOfRM(modrm)); 5652 if (invG) 5653 argG = unop(Iop_Not64, argG); 5654 5655 if (isReg) { 5656 delta++; 5657 argE = getMMXReg(eregOfRM(modrm)); 5658 } else { 5659 Int len; 5660 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5661 delta += len; 5662 argE = loadLE(Ity_I64, mkexpr(addr)); 5663 } 5664 5665 if (eLeft) { 5666 argL = argE; 5667 argR = argG; 5668 } else { 5669 argL = argG; 5670 argR = argE; 5671 } 5672 5673 if (op != Iop_INVALID) { 5674 vassert(hName == NULL); 5675 vassert(hAddr == NULL); 5676 assign(res, binop(op, argL, argR)); 5677 } else { 5678 vassert(hName != NULL); 5679 vassert(hAddr != NULL); 5680 assign( res, 5681 mkIRExprCCall( 5682 Ity_I64, 5683 0/*regparms*/, hName, hAddr, 5684 mkIRExprVec_2( argL, argR ) 5685 ) 5686 ); 5687 } 5688 5689 putMMXReg( gregOfRM(modrm), mkexpr(res) ); 5690 5691 DIP("%s%s %s, %s\n", 5692 name, show_granularity ? nameMMXGran(opc & 3) : "", 5693 ( isReg ? nameMMXReg(eregOfRM(modrm)) : dis_buf ), 5694 nameMMXReg(gregOfRM(modrm)) ); 5695 5696 return delta; 5697} 5698 5699 5700/* Vector by scalar shift of G by the amount specified at the bottom 5701 of E. This is a straight copy of dis_SSE_shiftG_byE. */ 5702 5703static UInt dis_MMX_shiftG_byE ( UChar sorb, Int delta, 5704 const HChar* opname, IROp op ) 5705{ 5706 HChar dis_buf[50]; 5707 Int alen, size; 5708 IRTemp addr; 5709 Bool shl, shr, sar; 5710 UChar rm = getIByte(delta); 5711 IRTemp g0 = newTemp(Ity_I64); 5712 IRTemp g1 = newTemp(Ity_I64); 5713 IRTemp amt = newTemp(Ity_I32); 5714 IRTemp amt8 = newTemp(Ity_I8); 5715 5716 if (epartIsReg(rm)) { 5717 assign( amt, unop(Iop_64to32, getMMXReg(eregOfRM(rm))) ); 5718 DIP("%s %s,%s\n", opname, 5719 nameMMXReg(eregOfRM(rm)), 5720 nameMMXReg(gregOfRM(rm)) ); 5721 delta++; 5722 } else { 5723 addr = disAMode ( &alen, sorb, delta, dis_buf ); 5724 assign( amt, loadLE(Ity_I32, mkexpr(addr)) ); 5725 DIP("%s %s,%s\n", opname, 5726 dis_buf, 5727 nameMMXReg(gregOfRM(rm)) ); 5728 delta += alen; 5729 } 5730 assign( g0, getMMXReg(gregOfRM(rm)) ); 5731 assign( amt8, unop(Iop_32to8, mkexpr(amt)) ); 5732 5733 shl = shr = sar = False; 5734 size = 0; 5735 switch (op) { 5736 case Iop_ShlN16x4: shl = True; size = 32; break; 5737 case Iop_ShlN32x2: shl = True; size = 32; break; 5738 case Iop_Shl64: shl = True; size = 64; break; 5739 case Iop_ShrN16x4: shr = True; size = 16; break; 5740 case Iop_ShrN32x2: shr = True; size = 32; break; 5741 case Iop_Shr64: shr = True; size = 64; break; 5742 case Iop_SarN16x4: sar = True; size = 16; break; 5743 case Iop_SarN32x2: sar = True; size = 32; break; 5744 default: vassert(0); 5745 } 5746 5747 if (shl || shr) { 5748 assign( 5749 g1, 5750 IRExpr_ITE( 5751 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)), 5752 binop(op, mkexpr(g0), mkexpr(amt8)), 5753 mkU64(0) 5754 ) 5755 ); 5756 } else 5757 if (sar) { 5758 assign( 5759 g1, 5760 IRExpr_ITE( 5761 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)), 5762 binop(op, mkexpr(g0), mkexpr(amt8)), 5763 binop(op, mkexpr(g0), mkU8(size-1)) 5764 ) 5765 ); 5766 } else { 5767 /*NOTREACHED*/ 5768 vassert(0); 5769 } 5770 5771 putMMXReg( gregOfRM(rm), mkexpr(g1) ); 5772 return delta; 5773} 5774 5775 5776/* Vector by scalar shift of E by an immediate byte. This is a 5777 straight copy of dis_SSE_shiftE_imm. */ 5778 5779static 5780UInt dis_MMX_shiftE_imm ( Int delta, const HChar* opname, IROp op ) 5781{ 5782 Bool shl, shr, sar; 5783 UChar rm = getIByte(delta); 5784 IRTemp e0 = newTemp(Ity_I64); 5785 IRTemp e1 = newTemp(Ity_I64); 5786 UChar amt, size; 5787 vassert(epartIsReg(rm)); 5788 vassert(gregOfRM(rm) == 2 5789 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6); 5790 amt = getIByte(delta+1); 5791 delta += 2; 5792 DIP("%s $%d,%s\n", opname, 5793 (Int)amt, 5794 nameMMXReg(eregOfRM(rm)) ); 5795 5796 assign( e0, getMMXReg(eregOfRM(rm)) ); 5797 5798 shl = shr = sar = False; 5799 size = 0; 5800 switch (op) { 5801 case Iop_ShlN16x4: shl = True; size = 16; break; 5802 case Iop_ShlN32x2: shl = True; size = 32; break; 5803 case Iop_Shl64: shl = True; size = 64; break; 5804 case Iop_SarN16x4: sar = True; size = 16; break; 5805 case Iop_SarN32x2: sar = True; size = 32; break; 5806 case Iop_ShrN16x4: shr = True; size = 16; break; 5807 case Iop_ShrN32x2: shr = True; size = 32; break; 5808 case Iop_Shr64: shr = True; size = 64; break; 5809 default: vassert(0); 5810 } 5811 5812 if (shl || shr) { 5813 assign( e1, amt >= size 5814 ? mkU64(0) 5815 : binop(op, mkexpr(e0), mkU8(amt)) 5816 ); 5817 } else 5818 if (sar) { 5819 assign( e1, amt >= size 5820 ? binop(op, mkexpr(e0), mkU8(size-1)) 5821 : binop(op, mkexpr(e0), mkU8(amt)) 5822 ); 5823 } else { 5824 /*NOTREACHED*/ 5825 vassert(0); 5826 } 5827 5828 putMMXReg( eregOfRM(rm), mkexpr(e1) ); 5829 return delta; 5830} 5831 5832 5833/* Completely handle all MMX instructions except emms. */ 5834 5835static 5836UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, Int delta ) 5837{ 5838 Int len; 5839 UChar modrm; 5840 HChar dis_buf[50]; 5841 UChar opc = getIByte(delta); 5842 delta++; 5843 5844 /* dis_MMX handles all insns except emms. */ 5845 do_MMX_preamble(); 5846 5847 switch (opc) { 5848 5849 case 0x6E: 5850 /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/ 5851 if (sz != 4) 5852 goto mmx_decode_failure; 5853 modrm = getIByte(delta); 5854 if (epartIsReg(modrm)) { 5855 delta++; 5856 putMMXReg( 5857 gregOfRM(modrm), 5858 binop( Iop_32HLto64, 5859 mkU32(0), 5860 getIReg(4, eregOfRM(modrm)) ) ); 5861 DIP("movd %s, %s\n", 5862 nameIReg(4,eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm))); 5863 } else { 5864 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5865 delta += len; 5866 putMMXReg( 5867 gregOfRM(modrm), 5868 binop( Iop_32HLto64, 5869 mkU32(0), 5870 loadLE(Ity_I32, mkexpr(addr)) ) ); 5871 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregOfRM(modrm))); 5872 } 5873 break; 5874 5875 case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */ 5876 if (sz != 4) 5877 goto mmx_decode_failure; 5878 modrm = getIByte(delta); 5879 if (epartIsReg(modrm)) { 5880 delta++; 5881 putIReg( 4, eregOfRM(modrm), 5882 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) ); 5883 DIP("movd %s, %s\n", 5884 nameMMXReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm))); 5885 } else { 5886 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5887 delta += len; 5888 storeLE( mkexpr(addr), 5889 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) ); 5890 DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm)), dis_buf); 5891 } 5892 break; 5893 5894 case 0x6F: 5895 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 5896 if (sz != 4) 5897 goto mmx_decode_failure; 5898 modrm = getIByte(delta); 5899 if (epartIsReg(modrm)) { 5900 delta++; 5901 putMMXReg( gregOfRM(modrm), getMMXReg(eregOfRM(modrm)) ); 5902 DIP("movq %s, %s\n", 5903 nameMMXReg(eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm))); 5904 } else { 5905 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5906 delta += len; 5907 putMMXReg( gregOfRM(modrm), loadLE(Ity_I64, mkexpr(addr)) ); 5908 DIP("movq %s, %s\n", 5909 dis_buf, nameMMXReg(gregOfRM(modrm))); 5910 } 5911 break; 5912 5913 case 0x7F: 5914 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 5915 if (sz != 4) 5916 goto mmx_decode_failure; 5917 modrm = getIByte(delta); 5918 if (epartIsReg(modrm)) { 5919 delta++; 5920 putMMXReg( eregOfRM(modrm), getMMXReg(gregOfRM(modrm)) ); 5921 DIP("movq %s, %s\n", 5922 nameMMXReg(gregOfRM(modrm)), nameMMXReg(eregOfRM(modrm))); 5923 } else { 5924 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5925 delta += len; 5926 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) ); 5927 DIP("mov(nt)q %s, %s\n", 5928 nameMMXReg(gregOfRM(modrm)), dis_buf); 5929 } 5930 break; 5931 5932 case 0xFC: 5933 case 0xFD: 5934 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 5935 if (sz != 4) 5936 goto mmx_decode_failure; 5937 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padd", True ); 5938 break; 5939 5940 case 0xEC: 5941 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5942 if (sz != 4) 5943 goto mmx_decode_failure; 5944 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padds", True ); 5945 break; 5946 5947 case 0xDC: 5948 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5949 if (sz != 4) 5950 goto mmx_decode_failure; 5951 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "paddus", True ); 5952 break; 5953 5954 case 0xF8: 5955 case 0xF9: 5956 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 5957 if (sz != 4) 5958 goto mmx_decode_failure; 5959 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psub", True ); 5960 break; 5961 5962 case 0xE8: 5963 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5964 if (sz != 4) 5965 goto mmx_decode_failure; 5966 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubs", True ); 5967 break; 5968 5969 case 0xD8: 5970 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5971 if (sz != 4) 5972 goto mmx_decode_failure; 5973 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubus", True ); 5974 break; 5975 5976 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 5977 if (sz != 4) 5978 goto mmx_decode_failure; 5979 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmulhw", False ); 5980 break; 5981 5982 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 5983 if (sz != 4) 5984 goto mmx_decode_failure; 5985 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmullw", False ); 5986 break; 5987 5988 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 5989 vassert(sz == 4); 5990 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmaddwd", False ); 5991 break; 5992 5993 case 0x74: 5994 case 0x75: 5995 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 5996 if (sz != 4) 5997 goto mmx_decode_failure; 5998 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpeq", True ); 5999 break; 6000 6001 case 0x64: 6002 case 0x65: 6003 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 6004 if (sz != 4) 6005 goto mmx_decode_failure; 6006 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpgt", True ); 6007 break; 6008 6009 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 6010 if (sz != 4) 6011 goto mmx_decode_failure; 6012 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packssdw", False ); 6013 break; 6014 6015 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 6016 if (sz != 4) 6017 goto mmx_decode_failure; 6018 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packsswb", False ); 6019 break; 6020 6021 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 6022 if (sz != 4) 6023 goto mmx_decode_failure; 6024 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packuswb", False ); 6025 break; 6026 6027 case 0x68: 6028 case 0x69: 6029 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 6030 if (sz != 4) 6031 goto mmx_decode_failure; 6032 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckh", True ); 6033 break; 6034 6035 case 0x60: 6036 case 0x61: 6037 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 6038 if (sz != 4) 6039 goto mmx_decode_failure; 6040 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckl", True ); 6041 break; 6042 6043 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 6044 if (sz != 4) 6045 goto mmx_decode_failure; 6046 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pand", False ); 6047 break; 6048 6049 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 6050 if (sz != 4) 6051 goto mmx_decode_failure; 6052 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pandn", False ); 6053 break; 6054 6055 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 6056 if (sz != 4) 6057 goto mmx_decode_failure; 6058 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "por", False ); 6059 break; 6060 6061 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 6062 if (sz != 4) 6063 goto mmx_decode_failure; 6064 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pxor", False ); 6065 break; 6066 6067# define SHIFT_BY_REG(_name,_op) \ 6068 delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \ 6069 break; 6070 6071 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 6072 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4); 6073 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2); 6074 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64); 6075 6076 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 6077 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4); 6078 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2); 6079 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64); 6080 6081 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 6082 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4); 6083 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2); 6084 6085# undef SHIFT_BY_REG 6086 6087 case 0x71: 6088 case 0x72: 6089 case 0x73: { 6090 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 6091 UChar byte2, subopc; 6092 if (sz != 4) 6093 goto mmx_decode_failure; 6094 byte2 = getIByte(delta); /* amode / sub-opcode */ 6095 subopc = toUChar( (byte2 >> 3) & 7 ); 6096 6097# define SHIFT_BY_IMM(_name,_op) \ 6098 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \ 6099 } while (0) 6100 6101 if (subopc == 2 /*SRL*/ && opc == 0x71) 6102 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4); 6103 else if (subopc == 2 /*SRL*/ && opc == 0x72) 6104 SHIFT_BY_IMM("psrld", Iop_ShrN32x2); 6105 else if (subopc == 2 /*SRL*/ && opc == 0x73) 6106 SHIFT_BY_IMM("psrlq", Iop_Shr64); 6107 6108 else if (subopc == 4 /*SAR*/ && opc == 0x71) 6109 SHIFT_BY_IMM("psraw", Iop_SarN16x4); 6110 else if (subopc == 4 /*SAR*/ && opc == 0x72) 6111 SHIFT_BY_IMM("psrad", Iop_SarN32x2); 6112 6113 else if (subopc == 6 /*SHL*/ && opc == 0x71) 6114 SHIFT_BY_IMM("psllw", Iop_ShlN16x4); 6115 else if (subopc == 6 /*SHL*/ && opc == 0x72) 6116 SHIFT_BY_IMM("pslld", Iop_ShlN32x2); 6117 else if (subopc == 6 /*SHL*/ && opc == 0x73) 6118 SHIFT_BY_IMM("psllq", Iop_Shl64); 6119 6120 else goto mmx_decode_failure; 6121 6122# undef SHIFT_BY_IMM 6123 break; 6124 } 6125 6126 case 0xF7: { 6127 IRTemp addr = newTemp(Ity_I32); 6128 IRTemp regD = newTemp(Ity_I64); 6129 IRTemp regM = newTemp(Ity_I64); 6130 IRTemp mask = newTemp(Ity_I64); 6131 IRTemp olddata = newTemp(Ity_I64); 6132 IRTemp newdata = newTemp(Ity_I64); 6133 6134 modrm = getIByte(delta); 6135 if (sz != 4 || (!epartIsReg(modrm))) 6136 goto mmx_decode_failure; 6137 delta++; 6138 6139 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) )); 6140 assign( regM, getMMXReg( eregOfRM(modrm) )); 6141 assign( regD, getMMXReg( gregOfRM(modrm) )); 6142 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); 6143 assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); 6144 assign( newdata, 6145 binop(Iop_Or64, 6146 binop(Iop_And64, 6147 mkexpr(regD), 6148 mkexpr(mask) ), 6149 binop(Iop_And64, 6150 mkexpr(olddata), 6151 unop(Iop_Not64, mkexpr(mask)))) ); 6152 storeLE( mkexpr(addr), mkexpr(newdata) ); 6153 DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ), 6154 nameMMXReg( gregOfRM(modrm) ) ); 6155 break; 6156 } 6157 6158 /* --- MMX decode failure --- */ 6159 default: 6160 mmx_decode_failure: 6161 *decode_ok = False; 6162 return delta; /* ignored */ 6163 6164 } 6165 6166 *decode_ok = True; 6167 return delta; 6168} 6169 6170 6171/*------------------------------------------------------------*/ 6172/*--- More misc arithmetic and other obscure insns. ---*/ 6173/*------------------------------------------------------------*/ 6174 6175/* Double length left and right shifts. Apparently only required in 6176 v-size (no b- variant). */ 6177static 6178UInt dis_SHLRD_Gv_Ev ( UChar sorb, 6179 Int delta, UChar modrm, 6180 Int sz, 6181 IRExpr* shift_amt, 6182 Bool amt_is_literal, 6183 const HChar* shift_amt_txt, 6184 Bool left_shift ) 6185{ 6186 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used 6187 for printing it. And eip on entry points at the modrm byte. */ 6188 Int len; 6189 HChar dis_buf[50]; 6190 6191 IRType ty = szToITy(sz); 6192 IRTemp gsrc = newTemp(ty); 6193 IRTemp esrc = newTemp(ty); 6194 IRTemp addr = IRTemp_INVALID; 6195 IRTemp tmpSH = newTemp(Ity_I8); 6196 IRTemp tmpL = IRTemp_INVALID; 6197 IRTemp tmpRes = IRTemp_INVALID; 6198 IRTemp tmpSubSh = IRTemp_INVALID; 6199 IROp mkpair; 6200 IROp getres; 6201 IROp shift; 6202 IRExpr* mask = NULL; 6203 6204 vassert(sz == 2 || sz == 4); 6205 6206 /* The E-part is the destination; this is shifted. The G-part 6207 supplies bits to be shifted into the E-part, but is not 6208 changed. 6209 6210 If shifting left, form a double-length word with E at the top 6211 and G at the bottom, and shift this left. The result is then in 6212 the high part. 6213 6214 If shifting right, form a double-length word with G at the top 6215 and E at the bottom, and shift this right. The result is then 6216 at the bottom. */ 6217 6218 /* Fetch the operands. */ 6219 6220 assign( gsrc, getIReg(sz, gregOfRM(modrm)) ); 6221 6222 if (epartIsReg(modrm)) { 6223 delta++; 6224 assign( esrc, getIReg(sz, eregOfRM(modrm)) ); 6225 DIP("sh%cd%c %s, %s, %s\n", 6226 ( left_shift ? 'l' : 'r' ), nameISize(sz), 6227 shift_amt_txt, 6228 nameIReg(sz, gregOfRM(modrm)), nameIReg(sz, eregOfRM(modrm))); 6229 } else { 6230 addr = disAMode ( &len, sorb, delta, dis_buf ); 6231 delta += len; 6232 assign( esrc, loadLE(ty, mkexpr(addr)) ); 6233 DIP("sh%cd%c %s, %s, %s\n", 6234 ( left_shift ? 'l' : 'r' ), nameISize(sz), 6235 shift_amt_txt, 6236 nameIReg(sz, gregOfRM(modrm)), dis_buf); 6237 } 6238 6239 /* Round up the relevant primops. */ 6240 6241 if (sz == 4) { 6242 tmpL = newTemp(Ity_I64); 6243 tmpRes = newTemp(Ity_I32); 6244 tmpSubSh = newTemp(Ity_I32); 6245 mkpair = Iop_32HLto64; 6246 getres = left_shift ? Iop_64HIto32 : Iop_64to32; 6247 shift = left_shift ? Iop_Shl64 : Iop_Shr64; 6248 mask = mkU8(31); 6249 } else { 6250 /* sz == 2 */ 6251 tmpL = newTemp(Ity_I32); 6252 tmpRes = newTemp(Ity_I16); 6253 tmpSubSh = newTemp(Ity_I16); 6254 mkpair = Iop_16HLto32; 6255 getres = left_shift ? Iop_32HIto16 : Iop_32to16; 6256 shift = left_shift ? Iop_Shl32 : Iop_Shr32; 6257 mask = mkU8(15); 6258 } 6259 6260 /* Do the shift, calculate the subshift value, and set 6261 the flag thunk. */ 6262 6263 assign( tmpSH, binop(Iop_And8, shift_amt, mask) ); 6264 6265 if (left_shift) 6266 assign( tmpL, binop(mkpair, mkexpr(esrc), mkexpr(gsrc)) ); 6267 else 6268 assign( tmpL, binop(mkpair, mkexpr(gsrc), mkexpr(esrc)) ); 6269 6270 assign( tmpRes, unop(getres, binop(shift, mkexpr(tmpL), mkexpr(tmpSH)) ) ); 6271 assign( tmpSubSh, 6272 unop(getres, 6273 binop(shift, 6274 mkexpr(tmpL), 6275 binop(Iop_And8, 6276 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ), 6277 mask))) ); 6278 6279 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl32 : Iop_Sar32, 6280 tmpRes, tmpSubSh, ty, tmpSH ); 6281 6282 /* Put result back. */ 6283 6284 if (epartIsReg(modrm)) { 6285 putIReg(sz, eregOfRM(modrm), mkexpr(tmpRes)); 6286 } else { 6287 storeLE( mkexpr(addr), mkexpr(tmpRes) ); 6288 } 6289 6290 if (amt_is_literal) delta++; 6291 return delta; 6292} 6293 6294 6295/* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not 6296 required. */ 6297 6298typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; 6299 6300static const HChar* nameBtOp ( BtOp op ) 6301{ 6302 switch (op) { 6303 case BtOpNone: return ""; 6304 case BtOpSet: return "s"; 6305 case BtOpReset: return "r"; 6306 case BtOpComp: return "c"; 6307 default: vpanic("nameBtOp(x86)"); 6308 } 6309} 6310 6311 6312static 6313UInt dis_bt_G_E ( const VexAbiInfo* vbi, 6314 UChar sorb, Bool locked, Int sz, Int delta, BtOp op ) 6315{ 6316 HChar dis_buf[50]; 6317 UChar modrm; 6318 Int len; 6319 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0, 6320 t_addr1, t_esp, t_mask, t_new; 6321 6322 vassert(sz == 2 || sz == 4); 6323 6324 t_fetched = t_bitno0 = t_bitno1 = t_bitno2 6325 = t_addr0 = t_addr1 = t_esp 6326 = t_mask = t_new = IRTemp_INVALID; 6327 6328 t_fetched = newTemp(Ity_I8); 6329 t_new = newTemp(Ity_I8); 6330 t_bitno0 = newTemp(Ity_I32); 6331 t_bitno1 = newTemp(Ity_I32); 6332 t_bitno2 = newTemp(Ity_I8); 6333 t_addr1 = newTemp(Ity_I32); 6334 modrm = getIByte(delta); 6335 6336 assign( t_bitno0, widenSto32(getIReg(sz, gregOfRM(modrm))) ); 6337 6338 if (epartIsReg(modrm)) { 6339 delta++; 6340 /* Get it onto the client's stack. */ 6341 t_esp = newTemp(Ity_I32); 6342 t_addr0 = newTemp(Ity_I32); 6343 6344 /* For the choice of the value 128, see comment in dis_bt_G_E in 6345 guest_amd64_toIR.c. We point out here only that 128 is 6346 fast-cased in Memcheck and is > 0, so seems like a good 6347 choice. */ 6348 vassert(vbi->guest_stack_redzone_size == 0); 6349 assign( t_esp, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(128)) ); 6350 putIReg(4, R_ESP, mkexpr(t_esp)); 6351 6352 storeLE( mkexpr(t_esp), getIReg(sz, eregOfRM(modrm)) ); 6353 6354 /* Make t_addr0 point at it. */ 6355 assign( t_addr0, mkexpr(t_esp) ); 6356 6357 /* Mask out upper bits of the shift amount, since we're doing a 6358 reg. */ 6359 assign( t_bitno1, binop(Iop_And32, 6360 mkexpr(t_bitno0), 6361 mkU32(sz == 4 ? 31 : 15)) ); 6362 6363 } else { 6364 t_addr0 = disAMode ( &len, sorb, delta, dis_buf ); 6365 delta += len; 6366 assign( t_bitno1, mkexpr(t_bitno0) ); 6367 } 6368 6369 /* At this point: t_addr0 is the address being operated on. If it 6370 was a reg, we will have pushed it onto the client's stack. 6371 t_bitno1 is the bit number, suitably masked in the case of a 6372 reg. */ 6373 6374 /* Now the main sequence. */ 6375 assign( t_addr1, 6376 binop(Iop_Add32, 6377 mkexpr(t_addr0), 6378 binop(Iop_Sar32, mkexpr(t_bitno1), mkU8(3))) ); 6379 6380 /* t_addr1 now holds effective address */ 6381 6382 assign( t_bitno2, 6383 unop(Iop_32to8, 6384 binop(Iop_And32, mkexpr(t_bitno1), mkU32(7))) ); 6385 6386 /* t_bitno2 contains offset of bit within byte */ 6387 6388 if (op != BtOpNone) { 6389 t_mask = newTemp(Ity_I8); 6390 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) ); 6391 } 6392 6393 /* t_mask is now a suitable byte mask */ 6394 6395 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) ); 6396 6397 if (op != BtOpNone) { 6398 switch (op) { 6399 case BtOpSet: 6400 assign( t_new, 6401 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) ); 6402 break; 6403 case BtOpComp: 6404 assign( t_new, 6405 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) ); 6406 break; 6407 case BtOpReset: 6408 assign( t_new, 6409 binop(Iop_And8, mkexpr(t_fetched), 6410 unop(Iop_Not8, mkexpr(t_mask))) ); 6411 break; 6412 default: 6413 vpanic("dis_bt_G_E(x86)"); 6414 } 6415 if (locked && !epartIsReg(modrm)) { 6416 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/, 6417 mkexpr(t_new)/*new*/, 6418 guest_EIP_curr_instr ); 6419 } else { 6420 storeLE( mkexpr(t_addr1), mkexpr(t_new) ); 6421 } 6422 } 6423 6424 /* Side effect done; now get selected bit into Carry flag */ 6425 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 6426 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 6427 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 6428 stmt( IRStmt_Put( 6429 OFFB_CC_DEP1, 6430 binop(Iop_And32, 6431 binop(Iop_Shr32, 6432 unop(Iop_8Uto32, mkexpr(t_fetched)), 6433 mkexpr(t_bitno2)), 6434 mkU32(1))) 6435 ); 6436 /* Set NDEP even though it isn't used. This makes redundant-PUT 6437 elimination of previous stores to this field work better. */ 6438 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6439 6440 /* Move reg operand from stack back to reg */ 6441 if (epartIsReg(modrm)) { 6442 /* t_esp still points at it. */ 6443 putIReg(sz, eregOfRM(modrm), loadLE(szToITy(sz), mkexpr(t_esp)) ); 6444 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t_esp), mkU32(128)) ); 6445 } 6446 6447 DIP("bt%s%c %s, %s\n", 6448 nameBtOp(op), nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 6449 ( epartIsReg(modrm) ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ) ); 6450 6451 return delta; 6452} 6453 6454 6455 6456/* Handle BSF/BSR. Only v-size seems necessary. */ 6457static 6458UInt dis_bs_E_G ( UChar sorb, Int sz, Int delta, Bool fwds ) 6459{ 6460 Bool isReg; 6461 UChar modrm; 6462 HChar dis_buf[50]; 6463 6464 IRType ty = szToITy(sz); 6465 IRTemp src = newTemp(ty); 6466 IRTemp dst = newTemp(ty); 6467 6468 IRTemp src32 = newTemp(Ity_I32); 6469 IRTemp dst32 = newTemp(Ity_I32); 6470 IRTemp srcB = newTemp(Ity_I1); 6471 6472 vassert(sz == 4 || sz == 2); 6473 6474 modrm = getIByte(delta); 6475 6476 isReg = epartIsReg(modrm); 6477 if (isReg) { 6478 delta++; 6479 assign( src, getIReg(sz, eregOfRM(modrm)) ); 6480 } else { 6481 Int len; 6482 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 6483 delta += len; 6484 assign( src, loadLE(ty, mkexpr(addr)) ); 6485 } 6486 6487 DIP("bs%c%c %s, %s\n", 6488 fwds ? 'f' : 'r', nameISize(sz), 6489 ( isReg ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ), 6490 nameIReg(sz, gregOfRM(modrm))); 6491 6492 /* Generate a bool expression which is zero iff the original is 6493 zero, and nonzero otherwise. Ask for a CmpNE version which, if 6494 instrumented by Memcheck, is instrumented expensively, since 6495 this may be used on the output of a preceding movmskb insn, 6496 which has been known to be partially defined, and in need of 6497 careful handling. */ 6498 assign( srcB, binop(mkSizedOp(ty,Iop_ExpCmpNE8), 6499 mkexpr(src), mkU(ty,0)) ); 6500 6501 /* Flags: Z is 1 iff source value is zero. All others 6502 are undefined -- we force them to zero. */ 6503 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 6504 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 6505 stmt( IRStmt_Put( 6506 OFFB_CC_DEP1, 6507 IRExpr_ITE( mkexpr(srcB), 6508 /* src!=0 */ 6509 mkU32(0), 6510 /* src==0 */ 6511 mkU32(X86G_CC_MASK_Z) 6512 ) 6513 )); 6514 /* Set NDEP even though it isn't used. This makes redundant-PUT 6515 elimination of previous stores to this field work better. */ 6516 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6517 6518 /* Result: iff source value is zero, we can't use 6519 Iop_Clz32/Iop_Ctz32 as they have no defined result in that case. 6520 But anyway, Intel x86 semantics say the result is undefined in 6521 such situations. Hence handle the zero case specially. */ 6522 6523 /* Bleh. What we compute: 6524 6525 bsf32: if src == 0 then 0 else Ctz32(src) 6526 bsr32: if src == 0 then 0 else 31 - Clz32(src) 6527 6528 bsf16: if src == 0 then 0 else Ctz32(16Uto32(src)) 6529 bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src)) 6530 6531 First, widen src to 32 bits if it is not already. 6532 6533 Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the 6534 dst register unchanged when src == 0. Hence change accordingly. 6535 */ 6536 if (sz == 2) 6537 assign( src32, unop(Iop_16Uto32, mkexpr(src)) ); 6538 else 6539 assign( src32, mkexpr(src) ); 6540 6541 /* The main computation, guarding against zero. */ 6542 assign( dst32, 6543 IRExpr_ITE( 6544 mkexpr(srcB), 6545 /* src != 0 */ 6546 fwds ? unop(Iop_Ctz32, mkexpr(src32)) 6547 : binop(Iop_Sub32, 6548 mkU32(31), 6549 unop(Iop_Clz32, mkexpr(src32))), 6550 /* src == 0 -- leave dst unchanged */ 6551 widenUto32( getIReg( sz, gregOfRM(modrm) ) ) 6552 ) 6553 ); 6554 6555 if (sz == 2) 6556 assign( dst, unop(Iop_32to16, mkexpr(dst32)) ); 6557 else 6558 assign( dst, mkexpr(dst32) ); 6559 6560 /* dump result back */ 6561 putIReg( sz, gregOfRM(modrm), mkexpr(dst) ); 6562 6563 return delta; 6564} 6565 6566 6567static 6568void codegen_xchg_eAX_Reg ( Int sz, Int reg ) 6569{ 6570 IRType ty = szToITy(sz); 6571 IRTemp t1 = newTemp(ty); 6572 IRTemp t2 = newTemp(ty); 6573 vassert(sz == 2 || sz == 4); 6574 assign( t1, getIReg(sz, R_EAX) ); 6575 assign( t2, getIReg(sz, reg) ); 6576 putIReg( sz, R_EAX, mkexpr(t2) ); 6577 putIReg( sz, reg, mkexpr(t1) ); 6578 DIP("xchg%c %s, %s\n", 6579 nameISize(sz), nameIReg(sz, R_EAX), nameIReg(sz, reg)); 6580} 6581 6582 6583static 6584void codegen_SAHF ( void ) 6585{ 6586 /* Set the flags to: 6587 (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag 6588 | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A 6589 |X86G_CC_MASK_P|X86G_CC_MASK_C) 6590 */ 6591 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A 6592 |X86G_CC_MASK_C|X86G_CC_MASK_P; 6593 IRTemp oldflags = newTemp(Ity_I32); 6594 assign( oldflags, mk_x86g_calculate_eflags_all() ); 6595 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 6596 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6597 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 6598 stmt( IRStmt_Put( OFFB_CC_DEP1, 6599 binop(Iop_Or32, 6600 binop(Iop_And32, mkexpr(oldflags), mkU32(X86G_CC_MASK_O)), 6601 binop(Iop_And32, 6602 binop(Iop_Shr32, getIReg(4, R_EAX), mkU8(8)), 6603 mkU32(mask_SZACP)) 6604 ) 6605 )); 6606 /* Set NDEP even though it isn't used. This makes redundant-PUT 6607 elimination of previous stores to this field work better. */ 6608 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6609} 6610 6611 6612static 6613void codegen_LAHF ( void ) 6614{ 6615 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */ 6616 IRExpr* eax_with_hole; 6617 IRExpr* new_byte; 6618 IRExpr* new_eax; 6619 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A 6620 |X86G_CC_MASK_C|X86G_CC_MASK_P; 6621 6622 IRTemp flags = newTemp(Ity_I32); 6623 assign( flags, mk_x86g_calculate_eflags_all() ); 6624 6625 eax_with_hole 6626 = binop(Iop_And32, getIReg(4, R_EAX), mkU32(0xFFFF00FF)); 6627 new_byte 6628 = binop(Iop_Or32, binop(Iop_And32, mkexpr(flags), mkU32(mask_SZACP)), 6629 mkU32(1<<1)); 6630 new_eax 6631 = binop(Iop_Or32, eax_with_hole, 6632 binop(Iop_Shl32, new_byte, mkU8(8))); 6633 putIReg(4, R_EAX, new_eax); 6634} 6635 6636 6637static 6638UInt dis_cmpxchg_G_E ( UChar sorb, 6639 Bool locked, 6640 Int size, 6641 Int delta0 ) 6642{ 6643 HChar dis_buf[50]; 6644 Int len; 6645 6646 IRType ty = szToITy(size); 6647 IRTemp acc = newTemp(ty); 6648 IRTemp src = newTemp(ty); 6649 IRTemp dest = newTemp(ty); 6650 IRTemp dest2 = newTemp(ty); 6651 IRTemp acc2 = newTemp(ty); 6652 IRTemp cond = newTemp(Ity_I1); 6653 IRTemp addr = IRTemp_INVALID; 6654 UChar rm = getUChar(delta0); 6655 6656 /* There are 3 cases to consider: 6657 6658 reg-reg: ignore any lock prefix, generate sequence based 6659 on ITE 6660 6661 reg-mem, not locked: ignore any lock prefix, generate sequence 6662 based on ITE 6663 6664 reg-mem, locked: use IRCAS 6665 */ 6666 if (epartIsReg(rm)) { 6667 /* case 1 */ 6668 assign( dest, getIReg(size, eregOfRM(rm)) ); 6669 delta0++; 6670 assign( src, getIReg(size, gregOfRM(rm)) ); 6671 assign( acc, getIReg(size, R_EAX) ); 6672 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 6673 assign( cond, mk_x86g_calculate_condition(X86CondZ) ); 6674 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) ); 6675 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 6676 putIReg(size, R_EAX, mkexpr(acc2)); 6677 putIReg(size, eregOfRM(rm), mkexpr(dest2)); 6678 DIP("cmpxchg%c %s,%s\n", nameISize(size), 6679 nameIReg(size,gregOfRM(rm)), 6680 nameIReg(size,eregOfRM(rm)) ); 6681 } 6682 else if (!epartIsReg(rm) && !locked) { 6683 /* case 2 */ 6684 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6685 assign( dest, loadLE(ty, mkexpr(addr)) ); 6686 delta0 += len; 6687 assign( src, getIReg(size, gregOfRM(rm)) ); 6688 assign( acc, getIReg(size, R_EAX) ); 6689 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 6690 assign( cond, mk_x86g_calculate_condition(X86CondZ) ); 6691 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) ); 6692 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 6693 putIReg(size, R_EAX, mkexpr(acc2)); 6694 storeLE( mkexpr(addr), mkexpr(dest2) ); 6695 DIP("cmpxchg%c %s,%s\n", nameISize(size), 6696 nameIReg(size,gregOfRM(rm)), dis_buf); 6697 } 6698 else if (!epartIsReg(rm) && locked) { 6699 /* case 3 */ 6700 /* src is new value. acc is expected value. dest is old value. 6701 Compute success from the output of the IRCAS, and steer the 6702 new value for EAX accordingly: in case of success, EAX is 6703 unchanged. */ 6704 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6705 delta0 += len; 6706 assign( src, getIReg(size, gregOfRM(rm)) ); 6707 assign( acc, getIReg(size, R_EAX) ); 6708 stmt( IRStmt_CAS( 6709 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr), 6710 NULL, mkexpr(acc), NULL, mkexpr(src) ) 6711 )); 6712 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 6713 assign( cond, mk_x86g_calculate_condition(X86CondZ) ); 6714 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 6715 putIReg(size, R_EAX, mkexpr(acc2)); 6716 DIP("cmpxchg%c %s,%s\n", nameISize(size), 6717 nameIReg(size,gregOfRM(rm)), dis_buf); 6718 } 6719 else vassert(0); 6720 6721 return delta0; 6722} 6723 6724 6725/* Handle conditional move instructions of the form 6726 cmovcc E(reg-or-mem), G(reg) 6727 6728 E(src) is reg-or-mem 6729 G(dst) is reg. 6730 6731 If E is reg, --> GET %E, tmps 6732 GET %G, tmpd 6733 CMOVcc tmps, tmpd 6734 PUT tmpd, %G 6735 6736 If E is mem --> (getAddr E) -> tmpa 6737 LD (tmpa), tmps 6738 GET %G, tmpd 6739 CMOVcc tmps, tmpd 6740 PUT tmpd, %G 6741*/ 6742static 6743UInt dis_cmov_E_G ( UChar sorb, 6744 Int sz, 6745 X86Condcode cond, 6746 Int delta0 ) 6747{ 6748 UChar rm = getIByte(delta0); 6749 HChar dis_buf[50]; 6750 Int len; 6751 6752 IRType ty = szToITy(sz); 6753 IRTemp tmps = newTemp(ty); 6754 IRTemp tmpd = newTemp(ty); 6755 6756 if (epartIsReg(rm)) { 6757 assign( tmps, getIReg(sz, eregOfRM(rm)) ); 6758 assign( tmpd, getIReg(sz, gregOfRM(rm)) ); 6759 6760 putIReg(sz, gregOfRM(rm), 6761 IRExpr_ITE( mk_x86g_calculate_condition(cond), 6762 mkexpr(tmps), 6763 mkexpr(tmpd) ) 6764 ); 6765 DIP("cmov%c%s %s,%s\n", nameISize(sz), 6766 name_X86Condcode(cond), 6767 nameIReg(sz,eregOfRM(rm)), 6768 nameIReg(sz,gregOfRM(rm))); 6769 return 1+delta0; 6770 } 6771 6772 /* E refers to memory */ 6773 { 6774 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 6775 assign( tmps, loadLE(ty, mkexpr(addr)) ); 6776 assign( tmpd, getIReg(sz, gregOfRM(rm)) ); 6777 6778 putIReg(sz, gregOfRM(rm), 6779 IRExpr_ITE( mk_x86g_calculate_condition(cond), 6780 mkexpr(tmps), 6781 mkexpr(tmpd) ) 6782 ); 6783 6784 DIP("cmov%c%s %s,%s\n", nameISize(sz), 6785 name_X86Condcode(cond), 6786 dis_buf, 6787 nameIReg(sz,gregOfRM(rm))); 6788 return len+delta0; 6789 } 6790} 6791 6792 6793static 6794UInt dis_xadd_G_E ( UChar sorb, Bool locked, Int sz, Int delta0, 6795 Bool* decodeOK ) 6796{ 6797 Int len; 6798 UChar rm = getIByte(delta0); 6799 HChar dis_buf[50]; 6800 6801 IRType ty = szToITy(sz); 6802 IRTemp tmpd = newTemp(ty); 6803 IRTemp tmpt0 = newTemp(ty); 6804 IRTemp tmpt1 = newTemp(ty); 6805 6806 /* There are 3 cases to consider: 6807 6808 reg-reg: ignore any lock prefix, 6809 generate 'naive' (non-atomic) sequence 6810 6811 reg-mem, not locked: ignore any lock prefix, generate 'naive' 6812 (non-atomic) sequence 6813 6814 reg-mem, locked: use IRCAS 6815 */ 6816 6817 if (epartIsReg(rm)) { 6818 /* case 1 */ 6819 assign( tmpd, getIReg(sz, eregOfRM(rm))); 6820 assign( tmpt0, getIReg(sz, gregOfRM(rm)) ); 6821 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 6822 mkexpr(tmpd), mkexpr(tmpt0)) ); 6823 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 6824 putIReg(sz, eregOfRM(rm), mkexpr(tmpt1)); 6825 putIReg(sz, gregOfRM(rm), mkexpr(tmpd)); 6826 DIP("xadd%c %s, %s\n", 6827 nameISize(sz), nameIReg(sz,gregOfRM(rm)), 6828 nameIReg(sz,eregOfRM(rm))); 6829 *decodeOK = True; 6830 return 1+delta0; 6831 } 6832 else if (!epartIsReg(rm) && !locked) { 6833 /* case 2 */ 6834 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 6835 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 6836 assign( tmpt0, getIReg(sz, gregOfRM(rm)) ); 6837 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 6838 mkexpr(tmpd), mkexpr(tmpt0)) ); 6839 storeLE( mkexpr(addr), mkexpr(tmpt1) ); 6840 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 6841 putIReg(sz, gregOfRM(rm), mkexpr(tmpd)); 6842 DIP("xadd%c %s, %s\n", 6843 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf); 6844 *decodeOK = True; 6845 return len+delta0; 6846 } 6847 else if (!epartIsReg(rm) && locked) { 6848 /* case 3 */ 6849 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 6850 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 6851 assign( tmpt0, getIReg(sz, gregOfRM(rm)) ); 6852 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 6853 mkexpr(tmpd), mkexpr(tmpt0)) ); 6854 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/, 6855 mkexpr(tmpt1)/*newVal*/, guest_EIP_curr_instr ); 6856 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 6857 putIReg(sz, gregOfRM(rm), mkexpr(tmpd)); 6858 DIP("xadd%c %s, %s\n", 6859 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf); 6860 *decodeOK = True; 6861 return len+delta0; 6862 } 6863 /*UNREACHED*/ 6864 vassert(0); 6865} 6866 6867/* Move 16 bits from Ew (ireg or mem) to G (a segment register). */ 6868 6869static 6870UInt dis_mov_Ew_Sw ( UChar sorb, Int delta0 ) 6871{ 6872 Int len; 6873 IRTemp addr; 6874 UChar rm = getIByte(delta0); 6875 HChar dis_buf[50]; 6876 6877 if (epartIsReg(rm)) { 6878 putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) ); 6879 DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm))); 6880 return 1+delta0; 6881 } else { 6882 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6883 putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) ); 6884 DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm))); 6885 return len+delta0; 6886 } 6887} 6888 6889/* Move 16 bits from G (a segment register) to Ew (ireg or mem). If 6890 dst is ireg and sz==4, zero out top half of it. */ 6891 6892static 6893UInt dis_mov_Sw_Ew ( UChar sorb, 6894 Int sz, 6895 Int delta0 ) 6896{ 6897 Int len; 6898 IRTemp addr; 6899 UChar rm = getIByte(delta0); 6900 HChar dis_buf[50]; 6901 6902 vassert(sz == 2 || sz == 4); 6903 6904 if (epartIsReg(rm)) { 6905 if (sz == 4) 6906 putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm)))); 6907 else 6908 putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm))); 6909 6910 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm))); 6911 return 1+delta0; 6912 } else { 6913 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6914 storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) ); 6915 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf); 6916 return len+delta0; 6917 } 6918} 6919 6920 6921static 6922void dis_push_segreg ( UInt sreg, Int sz ) 6923{ 6924 IRTemp t1 = newTemp(Ity_I16); 6925 IRTemp ta = newTemp(Ity_I32); 6926 vassert(sz == 2 || sz == 4); 6927 6928 assign( t1, getSReg(sreg) ); 6929 assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); 6930 putIReg(4, R_ESP, mkexpr(ta)); 6931 storeLE( mkexpr(ta), mkexpr(t1) ); 6932 6933 DIP("push%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg)); 6934} 6935 6936static 6937void dis_pop_segreg ( UInt sreg, Int sz ) 6938{ 6939 IRTemp t1 = newTemp(Ity_I16); 6940 IRTemp ta = newTemp(Ity_I32); 6941 vassert(sz == 2 || sz == 4); 6942 6943 assign( ta, getIReg(4, R_ESP) ); 6944 assign( t1, loadLE(Ity_I16, mkexpr(ta)) ); 6945 6946 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) ); 6947 putSReg( sreg, mkexpr(t1) ); 6948 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg)); 6949} 6950 6951static 6952void dis_ret ( /*MOD*/DisResult* dres, UInt d32 ) 6953{ 6954 IRTemp t1 = newTemp(Ity_I32); 6955 IRTemp t2 = newTemp(Ity_I32); 6956 assign(t1, getIReg(4,R_ESP)); 6957 assign(t2, loadLE(Ity_I32,mkexpr(t1))); 6958 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32))); 6959 jmp_treg(dres, Ijk_Ret, t2); 6960 vassert(dres->whatNext == Dis_StopHere); 6961} 6962 6963/*------------------------------------------------------------*/ 6964/*--- SSE/SSE2/SSE3 helpers ---*/ 6965/*------------------------------------------------------------*/ 6966 6967/* Indicates whether the op requires a rounding-mode argument. Note 6968 that this covers only vector floating point arithmetic ops, and 6969 omits the scalar ones that need rounding modes. Note also that 6970 inconsistencies here will get picked up later by the IR sanity 6971 checker, so this isn't correctness-critical. */ 6972static Bool requiresRMode ( IROp op ) 6973{ 6974 switch (op) { 6975 /* 128 bit ops */ 6976 case Iop_Add32Fx4: case Iop_Sub32Fx4: 6977 case Iop_Mul32Fx4: case Iop_Div32Fx4: 6978 case Iop_Add64Fx2: case Iop_Sub64Fx2: 6979 case Iop_Mul64Fx2: case Iop_Div64Fx2: 6980 return True; 6981 default: 6982 break; 6983 } 6984 return False; 6985} 6986 6987 6988/* Worker function; do not call directly. 6989 Handles full width G = G `op` E and G = (not G) `op` E. 6990*/ 6991 6992static UInt dis_SSE_E_to_G_all_wrk ( 6993 UChar sorb, Int delta, 6994 const HChar* opname, IROp op, 6995 Bool invertG 6996 ) 6997{ 6998 HChar dis_buf[50]; 6999 Int alen; 7000 IRTemp addr; 7001 UChar rm = getIByte(delta); 7002 IRExpr* gpart 7003 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRM(rm))) 7004 : getXMMReg(gregOfRM(rm)); 7005 if (epartIsReg(rm)) { 7006 putXMMReg( 7007 gregOfRM(rm), 7008 requiresRMode(op) 7009 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 7010 gpart, 7011 getXMMReg(eregOfRM(rm))) 7012 : binop(op, gpart, 7013 getXMMReg(eregOfRM(rm))) 7014 ); 7015 DIP("%s %s,%s\n", opname, 7016 nameXMMReg(eregOfRM(rm)), 7017 nameXMMReg(gregOfRM(rm)) ); 7018 return delta+1; 7019 } else { 7020 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7021 putXMMReg( 7022 gregOfRM(rm), 7023 requiresRMode(op) 7024 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 7025 gpart, 7026 loadLE(Ity_V128, mkexpr(addr))) 7027 : binop(op, gpart, 7028 loadLE(Ity_V128, mkexpr(addr))) 7029 ); 7030 DIP("%s %s,%s\n", opname, 7031 dis_buf, 7032 nameXMMReg(gregOfRM(rm)) ); 7033 return delta+alen; 7034 } 7035} 7036 7037 7038/* All lanes SSE binary operation, G = G `op` E. */ 7039 7040static 7041UInt dis_SSE_E_to_G_all ( UChar sorb, Int delta, const HChar* opname, IROp op ) 7042{ 7043 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, False ); 7044} 7045 7046/* All lanes SSE binary operation, G = (not G) `op` E. */ 7047 7048static 7049UInt dis_SSE_E_to_G_all_invG ( UChar sorb, Int delta, 7050 const HChar* opname, IROp op ) 7051{ 7052 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, True ); 7053} 7054 7055 7056/* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */ 7057 7058static UInt dis_SSE_E_to_G_lo32 ( UChar sorb, Int delta, 7059 const HChar* opname, IROp op ) 7060{ 7061 HChar dis_buf[50]; 7062 Int alen; 7063 IRTemp addr; 7064 UChar rm = getIByte(delta); 7065 IRExpr* gpart = getXMMReg(gregOfRM(rm)); 7066 if (epartIsReg(rm)) { 7067 putXMMReg( gregOfRM(rm), 7068 binop(op, gpart, 7069 getXMMReg(eregOfRM(rm))) ); 7070 DIP("%s %s,%s\n", opname, 7071 nameXMMReg(eregOfRM(rm)), 7072 nameXMMReg(gregOfRM(rm)) ); 7073 return delta+1; 7074 } else { 7075 /* We can only do a 32-bit memory read, so the upper 3/4 of the 7076 E operand needs to be made simply of zeroes. */ 7077 IRTemp epart = newTemp(Ity_V128); 7078 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7079 assign( epart, unop( Iop_32UtoV128, 7080 loadLE(Ity_I32, mkexpr(addr))) ); 7081 putXMMReg( gregOfRM(rm), 7082 binop(op, gpart, mkexpr(epart)) ); 7083 DIP("%s %s,%s\n", opname, 7084 dis_buf, 7085 nameXMMReg(gregOfRM(rm)) ); 7086 return delta+alen; 7087 } 7088} 7089 7090 7091/* Lower 64-bit lane only SSE binary operation, G = G `op` E. */ 7092 7093static UInt dis_SSE_E_to_G_lo64 ( UChar sorb, Int delta, 7094 const HChar* opname, IROp op ) 7095{ 7096 HChar dis_buf[50]; 7097 Int alen; 7098 IRTemp addr; 7099 UChar rm = getIByte(delta); 7100 IRExpr* gpart = getXMMReg(gregOfRM(rm)); 7101 if (epartIsReg(rm)) { 7102 putXMMReg( gregOfRM(rm), 7103 binop(op, gpart, 7104 getXMMReg(eregOfRM(rm))) ); 7105 DIP("%s %s,%s\n", opname, 7106 nameXMMReg(eregOfRM(rm)), 7107 nameXMMReg(gregOfRM(rm)) ); 7108 return delta+1; 7109 } else { 7110 /* We can only do a 64-bit memory read, so the upper half of the 7111 E operand needs to be made simply of zeroes. */ 7112 IRTemp epart = newTemp(Ity_V128); 7113 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7114 assign( epart, unop( Iop_64UtoV128, 7115 loadLE(Ity_I64, mkexpr(addr))) ); 7116 putXMMReg( gregOfRM(rm), 7117 binop(op, gpart, mkexpr(epart)) ); 7118 DIP("%s %s,%s\n", opname, 7119 dis_buf, 7120 nameXMMReg(gregOfRM(rm)) ); 7121 return delta+alen; 7122 } 7123} 7124 7125 7126/* All lanes unary SSE operation, G = op(E). */ 7127 7128static UInt dis_SSE_E_to_G_unary_all ( 7129 UChar sorb, Int delta, 7130 const HChar* opname, IROp op 7131 ) 7132{ 7133 HChar dis_buf[50]; 7134 Int alen; 7135 IRTemp addr; 7136 UChar rm = getIByte(delta); 7137 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked 7138 // up in the usual way. 7139 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2; 7140 if (epartIsReg(rm)) { 7141 IRExpr* src = getXMMReg(eregOfRM(rm)); 7142 /* XXXROUNDINGFIXME */ 7143 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src) 7144 : unop(op, src); 7145 putXMMReg( gregOfRM(rm), res ); 7146 DIP("%s %s,%s\n", opname, 7147 nameXMMReg(eregOfRM(rm)), 7148 nameXMMReg(gregOfRM(rm)) ); 7149 return delta+1; 7150 } else { 7151 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7152 IRExpr* src = loadLE(Ity_V128, mkexpr(addr)); 7153 /* XXXROUNDINGFIXME */ 7154 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src) 7155 : unop(op, src); 7156 putXMMReg( gregOfRM(rm), res ); 7157 DIP("%s %s,%s\n", opname, 7158 dis_buf, 7159 nameXMMReg(gregOfRM(rm)) ); 7160 return delta+alen; 7161 } 7162} 7163 7164 7165/* Lowest 32-bit lane only unary SSE operation, G = op(E). */ 7166 7167static UInt dis_SSE_E_to_G_unary_lo32 ( 7168 UChar sorb, Int delta, 7169 const HChar* opname, IROp op 7170 ) 7171{ 7172 /* First we need to get the old G value and patch the low 32 bits 7173 of the E operand into it. Then apply op and write back to G. */ 7174 HChar dis_buf[50]; 7175 Int alen; 7176 IRTemp addr; 7177 UChar rm = getIByte(delta); 7178 IRTemp oldG0 = newTemp(Ity_V128); 7179 IRTemp oldG1 = newTemp(Ity_V128); 7180 7181 assign( oldG0, getXMMReg(gregOfRM(rm)) ); 7182 7183 if (epartIsReg(rm)) { 7184 assign( oldG1, 7185 binop( Iop_SetV128lo32, 7186 mkexpr(oldG0), 7187 getXMMRegLane32(eregOfRM(rm), 0)) ); 7188 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7189 DIP("%s %s,%s\n", opname, 7190 nameXMMReg(eregOfRM(rm)), 7191 nameXMMReg(gregOfRM(rm)) ); 7192 return delta+1; 7193 } else { 7194 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7195 assign( oldG1, 7196 binop( Iop_SetV128lo32, 7197 mkexpr(oldG0), 7198 loadLE(Ity_I32, mkexpr(addr)) )); 7199 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7200 DIP("%s %s,%s\n", opname, 7201 dis_buf, 7202 nameXMMReg(gregOfRM(rm)) ); 7203 return delta+alen; 7204 } 7205} 7206 7207 7208/* Lowest 64-bit lane only unary SSE operation, G = op(E). */ 7209 7210static UInt dis_SSE_E_to_G_unary_lo64 ( 7211 UChar sorb, Int delta, 7212 const HChar* opname, IROp op 7213 ) 7214{ 7215 /* First we need to get the old G value and patch the low 64 bits 7216 of the E operand into it. Then apply op and write back to G. */ 7217 HChar dis_buf[50]; 7218 Int alen; 7219 IRTemp addr; 7220 UChar rm = getIByte(delta); 7221 IRTemp oldG0 = newTemp(Ity_V128); 7222 IRTemp oldG1 = newTemp(Ity_V128); 7223 7224 assign( oldG0, getXMMReg(gregOfRM(rm)) ); 7225 7226 if (epartIsReg(rm)) { 7227 assign( oldG1, 7228 binop( Iop_SetV128lo64, 7229 mkexpr(oldG0), 7230 getXMMRegLane64(eregOfRM(rm), 0)) ); 7231 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7232 DIP("%s %s,%s\n", opname, 7233 nameXMMReg(eregOfRM(rm)), 7234 nameXMMReg(gregOfRM(rm)) ); 7235 return delta+1; 7236 } else { 7237 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7238 assign( oldG1, 7239 binop( Iop_SetV128lo64, 7240 mkexpr(oldG0), 7241 loadLE(Ity_I64, mkexpr(addr)) )); 7242 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7243 DIP("%s %s,%s\n", opname, 7244 dis_buf, 7245 nameXMMReg(gregOfRM(rm)) ); 7246 return delta+alen; 7247 } 7248} 7249 7250 7251/* SSE integer binary operation: 7252 G = G `op` E (eLeft == False) 7253 G = E `op` G (eLeft == True) 7254*/ 7255static UInt dis_SSEint_E_to_G( 7256 UChar sorb, Int delta, 7257 const HChar* opname, IROp op, 7258 Bool eLeft 7259 ) 7260{ 7261 HChar dis_buf[50]; 7262 Int alen; 7263 IRTemp addr; 7264 UChar rm = getIByte(delta); 7265 IRExpr* gpart = getXMMReg(gregOfRM(rm)); 7266 IRExpr* epart = NULL; 7267 if (epartIsReg(rm)) { 7268 epart = getXMMReg(eregOfRM(rm)); 7269 DIP("%s %s,%s\n", opname, 7270 nameXMMReg(eregOfRM(rm)), 7271 nameXMMReg(gregOfRM(rm)) ); 7272 delta += 1; 7273 } else { 7274 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7275 epart = loadLE(Ity_V128, mkexpr(addr)); 7276 DIP("%s %s,%s\n", opname, 7277 dis_buf, 7278 nameXMMReg(gregOfRM(rm)) ); 7279 delta += alen; 7280 } 7281 putXMMReg( gregOfRM(rm), 7282 eLeft ? binop(op, epart, gpart) 7283 : binop(op, gpart, epart) ); 7284 return delta; 7285} 7286 7287 7288/* Helper for doing SSE FP comparisons. */ 7289 7290static void findSSECmpOp ( Bool* needNot, IROp* op, 7291 Int imm8, Bool all_lanes, Int sz ) 7292{ 7293 imm8 &= 7; 7294 *needNot = False; 7295 *op = Iop_INVALID; 7296 if (imm8 >= 4) { 7297 *needNot = True; 7298 imm8 -= 4; 7299 } 7300 7301 if (sz == 4 && all_lanes) { 7302 switch (imm8) { 7303 case 0: *op = Iop_CmpEQ32Fx4; return; 7304 case 1: *op = Iop_CmpLT32Fx4; return; 7305 case 2: *op = Iop_CmpLE32Fx4; return; 7306 case 3: *op = Iop_CmpUN32Fx4; return; 7307 default: break; 7308 } 7309 } 7310 if (sz == 4 && !all_lanes) { 7311 switch (imm8) { 7312 case 0: *op = Iop_CmpEQ32F0x4; return; 7313 case 1: *op = Iop_CmpLT32F0x4; return; 7314 case 2: *op = Iop_CmpLE32F0x4; return; 7315 case 3: *op = Iop_CmpUN32F0x4; return; 7316 default: break; 7317 } 7318 } 7319 if (sz == 8 && all_lanes) { 7320 switch (imm8) { 7321 case 0: *op = Iop_CmpEQ64Fx2; return; 7322 case 1: *op = Iop_CmpLT64Fx2; return; 7323 case 2: *op = Iop_CmpLE64Fx2; return; 7324 case 3: *op = Iop_CmpUN64Fx2; return; 7325 default: break; 7326 } 7327 } 7328 if (sz == 8 && !all_lanes) { 7329 switch (imm8) { 7330 case 0: *op = Iop_CmpEQ64F0x2; return; 7331 case 1: *op = Iop_CmpLT64F0x2; return; 7332 case 2: *op = Iop_CmpLE64F0x2; return; 7333 case 3: *op = Iop_CmpUN64F0x2; return; 7334 default: break; 7335 } 7336 } 7337 vpanic("findSSECmpOp(x86,guest)"); 7338} 7339 7340/* Handles SSE 32F/64F comparisons. */ 7341 7342static UInt dis_SSEcmp_E_to_G ( UChar sorb, Int delta, 7343 const HChar* opname, Bool all_lanes, Int sz ) 7344{ 7345 HChar dis_buf[50]; 7346 Int alen, imm8; 7347 IRTemp addr; 7348 Bool needNot = False; 7349 IROp op = Iop_INVALID; 7350 IRTemp plain = newTemp(Ity_V128); 7351 UChar rm = getIByte(delta); 7352 UShort mask = 0; 7353 vassert(sz == 4 || sz == 8); 7354 if (epartIsReg(rm)) { 7355 imm8 = getIByte(delta+1); 7356 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); 7357 assign( plain, binop(op, getXMMReg(gregOfRM(rm)), 7358 getXMMReg(eregOfRM(rm))) ); 7359 delta += 2; 7360 DIP("%s $%d,%s,%s\n", opname, 7361 imm8, 7362 nameXMMReg(eregOfRM(rm)), 7363 nameXMMReg(gregOfRM(rm)) ); 7364 } else { 7365 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7366 imm8 = getIByte(delta+alen); 7367 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); 7368 assign( plain, 7369 binop( 7370 op, 7371 getXMMReg(gregOfRM(rm)), 7372 all_lanes ? loadLE(Ity_V128, mkexpr(addr)) 7373 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 7374 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))) 7375 ) 7376 ); 7377 delta += alen+1; 7378 DIP("%s $%d,%s,%s\n", opname, 7379 imm8, 7380 dis_buf, 7381 nameXMMReg(gregOfRM(rm)) ); 7382 } 7383 7384 if (needNot && all_lanes) { 7385 putXMMReg( gregOfRM(rm), 7386 unop(Iop_NotV128, mkexpr(plain)) ); 7387 } 7388 else 7389 if (needNot && !all_lanes) { 7390 mask = toUShort( sz==4 ? 0x000F : 0x00FF ); 7391 putXMMReg( gregOfRM(rm), 7392 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) ); 7393 } 7394 else { 7395 putXMMReg( gregOfRM(rm), mkexpr(plain) ); 7396 } 7397 7398 return delta; 7399} 7400 7401 7402/* Vector by scalar shift of G by the amount specified at the bottom 7403 of E. */ 7404 7405static UInt dis_SSE_shiftG_byE ( UChar sorb, Int delta, 7406 const HChar* opname, IROp op ) 7407{ 7408 HChar dis_buf[50]; 7409 Int alen, size; 7410 IRTemp addr; 7411 Bool shl, shr, sar; 7412 UChar rm = getIByte(delta); 7413 IRTemp g0 = newTemp(Ity_V128); 7414 IRTemp g1 = newTemp(Ity_V128); 7415 IRTemp amt = newTemp(Ity_I32); 7416 IRTemp amt8 = newTemp(Ity_I8); 7417 if (epartIsReg(rm)) { 7418 assign( amt, getXMMRegLane32(eregOfRM(rm), 0) ); 7419 DIP("%s %s,%s\n", opname, 7420 nameXMMReg(eregOfRM(rm)), 7421 nameXMMReg(gregOfRM(rm)) ); 7422 delta++; 7423 } else { 7424 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7425 assign( amt, loadLE(Ity_I32, mkexpr(addr)) ); 7426 DIP("%s %s,%s\n", opname, 7427 dis_buf, 7428 nameXMMReg(gregOfRM(rm)) ); 7429 delta += alen; 7430 } 7431 assign( g0, getXMMReg(gregOfRM(rm)) ); 7432 assign( amt8, unop(Iop_32to8, mkexpr(amt)) ); 7433 7434 shl = shr = sar = False; 7435 size = 0; 7436 switch (op) { 7437 case Iop_ShlN16x8: shl = True; size = 32; break; 7438 case Iop_ShlN32x4: shl = True; size = 32; break; 7439 case Iop_ShlN64x2: shl = True; size = 64; break; 7440 case Iop_SarN16x8: sar = True; size = 16; break; 7441 case Iop_SarN32x4: sar = True; size = 32; break; 7442 case Iop_ShrN16x8: shr = True; size = 16; break; 7443 case Iop_ShrN32x4: shr = True; size = 32; break; 7444 case Iop_ShrN64x2: shr = True; size = 64; break; 7445 default: vassert(0); 7446 } 7447 7448 if (shl || shr) { 7449 assign( 7450 g1, 7451 IRExpr_ITE( 7452 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)), 7453 binop(op, mkexpr(g0), mkexpr(amt8)), 7454 mkV128(0x0000) 7455 ) 7456 ); 7457 } else 7458 if (sar) { 7459 assign( 7460 g1, 7461 IRExpr_ITE( 7462 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)), 7463 binop(op, mkexpr(g0), mkexpr(amt8)), 7464 binop(op, mkexpr(g0), mkU8(size-1)) 7465 ) 7466 ); 7467 } else { 7468 /*NOTREACHED*/ 7469 vassert(0); 7470 } 7471 7472 putXMMReg( gregOfRM(rm), mkexpr(g1) ); 7473 return delta; 7474} 7475 7476 7477/* Vector by scalar shift of E by an immediate byte. */ 7478 7479static 7480UInt dis_SSE_shiftE_imm ( Int delta, const HChar* opname, IROp op ) 7481{ 7482 Bool shl, shr, sar; 7483 UChar rm = getIByte(delta); 7484 IRTemp e0 = newTemp(Ity_V128); 7485 IRTemp e1 = newTemp(Ity_V128); 7486 UChar amt, size; 7487 vassert(epartIsReg(rm)); 7488 vassert(gregOfRM(rm) == 2 7489 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6); 7490 amt = getIByte(delta+1); 7491 delta += 2; 7492 DIP("%s $%d,%s\n", opname, 7493 (Int)amt, 7494 nameXMMReg(eregOfRM(rm)) ); 7495 assign( e0, getXMMReg(eregOfRM(rm)) ); 7496 7497 shl = shr = sar = False; 7498 size = 0; 7499 switch (op) { 7500 case Iop_ShlN16x8: shl = True; size = 16; break; 7501 case Iop_ShlN32x4: shl = True; size = 32; break; 7502 case Iop_ShlN64x2: shl = True; size = 64; break; 7503 case Iop_SarN16x8: sar = True; size = 16; break; 7504 case Iop_SarN32x4: sar = True; size = 32; break; 7505 case Iop_ShrN16x8: shr = True; size = 16; break; 7506 case Iop_ShrN32x4: shr = True; size = 32; break; 7507 case Iop_ShrN64x2: shr = True; size = 64; break; 7508 default: vassert(0); 7509 } 7510 7511 if (shl || shr) { 7512 assign( e1, amt >= size 7513 ? mkV128(0x0000) 7514 : binop(op, mkexpr(e0), mkU8(amt)) 7515 ); 7516 } else 7517 if (sar) { 7518 assign( e1, amt >= size 7519 ? binop(op, mkexpr(e0), mkU8(size-1)) 7520 : binop(op, mkexpr(e0), mkU8(amt)) 7521 ); 7522 } else { 7523 /*NOTREACHED*/ 7524 vassert(0); 7525 } 7526 7527 putXMMReg( eregOfRM(rm), mkexpr(e1) ); 7528 return delta; 7529} 7530 7531 7532/* Get the current SSE rounding mode. */ 7533 7534static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void ) 7535{ 7536 return binop( Iop_And32, 7537 IRExpr_Get( OFFB_SSEROUND, Ity_I32 ), 7538 mkU32(3) ); 7539} 7540 7541static void put_sse_roundingmode ( IRExpr* sseround ) 7542{ 7543 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32); 7544 stmt( IRStmt_Put( OFFB_SSEROUND, sseround ) ); 7545} 7546 7547/* Break a 128-bit value up into four 32-bit ints. */ 7548 7549static void breakup128to32s ( IRTemp t128, 7550 /*OUTs*/ 7551 IRTemp* t3, IRTemp* t2, 7552 IRTemp* t1, IRTemp* t0 ) 7553{ 7554 IRTemp hi64 = newTemp(Ity_I64); 7555 IRTemp lo64 = newTemp(Ity_I64); 7556 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) ); 7557 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) ); 7558 7559 vassert(t0 && *t0 == IRTemp_INVALID); 7560 vassert(t1 && *t1 == IRTemp_INVALID); 7561 vassert(t2 && *t2 == IRTemp_INVALID); 7562 vassert(t3 && *t3 == IRTemp_INVALID); 7563 7564 *t0 = newTemp(Ity_I32); 7565 *t1 = newTemp(Ity_I32); 7566 *t2 = newTemp(Ity_I32); 7567 *t3 = newTemp(Ity_I32); 7568 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) ); 7569 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) ); 7570 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) ); 7571 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) ); 7572} 7573 7574/* Construct a 128-bit value from four 32-bit ints. */ 7575 7576static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2, 7577 IRTemp t1, IRTemp t0 ) 7578{ 7579 return 7580 binop( Iop_64HLtoV128, 7581 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 7582 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) 7583 ); 7584} 7585 7586/* Break a 64-bit value up into four 16-bit ints. */ 7587 7588static void breakup64to16s ( IRTemp t64, 7589 /*OUTs*/ 7590 IRTemp* t3, IRTemp* t2, 7591 IRTemp* t1, IRTemp* t0 ) 7592{ 7593 IRTemp hi32 = newTemp(Ity_I32); 7594 IRTemp lo32 = newTemp(Ity_I32); 7595 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) ); 7596 assign( lo32, unop(Iop_64to32, mkexpr(t64)) ); 7597 7598 vassert(t0 && *t0 == IRTemp_INVALID); 7599 vassert(t1 && *t1 == IRTemp_INVALID); 7600 vassert(t2 && *t2 == IRTemp_INVALID); 7601 vassert(t3 && *t3 == IRTemp_INVALID); 7602 7603 *t0 = newTemp(Ity_I16); 7604 *t1 = newTemp(Ity_I16); 7605 *t2 = newTemp(Ity_I16); 7606 *t3 = newTemp(Ity_I16); 7607 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) ); 7608 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) ); 7609 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) ); 7610 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) ); 7611} 7612 7613/* Construct a 64-bit value from four 16-bit ints. */ 7614 7615static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, 7616 IRTemp t1, IRTemp t0 ) 7617{ 7618 return 7619 binop( Iop_32HLto64, 7620 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)), 7621 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0)) 7622 ); 7623} 7624 7625/* Generate IR to set the guest %EFLAGS from the pushfl-format image 7626 in the given 32-bit temporary. The flags that are set are: O S Z A 7627 C P D ID AC. 7628 7629 In all cases, code to set AC is generated. However, VEX actually 7630 ignores the AC value and so can optionally emit an emulation 7631 warning when it is enabled. In this routine, an emulation warning 7632 is only emitted if emit_AC_emwarn is True, in which case 7633 next_insn_EIP must be correct (this allows for correct code 7634 generation for popfl/popfw). If emit_AC_emwarn is False, 7635 next_insn_EIP is unimportant (this allows for easy if kludgey code 7636 generation for IRET.) */ 7637 7638static 7639void set_EFLAGS_from_value ( IRTemp t1, 7640 Bool emit_AC_emwarn, 7641 Addr32 next_insn_EIP ) 7642{ 7643 vassert(typeOfIRTemp(irsb->tyenv,t1) == Ity_I32); 7644 7645 /* t1 is the flag word. Mask out everything except OSZACP and set 7646 the flags thunk to X86G_CC_OP_COPY. */ 7647 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 7648 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 7649 stmt( IRStmt_Put( OFFB_CC_DEP1, 7650 binop(Iop_And32, 7651 mkexpr(t1), 7652 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P 7653 | X86G_CC_MASK_A | X86G_CC_MASK_Z 7654 | X86G_CC_MASK_S| X86G_CC_MASK_O ) 7655 ) 7656 ) 7657 ); 7658 /* Set NDEP even though it isn't used. This makes redundant-PUT 7659 elimination of previous stores to this field work better. */ 7660 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 7661 7662 /* Also need to set the D flag, which is held in bit 10 of t1. 7663 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */ 7664 stmt( IRStmt_Put( 7665 OFFB_DFLAG, 7666 IRExpr_ITE( 7667 unop(Iop_32to1, 7668 binop(Iop_And32, 7669 binop(Iop_Shr32, mkexpr(t1), mkU8(10)), 7670 mkU32(1))), 7671 mkU32(0xFFFFFFFF), 7672 mkU32(1))) 7673 ); 7674 7675 /* Set the ID flag */ 7676 stmt( IRStmt_Put( 7677 OFFB_IDFLAG, 7678 IRExpr_ITE( 7679 unop(Iop_32to1, 7680 binop(Iop_And32, 7681 binop(Iop_Shr32, mkexpr(t1), mkU8(21)), 7682 mkU32(1))), 7683 mkU32(1), 7684 mkU32(0))) 7685 ); 7686 7687 /* And set the AC flag. If setting it 1 to, possibly emit an 7688 emulation warning. */ 7689 stmt( IRStmt_Put( 7690 OFFB_ACFLAG, 7691 IRExpr_ITE( 7692 unop(Iop_32to1, 7693 binop(Iop_And32, 7694 binop(Iop_Shr32, mkexpr(t1), mkU8(18)), 7695 mkU32(1))), 7696 mkU32(1), 7697 mkU32(0))) 7698 ); 7699 7700 if (emit_AC_emwarn) { 7701 put_emwarn( mkU32(EmWarn_X86_acFlag) ); 7702 stmt( 7703 IRStmt_Exit( 7704 binop( Iop_CmpNE32, 7705 binop(Iop_And32, mkexpr(t1), mkU32(1<<18)), 7706 mkU32(0) ), 7707 Ijk_EmWarn, 7708 IRConst_U32( next_insn_EIP ), 7709 OFFB_EIP 7710 ) 7711 ); 7712 } 7713} 7714 7715 7716/* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit 7717 values (aa,bb), computes, for each of the 4 16-bit lanes: 7718 7719 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 7720*/ 7721static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) 7722{ 7723 IRTemp aa = newTemp(Ity_I64); 7724 IRTemp bb = newTemp(Ity_I64); 7725 IRTemp aahi32s = newTemp(Ity_I64); 7726 IRTemp aalo32s = newTemp(Ity_I64); 7727 IRTemp bbhi32s = newTemp(Ity_I64); 7728 IRTemp bblo32s = newTemp(Ity_I64); 7729 IRTemp rHi = newTemp(Ity_I64); 7730 IRTemp rLo = newTemp(Ity_I64); 7731 IRTemp one32x2 = newTemp(Ity_I64); 7732 assign(aa, aax); 7733 assign(bb, bbx); 7734 assign( aahi32s, 7735 binop(Iop_SarN32x2, 7736 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), 7737 mkU8(16) )); 7738 assign( aalo32s, 7739 binop(Iop_SarN32x2, 7740 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), 7741 mkU8(16) )); 7742 assign( bbhi32s, 7743 binop(Iop_SarN32x2, 7744 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), 7745 mkU8(16) )); 7746 assign( bblo32s, 7747 binop(Iop_SarN32x2, 7748 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), 7749 mkU8(16) )); 7750 assign(one32x2, mkU64( (1ULL << 32) + 1 )); 7751 assign( 7752 rHi, 7753 binop( 7754 Iop_ShrN32x2, 7755 binop( 7756 Iop_Add32x2, 7757 binop( 7758 Iop_ShrN32x2, 7759 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), 7760 mkU8(14) 7761 ), 7762 mkexpr(one32x2) 7763 ), 7764 mkU8(1) 7765 ) 7766 ); 7767 assign( 7768 rLo, 7769 binop( 7770 Iop_ShrN32x2, 7771 binop( 7772 Iop_Add32x2, 7773 binop( 7774 Iop_ShrN32x2, 7775 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), 7776 mkU8(14) 7777 ), 7778 mkexpr(one32x2) 7779 ), 7780 mkU8(1) 7781 ) 7782 ); 7783 return 7784 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); 7785} 7786 7787/* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit 7788 values (aa,bb), computes, for each lane: 7789 7790 if aa_lane < 0 then - bb_lane 7791 else if aa_lane > 0 then bb_lane 7792 else 0 7793*/ 7794static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) 7795{ 7796 IRTemp aa = newTemp(Ity_I64); 7797 IRTemp bb = newTemp(Ity_I64); 7798 IRTemp zero = newTemp(Ity_I64); 7799 IRTemp bbNeg = newTemp(Ity_I64); 7800 IRTemp negMask = newTemp(Ity_I64); 7801 IRTemp posMask = newTemp(Ity_I64); 7802 IROp opSub = Iop_INVALID; 7803 IROp opCmpGTS = Iop_INVALID; 7804 7805 switch (laneszB) { 7806 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; 7807 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; 7808 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; 7809 default: vassert(0); 7810 } 7811 7812 assign( aa, aax ); 7813 assign( bb, bbx ); 7814 assign( zero, mkU64(0) ); 7815 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); 7816 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); 7817 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); 7818 7819 return 7820 binop(Iop_Or64, 7821 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), 7822 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); 7823 7824} 7825 7826/* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit 7827 value aa, computes, for each lane 7828 7829 if aa < 0 then -aa else aa 7830 7831 Note that the result is interpreted as unsigned, so that the 7832 absolute value of the most negative signed input can be 7833 represented. 7834*/ 7835static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB ) 7836{ 7837 IRTemp aa = newTemp(Ity_I64); 7838 IRTemp zero = newTemp(Ity_I64); 7839 IRTemp aaNeg = newTemp(Ity_I64); 7840 IRTemp negMask = newTemp(Ity_I64); 7841 IRTemp posMask = newTemp(Ity_I64); 7842 IROp opSub = Iop_INVALID; 7843 IROp opSarN = Iop_INVALID; 7844 7845 switch (laneszB) { 7846 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; 7847 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; 7848 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; 7849 default: vassert(0); 7850 } 7851 7852 assign( aa, aax ); 7853 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); 7854 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); 7855 assign( zero, mkU64(0) ); 7856 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); 7857 return 7858 binop(Iop_Or64, 7859 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), 7860 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ); 7861} 7862 7863static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, 7864 IRTemp lo64, Int byteShift ) 7865{ 7866 vassert(byteShift >= 1 && byteShift <= 7); 7867 return 7868 binop(Iop_Or64, 7869 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), 7870 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) 7871 ); 7872} 7873 7874/* Generate a SIGSEGV followed by a restart of the current instruction 7875 if effective_addr is not 16-aligned. This is required behaviour 7876 for some SSE3 instructions and all 128-bit SSSE3 instructions. 7877 This assumes that guest_RIP_curr_instr is set correctly! */ 7878static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) 7879{ 7880 stmt( 7881 IRStmt_Exit( 7882 binop(Iop_CmpNE32, 7883 binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)), 7884 mkU32(0)), 7885 Ijk_SigSEGV, 7886 IRConst_U32(guest_EIP_curr_instr), 7887 OFFB_EIP 7888 ) 7889 ); 7890} 7891 7892 7893/* Helper for deciding whether a given insn (starting at the opcode 7894 byte) may validly be used with a LOCK prefix. The following insns 7895 may be used with LOCK when their destination operand is in memory. 7896 AFAICS this is exactly the same for both 32-bit and 64-bit mode. 7897 7898 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01 7899 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09 7900 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11 7901 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19 7902 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21 7903 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29 7904 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31 7905 7906 DEC FE /1, FF /1 7907 INC FE /0, FF /0 7908 7909 NEG F6 /3, F7 /3 7910 NOT F6 /2, F7 /2 7911 7912 XCHG 86, 87 7913 7914 BTC 0F BB, 0F BA /7 7915 BTR 0F B3, 0F BA /6 7916 BTS 0F AB, 0F BA /5 7917 7918 CMPXCHG 0F B0, 0F B1 7919 CMPXCHG8B 0F C7 /1 7920 7921 XADD 0F C0, 0F C1 7922 7923 ------------------------------ 7924 7925 80 /0 = addb $imm8, rm8 7926 81 /0 = addl $imm32, rm32 and addw $imm16, rm16 7927 82 /0 = addb $imm8, rm8 7928 83 /0 = addl $simm8, rm32 and addw $simm8, rm16 7929 7930 00 = addb r8, rm8 7931 01 = addl r32, rm32 and addw r16, rm16 7932 7933 Same for ADD OR ADC SBB AND SUB XOR 7934 7935 FE /1 = dec rm8 7936 FF /1 = dec rm32 and dec rm16 7937 7938 FE /0 = inc rm8 7939 FF /0 = inc rm32 and inc rm16 7940 7941 F6 /3 = neg rm8 7942 F7 /3 = neg rm32 and neg rm16 7943 7944 F6 /2 = not rm8 7945 F7 /2 = not rm32 and not rm16 7946 7947 0F BB = btcw r16, rm16 and btcl r32, rm32 7948 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32 7949 7950 Same for BTS, BTR 7951*/ 7952static Bool can_be_used_with_LOCK_prefix ( const UChar* opc ) 7953{ 7954 switch (opc[0]) { 7955 case 0x00: case 0x01: case 0x08: case 0x09: 7956 case 0x10: case 0x11: case 0x18: case 0x19: 7957 case 0x20: case 0x21: case 0x28: case 0x29: 7958 case 0x30: case 0x31: 7959 if (!epartIsReg(opc[1])) 7960 return True; 7961 break; 7962 7963 case 0x80: case 0x81: case 0x82: case 0x83: 7964 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6 7965 && !epartIsReg(opc[1])) 7966 return True; 7967 break; 7968 7969 case 0xFE: case 0xFF: 7970 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1 7971 && !epartIsReg(opc[1])) 7972 return True; 7973 break; 7974 7975 case 0xF6: case 0xF7: 7976 if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3 7977 && !epartIsReg(opc[1])) 7978 return True; 7979 break; 7980 7981 case 0x86: case 0x87: 7982 if (!epartIsReg(opc[1])) 7983 return True; 7984 break; 7985 7986 case 0x0F: { 7987 switch (opc[1]) { 7988 case 0xBB: case 0xB3: case 0xAB: 7989 if (!epartIsReg(opc[2])) 7990 return True; 7991 break; 7992 case 0xBA: 7993 if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7 7994 && !epartIsReg(opc[2])) 7995 return True; 7996 break; 7997 case 0xB0: case 0xB1: 7998 if (!epartIsReg(opc[2])) 7999 return True; 8000 break; 8001 case 0xC7: 8002 if (gregOfRM(opc[2]) == 1 && !epartIsReg(opc[2]) ) 8003 return True; 8004 break; 8005 case 0xC0: case 0xC1: 8006 if (!epartIsReg(opc[2])) 8007 return True; 8008 break; 8009 default: 8010 break; 8011 } /* switch (opc[1]) */ 8012 break; 8013 } 8014 8015 default: 8016 break; 8017 } /* switch (opc[0]) */ 8018 8019 return False; 8020} 8021 8022static IRTemp math_BSWAP ( IRTemp t1, IRType ty ) 8023{ 8024 IRTemp t2 = newTemp(ty); 8025 if (ty == Ity_I32) { 8026 assign( t2, 8027 binop( 8028 Iop_Or32, 8029 binop(Iop_Shl32, mkexpr(t1), mkU8(24)), 8030 binop( 8031 Iop_Or32, 8032 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)), 8033 mkU32(0x00FF0000)), 8034 binop(Iop_Or32, 8035 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)), 8036 mkU32(0x0000FF00)), 8037 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)), 8038 mkU32(0x000000FF) ) 8039 ))) 8040 ); 8041 return t2; 8042 } 8043 if (ty == Ity_I16) { 8044 assign(t2, 8045 binop(Iop_Or16, 8046 binop(Iop_Shl16, mkexpr(t1), mkU8(8)), 8047 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) )); 8048 return t2; 8049 } 8050 vassert(0); 8051 /*NOTREACHED*/ 8052 return IRTemp_INVALID; 8053} 8054 8055/*------------------------------------------------------------*/ 8056/*--- Disassemble a single instruction ---*/ 8057/*------------------------------------------------------------*/ 8058 8059/* Disassemble a single instruction into IR. The instruction is 8060 located in host memory at &guest_code[delta]. *expect_CAS is set 8061 to True if the resulting IR is expected to contain an IRCAS 8062 statement, and False if it's not expected to. This makes it 8063 possible for the caller of disInstr_X86_WRK to check that 8064 LOCK-prefixed instructions are at least plausibly translated, in 8065 that it becomes possible to check that a (validly) LOCK-prefixed 8066 instruction generates a translation containing an IRCAS, and 8067 instructions without LOCK prefixes don't generate translations 8068 containing an IRCAS. 8069*/ 8070static 8071DisResult disInstr_X86_WRK ( 8072 /*OUT*/Bool* expect_CAS, 8073 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 8074 Bool resteerCisOk, 8075 void* callback_opaque, 8076 Long delta64, 8077 const VexArchInfo* archinfo, 8078 const VexAbiInfo* vbi, 8079 Bool sigill_diag 8080 ) 8081{ 8082 IRType ty; 8083 IRTemp addr, t0, t1, t2, t3, t4, t5, t6; 8084 Int alen; 8085 UChar opc, modrm, abyte, pre; 8086 UInt d32; 8087 HChar dis_buf[50]; 8088 Int am_sz, d_sz, n_prefixes; 8089 DisResult dres; 8090 const UChar* insn; /* used in SSE decoders */ 8091 8092 /* The running delta */ 8093 Int delta = (Int)delta64; 8094 8095 /* Holds eip at the start of the insn, so that we can print 8096 consistent error messages for unimplemented insns. */ 8097 Int delta_start = delta; 8098 8099 /* sz denotes the nominal data-op size of the insn; we change it to 8100 2 if an 0x66 prefix is seen */ 8101 Int sz = 4; 8102 8103 /* sorb holds the segment-override-prefix byte, if any. Zero if no 8104 prefix has been seen, else one of {0x26, 0x3E, 0x64, 0x65} 8105 indicating the prefix. */ 8106 UChar sorb = 0; 8107 8108 /* Gets set to True if a LOCK prefix is seen. */ 8109 Bool pfx_lock = False; 8110 8111 /* Set result defaults. */ 8112 dres.whatNext = Dis_Continue; 8113 dres.len = 0; 8114 dres.continueAt = 0; 8115 dres.jk_StopHere = Ijk_INVALID; 8116 8117 *expect_CAS = False; 8118 8119 addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID; 8120 8121 vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr); 8122 DIP("\t0x%x: ", guest_EIP_bbstart+delta); 8123 8124 /* Spot "Special" instructions (see comment at top of file). */ 8125 { 8126 const UChar* code = guest_code + delta; 8127 /* Spot the 12-byte preamble: 8128 C1C703 roll $3, %edi 8129 C1C70D roll $13, %edi 8130 C1C71D roll $29, %edi 8131 C1C713 roll $19, %edi 8132 */ 8133 if (code[ 0] == 0xC1 && code[ 1] == 0xC7 && code[ 2] == 0x03 && 8134 code[ 3] == 0xC1 && code[ 4] == 0xC7 && code[ 5] == 0x0D && 8135 code[ 6] == 0xC1 && code[ 7] == 0xC7 && code[ 8] == 0x1D && 8136 code[ 9] == 0xC1 && code[10] == 0xC7 && code[11] == 0x13) { 8137 /* Got a "Special" instruction preamble. Which one is it? */ 8138 if (code[12] == 0x87 && code[13] == 0xDB /* xchgl %ebx,%ebx */) { 8139 /* %EDX = client_request ( %EAX ) */ 8140 DIP("%%edx = client_request ( %%eax )\n"); 8141 delta += 14; 8142 jmp_lit(&dres, Ijk_ClientReq, guest_EIP_bbstart+delta); 8143 vassert(dres.whatNext == Dis_StopHere); 8144 goto decode_success; 8145 } 8146 else 8147 if (code[12] == 0x87 && code[13] == 0xC9 /* xchgl %ecx,%ecx */) { 8148 /* %EAX = guest_NRADDR */ 8149 DIP("%%eax = guest_NRADDR\n"); 8150 delta += 14; 8151 putIReg(4, R_EAX, IRExpr_Get( OFFB_NRADDR, Ity_I32 )); 8152 goto decode_success; 8153 } 8154 else 8155 if (code[12] == 0x87 && code[13] == 0xD2 /* xchgl %edx,%edx */) { 8156 /* call-noredir *%EAX */ 8157 DIP("call-noredir *%%eax\n"); 8158 delta += 14; 8159 t1 = newTemp(Ity_I32); 8160 assign(t1, getIReg(4,R_EAX)); 8161 t2 = newTemp(Ity_I32); 8162 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 8163 putIReg(4, R_ESP, mkexpr(t2)); 8164 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta)); 8165 jmp_treg(&dres, Ijk_NoRedir, t1); 8166 vassert(dres.whatNext == Dis_StopHere); 8167 goto decode_success; 8168 } 8169 else 8170 if (code[12] == 0x87 && code[13] == 0xFF /* xchgl %edi,%edi */) { 8171 /* IR injection */ 8172 DIP("IR injection\n"); 8173 vex_inject_ir(irsb, Iend_LE); 8174 8175 // Invalidate the current insn. The reason is that the IRop we're 8176 // injecting here can change. In which case the translation has to 8177 // be redone. For ease of handling, we simply invalidate all the 8178 // time. 8179 stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_EIP_curr_instr))); 8180 stmt(IRStmt_Put(OFFB_CMLEN, mkU32(14))); 8181 8182 delta += 14; 8183 8184 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) ); 8185 dres.whatNext = Dis_StopHere; 8186 dres.jk_StopHere = Ijk_InvalICache; 8187 goto decode_success; 8188 } 8189 /* We don't know what it is. */ 8190 goto decode_failure; 8191 /*NOTREACHED*/ 8192 } 8193 } 8194 8195 /* Handle a couple of weird-ass NOPs that have been observed in the 8196 wild. */ 8197 { 8198 const UChar* code = guest_code + delta; 8199 /* Sun's JVM 1.5.0 uses the following as a NOP: 8200 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */ 8201 if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64 8202 && code[3] == 0x65 && code[4] == 0x90) { 8203 DIP("%%es:%%cs:%%fs:%%gs:nop\n"); 8204 delta += 5; 8205 goto decode_success; 8206 } 8207 /* Don't barf on recent binutils padding, 8208 all variants of which are: nopw %cs:0x0(%eax,%eax,1) 8209 66 2e 0f 1f 84 00 00 00 00 00 8210 66 66 2e 0f 1f 84 00 00 00 00 00 8211 66 66 66 2e 0f 1f 84 00 00 00 00 00 8212 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 8213 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 8214 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 8215 */ 8216 if (code[0] == 0x66) { 8217 Int data16_cnt; 8218 for (data16_cnt = 1; data16_cnt < 6; data16_cnt++) 8219 if (code[data16_cnt] != 0x66) 8220 break; 8221 if (code[data16_cnt] == 0x2E && code[data16_cnt + 1] == 0x0F 8222 && code[data16_cnt + 2] == 0x1F && code[data16_cnt + 3] == 0x84 8223 && code[data16_cnt + 4] == 0x00 && code[data16_cnt + 5] == 0x00 8224 && code[data16_cnt + 6] == 0x00 && code[data16_cnt + 7] == 0x00 8225 && code[data16_cnt + 8] == 0x00 ) { 8226 DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n"); 8227 delta += 9 + data16_cnt; 8228 goto decode_success; 8229 } 8230 } 8231 } 8232 8233 /* Normal instruction handling starts here. */ 8234 8235 /* Deal with some but not all prefixes: 8236 66(oso) 8237 F0(lock) 8238 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:) 8239 Not dealt with (left in place): 8240 F2 F3 8241 */ 8242 n_prefixes = 0; 8243 while (True) { 8244 if (n_prefixes > 7) goto decode_failure; 8245 pre = getUChar(delta); 8246 switch (pre) { 8247 case 0x66: 8248 sz = 2; 8249 break; 8250 case 0xF0: 8251 pfx_lock = True; 8252 *expect_CAS = True; 8253 break; 8254 case 0x3E: /* %DS: */ 8255 case 0x26: /* %ES: */ 8256 case 0x64: /* %FS: */ 8257 case 0x65: /* %GS: */ 8258 if (sorb != 0) 8259 goto decode_failure; /* only one seg override allowed */ 8260 sorb = pre; 8261 break; 8262 case 0x2E: { /* %CS: */ 8263 /* 2E prefix on a conditional branch instruction is a 8264 branch-prediction hint, which can safely be ignored. */ 8265 UChar op1 = getIByte(delta+1); 8266 UChar op2 = getIByte(delta+2); 8267 if ((op1 >= 0x70 && op1 <= 0x7F) 8268 || (op1 == 0xE3) 8269 || (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) { 8270 if (0) vex_printf("vex x86->IR: ignoring branch hint\n"); 8271 } else { 8272 /* All other CS override cases are not handled */ 8273 goto decode_failure; 8274 } 8275 break; 8276 } 8277 case 0x36: /* %SS: */ 8278 /* SS override cases are not handled */ 8279 goto decode_failure; 8280 default: 8281 goto not_a_prefix; 8282 } 8283 n_prefixes++; 8284 delta++; 8285 } 8286 8287 not_a_prefix: 8288 8289 /* Now we should be looking at the primary opcode byte or the 8290 leading F2 or F3. Check that any LOCK prefix is actually 8291 allowed. */ 8292 8293 if (pfx_lock) { 8294 if (can_be_used_with_LOCK_prefix( &guest_code[delta] )) { 8295 DIP("lock "); 8296 } else { 8297 *expect_CAS = False; 8298 goto decode_failure; 8299 } 8300 } 8301 8302 8303 /* ---------------------------------------------------- */ 8304 /* --- The SSE decoder. --- */ 8305 /* ---------------------------------------------------- */ 8306 8307 /* What did I do to deserve SSE ? Perhaps I was really bad in a 8308 previous life? */ 8309 8310 /* Note, this doesn't handle SSE2 or SSE3. That is handled in a 8311 later section, further on. */ 8312 8313 insn = &guest_code[delta]; 8314 8315 /* Treat fxsave specially. It should be doable even on an SSE0 8316 (Pentium-II class) CPU. Hence be prepared to handle it on 8317 any subarchitecture variant. 8318 */ 8319 8320 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */ 8321 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE 8322 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 0) { 8323 IRDirty* d; 8324 modrm = getIByte(delta+2); 8325 vassert(sz == 4); 8326 vassert(!epartIsReg(modrm)); 8327 8328 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8329 delta += 2+alen; 8330 gen_SEGV_if_not_16_aligned(addr); 8331 8332 DIP("fxsave %s\n", dis_buf); 8333 8334 /* Uses dirty helper: 8335 void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */ 8336 d = unsafeIRDirty_0_N ( 8337 0/*regparms*/, 8338 "x86g_dirtyhelper_FXSAVE", 8339 &x86g_dirtyhelper_FXSAVE, 8340 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 8341 ); 8342 8343 /* declare we're writing memory */ 8344 d->mFx = Ifx_Write; 8345 d->mAddr = mkexpr(addr); 8346 d->mSize = 464; /* according to recent Intel docs */ 8347 8348 /* declare we're reading guest state */ 8349 d->nFxState = 7; 8350 vex_bzero(&d->fxState, sizeof(d->fxState)); 8351 8352 d->fxState[0].fx = Ifx_Read; 8353 d->fxState[0].offset = OFFB_FTOP; 8354 d->fxState[0].size = sizeof(UInt); 8355 8356 d->fxState[1].fx = Ifx_Read; 8357 d->fxState[1].offset = OFFB_FPREGS; 8358 d->fxState[1].size = 8 * sizeof(ULong); 8359 8360 d->fxState[2].fx = Ifx_Read; 8361 d->fxState[2].offset = OFFB_FPTAGS; 8362 d->fxState[2].size = 8 * sizeof(UChar); 8363 8364 d->fxState[3].fx = Ifx_Read; 8365 d->fxState[3].offset = OFFB_FPROUND; 8366 d->fxState[3].size = sizeof(UInt); 8367 8368 d->fxState[4].fx = Ifx_Read; 8369 d->fxState[4].offset = OFFB_FC3210; 8370 d->fxState[4].size = sizeof(UInt); 8371 8372 d->fxState[5].fx = Ifx_Read; 8373 d->fxState[5].offset = OFFB_XMM0; 8374 d->fxState[5].size = 8 * sizeof(U128); 8375 8376 d->fxState[6].fx = Ifx_Read; 8377 d->fxState[6].offset = OFFB_SSEROUND; 8378 d->fxState[6].size = sizeof(UInt); 8379 8380 /* Be paranoid ... this assertion tries to ensure the 8 %xmm 8381 images are packed back-to-back. If not, the value of 8382 d->fxState[5].size is wrong. */ 8383 vassert(16 == sizeof(U128)); 8384 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16)); 8385 8386 stmt( IRStmt_Dirty(d) ); 8387 8388 goto decode_success; 8389 } 8390 8391 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */ 8392 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE 8393 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 1) { 8394 IRDirty* d; 8395 modrm = getIByte(delta+2); 8396 vassert(sz == 4); 8397 vassert(!epartIsReg(modrm)); 8398 8399 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8400 delta += 2+alen; 8401 gen_SEGV_if_not_16_aligned(addr); 8402 8403 DIP("fxrstor %s\n", dis_buf); 8404 8405 /* Uses dirty helper: 8406 VexEmNote x86g_do_FXRSTOR ( VexGuestX86State*, UInt ) 8407 NOTE: 8408 the VexEmNote value is simply ignored (unlike for FRSTOR) 8409 */ 8410 d = unsafeIRDirty_0_N ( 8411 0/*regparms*/, 8412 "x86g_dirtyhelper_FXRSTOR", 8413 &x86g_dirtyhelper_FXRSTOR, 8414 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 8415 ); 8416 8417 /* declare we're reading memory */ 8418 d->mFx = Ifx_Read; 8419 d->mAddr = mkexpr(addr); 8420 d->mSize = 464; /* according to recent Intel docs */ 8421 8422 /* declare we're writing guest state */ 8423 d->nFxState = 7; 8424 vex_bzero(&d->fxState, sizeof(d->fxState)); 8425 8426 d->fxState[0].fx = Ifx_Write; 8427 d->fxState[0].offset = OFFB_FTOP; 8428 d->fxState[0].size = sizeof(UInt); 8429 8430 d->fxState[1].fx = Ifx_Write; 8431 d->fxState[1].offset = OFFB_FPREGS; 8432 d->fxState[1].size = 8 * sizeof(ULong); 8433 8434 d->fxState[2].fx = Ifx_Write; 8435 d->fxState[2].offset = OFFB_FPTAGS; 8436 d->fxState[2].size = 8 * sizeof(UChar); 8437 8438 d->fxState[3].fx = Ifx_Write; 8439 d->fxState[3].offset = OFFB_FPROUND; 8440 d->fxState[3].size = sizeof(UInt); 8441 8442 d->fxState[4].fx = Ifx_Write; 8443 d->fxState[4].offset = OFFB_FC3210; 8444 d->fxState[4].size = sizeof(UInt); 8445 8446 d->fxState[5].fx = Ifx_Write; 8447 d->fxState[5].offset = OFFB_XMM0; 8448 d->fxState[5].size = 8 * sizeof(U128); 8449 8450 d->fxState[6].fx = Ifx_Write; 8451 d->fxState[6].offset = OFFB_SSEROUND; 8452 d->fxState[6].size = sizeof(UInt); 8453 8454 /* Be paranoid ... this assertion tries to ensure the 8 %xmm 8455 images are packed back-to-back. If not, the value of 8456 d->fxState[5].size is wrong. */ 8457 vassert(16 == sizeof(U128)); 8458 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16)); 8459 8460 stmt( IRStmt_Dirty(d) ); 8461 8462 goto decode_success; 8463 } 8464 8465 /* ------ SSE decoder main ------ */ 8466 8467 /* Skip parts of the decoder which don't apply given the stated 8468 guest subarchitecture. */ 8469 if (archinfo->hwcaps == 0/*baseline, no sse at all*/) 8470 goto after_sse_decoders; 8471 8472 /* With mmxext only some extended MMX instructions are recognized. 8473 The mmxext instructions are MASKMOVQ MOVNTQ PAVGB PAVGW PMAXSW 8474 PMAXUB PMINSW PMINUB PMULHUW PSADBW PSHUFW PEXTRW PINSRW PMOVMSKB 8475 PREFETCHNTA PREFETCHT0 PREFETCHT1 PREFETCHT2 SFENCE 8476 8477 http://support.amd.com/us/Embedded_TechDocs/22466.pdf 8478 https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions */ 8479 8480 if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/) 8481 goto mmxext; 8482 8483 /* Otherwise we must be doing sse1 or sse2, so we can at least try 8484 for SSE1 here. */ 8485 8486 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ 8487 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x58) { 8488 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addps", Iop_Add32Fx4 ); 8489 goto decode_success; 8490 } 8491 8492 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */ 8493 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x58) { 8494 vassert(sz == 4); 8495 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "addss", Iop_Add32F0x4 ); 8496 goto decode_success; 8497 } 8498 8499 /* 0F 55 = ANDNPS -- G = (not G) and E */ 8500 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x55) { 8501 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnps", Iop_AndV128 ); 8502 goto decode_success; 8503 } 8504 8505 /* 0F 54 = ANDPS -- G = G and E */ 8506 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x54) { 8507 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andps", Iop_AndV128 ); 8508 goto decode_success; 8509 } 8510 8511 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */ 8512 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC2) { 8513 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmpps", True, 4 ); 8514 goto decode_success; 8515 } 8516 8517 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */ 8518 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xC2) { 8519 vassert(sz == 4); 8520 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpss", False, 4 ); 8521 goto decode_success; 8522 } 8523 8524 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */ 8525 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */ 8526 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { 8527 IRTemp argL = newTemp(Ity_F32); 8528 IRTemp argR = newTemp(Ity_F32); 8529 modrm = getIByte(delta+2); 8530 if (epartIsReg(modrm)) { 8531 assign( argR, getXMMRegLane32F( eregOfRM(modrm), 0/*lowest lane*/ ) ); 8532 delta += 2+1; 8533 DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm)), 8534 nameXMMReg(gregOfRM(modrm)) ); 8535 } else { 8536 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8537 assign( argR, loadLE(Ity_F32, mkexpr(addr)) ); 8538 delta += 2+alen; 8539 DIP("[u]comiss %s,%s\n", dis_buf, 8540 nameXMMReg(gregOfRM(modrm)) ); 8541 } 8542 assign( argL, getXMMRegLane32F( gregOfRM(modrm), 0/*lowest lane*/ ) ); 8543 8544 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 8545 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 8546 stmt( IRStmt_Put( 8547 OFFB_CC_DEP1, 8548 binop( Iop_And32, 8549 binop(Iop_CmpF64, 8550 unop(Iop_F32toF64,mkexpr(argL)), 8551 unop(Iop_F32toF64,mkexpr(argR))), 8552 mkU32(0x45) 8553 ))); 8554 /* Set NDEP even though it isn't used. This makes redundant-PUT 8555 elimination of previous stores to this field work better. */ 8556 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 8557 goto decode_success; 8558 } 8559 8560 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low 8561 half xmm */ 8562 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x2A) { 8563 IRTemp arg64 = newTemp(Ity_I64); 8564 IRTemp rmode = newTemp(Ity_I32); 8565 vassert(sz == 4); 8566 8567 modrm = getIByte(delta+2); 8568 do_MMX_preamble(); 8569 if (epartIsReg(modrm)) { 8570 assign( arg64, getMMXReg(eregOfRM(modrm)) ); 8571 delta += 2+1; 8572 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm)), 8573 nameXMMReg(gregOfRM(modrm))); 8574 } else { 8575 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8576 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 8577 delta += 2+alen; 8578 DIP("cvtpi2ps %s,%s\n", dis_buf, 8579 nameXMMReg(gregOfRM(modrm)) ); 8580 } 8581 8582 assign( rmode, get_sse_roundingmode() ); 8583 8584 putXMMRegLane32F( 8585 gregOfRM(modrm), 0, 8586 binop(Iop_F64toF32, 8587 mkexpr(rmode), 8588 unop(Iop_I32StoF64, 8589 unop(Iop_64to32, mkexpr(arg64)) )) ); 8590 8591 putXMMRegLane32F( 8592 gregOfRM(modrm), 1, 8593 binop(Iop_F64toF32, 8594 mkexpr(rmode), 8595 unop(Iop_I32StoF64, 8596 unop(Iop_64HIto32, mkexpr(arg64)) )) ); 8597 8598 goto decode_success; 8599 } 8600 8601 /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low 8602 quarter xmm */ 8603 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x2A) { 8604 IRTemp arg32 = newTemp(Ity_I32); 8605 IRTemp rmode = newTemp(Ity_I32); 8606 vassert(sz == 4); 8607 8608 modrm = getIByte(delta+3); 8609 if (epartIsReg(modrm)) { 8610 assign( arg32, getIReg(4, eregOfRM(modrm)) ); 8611 delta += 3+1; 8612 DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm)), 8613 nameXMMReg(gregOfRM(modrm))); 8614 } else { 8615 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 8616 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 8617 delta += 3+alen; 8618 DIP("cvtsi2ss %s,%s\n", dis_buf, 8619 nameXMMReg(gregOfRM(modrm)) ); 8620 } 8621 8622 assign( rmode, get_sse_roundingmode() ); 8623 8624 putXMMRegLane32F( 8625 gregOfRM(modrm), 0, 8626 binop(Iop_F64toF32, 8627 mkexpr(rmode), 8628 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 8629 8630 goto decode_success; 8631 } 8632 8633 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 8634 I32 in mmx, according to prevailing SSE rounding mode */ 8635 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 8636 I32 in mmx, rounding towards zero */ 8637 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { 8638 IRTemp dst64 = newTemp(Ity_I64); 8639 IRTemp rmode = newTemp(Ity_I32); 8640 IRTemp f32lo = newTemp(Ity_F32); 8641 IRTemp f32hi = newTemp(Ity_F32); 8642 Bool r2zero = toBool(insn[1] == 0x2C); 8643 8644 do_MMX_preamble(); 8645 modrm = getIByte(delta+2); 8646 8647 if (epartIsReg(modrm)) { 8648 delta += 2+1; 8649 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); 8650 assign(f32hi, getXMMRegLane32F(eregOfRM(modrm), 1)); 8651 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 8652 nameXMMReg(eregOfRM(modrm)), 8653 nameMMXReg(gregOfRM(modrm))); 8654 } else { 8655 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8656 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 8657 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add32, 8658 mkexpr(addr), 8659 mkU32(4) ))); 8660 delta += 2+alen; 8661 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 8662 dis_buf, 8663 nameMMXReg(gregOfRM(modrm))); 8664 } 8665 8666 if (r2zero) { 8667 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 8668 } else { 8669 assign( rmode, get_sse_roundingmode() ); 8670 } 8671 8672 assign( 8673 dst64, 8674 binop( Iop_32HLto64, 8675 binop( Iop_F64toI32S, 8676 mkexpr(rmode), 8677 unop( Iop_F32toF64, mkexpr(f32hi) ) ), 8678 binop( Iop_F64toI32S, 8679 mkexpr(rmode), 8680 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 8681 ) 8682 ); 8683 8684 putMMXReg(gregOfRM(modrm), mkexpr(dst64)); 8685 goto decode_success; 8686 } 8687 8688 /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to 8689 I32 in ireg, according to prevailing SSE rounding mode */ 8690 /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to 8691 I32 in ireg, rounding towards zero */ 8692 if (insn[0] == 0xF3 && insn[1] == 0x0F 8693 && (insn[2] == 0x2D || insn[2] == 0x2C)) { 8694 IRTemp rmode = newTemp(Ity_I32); 8695 IRTemp f32lo = newTemp(Ity_F32); 8696 Bool r2zero = toBool(insn[2] == 0x2C); 8697 vassert(sz == 4); 8698 8699 modrm = getIByte(delta+3); 8700 if (epartIsReg(modrm)) { 8701 delta += 3+1; 8702 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); 8703 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", 8704 nameXMMReg(eregOfRM(modrm)), 8705 nameIReg(4, gregOfRM(modrm))); 8706 } else { 8707 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 8708 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 8709 delta += 3+alen; 8710 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", 8711 dis_buf, 8712 nameIReg(4, gregOfRM(modrm))); 8713 } 8714 8715 if (r2zero) { 8716 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 8717 } else { 8718 assign( rmode, get_sse_roundingmode() ); 8719 } 8720 8721 putIReg(4, gregOfRM(modrm), 8722 binop( Iop_F64toI32S, 8723 mkexpr(rmode), 8724 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 8725 ); 8726 8727 goto decode_success; 8728 } 8729 8730 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */ 8731 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5E) { 8732 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divps", Iop_Div32Fx4 ); 8733 goto decode_success; 8734 } 8735 8736 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */ 8737 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5E) { 8738 vassert(sz == 4); 8739 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "divss", Iop_Div32F0x4 ); 8740 goto decode_success; 8741 } 8742 8743 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */ 8744 if (insn[0] == 0x0F && insn[1] == 0xAE 8745 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 2) { 8746 8747 IRTemp t64 = newTemp(Ity_I64); 8748 IRTemp ew = newTemp(Ity_I32); 8749 8750 modrm = getIByte(delta+2); 8751 vassert(!epartIsReg(modrm)); 8752 vassert(sz == 4); 8753 8754 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8755 delta += 2+alen; 8756 DIP("ldmxcsr %s\n", dis_buf); 8757 8758 /* The only thing we observe in %mxcsr is the rounding mode. 8759 Therefore, pass the 32-bit value (SSE native-format control 8760 word) to a clean helper, getting back a 64-bit value, the 8761 lower half of which is the SSEROUND value to store, and the 8762 upper half of which is the emulation-warning token which may 8763 be generated. 8764 */ 8765 /* ULong x86h_check_ldmxcsr ( UInt ); */ 8766 assign( t64, mkIRExprCCall( 8767 Ity_I64, 0/*regparms*/, 8768 "x86g_check_ldmxcsr", 8769 &x86g_check_ldmxcsr, 8770 mkIRExprVec_1( loadLE(Ity_I32, mkexpr(addr)) ) 8771 ) 8772 ); 8773 8774 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) ); 8775 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 8776 put_emwarn( mkexpr(ew) ); 8777 /* Finally, if an emulation warning was reported, side-exit to 8778 the next insn, reporting the warning, so that Valgrind's 8779 dispatcher sees the warning. */ 8780 stmt( 8781 IRStmt_Exit( 8782 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 8783 Ijk_EmWarn, 8784 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta), 8785 OFFB_EIP 8786 ) 8787 ); 8788 goto decode_success; 8789 } 8790 8791 8792 /* mmxext sse1 subset starts here. mmxext only arches will parse 8793 only this subset of the sse1 instructions. */ 8794 mmxext: 8795 8796 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8797 /* 0F F7 = MASKMOVQ -- 8x8 masked store */ 8798 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) { 8799 Bool ok = False; 8800 delta = dis_MMX( &ok, sorb, sz, delta+1 ); 8801 if (!ok) 8802 goto decode_failure; 8803 goto decode_success; 8804 } 8805 8806 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8807 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the 8808 Intel manual does not say anything about the usual business of 8809 the FP reg tags getting trashed whenever an MMX insn happens. 8810 So we just leave them alone. 8811 */ 8812 if (insn[0] == 0x0F && insn[1] == 0xE7) { 8813 modrm = getIByte(delta+2); 8814 if (sz == 4 && !epartIsReg(modrm)) { 8815 /* do_MMX_preamble(); Intel docs don't specify this */ 8816 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8817 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) ); 8818 DIP("movntq %s,%s\n", dis_buf, 8819 nameMMXReg(gregOfRM(modrm))); 8820 delta += 2+alen; 8821 goto decode_success; 8822 } 8823 /* else fall through */ 8824 } 8825 8826 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8827 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ 8828 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) { 8829 do_MMX_preamble(); 8830 delta = dis_MMXop_regmem_to_reg ( 8831 sorb, delta+2, insn[1], "pavgb", False ); 8832 goto decode_success; 8833 } 8834 8835 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8836 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */ 8837 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE3) { 8838 do_MMX_preamble(); 8839 delta = dis_MMXop_regmem_to_reg ( 8840 sorb, delta+2, insn[1], "pavgw", False ); 8841 goto decode_success; 8842 } 8843 8844 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8845 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put 8846 zero-extend of it in ireg(G). */ 8847 if (insn[0] == 0x0F && insn[1] == 0xC5) { 8848 modrm = insn[2]; 8849 if (sz == 4 && epartIsReg(modrm)) { 8850 IRTemp sV = newTemp(Ity_I64); 8851 t5 = newTemp(Ity_I16); 8852 do_MMX_preamble(); 8853 assign(sV, getMMXReg(eregOfRM(modrm))); 8854 breakup64to16s( sV, &t3, &t2, &t1, &t0 ); 8855 switch (insn[3] & 3) { 8856 case 0: assign(t5, mkexpr(t0)); break; 8857 case 1: assign(t5, mkexpr(t1)); break; 8858 case 2: assign(t5, mkexpr(t2)); break; 8859 case 3: assign(t5, mkexpr(t3)); break; 8860 default: vassert(0); /*NOTREACHED*/ 8861 } 8862 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5))); 8863 DIP("pextrw $%d,%s,%s\n", 8864 (Int)insn[3], nameMMXReg(eregOfRM(modrm)), 8865 nameIReg(4,gregOfRM(modrm))); 8866 delta += 4; 8867 goto decode_success; 8868 } 8869 /* else fall through */ 8870 } 8871 8872 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8873 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 8874 put it into the specified lane of mmx(G). */ 8875 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC4) { 8876 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the 8877 mmx reg. t4 is the new lane value. t5 is the original 8878 mmx value. t6 is the new mmx value. */ 8879 Int lane; 8880 t4 = newTemp(Ity_I16); 8881 t5 = newTemp(Ity_I64); 8882 t6 = newTemp(Ity_I64); 8883 modrm = insn[2]; 8884 do_MMX_preamble(); 8885 8886 assign(t5, getMMXReg(gregOfRM(modrm))); 8887 breakup64to16s( t5, &t3, &t2, &t1, &t0 ); 8888 8889 if (epartIsReg(modrm)) { 8890 assign(t4, getIReg(2, eregOfRM(modrm))); 8891 delta += 3+1; 8892 lane = insn[3+1-1]; 8893 DIP("pinsrw $%d,%s,%s\n", lane, 8894 nameIReg(2,eregOfRM(modrm)), 8895 nameMMXReg(gregOfRM(modrm))); 8896 } else { 8897 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8898 delta += 3+alen; 8899 lane = insn[3+alen-1]; 8900 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 8901 DIP("pinsrw $%d,%s,%s\n", lane, 8902 dis_buf, 8903 nameMMXReg(gregOfRM(modrm))); 8904 } 8905 8906 switch (lane & 3) { 8907 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break; 8908 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break; 8909 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break; 8910 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break; 8911 default: vassert(0); /*NOTREACHED*/ 8912 } 8913 putMMXReg(gregOfRM(modrm), mkexpr(t6)); 8914 goto decode_success; 8915 } 8916 8917 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8918 /* 0F EE = PMAXSW -- 16x4 signed max */ 8919 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEE) { 8920 do_MMX_preamble(); 8921 delta = dis_MMXop_regmem_to_reg ( 8922 sorb, delta+2, insn[1], "pmaxsw", False ); 8923 goto decode_success; 8924 } 8925 8926 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8927 /* 0F DE = PMAXUB -- 8x8 unsigned max */ 8928 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDE) { 8929 do_MMX_preamble(); 8930 delta = dis_MMXop_regmem_to_reg ( 8931 sorb, delta+2, insn[1], "pmaxub", False ); 8932 goto decode_success; 8933 } 8934 8935 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8936 /* 0F EA = PMINSW -- 16x4 signed min */ 8937 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEA) { 8938 do_MMX_preamble(); 8939 delta = dis_MMXop_regmem_to_reg ( 8940 sorb, delta+2, insn[1], "pminsw", False ); 8941 goto decode_success; 8942 } 8943 8944 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8945 /* 0F DA = PMINUB -- 8x8 unsigned min */ 8946 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDA) { 8947 do_MMX_preamble(); 8948 delta = dis_MMXop_regmem_to_reg ( 8949 sorb, delta+2, insn[1], "pminub", False ); 8950 goto decode_success; 8951 } 8952 8953 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8954 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in 8955 mmx(E), turn them into a byte, and put zero-extend of it in 8956 ireg(G). */ 8957 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) { 8958 modrm = insn[2]; 8959 if (epartIsReg(modrm)) { 8960 do_MMX_preamble(); 8961 t0 = newTemp(Ity_I64); 8962 t1 = newTemp(Ity_I32); 8963 assign(t0, getMMXReg(eregOfRM(modrm))); 8964 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0)))); 8965 putIReg(4, gregOfRM(modrm), mkexpr(t1)); 8966 DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)), 8967 nameIReg(4,gregOfRM(modrm))); 8968 delta += 3; 8969 goto decode_success; 8970 } 8971 /* else fall through */ 8972 } 8973 8974 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8975 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */ 8976 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE4) { 8977 do_MMX_preamble(); 8978 delta = dis_MMXop_regmem_to_reg ( 8979 sorb, delta+2, insn[1], "pmuluh", False ); 8980 goto decode_success; 8981 } 8982 8983 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */ 8984 /* 0F 18 /1 = PREFETCH0 -- with various different hints */ 8985 /* 0F 18 /2 = PREFETCH1 */ 8986 /* 0F 18 /3 = PREFETCH2 */ 8987 if (insn[0] == 0x0F && insn[1] == 0x18 8988 && !epartIsReg(insn[2]) 8989 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 3) { 8990 const HChar* hintstr = "??"; 8991 8992 modrm = getIByte(delta+2); 8993 vassert(!epartIsReg(modrm)); 8994 8995 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8996 delta += 2+alen; 8997 8998 switch (gregOfRM(modrm)) { 8999 case 0: hintstr = "nta"; break; 9000 case 1: hintstr = "t0"; break; 9001 case 2: hintstr = "t1"; break; 9002 case 3: hintstr = "t2"; break; 9003 default: vassert(0); /*NOTREACHED*/ 9004 } 9005 9006 DIP("prefetch%s %s\n", hintstr, dis_buf); 9007 goto decode_success; 9008 } 9009 9010 /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */ 9011 /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */ 9012 if (insn[0] == 0x0F && insn[1] == 0x0D 9013 && !epartIsReg(insn[2]) 9014 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 1) { 9015 const HChar* hintstr = "??"; 9016 9017 modrm = getIByte(delta+2); 9018 vassert(!epartIsReg(modrm)); 9019 9020 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9021 delta += 2+alen; 9022 9023 switch (gregOfRM(modrm)) { 9024 case 0: hintstr = ""; break; 9025 case 1: hintstr = "w"; break; 9026 default: vassert(0); /*NOTREACHED*/ 9027 } 9028 9029 DIP("prefetch%s %s\n", hintstr, dis_buf); 9030 goto decode_success; 9031 } 9032 9033 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9034 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */ 9035 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF6) { 9036 do_MMX_preamble(); 9037 delta = dis_MMXop_regmem_to_reg ( 9038 sorb, delta+2, insn[1], "psadbw", False ); 9039 goto decode_success; 9040 } 9041 9042 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9043 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */ 9044 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x70) { 9045 Int order; 9046 IRTemp sV, dV, s3, s2, s1, s0; 9047 s3 = s2 = s1 = s0 = IRTemp_INVALID; 9048 sV = newTemp(Ity_I64); 9049 dV = newTemp(Ity_I64); 9050 do_MMX_preamble(); 9051 modrm = insn[2]; 9052 if (epartIsReg(modrm)) { 9053 assign( sV, getMMXReg(eregOfRM(modrm)) ); 9054 order = (Int)insn[3]; 9055 delta += 2+2; 9056 DIP("pshufw $%d,%s,%s\n", order, 9057 nameMMXReg(eregOfRM(modrm)), 9058 nameMMXReg(gregOfRM(modrm))); 9059 } else { 9060 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9061 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 9062 order = (Int)insn[2+alen]; 9063 delta += 3+alen; 9064 DIP("pshufw $%d,%s,%s\n", order, 9065 dis_buf, 9066 nameMMXReg(gregOfRM(modrm))); 9067 } 9068 breakup64to16s( sV, &s3, &s2, &s1, &s0 ); 9069 9070# define SEL(n) \ 9071 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 9072 assign(dV, 9073 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 9074 SEL((order>>2)&3), SEL((order>>0)&3) ) 9075 ); 9076 putMMXReg(gregOfRM(modrm), mkexpr(dV)); 9077# undef SEL 9078 goto decode_success; 9079 } 9080 9081 /* 0F AE /7 = SFENCE -- flush pending operations to memory */ 9082 if (insn[0] == 0x0F && insn[1] == 0xAE 9083 && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) { 9084 vassert(sz == 4); 9085 delta += 3; 9086 /* Insert a memory fence. It's sometimes important that these 9087 are carried through to the generated code. */ 9088 stmt( IRStmt_MBE(Imbe_Fence) ); 9089 DIP("sfence\n"); 9090 goto decode_success; 9091 } 9092 9093 /* End of mmxext sse1 subset. No more sse parsing for mmxext only arches. */ 9094 if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/) 9095 goto after_sse_decoders; 9096 9097 9098 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ 9099 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) { 9100 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 ); 9101 goto decode_success; 9102 } 9103 9104 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ 9105 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) { 9106 vassert(sz == 4); 9107 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 ); 9108 goto decode_success; 9109 } 9110 9111 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ 9112 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) { 9113 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 ); 9114 goto decode_success; 9115 } 9116 9117 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ 9118 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) { 9119 vassert(sz == 4); 9120 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 ); 9121 goto decode_success; 9122 } 9123 9124 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ 9125 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ 9126 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) { 9127 modrm = getIByte(delta+2); 9128 if (epartIsReg(modrm)) { 9129 putXMMReg( gregOfRM(modrm), 9130 getXMMReg( eregOfRM(modrm) )); 9131 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9132 nameXMMReg(gregOfRM(modrm))); 9133 delta += 2+1; 9134 } else { 9135 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9136 if (insn[1] == 0x28/*movaps*/) 9137 gen_SEGV_if_not_16_aligned( addr ); 9138 putXMMReg( gregOfRM(modrm), 9139 loadLE(Ity_V128, mkexpr(addr)) ); 9140 DIP("mov[ua]ps %s,%s\n", dis_buf, 9141 nameXMMReg(gregOfRM(modrm))); 9142 delta += 2+alen; 9143 } 9144 goto decode_success; 9145 } 9146 9147 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ 9148 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ 9149 if (sz == 4 && insn[0] == 0x0F 9150 && (insn[1] == 0x29 || insn[1] == 0x11)) { 9151 modrm = getIByte(delta+2); 9152 if (epartIsReg(modrm)) { 9153 /* fall through; awaiting test case */ 9154 } else { 9155 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9156 if (insn[1] == 0x29/*movaps*/) 9157 gen_SEGV_if_not_16_aligned( addr ); 9158 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 9159 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)), 9160 dis_buf ); 9161 delta += 2+alen; 9162 goto decode_success; 9163 } 9164 } 9165 9166 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ 9167 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ 9168 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) { 9169 modrm = getIByte(delta+2); 9170 if (epartIsReg(modrm)) { 9171 delta += 2+1; 9172 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, 9173 getXMMRegLane64( eregOfRM(modrm), 0 ) ); 9174 DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9175 nameXMMReg(gregOfRM(modrm))); 9176 } else { 9177 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9178 delta += 2+alen; 9179 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, 9180 loadLE(Ity_I64, mkexpr(addr)) ); 9181 DIP("movhps %s,%s\n", dis_buf, 9182 nameXMMReg( gregOfRM(modrm) )); 9183 } 9184 goto decode_success; 9185 } 9186 9187 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ 9188 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) { 9189 if (!epartIsReg(insn[2])) { 9190 delta += 2; 9191 addr = disAMode ( &alen, sorb, delta, dis_buf ); 9192 delta += alen; 9193 storeLE( mkexpr(addr), 9194 getXMMRegLane64( gregOfRM(insn[2]), 9195 1/*upper lane*/ ) ); 9196 DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ), 9197 dis_buf); 9198 goto decode_success; 9199 } 9200 /* else fall through */ 9201 } 9202 9203 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ 9204 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ 9205 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) { 9206 modrm = getIByte(delta+2); 9207 if (epartIsReg(modrm)) { 9208 delta += 2+1; 9209 putXMMRegLane64( gregOfRM(modrm), 9210 0/*lower lane*/, 9211 getXMMRegLane64( eregOfRM(modrm), 1 )); 9212 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)), 9213 nameXMMReg(gregOfRM(modrm))); 9214 } else { 9215 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9216 delta += 2+alen; 9217 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/, 9218 loadLE(Ity_I64, mkexpr(addr)) ); 9219 DIP("movlps %s, %s\n", 9220 dis_buf, nameXMMReg( gregOfRM(modrm) )); 9221 } 9222 goto decode_success; 9223 } 9224 9225 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ 9226 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) { 9227 if (!epartIsReg(insn[2])) { 9228 delta += 2; 9229 addr = disAMode ( &alen, sorb, delta, dis_buf ); 9230 delta += alen; 9231 storeLE( mkexpr(addr), 9232 getXMMRegLane64( gregOfRM(insn[2]), 9233 0/*lower lane*/ ) ); 9234 DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ), 9235 dis_buf); 9236 goto decode_success; 9237 } 9238 /* else fall through */ 9239 } 9240 9241 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) 9242 to 4 lowest bits of ireg(G) */ 9243 if (insn[0] == 0x0F && insn[1] == 0x50) { 9244 modrm = getIByte(delta+2); 9245 if (sz == 4 && epartIsReg(modrm)) { 9246 Int src; 9247 t0 = newTemp(Ity_I32); 9248 t1 = newTemp(Ity_I32); 9249 t2 = newTemp(Ity_I32); 9250 t3 = newTemp(Ity_I32); 9251 delta += 2+1; 9252 src = eregOfRM(modrm); 9253 assign( t0, binop( Iop_And32, 9254 binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)), 9255 mkU32(1) )); 9256 assign( t1, binop( Iop_And32, 9257 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)), 9258 mkU32(2) )); 9259 assign( t2, binop( Iop_And32, 9260 binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)), 9261 mkU32(4) )); 9262 assign( t3, binop( Iop_And32, 9263 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)), 9264 mkU32(8) )); 9265 putIReg(4, gregOfRM(modrm), 9266 binop(Iop_Or32, 9267 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 9268 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) 9269 ) 9270 ); 9271 DIP("movmskps %s,%s\n", nameXMMReg(src), 9272 nameIReg(4, gregOfRM(modrm))); 9273 goto decode_success; 9274 } 9275 /* else fall through */ 9276 } 9277 9278 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ 9279 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ 9280 if (insn[0] == 0x0F && insn[1] == 0x2B) { 9281 modrm = getIByte(delta+2); 9282 if (!epartIsReg(modrm)) { 9283 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9284 gen_SEGV_if_not_16_aligned( addr ); 9285 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 9286 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", 9287 dis_buf, 9288 nameXMMReg(gregOfRM(modrm))); 9289 delta += 2+alen; 9290 goto decode_success; 9291 } 9292 /* else fall through */ 9293 } 9294 9295 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G 9296 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ 9297 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) { 9298 vassert(sz == 4); 9299 modrm = getIByte(delta+3); 9300 if (epartIsReg(modrm)) { 9301 putXMMRegLane32( gregOfRM(modrm), 0, 9302 getXMMRegLane32( eregOfRM(modrm), 0 )); 9303 DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9304 nameXMMReg(gregOfRM(modrm))); 9305 delta += 3+1; 9306 } else { 9307 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9308 /* zero bits 127:64 */ 9309 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); 9310 /* zero bits 63:32 */ 9311 putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) ); 9312 /* write bits 31:0 */ 9313 putXMMRegLane32( gregOfRM(modrm), 0, 9314 loadLE(Ity_I32, mkexpr(addr)) ); 9315 DIP("movss %s,%s\n", dis_buf, 9316 nameXMMReg(gregOfRM(modrm))); 9317 delta += 3+alen; 9318 } 9319 goto decode_success; 9320 } 9321 9322 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem 9323 or lo 1/4 xmm). */ 9324 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) { 9325 vassert(sz == 4); 9326 modrm = getIByte(delta+3); 9327 if (epartIsReg(modrm)) { 9328 /* fall through, we don't yet have a test case */ 9329 } else { 9330 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9331 storeLE( mkexpr(addr), 9332 getXMMRegLane32(gregOfRM(modrm), 0) ); 9333 DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)), 9334 dis_buf); 9335 delta += 3+alen; 9336 goto decode_success; 9337 } 9338 } 9339 9340 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ 9341 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) { 9342 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 ); 9343 goto decode_success; 9344 } 9345 9346 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ 9347 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) { 9348 vassert(sz == 4); 9349 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 ); 9350 goto decode_success; 9351 } 9352 9353 /* 0F 56 = ORPS -- G = G and E */ 9354 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) { 9355 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 ); 9356 goto decode_success; 9357 } 9358 9359 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ 9360 if (insn[0] == 0x0F && insn[1] == 0x53) { 9361 vassert(sz == 4); 9362 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 9363 "rcpps", Iop_RecipEst32Fx4 ); 9364 goto decode_success; 9365 } 9366 9367 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */ 9368 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x53) { 9369 vassert(sz == 4); 9370 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, 9371 "rcpss", Iop_RecipEst32F0x4 ); 9372 goto decode_success; 9373 } 9374 9375 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */ 9376 if (insn[0] == 0x0F && insn[1] == 0x52) { 9377 vassert(sz == 4); 9378 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 9379 "rsqrtps", Iop_RSqrtEst32Fx4 ); 9380 goto decode_success; 9381 } 9382 9383 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */ 9384 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x52) { 9385 vassert(sz == 4); 9386 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, 9387 "rsqrtss", Iop_RSqrtEst32F0x4 ); 9388 goto decode_success; 9389 } 9390 9391 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ 9392 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) { 9393 Int select; 9394 IRTemp sV, dV; 9395 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 9396 sV = newTemp(Ity_V128); 9397 dV = newTemp(Ity_V128); 9398 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 9399 modrm = insn[2]; 9400 assign( dV, getXMMReg(gregOfRM(modrm)) ); 9401 9402 if (epartIsReg(modrm)) { 9403 assign( sV, getXMMReg(eregOfRM(modrm)) ); 9404 select = (Int)insn[3]; 9405 delta += 2+2; 9406 DIP("shufps $%d,%s,%s\n", select, 9407 nameXMMReg(eregOfRM(modrm)), 9408 nameXMMReg(gregOfRM(modrm))); 9409 } else { 9410 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9411 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 9412 select = (Int)insn[2+alen]; 9413 delta += 3+alen; 9414 DIP("shufps $%d,%s,%s\n", select, 9415 dis_buf, 9416 nameXMMReg(gregOfRM(modrm))); 9417 } 9418 9419 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 9420 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 9421 9422# define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3))) 9423# define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 9424 9425 putXMMReg( 9426 gregOfRM(modrm), 9427 mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3), 9428 SELD((select>>2)&3), SELD((select>>0)&3) ) 9429 ); 9430 9431# undef SELD 9432# undef SELS 9433 9434 goto decode_success; 9435 } 9436 9437 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */ 9438 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x51) { 9439 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 9440 "sqrtps", Iop_Sqrt32Fx4 ); 9441 goto decode_success; 9442 } 9443 9444 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */ 9445 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x51) { 9446 vassert(sz == 4); 9447 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, 9448 "sqrtss", Iop_Sqrt32F0x4 ); 9449 goto decode_success; 9450 } 9451 9452 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */ 9453 if (insn[0] == 0x0F && insn[1] == 0xAE 9454 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 3) { 9455 modrm = getIByte(delta+2); 9456 vassert(sz == 4); 9457 vassert(!epartIsReg(modrm)); 9458 9459 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9460 delta += 2+alen; 9461 9462 /* Fake up a native SSE mxcsr word. The only thing it depends 9463 on is SSEROUND[1:0], so call a clean helper to cook it up. 9464 */ 9465 /* UInt x86h_create_mxcsr ( UInt sseround ) */ 9466 DIP("stmxcsr %s\n", dis_buf); 9467 storeLE( mkexpr(addr), 9468 mkIRExprCCall( 9469 Ity_I32, 0/*regp*/, 9470 "x86g_create_mxcsr", &x86g_create_mxcsr, 9471 mkIRExprVec_1( get_sse_roundingmode() ) 9472 ) 9473 ); 9474 goto decode_success; 9475 } 9476 9477 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */ 9478 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5C) { 9479 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subps", Iop_Sub32Fx4 ); 9480 goto decode_success; 9481 } 9482 9483 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */ 9484 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5C) { 9485 vassert(sz == 4); 9486 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "subss", Iop_Sub32F0x4 ); 9487 goto decode_success; 9488 } 9489 9490 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */ 9491 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */ 9492 /* These just appear to be special cases of SHUFPS */ 9493 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { 9494 IRTemp sV, dV; 9495 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 9496 Bool hi = toBool(insn[1] == 0x15); 9497 sV = newTemp(Ity_V128); 9498 dV = newTemp(Ity_V128); 9499 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 9500 modrm = insn[2]; 9501 assign( dV, getXMMReg(gregOfRM(modrm)) ); 9502 9503 if (epartIsReg(modrm)) { 9504 assign( sV, getXMMReg(eregOfRM(modrm)) ); 9505 delta += 2+1; 9506 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 9507 nameXMMReg(eregOfRM(modrm)), 9508 nameXMMReg(gregOfRM(modrm))); 9509 } else { 9510 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9511 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 9512 delta += 2+alen; 9513 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 9514 dis_buf, 9515 nameXMMReg(gregOfRM(modrm))); 9516 } 9517 9518 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 9519 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 9520 9521 if (hi) { 9522 putXMMReg( gregOfRM(modrm), mk128from32s( s3, d3, s2, d2 ) ); 9523 } else { 9524 putXMMReg( gregOfRM(modrm), mk128from32s( s1, d1, s0, d0 ) ); 9525 } 9526 9527 goto decode_success; 9528 } 9529 9530 /* 0F 57 = XORPS -- G = G and E */ 9531 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x57) { 9532 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorps", Iop_XorV128 ); 9533 goto decode_success; 9534 } 9535 9536 /* ---------------------------------------------------- */ 9537 /* --- end of the SSE decoder. --- */ 9538 /* ---------------------------------------------------- */ 9539 9540 /* ---------------------------------------------------- */ 9541 /* --- start of the SSE2 decoder. --- */ 9542 /* ---------------------------------------------------- */ 9543 9544 /* Skip parts of the decoder which don't apply given the stated 9545 guest subarchitecture. */ 9546 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2)) 9547 goto after_sse_decoders; /* no SSE2 capabilities */ 9548 9549 insn = &guest_code[delta]; 9550 9551 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ 9552 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x58) { 9553 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addpd", Iop_Add64Fx2 ); 9554 goto decode_success; 9555 } 9556 9557 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ 9558 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x58) { 9559 vassert(sz == 4); 9560 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "addsd", Iop_Add64F0x2 ); 9561 goto decode_success; 9562 } 9563 9564 /* 66 0F 55 = ANDNPD -- G = (not G) and E */ 9565 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x55) { 9566 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnpd", Iop_AndV128 ); 9567 goto decode_success; 9568 } 9569 9570 /* 66 0F 54 = ANDPD -- G = G and E */ 9571 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x54) { 9572 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andpd", Iop_AndV128 ); 9573 goto decode_success; 9574 } 9575 9576 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */ 9577 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC2) { 9578 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmppd", True, 8 ); 9579 goto decode_success; 9580 } 9581 9582 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */ 9583 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xC2) { 9584 vassert(sz == 4); 9585 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpsd", False, 8 ); 9586 goto decode_success; 9587 } 9588 9589 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */ 9590 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */ 9591 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { 9592 IRTemp argL = newTemp(Ity_F64); 9593 IRTemp argR = newTemp(Ity_F64); 9594 modrm = getIByte(delta+2); 9595 if (epartIsReg(modrm)) { 9596 assign( argR, getXMMRegLane64F( eregOfRM(modrm), 0/*lowest lane*/ ) ); 9597 delta += 2+1; 9598 DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9599 nameXMMReg(gregOfRM(modrm)) ); 9600 } else { 9601 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9602 assign( argR, loadLE(Ity_F64, mkexpr(addr)) ); 9603 delta += 2+alen; 9604 DIP("[u]comisd %s,%s\n", dis_buf, 9605 nameXMMReg(gregOfRM(modrm)) ); 9606 } 9607 assign( argL, getXMMRegLane64F( gregOfRM(modrm), 0/*lowest lane*/ ) ); 9608 9609 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 9610 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 9611 stmt( IRStmt_Put( 9612 OFFB_CC_DEP1, 9613 binop( Iop_And32, 9614 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)), 9615 mkU32(0x45) 9616 ))); 9617 /* Set NDEP even though it isn't used. This makes redundant-PUT 9618 elimination of previous stores to this field work better. */ 9619 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 9620 goto decode_success; 9621 } 9622 9623 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x 9624 F64 in xmm(G) */ 9625 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xE6) { 9626 IRTemp arg64 = newTemp(Ity_I64); 9627 vassert(sz == 4); 9628 9629 modrm = getIByte(delta+3); 9630 if (epartIsReg(modrm)) { 9631 assign( arg64, getXMMRegLane64(eregOfRM(modrm), 0) ); 9632 delta += 3+1; 9633 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9634 nameXMMReg(gregOfRM(modrm))); 9635 } else { 9636 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9637 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 9638 delta += 3+alen; 9639 DIP("cvtdq2pd %s,%s\n", dis_buf, 9640 nameXMMReg(gregOfRM(modrm)) ); 9641 } 9642 9643 putXMMRegLane64F( 9644 gregOfRM(modrm), 0, 9645 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64))) 9646 ); 9647 9648 putXMMRegLane64F( 9649 gregOfRM(modrm), 1, 9650 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64))) 9651 ); 9652 9653 goto decode_success; 9654 } 9655 9656 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in 9657 xmm(G) */ 9658 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5B) { 9659 IRTemp argV = newTemp(Ity_V128); 9660 IRTemp rmode = newTemp(Ity_I32); 9661 9662 modrm = getIByte(delta+2); 9663 if (epartIsReg(modrm)) { 9664 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9665 delta += 2+1; 9666 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9667 nameXMMReg(gregOfRM(modrm))); 9668 } else { 9669 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9670 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9671 delta += 2+alen; 9672 DIP("cvtdq2ps %s,%s\n", dis_buf, 9673 nameXMMReg(gregOfRM(modrm)) ); 9674 } 9675 9676 assign( rmode, get_sse_roundingmode() ); 9677 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 9678 9679# define CVT(_t) binop( Iop_F64toF32, \ 9680 mkexpr(rmode), \ 9681 unop(Iop_I32StoF64,mkexpr(_t))) 9682 9683 putXMMRegLane32F( gregOfRM(modrm), 3, CVT(t3) ); 9684 putXMMRegLane32F( gregOfRM(modrm), 2, CVT(t2) ); 9685 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) ); 9686 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) ); 9687 9688# undef CVT 9689 9690 goto decode_success; 9691 } 9692 9693 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 9694 lo half xmm(G), and zero upper half */ 9695 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xE6) { 9696 IRTemp argV = newTemp(Ity_V128); 9697 IRTemp rmode = newTemp(Ity_I32); 9698 vassert(sz == 4); 9699 9700 modrm = getIByte(delta+3); 9701 if (epartIsReg(modrm)) { 9702 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9703 delta += 3+1; 9704 DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9705 nameXMMReg(gregOfRM(modrm))); 9706 } else { 9707 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9708 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9709 delta += 3+alen; 9710 DIP("cvtpd2dq %s,%s\n", dis_buf, 9711 nameXMMReg(gregOfRM(modrm)) ); 9712 } 9713 9714 assign( rmode, get_sse_roundingmode() ); 9715 t0 = newTemp(Ity_F64); 9716 t1 = newTemp(Ity_F64); 9717 assign( t0, unop(Iop_ReinterpI64asF64, 9718 unop(Iop_V128to64, mkexpr(argV))) ); 9719 assign( t1, unop(Iop_ReinterpI64asF64, 9720 unop(Iop_V128HIto64, mkexpr(argV))) ); 9721 9722# define CVT(_t) binop( Iop_F64toI32S, \ 9723 mkexpr(rmode), \ 9724 mkexpr(_t) ) 9725 9726 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); 9727 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); 9728 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 9729 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 9730 9731# undef CVT 9732 9733 goto decode_success; 9734 } 9735 9736 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 9737 I32 in mmx, according to prevailing SSE rounding mode */ 9738 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 9739 I32 in mmx, rounding towards zero */ 9740 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { 9741 IRTemp dst64 = newTemp(Ity_I64); 9742 IRTemp rmode = newTemp(Ity_I32); 9743 IRTemp f64lo = newTemp(Ity_F64); 9744 IRTemp f64hi = newTemp(Ity_F64); 9745 Bool r2zero = toBool(insn[1] == 0x2C); 9746 9747 do_MMX_preamble(); 9748 modrm = getIByte(delta+2); 9749 9750 if (epartIsReg(modrm)) { 9751 delta += 2+1; 9752 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); 9753 assign(f64hi, getXMMRegLane64F(eregOfRM(modrm), 1)); 9754 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "", 9755 nameXMMReg(eregOfRM(modrm)), 9756 nameMMXReg(gregOfRM(modrm))); 9757 } else { 9758 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9759 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 9760 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add32, 9761 mkexpr(addr), 9762 mkU32(8) ))); 9763 delta += 2+alen; 9764 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "", 9765 dis_buf, 9766 nameMMXReg(gregOfRM(modrm))); 9767 } 9768 9769 if (r2zero) { 9770 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 9771 } else { 9772 assign( rmode, get_sse_roundingmode() ); 9773 } 9774 9775 assign( 9776 dst64, 9777 binop( Iop_32HLto64, 9778 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ), 9779 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) ) 9780 ) 9781 ); 9782 9783 putMMXReg(gregOfRM(modrm), mkexpr(dst64)); 9784 goto decode_success; 9785 } 9786 9787 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in 9788 lo half xmm(G), and zero upper half */ 9789 /* Note, this is practically identical to CVTPD2DQ. It would have 9790 been nicer to merge them together, but the insn[] offsets differ 9791 by one. */ 9792 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5A) { 9793 IRTemp argV = newTemp(Ity_V128); 9794 IRTemp rmode = newTemp(Ity_I32); 9795 9796 modrm = getIByte(delta+2); 9797 if (epartIsReg(modrm)) { 9798 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9799 delta += 2+1; 9800 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9801 nameXMMReg(gregOfRM(modrm))); 9802 } else { 9803 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9804 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9805 delta += 2+alen; 9806 DIP("cvtpd2ps %s,%s\n", dis_buf, 9807 nameXMMReg(gregOfRM(modrm)) ); 9808 } 9809 9810 assign( rmode, get_sse_roundingmode() ); 9811 t0 = newTemp(Ity_F64); 9812 t1 = newTemp(Ity_F64); 9813 assign( t0, unop(Iop_ReinterpI64asF64, 9814 unop(Iop_V128to64, mkexpr(argV))) ); 9815 assign( t1, unop(Iop_ReinterpI64asF64, 9816 unop(Iop_V128HIto64, mkexpr(argV))) ); 9817 9818# define CVT(_t) binop( Iop_F64toF32, \ 9819 mkexpr(rmode), \ 9820 mkexpr(_t) ) 9821 9822 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); 9823 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); 9824 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) ); 9825 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) ); 9826 9827# undef CVT 9828 9829 goto decode_success; 9830 } 9831 9832 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in 9833 xmm(G) */ 9834 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x2A) { 9835 IRTemp arg64 = newTemp(Ity_I64); 9836 9837 modrm = getIByte(delta+2); 9838 if (epartIsReg(modrm)) { 9839 /* Only switch to MMX mode if the source is a MMX register. 9840 This is inconsistent with all other instructions which 9841 convert between XMM and (M64 or MMX), which always switch 9842 to MMX mode even if 64-bit operand is M64 and not MMX. At 9843 least, that's what the Intel docs seem to me to say. 9844 Fixes #210264. */ 9845 do_MMX_preamble(); 9846 assign( arg64, getMMXReg(eregOfRM(modrm)) ); 9847 delta += 2+1; 9848 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm)), 9849 nameXMMReg(gregOfRM(modrm))); 9850 } else { 9851 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9852 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 9853 delta += 2+alen; 9854 DIP("cvtpi2pd %s,%s\n", dis_buf, 9855 nameXMMReg(gregOfRM(modrm)) ); 9856 } 9857 9858 putXMMRegLane64F( 9859 gregOfRM(modrm), 0, 9860 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) ) 9861 ); 9862 9863 putXMMRegLane64F( 9864 gregOfRM(modrm), 1, 9865 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) ) 9866 ); 9867 9868 goto decode_success; 9869 } 9870 9871 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 9872 xmm(G) */ 9873 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5B) { 9874 IRTemp argV = newTemp(Ity_V128); 9875 IRTemp rmode = newTemp(Ity_I32); 9876 9877 modrm = getIByte(delta+2); 9878 if (epartIsReg(modrm)) { 9879 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9880 delta += 2+1; 9881 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9882 nameXMMReg(gregOfRM(modrm))); 9883 } else { 9884 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9885 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9886 delta += 2+alen; 9887 DIP("cvtps2dq %s,%s\n", dis_buf, 9888 nameXMMReg(gregOfRM(modrm)) ); 9889 } 9890 9891 assign( rmode, get_sse_roundingmode() ); 9892 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 9893 9894 /* This is less than ideal. If it turns out to be a performance 9895 bottleneck it can be improved. */ 9896# define CVT(_t) \ 9897 binop( Iop_F64toI32S, \ 9898 mkexpr(rmode), \ 9899 unop( Iop_F32toF64, \ 9900 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 9901 9902 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) ); 9903 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) ); 9904 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 9905 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 9906 9907# undef CVT 9908 9909 goto decode_success; 9910 } 9911 9912 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x 9913 F64 in xmm(G). */ 9914 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5A) { 9915 IRTemp f32lo = newTemp(Ity_F32); 9916 IRTemp f32hi = newTemp(Ity_F32); 9917 9918 modrm = getIByte(delta+2); 9919 if (epartIsReg(modrm)) { 9920 assign( f32lo, getXMMRegLane32F(eregOfRM(modrm), 0) ); 9921 assign( f32hi, getXMMRegLane32F(eregOfRM(modrm), 1) ); 9922 delta += 2+1; 9923 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9924 nameXMMReg(gregOfRM(modrm))); 9925 } else { 9926 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9927 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 9928 assign( f32hi, loadLE(Ity_F32, 9929 binop(Iop_Add32,mkexpr(addr),mkU32(4))) ); 9930 delta += 2+alen; 9931 DIP("cvtps2pd %s,%s\n", dis_buf, 9932 nameXMMReg(gregOfRM(modrm)) ); 9933 } 9934 9935 putXMMRegLane64F( gregOfRM(modrm), 1, 9936 unop(Iop_F32toF64, mkexpr(f32hi)) ); 9937 putXMMRegLane64F( gregOfRM(modrm), 0, 9938 unop(Iop_F32toF64, mkexpr(f32lo)) ); 9939 9940 goto decode_success; 9941 } 9942 9943 /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to 9944 I32 in ireg, according to prevailing SSE rounding mode */ 9945 /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to 9946 I32 in ireg, rounding towards zero */ 9947 if (insn[0] == 0xF2 && insn[1] == 0x0F 9948 && (insn[2] == 0x2D || insn[2] == 0x2C)) { 9949 IRTemp rmode = newTemp(Ity_I32); 9950 IRTemp f64lo = newTemp(Ity_F64); 9951 Bool r2zero = toBool(insn[2] == 0x2C); 9952 vassert(sz == 4); 9953 9954 modrm = getIByte(delta+3); 9955 if (epartIsReg(modrm)) { 9956 delta += 3+1; 9957 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); 9958 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", 9959 nameXMMReg(eregOfRM(modrm)), 9960 nameIReg(4, gregOfRM(modrm))); 9961 } else { 9962 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9963 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 9964 delta += 3+alen; 9965 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", 9966 dis_buf, 9967 nameIReg(4, gregOfRM(modrm))); 9968 } 9969 9970 if (r2zero) { 9971 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 9972 } else { 9973 assign( rmode, get_sse_roundingmode() ); 9974 } 9975 9976 putIReg(4, gregOfRM(modrm), 9977 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) ); 9978 9979 goto decode_success; 9980 } 9981 9982 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in 9983 low 1/4 xmm(G), according to prevailing SSE rounding mode */ 9984 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5A) { 9985 IRTemp rmode = newTemp(Ity_I32); 9986 IRTemp f64lo = newTemp(Ity_F64); 9987 vassert(sz == 4); 9988 9989 modrm = getIByte(delta+3); 9990 if (epartIsReg(modrm)) { 9991 delta += 3+1; 9992 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); 9993 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9994 nameXMMReg(gregOfRM(modrm))); 9995 } else { 9996 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9997 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 9998 delta += 3+alen; 9999 DIP("cvtsd2ss %s,%s\n", dis_buf, 10000 nameXMMReg(gregOfRM(modrm))); 10001 } 10002 10003 assign( rmode, get_sse_roundingmode() ); 10004 putXMMRegLane32F( 10005 gregOfRM(modrm), 0, 10006 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) ) 10007 ); 10008 10009 goto decode_success; 10010 } 10011 10012 /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low 10013 half xmm */ 10014 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x2A) { 10015 IRTemp arg32 = newTemp(Ity_I32); 10016 vassert(sz == 4); 10017 10018 modrm = getIByte(delta+3); 10019 if (epartIsReg(modrm)) { 10020 assign( arg32, getIReg(4, eregOfRM(modrm)) ); 10021 delta += 3+1; 10022 DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm)), 10023 nameXMMReg(gregOfRM(modrm))); 10024 } else { 10025 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10026 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 10027 delta += 3+alen; 10028 DIP("cvtsi2sd %s,%s\n", dis_buf, 10029 nameXMMReg(gregOfRM(modrm)) ); 10030 } 10031 10032 putXMMRegLane64F( 10033 gregOfRM(modrm), 0, 10034 unop(Iop_I32StoF64, mkexpr(arg32)) ); 10035 10036 goto decode_success; 10037 } 10038 10039 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in 10040 low half xmm(G) */ 10041 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5A) { 10042 IRTemp f32lo = newTemp(Ity_F32); 10043 vassert(sz == 4); 10044 10045 modrm = getIByte(delta+3); 10046 if (epartIsReg(modrm)) { 10047 delta += 3+1; 10048 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); 10049 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10050 nameXMMReg(gregOfRM(modrm))); 10051 } else { 10052 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10053 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 10054 delta += 3+alen; 10055 DIP("cvtss2sd %s,%s\n", dis_buf, 10056 nameXMMReg(gregOfRM(modrm))); 10057 } 10058 10059 putXMMRegLane64F( gregOfRM(modrm), 0, 10060 unop( Iop_F32toF64, mkexpr(f32lo) ) ); 10061 10062 goto decode_success; 10063 } 10064 10065 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 10066 lo half xmm(G), and zero upper half, rounding towards zero */ 10067 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE6) { 10068 IRTemp argV = newTemp(Ity_V128); 10069 IRTemp rmode = newTemp(Ity_I32); 10070 10071 modrm = getIByte(delta+2); 10072 if (epartIsReg(modrm)) { 10073 assign( argV, getXMMReg(eregOfRM(modrm)) ); 10074 delta += 2+1; 10075 DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10076 nameXMMReg(gregOfRM(modrm))); 10077 } else { 10078 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10079 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10080 delta += 2+alen; 10081 DIP("cvttpd2dq %s,%s\n", dis_buf, 10082 nameXMMReg(gregOfRM(modrm)) ); 10083 } 10084 10085 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10086 10087 t0 = newTemp(Ity_F64); 10088 t1 = newTemp(Ity_F64); 10089 assign( t0, unop(Iop_ReinterpI64asF64, 10090 unop(Iop_V128to64, mkexpr(argV))) ); 10091 assign( t1, unop(Iop_ReinterpI64asF64, 10092 unop(Iop_V128HIto64, mkexpr(argV))) ); 10093 10094# define CVT(_t) binop( Iop_F64toI32S, \ 10095 mkexpr(rmode), \ 10096 mkexpr(_t) ) 10097 10098 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); 10099 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); 10100 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 10101 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 10102 10103# undef CVT 10104 10105 goto decode_success; 10106 } 10107 10108 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 10109 xmm(G), rounding towards zero */ 10110 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5B) { 10111 IRTemp argV = newTemp(Ity_V128); 10112 IRTemp rmode = newTemp(Ity_I32); 10113 vassert(sz == 4); 10114 10115 modrm = getIByte(delta+3); 10116 if (epartIsReg(modrm)) { 10117 assign( argV, getXMMReg(eregOfRM(modrm)) ); 10118 delta += 3+1; 10119 DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10120 nameXMMReg(gregOfRM(modrm))); 10121 } else { 10122 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10123 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10124 delta += 3+alen; 10125 DIP("cvttps2dq %s,%s\n", dis_buf, 10126 nameXMMReg(gregOfRM(modrm)) ); 10127 } 10128 10129 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10130 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 10131 10132 /* This is less than ideal. If it turns out to be a performance 10133 bottleneck it can be improved. */ 10134# define CVT(_t) \ 10135 binop( Iop_F64toI32S, \ 10136 mkexpr(rmode), \ 10137 unop( Iop_F32toF64, \ 10138 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 10139 10140 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) ); 10141 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) ); 10142 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 10143 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 10144 10145# undef CVT 10146 10147 goto decode_success; 10148 } 10149 10150 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */ 10151 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5E) { 10152 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divpd", Iop_Div64Fx2 ); 10153 goto decode_success; 10154 } 10155 10156 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */ 10157 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5E) { 10158 vassert(sz == 4); 10159 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "divsd", Iop_Div64F0x2 ); 10160 goto decode_success; 10161 } 10162 10163 /* 0F AE /5 = LFENCE -- flush pending operations to memory */ 10164 /* 0F AE /6 = MFENCE -- flush pending operations to memory */ 10165 if (insn[0] == 0x0F && insn[1] == 0xAE 10166 && epartIsReg(insn[2]) 10167 && (gregOfRM(insn[2]) == 5 || gregOfRM(insn[2]) == 6)) { 10168 vassert(sz == 4); 10169 delta += 3; 10170 /* Insert a memory fence. It's sometimes important that these 10171 are carried through to the generated code. */ 10172 stmt( IRStmt_MBE(Imbe_Fence) ); 10173 DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m"); 10174 goto decode_success; 10175 } 10176 10177 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */ 10178 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5F) { 10179 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxpd", Iop_Max64Fx2 ); 10180 goto decode_success; 10181 } 10182 10183 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */ 10184 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5F) { 10185 vassert(sz == 4); 10186 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "maxsd", Iop_Max64F0x2 ); 10187 goto decode_success; 10188 } 10189 10190 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */ 10191 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5D) { 10192 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minpd", Iop_Min64Fx2 ); 10193 goto decode_success; 10194 } 10195 10196 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */ 10197 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5D) { 10198 vassert(sz == 4); 10199 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "minsd", Iop_Min64F0x2 ); 10200 goto decode_success; 10201 } 10202 10203 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */ 10204 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */ 10205 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */ 10206 if (sz == 2 && insn[0] == 0x0F 10207 && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) { 10208 const HChar* wot = insn[1]==0x28 ? "apd" : 10209 insn[1]==0x10 ? "upd" : "dqa"; 10210 modrm = getIByte(delta+2); 10211 if (epartIsReg(modrm)) { 10212 putXMMReg( gregOfRM(modrm), 10213 getXMMReg( eregOfRM(modrm) )); 10214 DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRM(modrm)), 10215 nameXMMReg(gregOfRM(modrm))); 10216 delta += 2+1; 10217 } else { 10218 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10219 if (insn[1] == 0x28/*movapd*/ || insn[1] == 0x6F/*movdqa*/) 10220 gen_SEGV_if_not_16_aligned( addr ); 10221 putXMMReg( gregOfRM(modrm), 10222 loadLE(Ity_V128, mkexpr(addr)) ); 10223 DIP("mov%s %s,%s\n", wot, dis_buf, 10224 nameXMMReg(gregOfRM(modrm))); 10225 delta += 2+alen; 10226 } 10227 goto decode_success; 10228 } 10229 10230 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ 10231 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */ 10232 if (sz == 2 && insn[0] == 0x0F 10233 && (insn[1] == 0x29 || insn[1] == 0x11)) { 10234 const HChar* wot = insn[1]==0x29 ? "apd" : "upd"; 10235 modrm = getIByte(delta+2); 10236 if (epartIsReg(modrm)) { 10237 /* fall through; awaiting test case */ 10238 } else { 10239 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10240 if (insn[1] == 0x29/*movapd*/) 10241 gen_SEGV_if_not_16_aligned( addr ); 10242 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 10243 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRM(modrm)), 10244 dis_buf ); 10245 delta += 2+alen; 10246 goto decode_success; 10247 } 10248 } 10249 10250 /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */ 10251 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6E) { 10252 modrm = getIByte(delta+2); 10253 if (epartIsReg(modrm)) { 10254 delta += 2+1; 10255 putXMMReg( 10256 gregOfRM(modrm), 10257 unop( Iop_32UtoV128, getIReg(4, eregOfRM(modrm)) ) 10258 ); 10259 DIP("movd %s, %s\n", 10260 nameIReg(4,eregOfRM(modrm)), nameXMMReg(gregOfRM(modrm))); 10261 } else { 10262 addr = disAMode( &alen, sorb, delta+2, dis_buf ); 10263 delta += 2+alen; 10264 putXMMReg( 10265 gregOfRM(modrm), 10266 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) ) 10267 ); 10268 DIP("movd %s, %s\n", dis_buf, nameXMMReg(gregOfRM(modrm))); 10269 } 10270 goto decode_success; 10271 } 10272 10273 /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */ 10274 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7E) { 10275 modrm = getIByte(delta+2); 10276 if (epartIsReg(modrm)) { 10277 delta += 2+1; 10278 putIReg( 4, eregOfRM(modrm), 10279 getXMMRegLane32(gregOfRM(modrm), 0) ); 10280 DIP("movd %s, %s\n", 10281 nameXMMReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm))); 10282 } else { 10283 addr = disAMode( &alen, sorb, delta+2, dis_buf ); 10284 delta += 2+alen; 10285 storeLE( mkexpr(addr), 10286 getXMMRegLane32(gregOfRM(modrm), 0) ); 10287 DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); 10288 } 10289 goto decode_success; 10290 } 10291 10292 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */ 10293 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7F) { 10294 modrm = getIByte(delta+2); 10295 if (epartIsReg(modrm)) { 10296 delta += 2+1; 10297 putXMMReg( eregOfRM(modrm), 10298 getXMMReg(gregOfRM(modrm)) ); 10299 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), 10300 nameXMMReg(eregOfRM(modrm))); 10301 } else { 10302 addr = disAMode( &alen, sorb, delta+2, dis_buf ); 10303 delta += 2+alen; 10304 gen_SEGV_if_not_16_aligned( addr ); 10305 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 10306 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); 10307 } 10308 goto decode_success; 10309 } 10310 10311 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */ 10312 /* Unfortunately can't simply use the MOVDQA case since the 10313 prefix lengths are different (66 vs F3) */ 10314 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x6F) { 10315 vassert(sz == 4); 10316 modrm = getIByte(delta+3); 10317 if (epartIsReg(modrm)) { 10318 putXMMReg( gregOfRM(modrm), 10319 getXMMReg( eregOfRM(modrm) )); 10320 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10321 nameXMMReg(gregOfRM(modrm))); 10322 delta += 3+1; 10323 } else { 10324 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10325 putXMMReg( gregOfRM(modrm), 10326 loadLE(Ity_V128, mkexpr(addr)) ); 10327 DIP("movdqu %s,%s\n", dis_buf, 10328 nameXMMReg(gregOfRM(modrm))); 10329 delta += 3+alen; 10330 } 10331 goto decode_success; 10332 } 10333 10334 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */ 10335 /* Unfortunately can't simply use the MOVDQA case since the 10336 prefix lengths are different (66 vs F3) */ 10337 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7F) { 10338 vassert(sz == 4); 10339 modrm = getIByte(delta+3); 10340 if (epartIsReg(modrm)) { 10341 delta += 3+1; 10342 putXMMReg( eregOfRM(modrm), 10343 getXMMReg(gregOfRM(modrm)) ); 10344 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), 10345 nameXMMReg(eregOfRM(modrm))); 10346 } else { 10347 addr = disAMode( &alen, sorb, delta+3, dis_buf ); 10348 delta += 3+alen; 10349 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 10350 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); 10351 } 10352 goto decode_success; 10353 } 10354 10355 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */ 10356 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD6) { 10357 vassert(sz == 4); 10358 modrm = getIByte(delta+3); 10359 if (epartIsReg(modrm)) { 10360 do_MMX_preamble(); 10361 putMMXReg( gregOfRM(modrm), 10362 getXMMRegLane64( eregOfRM(modrm), 0 )); 10363 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10364 nameMMXReg(gregOfRM(modrm))); 10365 delta += 3+1; 10366 goto decode_success; 10367 } else { 10368 /* fall through, apparently no mem case for this insn */ 10369 } 10370 } 10371 10372 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ 10373 /* These seems identical to MOVHPS. This instruction encoding is 10374 completely crazy. */ 10375 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x16) { 10376 modrm = getIByte(delta+2); 10377 if (epartIsReg(modrm)) { 10378 /* fall through; apparently reg-reg is not possible */ 10379 } else { 10380 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10381 delta += 2+alen; 10382 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, 10383 loadLE(Ity_I64, mkexpr(addr)) ); 10384 DIP("movhpd %s,%s\n", dis_buf, 10385 nameXMMReg( gregOfRM(modrm) )); 10386 goto decode_success; 10387 } 10388 } 10389 10390 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ 10391 /* Again, this seems identical to MOVHPS. */ 10392 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x17) { 10393 if (!epartIsReg(insn[2])) { 10394 delta += 2; 10395 addr = disAMode ( &alen, sorb, delta, dis_buf ); 10396 delta += alen; 10397 storeLE( mkexpr(addr), 10398 getXMMRegLane64( gregOfRM(insn[2]), 10399 1/*upper lane*/ ) ); 10400 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ), 10401 dis_buf); 10402 goto decode_success; 10403 } 10404 /* else fall through */ 10405 } 10406 10407 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ 10408 /* Identical to MOVLPS ? */ 10409 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x12) { 10410 modrm = getIByte(delta+2); 10411 if (epartIsReg(modrm)) { 10412 /* fall through; apparently reg-reg is not possible */ 10413 } else { 10414 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10415 delta += 2+alen; 10416 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/, 10417 loadLE(Ity_I64, mkexpr(addr)) ); 10418 DIP("movlpd %s, %s\n", 10419 dis_buf, nameXMMReg( gregOfRM(modrm) )); 10420 goto decode_success; 10421 } 10422 } 10423 10424 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ 10425 /* Identical to MOVLPS ? */ 10426 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x13) { 10427 if (!epartIsReg(insn[2])) { 10428 delta += 2; 10429 addr = disAMode ( &alen, sorb, delta, dis_buf ); 10430 delta += alen; 10431 storeLE( mkexpr(addr), 10432 getXMMRegLane64( gregOfRM(insn[2]), 10433 0/*lower lane*/ ) ); 10434 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ), 10435 dis_buf); 10436 goto decode_success; 10437 } 10438 /* else fall through */ 10439 } 10440 10441 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to 10442 2 lowest bits of ireg(G) */ 10443 if (insn[0] == 0x0F && insn[1] == 0x50) { 10444 modrm = getIByte(delta+2); 10445 if (sz == 2 && epartIsReg(modrm)) { 10446 Int src; 10447 t0 = newTemp(Ity_I32); 10448 t1 = newTemp(Ity_I32); 10449 delta += 2+1; 10450 src = eregOfRM(modrm); 10451 assign( t0, binop( Iop_And32, 10452 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)), 10453 mkU32(1) )); 10454 assign( t1, binop( Iop_And32, 10455 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)), 10456 mkU32(2) )); 10457 putIReg(4, gregOfRM(modrm), 10458 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)) 10459 ); 10460 DIP("movmskpd %s,%s\n", nameXMMReg(src), 10461 nameIReg(4, gregOfRM(modrm))); 10462 goto decode_success; 10463 } 10464 /* else fall through */ 10465 } 10466 10467 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ 10468 if (insn[0] == 0x0F && insn[1] == 0xF7) { 10469 modrm = getIByte(delta+2); 10470 if (sz == 2 && epartIsReg(modrm)) { 10471 IRTemp regD = newTemp(Ity_V128); 10472 IRTemp mask = newTemp(Ity_V128); 10473 IRTemp olddata = newTemp(Ity_V128); 10474 IRTemp newdata = newTemp(Ity_V128); 10475 addr = newTemp(Ity_I32); 10476 10477 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) )); 10478 assign( regD, getXMMReg( gregOfRM(modrm) )); 10479 10480 /* Unfortunately can't do the obvious thing with SarN8x16 10481 here since that can't be re-emitted as SSE2 code - no such 10482 insn. */ 10483 assign( 10484 mask, 10485 binop(Iop_64HLtoV128, 10486 binop(Iop_SarN8x8, 10487 getXMMRegLane64( eregOfRM(modrm), 1 ), 10488 mkU8(7) ), 10489 binop(Iop_SarN8x8, 10490 getXMMRegLane64( eregOfRM(modrm), 0 ), 10491 mkU8(7) ) )); 10492 assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); 10493 assign( newdata, 10494 binop(Iop_OrV128, 10495 binop(Iop_AndV128, 10496 mkexpr(regD), 10497 mkexpr(mask) ), 10498 binop(Iop_AndV128, 10499 mkexpr(olddata), 10500 unop(Iop_NotV128, mkexpr(mask)))) ); 10501 storeLE( mkexpr(addr), mkexpr(newdata) ); 10502 10503 delta += 2+1; 10504 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ), 10505 nameXMMReg( gregOfRM(modrm) ) ); 10506 goto decode_success; 10507 } 10508 /* else fall through */ 10509 } 10510 10511 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ 10512 if (insn[0] == 0x0F && insn[1] == 0xE7) { 10513 modrm = getIByte(delta+2); 10514 if (sz == 2 && !epartIsReg(modrm)) { 10515 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10516 gen_SEGV_if_not_16_aligned( addr ); 10517 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 10518 DIP("movntdq %s,%s\n", dis_buf, 10519 nameXMMReg(gregOfRM(modrm))); 10520 delta += 2+alen; 10521 goto decode_success; 10522 } 10523 /* else fall through */ 10524 } 10525 10526 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */ 10527 if (insn[0] == 0x0F && insn[1] == 0xC3) { 10528 vassert(sz == 4); 10529 modrm = getIByte(delta+2); 10530 if (!epartIsReg(modrm)) { 10531 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10532 storeLE( mkexpr(addr), getIReg(4, gregOfRM(modrm)) ); 10533 DIP("movnti %s,%s\n", dis_buf, 10534 nameIReg(4, gregOfRM(modrm))); 10535 delta += 2+alen; 10536 goto decode_success; 10537 } 10538 /* else fall through */ 10539 } 10540 10541 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem 10542 or lo half xmm). */ 10543 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD6) { 10544 modrm = getIByte(delta+2); 10545 if (epartIsReg(modrm)) { 10546 /* fall through, awaiting test case */ 10547 /* dst: lo half copied, hi half zeroed */ 10548 } else { 10549 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10550 storeLE( mkexpr(addr), 10551 getXMMRegLane64( gregOfRM(modrm), 0 )); 10552 DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm)), dis_buf ); 10553 delta += 2+alen; 10554 goto decode_success; 10555 } 10556 } 10557 10558 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero 10559 hi half). */ 10560 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xD6) { 10561 vassert(sz == 4); 10562 modrm = getIByte(delta+3); 10563 if (epartIsReg(modrm)) { 10564 do_MMX_preamble(); 10565 putXMMReg( gregOfRM(modrm), 10566 unop(Iop_64UtoV128, getMMXReg( eregOfRM(modrm) )) ); 10567 DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm)), 10568 nameXMMReg(gregOfRM(modrm))); 10569 delta += 3+1; 10570 goto decode_success; 10571 } else { 10572 /* fall through, apparently no mem case for this insn */ 10573 } 10574 } 10575 10576 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to 10577 G (lo half xmm). Upper half of G is zeroed out. */ 10578 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to 10579 G (lo half xmm). If E is mem, upper half of G is zeroed out. 10580 If E is reg, upper half of G is unchanged. */ 10581 if ((insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x10) 10582 || (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7E)) { 10583 vassert(sz == 4); 10584 modrm = getIByte(delta+3); 10585 if (epartIsReg(modrm)) { 10586 putXMMRegLane64( gregOfRM(modrm), 0, 10587 getXMMRegLane64( eregOfRM(modrm), 0 )); 10588 if (insn[0] == 0xF3/*MOVQ*/) { 10589 /* zero bits 127:64 */ 10590 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); 10591 } 10592 DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10593 nameXMMReg(gregOfRM(modrm))); 10594 delta += 3+1; 10595 } else { 10596 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10597 /* zero bits 127:64 */ 10598 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); 10599 /* write bits 63:0 */ 10600 putXMMRegLane64( gregOfRM(modrm), 0, 10601 loadLE(Ity_I64, mkexpr(addr)) ); 10602 DIP("movsd %s,%s\n", dis_buf, 10603 nameXMMReg(gregOfRM(modrm))); 10604 delta += 3+alen; 10605 } 10606 goto decode_success; 10607 } 10608 10609 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem 10610 or lo half xmm). */ 10611 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x11) { 10612 vassert(sz == 4); 10613 modrm = getIByte(delta+3); 10614 if (epartIsReg(modrm)) { 10615 putXMMRegLane64( eregOfRM(modrm), 0, 10616 getXMMRegLane64( gregOfRM(modrm), 0 )); 10617 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)), 10618 nameXMMReg(eregOfRM(modrm))); 10619 delta += 3+1; 10620 } else { 10621 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10622 storeLE( mkexpr(addr), 10623 getXMMRegLane64(gregOfRM(modrm), 0) ); 10624 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)), 10625 dis_buf); 10626 delta += 3+alen; 10627 } 10628 goto decode_success; 10629 } 10630 10631 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ 10632 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x59) { 10633 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulpd", Iop_Mul64Fx2 ); 10634 goto decode_success; 10635 } 10636 10637 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ 10638 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x59) { 10639 vassert(sz == 4); 10640 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "mulsd", Iop_Mul64F0x2 ); 10641 goto decode_success; 10642 } 10643 10644 /* 66 0F 56 = ORPD -- G = G and E */ 10645 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x56) { 10646 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orpd", Iop_OrV128 ); 10647 goto decode_success; 10648 } 10649 10650 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */ 10651 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC6) { 10652 Int select; 10653 IRTemp sV = newTemp(Ity_V128); 10654 IRTemp dV = newTemp(Ity_V128); 10655 IRTemp s1 = newTemp(Ity_I64); 10656 IRTemp s0 = newTemp(Ity_I64); 10657 IRTemp d1 = newTemp(Ity_I64); 10658 IRTemp d0 = newTemp(Ity_I64); 10659 10660 modrm = insn[2]; 10661 assign( dV, getXMMReg(gregOfRM(modrm)) ); 10662 10663 if (epartIsReg(modrm)) { 10664 assign( sV, getXMMReg(eregOfRM(modrm)) ); 10665 select = (Int)insn[3]; 10666 delta += 2+2; 10667 DIP("shufpd $%d,%s,%s\n", select, 10668 nameXMMReg(eregOfRM(modrm)), 10669 nameXMMReg(gregOfRM(modrm))); 10670 } else { 10671 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10672 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10673 select = (Int)insn[2+alen]; 10674 delta += 3+alen; 10675 DIP("shufpd $%d,%s,%s\n", select, 10676 dis_buf, 10677 nameXMMReg(gregOfRM(modrm))); 10678 } 10679 10680 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10681 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10682 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10683 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10684 10685# define SELD(n) mkexpr((n)==0 ? d0 : d1) 10686# define SELS(n) mkexpr((n)==0 ? s0 : s1) 10687 10688 putXMMReg( 10689 gregOfRM(modrm), 10690 binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) ) 10691 ); 10692 10693# undef SELD 10694# undef SELS 10695 10696 goto decode_success; 10697 } 10698 10699 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */ 10700 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x51) { 10701 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 10702 "sqrtpd", Iop_Sqrt64Fx2 ); 10703 goto decode_success; 10704 } 10705 10706 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */ 10707 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x51) { 10708 vassert(sz == 4); 10709 delta = dis_SSE_E_to_G_unary_lo64( sorb, delta+3, 10710 "sqrtsd", Iop_Sqrt64F0x2 ); 10711 goto decode_success; 10712 } 10713 10714 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ 10715 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5C) { 10716 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subpd", Iop_Sub64Fx2 ); 10717 goto decode_success; 10718 } 10719 10720 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ 10721 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5C) { 10722 vassert(sz == 4); 10723 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "subsd", Iop_Sub64F0x2 ); 10724 goto decode_success; 10725 } 10726 10727 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */ 10728 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */ 10729 /* These just appear to be special cases of SHUFPS */ 10730 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { 10731 IRTemp s1 = newTemp(Ity_I64); 10732 IRTemp s0 = newTemp(Ity_I64); 10733 IRTemp d1 = newTemp(Ity_I64); 10734 IRTemp d0 = newTemp(Ity_I64); 10735 IRTemp sV = newTemp(Ity_V128); 10736 IRTemp dV = newTemp(Ity_V128); 10737 Bool hi = toBool(insn[1] == 0x15); 10738 10739 modrm = insn[2]; 10740 assign( dV, getXMMReg(gregOfRM(modrm)) ); 10741 10742 if (epartIsReg(modrm)) { 10743 assign( sV, getXMMReg(eregOfRM(modrm)) ); 10744 delta += 2+1; 10745 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 10746 nameXMMReg(eregOfRM(modrm)), 10747 nameXMMReg(gregOfRM(modrm))); 10748 } else { 10749 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10750 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10751 delta += 2+alen; 10752 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 10753 dis_buf, 10754 nameXMMReg(gregOfRM(modrm))); 10755 } 10756 10757 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10758 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10759 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10760 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10761 10762 if (hi) { 10763 putXMMReg( gregOfRM(modrm), 10764 binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); 10765 } else { 10766 putXMMReg( gregOfRM(modrm), 10767 binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) ); 10768 } 10769 10770 goto decode_success; 10771 } 10772 10773 /* 66 0F 57 = XORPD -- G = G and E */ 10774 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x57) { 10775 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorpd", Iop_XorV128 ); 10776 goto decode_success; 10777 } 10778 10779 /* 66 0F 6B = PACKSSDW */ 10780 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) { 10781 delta = dis_SSEint_E_to_G( sorb, delta+2, 10782 "packssdw", 10783 Iop_QNarrowBin32Sto16Sx8, True ); 10784 goto decode_success; 10785 } 10786 10787 /* 66 0F 63 = PACKSSWB */ 10788 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) { 10789 delta = dis_SSEint_E_to_G( sorb, delta+2, 10790 "packsswb", 10791 Iop_QNarrowBin16Sto8Sx16, True ); 10792 goto decode_success; 10793 } 10794 10795 /* 66 0F 67 = PACKUSWB */ 10796 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) { 10797 delta = dis_SSEint_E_to_G( sorb, delta+2, 10798 "packuswb", 10799 Iop_QNarrowBin16Sto8Ux16, True ); 10800 goto decode_success; 10801 } 10802 10803 /* 66 0F FC = PADDB */ 10804 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFC) { 10805 delta = dis_SSEint_E_to_G( sorb, delta+2, 10806 "paddb", Iop_Add8x16, False ); 10807 goto decode_success; 10808 } 10809 10810 /* 66 0F FE = PADDD */ 10811 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFE) { 10812 delta = dis_SSEint_E_to_G( sorb, delta+2, 10813 "paddd", Iop_Add32x4, False ); 10814 goto decode_success; 10815 } 10816 10817 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 10818 /* 0F D4 = PADDQ -- add 64x1 */ 10819 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD4) { 10820 do_MMX_preamble(); 10821 delta = dis_MMXop_regmem_to_reg ( 10822 sorb, delta+2, insn[1], "paddq", False ); 10823 goto decode_success; 10824 } 10825 10826 /* 66 0F D4 = PADDQ */ 10827 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD4) { 10828 delta = dis_SSEint_E_to_G( sorb, delta+2, 10829 "paddq", Iop_Add64x2, False ); 10830 goto decode_success; 10831 } 10832 10833 /* 66 0F FD = PADDW */ 10834 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFD) { 10835 delta = dis_SSEint_E_to_G( sorb, delta+2, 10836 "paddw", Iop_Add16x8, False ); 10837 goto decode_success; 10838 } 10839 10840 /* 66 0F EC = PADDSB */ 10841 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEC) { 10842 delta = dis_SSEint_E_to_G( sorb, delta+2, 10843 "paddsb", Iop_QAdd8Sx16, False ); 10844 goto decode_success; 10845 } 10846 10847 /* 66 0F ED = PADDSW */ 10848 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xED) { 10849 delta = dis_SSEint_E_to_G( sorb, delta+2, 10850 "paddsw", Iop_QAdd16Sx8, False ); 10851 goto decode_success; 10852 } 10853 10854 /* 66 0F DC = PADDUSB */ 10855 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDC) { 10856 delta = dis_SSEint_E_to_G( sorb, delta+2, 10857 "paddusb", Iop_QAdd8Ux16, False ); 10858 goto decode_success; 10859 } 10860 10861 /* 66 0F DD = PADDUSW */ 10862 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDD) { 10863 delta = dis_SSEint_E_to_G( sorb, delta+2, 10864 "paddusw", Iop_QAdd16Ux8, False ); 10865 goto decode_success; 10866 } 10867 10868 /* 66 0F DB = PAND */ 10869 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDB) { 10870 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pand", Iop_AndV128 ); 10871 goto decode_success; 10872 } 10873 10874 /* 66 0F DF = PANDN */ 10875 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDF) { 10876 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "pandn", Iop_AndV128 ); 10877 goto decode_success; 10878 } 10879 10880 /* 66 0F E0 = PAVGB */ 10881 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE0) { 10882 delta = dis_SSEint_E_to_G( sorb, delta+2, 10883 "pavgb", Iop_Avg8Ux16, False ); 10884 goto decode_success; 10885 } 10886 10887 /* 66 0F E3 = PAVGW */ 10888 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE3) { 10889 delta = dis_SSEint_E_to_G( sorb, delta+2, 10890 "pavgw", Iop_Avg16Ux8, False ); 10891 goto decode_success; 10892 } 10893 10894 /* 66 0F 74 = PCMPEQB */ 10895 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x74) { 10896 delta = dis_SSEint_E_to_G( sorb, delta+2, 10897 "pcmpeqb", Iop_CmpEQ8x16, False ); 10898 goto decode_success; 10899 } 10900 10901 /* 66 0F 76 = PCMPEQD */ 10902 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x76) { 10903 delta = dis_SSEint_E_to_G( sorb, delta+2, 10904 "pcmpeqd", Iop_CmpEQ32x4, False ); 10905 goto decode_success; 10906 } 10907 10908 /* 66 0F 75 = PCMPEQW */ 10909 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x75) { 10910 delta = dis_SSEint_E_to_G( sorb, delta+2, 10911 "pcmpeqw", Iop_CmpEQ16x8, False ); 10912 goto decode_success; 10913 } 10914 10915 /* 66 0F 64 = PCMPGTB */ 10916 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x64) { 10917 delta = dis_SSEint_E_to_G( sorb, delta+2, 10918 "pcmpgtb", Iop_CmpGT8Sx16, False ); 10919 goto decode_success; 10920 } 10921 10922 /* 66 0F 66 = PCMPGTD */ 10923 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x66) { 10924 delta = dis_SSEint_E_to_G( sorb, delta+2, 10925 "pcmpgtd", Iop_CmpGT32Sx4, False ); 10926 goto decode_success; 10927 } 10928 10929 /* 66 0F 65 = PCMPGTW */ 10930 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x65) { 10931 delta = dis_SSEint_E_to_G( sorb, delta+2, 10932 "pcmpgtw", Iop_CmpGT16Sx8, False ); 10933 goto decode_success; 10934 } 10935 10936 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put 10937 zero-extend of it in ireg(G). */ 10938 if (insn[0] == 0x0F && insn[1] == 0xC5) { 10939 modrm = insn[2]; 10940 if (sz == 2 && epartIsReg(modrm)) { 10941 t5 = newTemp(Ity_V128); 10942 t4 = newTemp(Ity_I16); 10943 assign(t5, getXMMReg(eregOfRM(modrm))); 10944 breakup128to32s( t5, &t3, &t2, &t1, &t0 ); 10945 switch (insn[3] & 7) { 10946 case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break; 10947 case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break; 10948 case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break; 10949 case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break; 10950 case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break; 10951 case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break; 10952 case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break; 10953 case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break; 10954 default: vassert(0); /*NOTREACHED*/ 10955 } 10956 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t4))); 10957 DIP("pextrw $%d,%s,%s\n", 10958 (Int)insn[3], nameXMMReg(eregOfRM(modrm)), 10959 nameIReg(4,gregOfRM(modrm))); 10960 delta += 4; 10961 goto decode_success; 10962 } 10963 /* else fall through */ 10964 } 10965 10966 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 10967 put it into the specified lane of xmm(G). */ 10968 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC4) { 10969 Int lane; 10970 t4 = newTemp(Ity_I16); 10971 modrm = insn[2]; 10972 10973 if (epartIsReg(modrm)) { 10974 assign(t4, getIReg(2, eregOfRM(modrm))); 10975 delta += 3+1; 10976 lane = insn[3+1-1]; 10977 DIP("pinsrw $%d,%s,%s\n", lane, 10978 nameIReg(2,eregOfRM(modrm)), 10979 nameXMMReg(gregOfRM(modrm))); 10980 } else { 10981 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10982 delta += 3+alen; 10983 lane = insn[3+alen-1]; 10984 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 10985 DIP("pinsrw $%d,%s,%s\n", lane, 10986 dis_buf, 10987 nameXMMReg(gregOfRM(modrm))); 10988 } 10989 10990 putXMMRegLane16( gregOfRM(modrm), lane & 7, mkexpr(t4) ); 10991 goto decode_success; 10992 } 10993 10994 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from 10995 E(xmm or mem) to G(xmm) */ 10996 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF5) { 10997 IRTemp s1V = newTemp(Ity_V128); 10998 IRTemp s2V = newTemp(Ity_V128); 10999 IRTemp dV = newTemp(Ity_V128); 11000 IRTemp s1Hi = newTemp(Ity_I64); 11001 IRTemp s1Lo = newTemp(Ity_I64); 11002 IRTemp s2Hi = newTemp(Ity_I64); 11003 IRTemp s2Lo = newTemp(Ity_I64); 11004 IRTemp dHi = newTemp(Ity_I64); 11005 IRTemp dLo = newTemp(Ity_I64); 11006 modrm = insn[2]; 11007 if (epartIsReg(modrm)) { 11008 assign( s1V, getXMMReg(eregOfRM(modrm)) ); 11009 delta += 2+1; 11010 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11011 nameXMMReg(gregOfRM(modrm))); 11012 } else { 11013 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11014 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); 11015 delta += 2+alen; 11016 DIP("pmaddwd %s,%s\n", dis_buf, 11017 nameXMMReg(gregOfRM(modrm))); 11018 } 11019 assign( s2V, getXMMReg(gregOfRM(modrm)) ); 11020 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); 11021 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); 11022 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); 11023 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); 11024 assign( dHi, mkIRExprCCall( 11025 Ity_I64, 0/*regparms*/, 11026 "x86g_calculate_mmx_pmaddwd", 11027 &x86g_calculate_mmx_pmaddwd, 11028 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) 11029 )); 11030 assign( dLo, mkIRExprCCall( 11031 Ity_I64, 0/*regparms*/, 11032 "x86g_calculate_mmx_pmaddwd", 11033 &x86g_calculate_mmx_pmaddwd, 11034 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) 11035 )); 11036 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; 11037 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11038 goto decode_success; 11039 } 11040 11041 /* 66 0F EE = PMAXSW -- 16x8 signed max */ 11042 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEE) { 11043 delta = dis_SSEint_E_to_G( sorb, delta+2, 11044 "pmaxsw", Iop_Max16Sx8, False ); 11045 goto decode_success; 11046 } 11047 11048 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */ 11049 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDE) { 11050 delta = dis_SSEint_E_to_G( sorb, delta+2, 11051 "pmaxub", Iop_Max8Ux16, False ); 11052 goto decode_success; 11053 } 11054 11055 /* 66 0F EA = PMINSW -- 16x8 signed min */ 11056 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEA) { 11057 delta = dis_SSEint_E_to_G( sorb, delta+2, 11058 "pminsw", Iop_Min16Sx8, False ); 11059 goto decode_success; 11060 } 11061 11062 /* 66 0F DA = PMINUB -- 8x16 unsigned min */ 11063 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDA) { 11064 delta = dis_SSEint_E_to_G( sorb, delta+2, 11065 "pminub", Iop_Min8Ux16, False ); 11066 goto decode_success; 11067 } 11068 11069 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes 11070 in xmm(E), turn them into a byte, and put zero-extend of it in 11071 ireg(G). */ 11072 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD7) { 11073 modrm = insn[2]; 11074 if (epartIsReg(modrm)) { 11075 t0 = newTemp(Ity_I64); 11076 t1 = newTemp(Ity_I64); 11077 assign(t0, getXMMRegLane64(eregOfRM(modrm), 0)); 11078 assign(t1, getXMMRegLane64(eregOfRM(modrm), 1)); 11079 t5 = newTemp(Ity_I32); 11080 assign(t5, 11081 unop(Iop_16Uto32, 11082 binop(Iop_8HLto16, 11083 unop(Iop_GetMSBs8x8, mkexpr(t1)), 11084 unop(Iop_GetMSBs8x8, mkexpr(t0))))); 11085 putIReg(4, gregOfRM(modrm), mkexpr(t5)); 11086 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11087 nameIReg(4,gregOfRM(modrm))); 11088 delta += 3; 11089 goto decode_success; 11090 } 11091 /* else fall through */ 11092 } 11093 11094 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */ 11095 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE4) { 11096 delta = dis_SSEint_E_to_G( sorb, delta+2, 11097 "pmulhuw", Iop_MulHi16Ux8, False ); 11098 goto decode_success; 11099 } 11100 11101 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */ 11102 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE5) { 11103 delta = dis_SSEint_E_to_G( sorb, delta+2, 11104 "pmulhw", Iop_MulHi16Sx8, False ); 11105 goto decode_success; 11106 } 11107 11108 /* 66 0F D5 = PMULHL -- 16x8 multiply */ 11109 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD5) { 11110 delta = dis_SSEint_E_to_G( sorb, delta+2, 11111 "pmullw", Iop_Mul16x8, False ); 11112 goto decode_success; 11113 } 11114 11115 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 11116 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 11117 0 to form 64-bit result */ 11118 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF4) { 11119 IRTemp sV = newTemp(Ity_I64); 11120 IRTemp dV = newTemp(Ity_I64); 11121 t1 = newTemp(Ity_I32); 11122 t0 = newTemp(Ity_I32); 11123 modrm = insn[2]; 11124 11125 do_MMX_preamble(); 11126 assign( dV, getMMXReg(gregOfRM(modrm)) ); 11127 11128 if (epartIsReg(modrm)) { 11129 assign( sV, getMMXReg(eregOfRM(modrm)) ); 11130 delta += 2+1; 11131 DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm)), 11132 nameMMXReg(gregOfRM(modrm))); 11133 } else { 11134 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11135 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 11136 delta += 2+alen; 11137 DIP("pmuludq %s,%s\n", dis_buf, 11138 nameMMXReg(gregOfRM(modrm))); 11139 } 11140 11141 assign( t0, unop(Iop_64to32, mkexpr(dV)) ); 11142 assign( t1, unop(Iop_64to32, mkexpr(sV)) ); 11143 putMMXReg( gregOfRM(modrm), 11144 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) ); 11145 goto decode_success; 11146 } 11147 11148 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 11149 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit 11150 half */ 11151 /* This is a really poor translation -- could be improved if 11152 performance critical */ 11153 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF4) { 11154 IRTemp sV, dV; 11155 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 11156 sV = newTemp(Ity_V128); 11157 dV = newTemp(Ity_V128); 11158 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 11159 t1 = newTemp(Ity_I64); 11160 t0 = newTemp(Ity_I64); 11161 modrm = insn[2]; 11162 assign( dV, getXMMReg(gregOfRM(modrm)) ); 11163 11164 if (epartIsReg(modrm)) { 11165 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11166 delta += 2+1; 11167 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11168 nameXMMReg(gregOfRM(modrm))); 11169 } else { 11170 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11171 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11172 delta += 2+alen; 11173 DIP("pmuludq %s,%s\n", dis_buf, 11174 nameXMMReg(gregOfRM(modrm))); 11175 } 11176 11177 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 11178 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 11179 11180 assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ); 11181 putXMMRegLane64( gregOfRM(modrm), 0, mkexpr(t0) ); 11182 assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) ); 11183 putXMMRegLane64( gregOfRM(modrm), 1, mkexpr(t1) ); 11184 goto decode_success; 11185 } 11186 11187 /* 66 0F EB = POR */ 11188 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEB) { 11189 delta = dis_SSE_E_to_G_all( sorb, delta+2, "por", Iop_OrV128 ); 11190 goto decode_success; 11191 } 11192 11193 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs 11194 from E(xmm or mem) to G(xmm) */ 11195 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF6) { 11196 IRTemp s1V = newTemp(Ity_V128); 11197 IRTemp s2V = newTemp(Ity_V128); 11198 IRTemp dV = newTemp(Ity_V128); 11199 IRTemp s1Hi = newTemp(Ity_I64); 11200 IRTemp s1Lo = newTemp(Ity_I64); 11201 IRTemp s2Hi = newTemp(Ity_I64); 11202 IRTemp s2Lo = newTemp(Ity_I64); 11203 IRTemp dHi = newTemp(Ity_I64); 11204 IRTemp dLo = newTemp(Ity_I64); 11205 modrm = insn[2]; 11206 if (epartIsReg(modrm)) { 11207 assign( s1V, getXMMReg(eregOfRM(modrm)) ); 11208 delta += 2+1; 11209 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11210 nameXMMReg(gregOfRM(modrm))); 11211 } else { 11212 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11213 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); 11214 delta += 2+alen; 11215 DIP("psadbw %s,%s\n", dis_buf, 11216 nameXMMReg(gregOfRM(modrm))); 11217 } 11218 assign( s2V, getXMMReg(gregOfRM(modrm)) ); 11219 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); 11220 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); 11221 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); 11222 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); 11223 assign( dHi, mkIRExprCCall( 11224 Ity_I64, 0/*regparms*/, 11225 "x86g_calculate_mmx_psadbw", 11226 &x86g_calculate_mmx_psadbw, 11227 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) 11228 )); 11229 assign( dLo, mkIRExprCCall( 11230 Ity_I64, 0/*regparms*/, 11231 "x86g_calculate_mmx_psadbw", 11232 &x86g_calculate_mmx_psadbw, 11233 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) 11234 )); 11235 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; 11236 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11237 goto decode_success; 11238 } 11239 11240 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */ 11241 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x70) { 11242 Int order; 11243 IRTemp sV, dV, s3, s2, s1, s0; 11244 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11245 sV = newTemp(Ity_V128); 11246 dV = newTemp(Ity_V128); 11247 modrm = insn[2]; 11248 if (epartIsReg(modrm)) { 11249 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11250 order = (Int)insn[3]; 11251 delta += 2+2; 11252 DIP("pshufd $%d,%s,%s\n", order, 11253 nameXMMReg(eregOfRM(modrm)), 11254 nameXMMReg(gregOfRM(modrm))); 11255 } else { 11256 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11257 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11258 order = (Int)insn[2+alen]; 11259 delta += 3+alen; 11260 DIP("pshufd $%d,%s,%s\n", order, 11261 dis_buf, 11262 nameXMMReg(gregOfRM(modrm))); 11263 } 11264 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 11265 11266# define SEL(n) \ 11267 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11268 assign(dV, 11269 mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3), 11270 SEL((order>>2)&3), SEL((order>>0)&3) ) 11271 ); 11272 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11273# undef SEL 11274 goto decode_success; 11275 } 11276 11277 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or 11278 mem) to G(xmm), and copy lower half */ 11279 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x70) { 11280 Int order; 11281 IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0; 11282 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11283 sV = newTemp(Ity_V128); 11284 dV = newTemp(Ity_V128); 11285 sVhi = newTemp(Ity_I64); 11286 dVhi = newTemp(Ity_I64); 11287 modrm = insn[3]; 11288 if (epartIsReg(modrm)) { 11289 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11290 order = (Int)insn[4]; 11291 delta += 4+1; 11292 DIP("pshufhw $%d,%s,%s\n", order, 11293 nameXMMReg(eregOfRM(modrm)), 11294 nameXMMReg(gregOfRM(modrm))); 11295 } else { 11296 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11297 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11298 order = (Int)insn[3+alen]; 11299 delta += 4+alen; 11300 DIP("pshufhw $%d,%s,%s\n", order, 11301 dis_buf, 11302 nameXMMReg(gregOfRM(modrm))); 11303 } 11304 assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) ); 11305 breakup64to16s( sVhi, &s3, &s2, &s1, &s0 ); 11306 11307# define SEL(n) \ 11308 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11309 assign(dVhi, 11310 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 11311 SEL((order>>2)&3), SEL((order>>0)&3) ) 11312 ); 11313 assign(dV, binop( Iop_64HLtoV128, 11314 mkexpr(dVhi), 11315 unop(Iop_V128to64, mkexpr(sV))) ); 11316 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11317# undef SEL 11318 goto decode_success; 11319 } 11320 11321 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or 11322 mem) to G(xmm), and copy upper half */ 11323 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x70) { 11324 Int order; 11325 IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0; 11326 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11327 sV = newTemp(Ity_V128); 11328 dV = newTemp(Ity_V128); 11329 sVlo = newTemp(Ity_I64); 11330 dVlo = newTemp(Ity_I64); 11331 modrm = insn[3]; 11332 if (epartIsReg(modrm)) { 11333 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11334 order = (Int)insn[4]; 11335 delta += 4+1; 11336 DIP("pshuflw $%d,%s,%s\n", order, 11337 nameXMMReg(eregOfRM(modrm)), 11338 nameXMMReg(gregOfRM(modrm))); 11339 } else { 11340 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11341 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11342 order = (Int)insn[3+alen]; 11343 delta += 4+alen; 11344 DIP("pshuflw $%d,%s,%s\n", order, 11345 dis_buf, 11346 nameXMMReg(gregOfRM(modrm))); 11347 } 11348 assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) ); 11349 breakup64to16s( sVlo, &s3, &s2, &s1, &s0 ); 11350 11351# define SEL(n) \ 11352 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11353 assign(dVlo, 11354 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 11355 SEL((order>>2)&3), SEL((order>>0)&3) ) 11356 ); 11357 assign(dV, binop( Iop_64HLtoV128, 11358 unop(Iop_V128HIto64, mkexpr(sV)), 11359 mkexpr(dVlo) ) ); 11360 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11361# undef SEL 11362 goto decode_success; 11363 } 11364 11365 /* 66 0F 72 /6 ib = PSLLD by immediate */ 11366 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 11367 && epartIsReg(insn[2]) 11368 && gregOfRM(insn[2]) == 6) { 11369 delta = dis_SSE_shiftE_imm( delta+2, "pslld", Iop_ShlN32x4 ); 11370 goto decode_success; 11371 } 11372 11373 /* 66 0F F2 = PSLLD by E */ 11374 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF2) { 11375 delta = dis_SSE_shiftG_byE( sorb, delta+2, "pslld", Iop_ShlN32x4 ); 11376 goto decode_success; 11377 } 11378 11379 /* 66 0F 73 /7 ib = PSLLDQ by immediate */ 11380 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11381 && epartIsReg(insn[2]) 11382 && gregOfRM(insn[2]) == 7) { 11383 IRTemp sV, dV, hi64, lo64, hi64r, lo64r; 11384 Int imm = (Int)insn[3]; 11385 Int reg = eregOfRM(insn[2]); 11386 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg)); 11387 vassert(imm >= 0 && imm <= 255); 11388 delta += 4; 11389 11390 sV = newTemp(Ity_V128); 11391 dV = newTemp(Ity_V128); 11392 hi64 = newTemp(Ity_I64); 11393 lo64 = newTemp(Ity_I64); 11394 hi64r = newTemp(Ity_I64); 11395 lo64r = newTemp(Ity_I64); 11396 11397 if (imm >= 16) { 11398 putXMMReg(reg, mkV128(0x0000)); 11399 goto decode_success; 11400 } 11401 11402 assign( sV, getXMMReg(reg) ); 11403 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 11404 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 11405 11406 if (imm == 0) { 11407 assign( lo64r, mkexpr(lo64) ); 11408 assign( hi64r, mkexpr(hi64) ); 11409 } 11410 else 11411 if (imm == 8) { 11412 assign( lo64r, mkU64(0) ); 11413 assign( hi64r, mkexpr(lo64) ); 11414 } 11415 else 11416 if (imm > 8) { 11417 assign( lo64r, mkU64(0) ); 11418 assign( hi64r, binop( Iop_Shl64, 11419 mkexpr(lo64), 11420 mkU8( 8*(imm-8) ) )); 11421 } else { 11422 assign( lo64r, binop( Iop_Shl64, 11423 mkexpr(lo64), 11424 mkU8(8 * imm) )); 11425 assign( hi64r, 11426 binop( Iop_Or64, 11427 binop(Iop_Shl64, mkexpr(hi64), 11428 mkU8(8 * imm)), 11429 binop(Iop_Shr64, mkexpr(lo64), 11430 mkU8(8 * (8 - imm)) ) 11431 ) 11432 ); 11433 } 11434 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 11435 putXMMReg(reg, mkexpr(dV)); 11436 goto decode_success; 11437 } 11438 11439 /* 66 0F 73 /6 ib = PSLLQ by immediate */ 11440 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11441 && epartIsReg(insn[2]) 11442 && gregOfRM(insn[2]) == 6) { 11443 delta = dis_SSE_shiftE_imm( delta+2, "psllq", Iop_ShlN64x2 ); 11444 goto decode_success; 11445 } 11446 11447 /* 66 0F F3 = PSLLQ by E */ 11448 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF3) { 11449 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllq", Iop_ShlN64x2 ); 11450 goto decode_success; 11451 } 11452 11453 /* 66 0F 71 /6 ib = PSLLW by immediate */ 11454 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 11455 && epartIsReg(insn[2]) 11456 && gregOfRM(insn[2]) == 6) { 11457 delta = dis_SSE_shiftE_imm( delta+2, "psllw", Iop_ShlN16x8 ); 11458 goto decode_success; 11459 } 11460 11461 /* 66 0F F1 = PSLLW by E */ 11462 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF1) { 11463 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllw", Iop_ShlN16x8 ); 11464 goto decode_success; 11465 } 11466 11467 /* 66 0F 72 /4 ib = PSRAD by immediate */ 11468 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 11469 && epartIsReg(insn[2]) 11470 && gregOfRM(insn[2]) == 4) { 11471 delta = dis_SSE_shiftE_imm( delta+2, "psrad", Iop_SarN32x4 ); 11472 goto decode_success; 11473 } 11474 11475 /* 66 0F E2 = PSRAD by E */ 11476 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE2) { 11477 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrad", Iop_SarN32x4 ); 11478 goto decode_success; 11479 } 11480 11481 /* 66 0F 71 /4 ib = PSRAW by immediate */ 11482 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 11483 && epartIsReg(insn[2]) 11484 && gregOfRM(insn[2]) == 4) { 11485 delta = dis_SSE_shiftE_imm( delta+2, "psraw", Iop_SarN16x8 ); 11486 goto decode_success; 11487 } 11488 11489 /* 66 0F E1 = PSRAW by E */ 11490 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE1) { 11491 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psraw", Iop_SarN16x8 ); 11492 goto decode_success; 11493 } 11494 11495 /* 66 0F 72 /2 ib = PSRLD by immediate */ 11496 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 11497 && epartIsReg(insn[2]) 11498 && gregOfRM(insn[2]) == 2) { 11499 delta = dis_SSE_shiftE_imm( delta+2, "psrld", Iop_ShrN32x4 ); 11500 goto decode_success; 11501 } 11502 11503 /* 66 0F D2 = PSRLD by E */ 11504 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD2) { 11505 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrld", Iop_ShrN32x4 ); 11506 goto decode_success; 11507 } 11508 11509 /* 66 0F 73 /3 ib = PSRLDQ by immediate */ 11510 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11511 && epartIsReg(insn[2]) 11512 && gregOfRM(insn[2]) == 3) { 11513 IRTemp sV, dV, hi64, lo64, hi64r, lo64r; 11514 Int imm = (Int)insn[3]; 11515 Int reg = eregOfRM(insn[2]); 11516 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg)); 11517 vassert(imm >= 0 && imm <= 255); 11518 delta += 4; 11519 11520 sV = newTemp(Ity_V128); 11521 dV = newTemp(Ity_V128); 11522 hi64 = newTemp(Ity_I64); 11523 lo64 = newTemp(Ity_I64); 11524 hi64r = newTemp(Ity_I64); 11525 lo64r = newTemp(Ity_I64); 11526 11527 if (imm >= 16) { 11528 putXMMReg(reg, mkV128(0x0000)); 11529 goto decode_success; 11530 } 11531 11532 assign( sV, getXMMReg(reg) ); 11533 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 11534 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 11535 11536 if (imm == 0) { 11537 assign( lo64r, mkexpr(lo64) ); 11538 assign( hi64r, mkexpr(hi64) ); 11539 } 11540 else 11541 if (imm == 8) { 11542 assign( hi64r, mkU64(0) ); 11543 assign( lo64r, mkexpr(hi64) ); 11544 } 11545 else 11546 if (imm > 8) { 11547 assign( hi64r, mkU64(0) ); 11548 assign( lo64r, binop( Iop_Shr64, 11549 mkexpr(hi64), 11550 mkU8( 8*(imm-8) ) )); 11551 } else { 11552 assign( hi64r, binop( Iop_Shr64, 11553 mkexpr(hi64), 11554 mkU8(8 * imm) )); 11555 assign( lo64r, 11556 binop( Iop_Or64, 11557 binop(Iop_Shr64, mkexpr(lo64), 11558 mkU8(8 * imm)), 11559 binop(Iop_Shl64, mkexpr(hi64), 11560 mkU8(8 * (8 - imm)) ) 11561 ) 11562 ); 11563 } 11564 11565 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 11566 putXMMReg(reg, mkexpr(dV)); 11567 goto decode_success; 11568 } 11569 11570 /* 66 0F 73 /2 ib = PSRLQ by immediate */ 11571 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11572 && epartIsReg(insn[2]) 11573 && gregOfRM(insn[2]) == 2) { 11574 delta = dis_SSE_shiftE_imm( delta+2, "psrlq", Iop_ShrN64x2 ); 11575 goto decode_success; 11576 } 11577 11578 /* 66 0F D3 = PSRLQ by E */ 11579 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD3) { 11580 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlq", Iop_ShrN64x2 ); 11581 goto decode_success; 11582 } 11583 11584 /* 66 0F 71 /2 ib = PSRLW by immediate */ 11585 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 11586 && epartIsReg(insn[2]) 11587 && gregOfRM(insn[2]) == 2) { 11588 delta = dis_SSE_shiftE_imm( delta+2, "psrlw", Iop_ShrN16x8 ); 11589 goto decode_success; 11590 } 11591 11592 /* 66 0F D1 = PSRLW by E */ 11593 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD1) { 11594 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlw", Iop_ShrN16x8 ); 11595 goto decode_success; 11596 } 11597 11598 /* 66 0F F8 = PSUBB */ 11599 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF8) { 11600 delta = dis_SSEint_E_to_G( sorb, delta+2, 11601 "psubb", Iop_Sub8x16, False ); 11602 goto decode_success; 11603 } 11604 11605 /* 66 0F FA = PSUBD */ 11606 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFA) { 11607 delta = dis_SSEint_E_to_G( sorb, delta+2, 11608 "psubd", Iop_Sub32x4, False ); 11609 goto decode_success; 11610 } 11611 11612 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 11613 /* 0F FB = PSUBQ -- sub 64x1 */ 11614 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xFB) { 11615 do_MMX_preamble(); 11616 delta = dis_MMXop_regmem_to_reg ( 11617 sorb, delta+2, insn[1], "psubq", False ); 11618 goto decode_success; 11619 } 11620 11621 /* 66 0F FB = PSUBQ */ 11622 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFB) { 11623 delta = dis_SSEint_E_to_G( sorb, delta+2, 11624 "psubq", Iop_Sub64x2, False ); 11625 goto decode_success; 11626 } 11627 11628 /* 66 0F F9 = PSUBW */ 11629 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF9) { 11630 delta = dis_SSEint_E_to_G( sorb, delta+2, 11631 "psubw", Iop_Sub16x8, False ); 11632 goto decode_success; 11633 } 11634 11635 /* 66 0F E8 = PSUBSB */ 11636 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE8) { 11637 delta = dis_SSEint_E_to_G( sorb, delta+2, 11638 "psubsb", Iop_QSub8Sx16, False ); 11639 goto decode_success; 11640 } 11641 11642 /* 66 0F E9 = PSUBSW */ 11643 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE9) { 11644 delta = dis_SSEint_E_to_G( sorb, delta+2, 11645 "psubsw", Iop_QSub16Sx8, False ); 11646 goto decode_success; 11647 } 11648 11649 /* 66 0F D8 = PSUBSB */ 11650 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD8) { 11651 delta = dis_SSEint_E_to_G( sorb, delta+2, 11652 "psubusb", Iop_QSub8Ux16, False ); 11653 goto decode_success; 11654 } 11655 11656 /* 66 0F D9 = PSUBSW */ 11657 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD9) { 11658 delta = dis_SSEint_E_to_G( sorb, delta+2, 11659 "psubusw", Iop_QSub16Ux8, False ); 11660 goto decode_success; 11661 } 11662 11663 /* 66 0F 68 = PUNPCKHBW */ 11664 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x68) { 11665 delta = dis_SSEint_E_to_G( sorb, delta+2, 11666 "punpckhbw", 11667 Iop_InterleaveHI8x16, True ); 11668 goto decode_success; 11669 } 11670 11671 /* 66 0F 6A = PUNPCKHDQ */ 11672 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6A) { 11673 delta = dis_SSEint_E_to_G( sorb, delta+2, 11674 "punpckhdq", 11675 Iop_InterleaveHI32x4, True ); 11676 goto decode_success; 11677 } 11678 11679 /* 66 0F 6D = PUNPCKHQDQ */ 11680 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6D) { 11681 delta = dis_SSEint_E_to_G( sorb, delta+2, 11682 "punpckhqdq", 11683 Iop_InterleaveHI64x2, True ); 11684 goto decode_success; 11685 } 11686 11687 /* 66 0F 69 = PUNPCKHWD */ 11688 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x69) { 11689 delta = dis_SSEint_E_to_G( sorb, delta+2, 11690 "punpckhwd", 11691 Iop_InterleaveHI16x8, True ); 11692 goto decode_success; 11693 } 11694 11695 /* 66 0F 60 = PUNPCKLBW */ 11696 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x60) { 11697 delta = dis_SSEint_E_to_G( sorb, delta+2, 11698 "punpcklbw", 11699 Iop_InterleaveLO8x16, True ); 11700 goto decode_success; 11701 } 11702 11703 /* 66 0F 62 = PUNPCKLDQ */ 11704 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x62) { 11705 delta = dis_SSEint_E_to_G( sorb, delta+2, 11706 "punpckldq", 11707 Iop_InterleaveLO32x4, True ); 11708 goto decode_success; 11709 } 11710 11711 /* 66 0F 6C = PUNPCKLQDQ */ 11712 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6C) { 11713 delta = dis_SSEint_E_to_G( sorb, delta+2, 11714 "punpcklqdq", 11715 Iop_InterleaveLO64x2, True ); 11716 goto decode_success; 11717 } 11718 11719 /* 66 0F 61 = PUNPCKLWD */ 11720 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x61) { 11721 delta = dis_SSEint_E_to_G( sorb, delta+2, 11722 "punpcklwd", 11723 Iop_InterleaveLO16x8, True ); 11724 goto decode_success; 11725 } 11726 11727 /* 66 0F EF = PXOR */ 11728 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEF) { 11729 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pxor", Iop_XorV128 ); 11730 goto decode_success; 11731 } 11732 11733//-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */ 11734//-- if (insn[0] == 0x0F && insn[1] == 0xAE 11735//-- && (!epartIsReg(insn[2])) 11736//-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) { 11737//-- Bool store = gregOfRM(insn[2]) == 0; 11738//-- vg_assert(sz == 4); 11739//-- pair = disAMode ( cb, sorb, eip+2, dis_buf ); 11740//-- t1 = LOW24(pair); 11741//-- eip += 2+HI8(pair); 11742//-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512, 11743//-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1], 11744//-- Lit16, (UShort)insn[2], 11745//-- TempReg, t1 ); 11746//-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf ); 11747//-- goto decode_success; 11748//-- } 11749 11750 /* 0F AE /7 = CLFLUSH -- flush cache line */ 11751 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE 11752 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) { 11753 11754 /* This is something of a hack. We need to know the size of the 11755 cache line containing addr. Since we don't (easily), assume 11756 256 on the basis that no real cache would have a line that 11757 big. It's safe to invalidate more stuff than we need, just 11758 inefficient. */ 11759 UInt lineszB = 256; 11760 11761 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11762 delta += 2+alen; 11763 11764 /* Round addr down to the start of the containing block. */ 11765 stmt( IRStmt_Put( 11766 OFFB_CMSTART, 11767 binop( Iop_And32, 11768 mkexpr(addr), 11769 mkU32( ~(lineszB-1) ))) ); 11770 11771 stmt( IRStmt_Put(OFFB_CMLEN, mkU32(lineszB) ) ); 11772 11773 jmp_lit(&dres, Ijk_InvalICache, (Addr32)(guest_EIP_bbstart+delta)); 11774 11775 DIP("clflush %s\n", dis_buf); 11776 goto decode_success; 11777 } 11778 11779 /* ---------------------------------------------------- */ 11780 /* --- end of the SSE2 decoder. --- */ 11781 /* ---------------------------------------------------- */ 11782 11783 /* ---------------------------------------------------- */ 11784 /* --- start of the SSE3 decoder. --- */ 11785 /* ---------------------------------------------------- */ 11786 11787 /* Skip parts of the decoder which don't apply given the stated 11788 guest subarchitecture. */ 11789 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3)) 11790 goto after_sse_decoders; /* no SSE3 capabilities */ 11791 11792 insn = &guest_code[delta]; 11793 11794 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm), 11795 duplicating some lanes (2:2:0:0). */ 11796 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm), 11797 duplicating some lanes (3:3:1:1). */ 11798 if (sz == 4 && insn[0] == 0xF3 && insn[1] == 0x0F 11799 && (insn[2] == 0x12 || insn[2] == 0x16)) { 11800 IRTemp s3, s2, s1, s0; 11801 IRTemp sV = newTemp(Ity_V128); 11802 Bool isH = insn[2] == 0x16; 11803 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11804 11805 modrm = insn[3]; 11806 if (epartIsReg(modrm)) { 11807 assign( sV, getXMMReg( eregOfRM(modrm)) ); 11808 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', 11809 nameXMMReg(eregOfRM(modrm)), 11810 nameXMMReg(gregOfRM(modrm))); 11811 delta += 3+1; 11812 } else { 11813 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11814 gen_SEGV_if_not_16_aligned( addr ); 11815 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11816 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', 11817 dis_buf, 11818 nameXMMReg(gregOfRM(modrm))); 11819 delta += 3+alen; 11820 } 11821 11822 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 11823 putXMMReg( gregOfRM(modrm), 11824 isH ? mk128from32s( s3, s3, s1, s1 ) 11825 : mk128from32s( s2, s2, s0, s0 ) ); 11826 goto decode_success; 11827 } 11828 11829 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm), 11830 duplicating some lanes (0:1:0:1). */ 11831 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x12) { 11832 IRTemp sV = newTemp(Ity_V128); 11833 IRTemp d0 = newTemp(Ity_I64); 11834 11835 modrm = insn[3]; 11836 if (epartIsReg(modrm)) { 11837 assign( sV, getXMMReg( eregOfRM(modrm)) ); 11838 DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11839 nameXMMReg(gregOfRM(modrm))); 11840 delta += 3+1; 11841 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) ); 11842 } else { 11843 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11844 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 11845 DIP("movddup %s,%s\n", dis_buf, 11846 nameXMMReg(gregOfRM(modrm))); 11847 delta += 3+alen; 11848 } 11849 11850 putXMMReg( gregOfRM(modrm), binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) ); 11851 goto decode_success; 11852 } 11853 11854 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */ 11855 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD0) { 11856 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 11857 IRTemp eV = newTemp(Ity_V128); 11858 IRTemp gV = newTemp(Ity_V128); 11859 IRTemp addV = newTemp(Ity_V128); 11860 IRTemp subV = newTemp(Ity_V128); 11861 IRTemp rm = newTemp(Ity_I32); 11862 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11863 11864 modrm = insn[3]; 11865 if (epartIsReg(modrm)) { 11866 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11867 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11868 nameXMMReg(gregOfRM(modrm))); 11869 delta += 3+1; 11870 } else { 11871 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11872 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11873 DIP("addsubps %s,%s\n", dis_buf, 11874 nameXMMReg(gregOfRM(modrm))); 11875 delta += 3+alen; 11876 } 11877 11878 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11879 11880 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11881 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(gV), mkexpr(eV)) ); 11882 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(gV), mkexpr(eV)) ); 11883 11884 breakup128to32s( addV, &a3, &a2, &a1, &a0 ); 11885 breakup128to32s( subV, &s3, &s2, &s1, &s0 ); 11886 11887 putXMMReg( gregOfRM(modrm), mk128from32s( a3, s2, a1, s0 )); 11888 goto decode_success; 11889 } 11890 11891 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */ 11892 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD0) { 11893 IRTemp eV = newTemp(Ity_V128); 11894 IRTemp gV = newTemp(Ity_V128); 11895 IRTemp addV = newTemp(Ity_V128); 11896 IRTemp subV = newTemp(Ity_V128); 11897 IRTemp a1 = newTemp(Ity_I64); 11898 IRTemp s0 = newTemp(Ity_I64); 11899 IRTemp rm = newTemp(Ity_I32); 11900 11901 modrm = insn[2]; 11902 if (epartIsReg(modrm)) { 11903 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11904 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11905 nameXMMReg(gregOfRM(modrm))); 11906 delta += 2+1; 11907 } else { 11908 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11909 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11910 DIP("addsubpd %s,%s\n", dis_buf, 11911 nameXMMReg(gregOfRM(modrm))); 11912 delta += 2+alen; 11913 } 11914 11915 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11916 11917 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11918 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(gV), mkexpr(eV)) ); 11919 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(gV), mkexpr(eV)) ); 11920 11921 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); 11922 assign( s0, unop(Iop_V128to64, mkexpr(subV) )); 11923 11924 putXMMReg( gregOfRM(modrm), 11925 binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) ); 11926 goto decode_success; 11927 } 11928 11929 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */ 11930 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */ 11931 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F 11932 && (insn[2] == 0x7C || insn[2] == 0x7D)) { 11933 IRTemp e3, e2, e1, e0, g3, g2, g1, g0; 11934 IRTemp eV = newTemp(Ity_V128); 11935 IRTemp gV = newTemp(Ity_V128); 11936 IRTemp leftV = newTemp(Ity_V128); 11937 IRTemp rightV = newTemp(Ity_V128); 11938 IRTemp rm = newTemp(Ity_I32); 11939 Bool isAdd = insn[2] == 0x7C; 11940 const HChar* str = isAdd ? "add" : "sub"; 11941 e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID; 11942 11943 modrm = insn[3]; 11944 if (epartIsReg(modrm)) { 11945 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11946 DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 11947 nameXMMReg(gregOfRM(modrm))); 11948 delta += 3+1; 11949 } else { 11950 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11951 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11952 DIP("h%sps %s,%s\n", str, dis_buf, 11953 nameXMMReg(gregOfRM(modrm))); 11954 delta += 3+alen; 11955 } 11956 11957 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11958 11959 breakup128to32s( eV, &e3, &e2, &e1, &e0 ); 11960 breakup128to32s( gV, &g3, &g2, &g1, &g0 ); 11961 11962 assign( leftV, mk128from32s( e2, e0, g2, g0 ) ); 11963 assign( rightV, mk128from32s( e3, e1, g3, g1 ) ); 11964 11965 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11966 putXMMReg( gregOfRM(modrm), 11967 triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, 11968 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); 11969 goto decode_success; 11970 } 11971 11972 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */ 11973 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */ 11974 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) { 11975 IRTemp e1 = newTemp(Ity_I64); 11976 IRTemp e0 = newTemp(Ity_I64); 11977 IRTemp g1 = newTemp(Ity_I64); 11978 IRTemp g0 = newTemp(Ity_I64); 11979 IRTemp eV = newTemp(Ity_V128); 11980 IRTemp gV = newTemp(Ity_V128); 11981 IRTemp leftV = newTemp(Ity_V128); 11982 IRTemp rightV = newTemp(Ity_V128); 11983 IRTemp rm = newTemp(Ity_I32); 11984 Bool isAdd = insn[1] == 0x7C; 11985 const HChar* str = isAdd ? "add" : "sub"; 11986 11987 modrm = insn[2]; 11988 if (epartIsReg(modrm)) { 11989 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11990 DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 11991 nameXMMReg(gregOfRM(modrm))); 11992 delta += 2+1; 11993 } else { 11994 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11995 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11996 DIP("h%spd %s,%s\n", str, dis_buf, 11997 nameXMMReg(gregOfRM(modrm))); 11998 delta += 2+alen; 11999 } 12000 12001 assign( gV, getXMMReg(gregOfRM(modrm)) ); 12002 12003 assign( e1, unop(Iop_V128HIto64, mkexpr(eV) )); 12004 assign( e0, unop(Iop_V128to64, mkexpr(eV) )); 12005 assign( g1, unop(Iop_V128HIto64, mkexpr(gV) )); 12006 assign( g0, unop(Iop_V128to64, mkexpr(gV) )); 12007 12008 assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) ); 12009 assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) ); 12010 12011 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 12012 putXMMReg( gregOfRM(modrm), 12013 triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, 12014 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); 12015 goto decode_success; 12016 } 12017 12018 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */ 12019 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xF0) { 12020 modrm = getIByte(delta+3); 12021 if (epartIsReg(modrm)) { 12022 goto decode_failure; 12023 } else { 12024 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12025 putXMMReg( gregOfRM(modrm), 12026 loadLE(Ity_V128, mkexpr(addr)) ); 12027 DIP("lddqu %s,%s\n", dis_buf, 12028 nameXMMReg(gregOfRM(modrm))); 12029 delta += 3+alen; 12030 } 12031 goto decode_success; 12032 } 12033 12034 /* ---------------------------------------------------- */ 12035 /* --- end of the SSE3 decoder. --- */ 12036 /* ---------------------------------------------------- */ 12037 12038 /* ---------------------------------------------------- */ 12039 /* --- start of the SSSE3 decoder. --- */ 12040 /* ---------------------------------------------------- */ 12041 12042 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 12043 Unsigned Bytes (MMX) */ 12044 if (sz == 4 12045 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { 12046 IRTemp sV = newTemp(Ity_I64); 12047 IRTemp dV = newTemp(Ity_I64); 12048 IRTemp sVoddsSX = newTemp(Ity_I64); 12049 IRTemp sVevensSX = newTemp(Ity_I64); 12050 IRTemp dVoddsZX = newTemp(Ity_I64); 12051 IRTemp dVevensZX = newTemp(Ity_I64); 12052 12053 modrm = insn[3]; 12054 do_MMX_preamble(); 12055 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12056 12057 if (epartIsReg(modrm)) { 12058 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12059 delta += 3+1; 12060 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm)), 12061 nameMMXReg(gregOfRM(modrm))); 12062 } else { 12063 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12064 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12065 delta += 3+alen; 12066 DIP("pmaddubsw %s,%s\n", dis_buf, 12067 nameMMXReg(gregOfRM(modrm))); 12068 } 12069 12070 /* compute dV unsigned x sV signed */ 12071 assign( sVoddsSX, 12072 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) ); 12073 assign( sVevensSX, 12074 binop(Iop_SarN16x4, 12075 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)), 12076 mkU8(8)) ); 12077 assign( dVoddsZX, 12078 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) ); 12079 assign( dVevensZX, 12080 binop(Iop_ShrN16x4, 12081 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)), 12082 mkU8(8)) ); 12083 12084 putMMXReg( 12085 gregOfRM(modrm), 12086 binop(Iop_QAdd16Sx4, 12087 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 12088 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX)) 12089 ) 12090 ); 12091 goto decode_success; 12092 } 12093 12094 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 12095 Unsigned Bytes (XMM) */ 12096 if (sz == 2 12097 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { 12098 IRTemp sV = newTemp(Ity_V128); 12099 IRTemp dV = newTemp(Ity_V128); 12100 IRTemp sVoddsSX = newTemp(Ity_V128); 12101 IRTemp sVevensSX = newTemp(Ity_V128); 12102 IRTemp dVoddsZX = newTemp(Ity_V128); 12103 IRTemp dVevensZX = newTemp(Ity_V128); 12104 12105 modrm = insn[3]; 12106 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12107 12108 if (epartIsReg(modrm)) { 12109 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12110 delta += 3+1; 12111 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm)), 12112 nameXMMReg(gregOfRM(modrm))); 12113 } else { 12114 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12115 gen_SEGV_if_not_16_aligned( addr ); 12116 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12117 delta += 3+alen; 12118 DIP("pmaddubsw %s,%s\n", dis_buf, 12119 nameXMMReg(gregOfRM(modrm))); 12120 } 12121 12122 /* compute dV unsigned x sV signed */ 12123 assign( sVoddsSX, 12124 binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) ); 12125 assign( sVevensSX, 12126 binop(Iop_SarN16x8, 12127 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)), 12128 mkU8(8)) ); 12129 assign( dVoddsZX, 12130 binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) ); 12131 assign( dVevensZX, 12132 binop(Iop_ShrN16x8, 12133 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)), 12134 mkU8(8)) ); 12135 12136 putXMMReg( 12137 gregOfRM(modrm), 12138 binop(Iop_QAdd16Sx8, 12139 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 12140 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX)) 12141 ) 12142 ); 12143 goto decode_success; 12144 } 12145 12146 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */ 12147 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or 12148 mmx) and G to G (mmx). */ 12149 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or 12150 mmx) and G to G (mmx). */ 12151 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G 12152 to G (mmx). */ 12153 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G 12154 to G (mmx). */ 12155 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G 12156 to G (mmx). */ 12157 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G 12158 to G (mmx). */ 12159 12160 if (sz == 4 12161 && insn[0] == 0x0F && insn[1] == 0x38 12162 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 12163 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { 12164 const HChar* str = "???"; 12165 IROp opV64 = Iop_INVALID; 12166 IROp opCatO = Iop_CatOddLanes16x4; 12167 IROp opCatE = Iop_CatEvenLanes16x4; 12168 IRTemp sV = newTemp(Ity_I64); 12169 IRTemp dV = newTemp(Ity_I64); 12170 12171 modrm = insn[3]; 12172 12173 switch (insn[2]) { 12174 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 12175 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 12176 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 12177 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 12178 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 12179 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 12180 default: vassert(0); 12181 } 12182 if (insn[2] == 0x02 || insn[2] == 0x06) { 12183 opCatO = Iop_InterleaveHI32x2; 12184 opCatE = Iop_InterleaveLO32x2; 12185 } 12186 12187 do_MMX_preamble(); 12188 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12189 12190 if (epartIsReg(modrm)) { 12191 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12192 delta += 3+1; 12193 DIP("ph%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), 12194 nameMMXReg(gregOfRM(modrm))); 12195 } else { 12196 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12197 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12198 delta += 3+alen; 12199 DIP("ph%s %s,%s\n", str, dis_buf, 12200 nameMMXReg(gregOfRM(modrm))); 12201 } 12202 12203 putMMXReg( 12204 gregOfRM(modrm), 12205 binop(opV64, 12206 binop(opCatE,mkexpr(sV),mkexpr(dV)), 12207 binop(opCatO,mkexpr(sV),mkexpr(dV)) 12208 ) 12209 ); 12210 goto decode_success; 12211 } 12212 12213 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or 12214 xmm) and G to G (xmm). */ 12215 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or 12216 xmm) and G to G (xmm). */ 12217 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and 12218 G to G (xmm). */ 12219 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and 12220 G to G (xmm). */ 12221 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and 12222 G to G (xmm). */ 12223 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and 12224 G to G (xmm). */ 12225 12226 if (sz == 2 12227 && insn[0] == 0x0F && insn[1] == 0x38 12228 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 12229 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { 12230 const HChar* str = "???"; 12231 IROp opV64 = Iop_INVALID; 12232 IROp opCatO = Iop_CatOddLanes16x4; 12233 IROp opCatE = Iop_CatEvenLanes16x4; 12234 IRTemp sV = newTemp(Ity_V128); 12235 IRTemp dV = newTemp(Ity_V128); 12236 IRTemp sHi = newTemp(Ity_I64); 12237 IRTemp sLo = newTemp(Ity_I64); 12238 IRTemp dHi = newTemp(Ity_I64); 12239 IRTemp dLo = newTemp(Ity_I64); 12240 12241 modrm = insn[3]; 12242 12243 switch (insn[2]) { 12244 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 12245 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 12246 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 12247 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 12248 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 12249 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 12250 default: vassert(0); 12251 } 12252 if (insn[2] == 0x02 || insn[2] == 0x06) { 12253 opCatO = Iop_InterleaveHI32x2; 12254 opCatE = Iop_InterleaveLO32x2; 12255 } 12256 12257 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12258 12259 if (epartIsReg(modrm)) { 12260 assign( sV, getXMMReg( eregOfRM(modrm)) ); 12261 DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 12262 nameXMMReg(gregOfRM(modrm))); 12263 delta += 3+1; 12264 } else { 12265 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12266 gen_SEGV_if_not_16_aligned( addr ); 12267 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12268 DIP("ph%s %s,%s\n", str, dis_buf, 12269 nameXMMReg(gregOfRM(modrm))); 12270 delta += 3+alen; 12271 } 12272 12273 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12274 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12275 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12276 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12277 12278 /* This isn't a particularly efficient way to compute the 12279 result, but at least it avoids a proliferation of IROps, 12280 hence avoids complication all the backends. */ 12281 putXMMReg( 12282 gregOfRM(modrm), 12283 binop(Iop_64HLtoV128, 12284 binop(opV64, 12285 binop(opCatE,mkexpr(sHi),mkexpr(sLo)), 12286 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) 12287 ), 12288 binop(opV64, 12289 binop(opCatE,mkexpr(dHi),mkexpr(dLo)), 12290 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) 12291 ) 12292 ) 12293 ); 12294 goto decode_success; 12295 } 12296 12297 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale 12298 (MMX) */ 12299 if (sz == 4 12300 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { 12301 IRTemp sV = newTemp(Ity_I64); 12302 IRTemp dV = newTemp(Ity_I64); 12303 12304 modrm = insn[3]; 12305 do_MMX_preamble(); 12306 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12307 12308 if (epartIsReg(modrm)) { 12309 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12310 delta += 3+1; 12311 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm)), 12312 nameMMXReg(gregOfRM(modrm))); 12313 } else { 12314 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12315 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12316 delta += 3+alen; 12317 DIP("pmulhrsw %s,%s\n", dis_buf, 12318 nameMMXReg(gregOfRM(modrm))); 12319 } 12320 12321 putMMXReg( 12322 gregOfRM(modrm), 12323 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) ) 12324 ); 12325 goto decode_success; 12326 } 12327 12328 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and 12329 Scale (XMM) */ 12330 if (sz == 2 12331 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { 12332 IRTemp sV = newTemp(Ity_V128); 12333 IRTemp dV = newTemp(Ity_V128); 12334 IRTemp sHi = newTemp(Ity_I64); 12335 IRTemp sLo = newTemp(Ity_I64); 12336 IRTemp dHi = newTemp(Ity_I64); 12337 IRTemp dLo = newTemp(Ity_I64); 12338 12339 modrm = insn[3]; 12340 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12341 12342 if (epartIsReg(modrm)) { 12343 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12344 delta += 3+1; 12345 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm)), 12346 nameXMMReg(gregOfRM(modrm))); 12347 } else { 12348 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12349 gen_SEGV_if_not_16_aligned( addr ); 12350 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12351 delta += 3+alen; 12352 DIP("pmulhrsw %s,%s\n", dis_buf, 12353 nameXMMReg(gregOfRM(modrm))); 12354 } 12355 12356 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12357 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12358 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12359 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12360 12361 putXMMReg( 12362 gregOfRM(modrm), 12363 binop(Iop_64HLtoV128, 12364 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), 12365 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) 12366 ) 12367 ); 12368 goto decode_success; 12369 } 12370 12371 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */ 12372 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */ 12373 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */ 12374 if (sz == 4 12375 && insn[0] == 0x0F && insn[1] == 0x38 12376 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { 12377 IRTemp sV = newTemp(Ity_I64); 12378 IRTemp dV = newTemp(Ity_I64); 12379 const HChar* str = "???"; 12380 Int laneszB = 0; 12381 12382 switch (insn[2]) { 12383 case 0x08: laneszB = 1; str = "b"; break; 12384 case 0x09: laneszB = 2; str = "w"; break; 12385 case 0x0A: laneszB = 4; str = "d"; break; 12386 default: vassert(0); 12387 } 12388 12389 modrm = insn[3]; 12390 do_MMX_preamble(); 12391 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12392 12393 if (epartIsReg(modrm)) { 12394 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12395 delta += 3+1; 12396 DIP("psign%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), 12397 nameMMXReg(gregOfRM(modrm))); 12398 } else { 12399 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12400 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12401 delta += 3+alen; 12402 DIP("psign%s %s,%s\n", str, dis_buf, 12403 nameMMXReg(gregOfRM(modrm))); 12404 } 12405 12406 putMMXReg( 12407 gregOfRM(modrm), 12408 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB ) 12409 ); 12410 goto decode_success; 12411 } 12412 12413 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */ 12414 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */ 12415 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */ 12416 if (sz == 2 12417 && insn[0] == 0x0F && insn[1] == 0x38 12418 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { 12419 IRTemp sV = newTemp(Ity_V128); 12420 IRTemp dV = newTemp(Ity_V128); 12421 IRTemp sHi = newTemp(Ity_I64); 12422 IRTemp sLo = newTemp(Ity_I64); 12423 IRTemp dHi = newTemp(Ity_I64); 12424 IRTemp dLo = newTemp(Ity_I64); 12425 const HChar* str = "???"; 12426 Int laneszB = 0; 12427 12428 switch (insn[2]) { 12429 case 0x08: laneszB = 1; str = "b"; break; 12430 case 0x09: laneszB = 2; str = "w"; break; 12431 case 0x0A: laneszB = 4; str = "d"; break; 12432 default: vassert(0); 12433 } 12434 12435 modrm = insn[3]; 12436 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12437 12438 if (epartIsReg(modrm)) { 12439 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12440 delta += 3+1; 12441 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 12442 nameXMMReg(gregOfRM(modrm))); 12443 } else { 12444 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12445 gen_SEGV_if_not_16_aligned( addr ); 12446 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12447 delta += 3+alen; 12448 DIP("psign%s %s,%s\n", str, dis_buf, 12449 nameXMMReg(gregOfRM(modrm))); 12450 } 12451 12452 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12453 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12454 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12455 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12456 12457 putXMMReg( 12458 gregOfRM(modrm), 12459 binop(Iop_64HLtoV128, 12460 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), 12461 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) 12462 ) 12463 ); 12464 goto decode_success; 12465 } 12466 12467 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */ 12468 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */ 12469 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */ 12470 if (sz == 4 12471 && insn[0] == 0x0F && insn[1] == 0x38 12472 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { 12473 IRTemp sV = newTemp(Ity_I64); 12474 const HChar* str = "???"; 12475 Int laneszB = 0; 12476 12477 switch (insn[2]) { 12478 case 0x1C: laneszB = 1; str = "b"; break; 12479 case 0x1D: laneszB = 2; str = "w"; break; 12480 case 0x1E: laneszB = 4; str = "d"; break; 12481 default: vassert(0); 12482 } 12483 12484 modrm = insn[3]; 12485 do_MMX_preamble(); 12486 12487 if (epartIsReg(modrm)) { 12488 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12489 delta += 3+1; 12490 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), 12491 nameMMXReg(gregOfRM(modrm))); 12492 } else { 12493 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12494 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12495 delta += 3+alen; 12496 DIP("pabs%s %s,%s\n", str, dis_buf, 12497 nameMMXReg(gregOfRM(modrm))); 12498 } 12499 12500 putMMXReg( 12501 gregOfRM(modrm), 12502 dis_PABS_helper( mkexpr(sV), laneszB ) 12503 ); 12504 goto decode_success; 12505 } 12506 12507 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */ 12508 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */ 12509 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */ 12510 if (sz == 2 12511 && insn[0] == 0x0F && insn[1] == 0x38 12512 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { 12513 IRTemp sV = newTemp(Ity_V128); 12514 IRTemp sHi = newTemp(Ity_I64); 12515 IRTemp sLo = newTemp(Ity_I64); 12516 const HChar* str = "???"; 12517 Int laneszB = 0; 12518 12519 switch (insn[2]) { 12520 case 0x1C: laneszB = 1; str = "b"; break; 12521 case 0x1D: laneszB = 2; str = "w"; break; 12522 case 0x1E: laneszB = 4; str = "d"; break; 12523 default: vassert(0); 12524 } 12525 12526 modrm = insn[3]; 12527 12528 if (epartIsReg(modrm)) { 12529 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12530 delta += 3+1; 12531 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 12532 nameXMMReg(gregOfRM(modrm))); 12533 } else { 12534 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12535 gen_SEGV_if_not_16_aligned( addr ); 12536 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12537 delta += 3+alen; 12538 DIP("pabs%s %s,%s\n", str, dis_buf, 12539 nameXMMReg(gregOfRM(modrm))); 12540 } 12541 12542 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12543 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12544 12545 putXMMReg( 12546 gregOfRM(modrm), 12547 binop(Iop_64HLtoV128, 12548 dis_PABS_helper( mkexpr(sHi), laneszB ), 12549 dis_PABS_helper( mkexpr(sLo), laneszB ) 12550 ) 12551 ); 12552 goto decode_success; 12553 } 12554 12555 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */ 12556 if (sz == 4 12557 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { 12558 IRTemp sV = newTemp(Ity_I64); 12559 IRTemp dV = newTemp(Ity_I64); 12560 IRTemp res = newTemp(Ity_I64); 12561 12562 modrm = insn[3]; 12563 do_MMX_preamble(); 12564 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12565 12566 if (epartIsReg(modrm)) { 12567 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12568 d32 = (UInt)insn[3+1]; 12569 delta += 3+1+1; 12570 DIP("palignr $%u,%s,%s\n", d32, 12571 nameMMXReg(eregOfRM(modrm)), 12572 nameMMXReg(gregOfRM(modrm))); 12573 } else { 12574 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12575 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12576 d32 = (UInt)insn[3+alen]; 12577 delta += 3+alen+1; 12578 DIP("palignr $%u%s,%s\n", d32, 12579 dis_buf, 12580 nameMMXReg(gregOfRM(modrm))); 12581 } 12582 12583 if (d32 == 0) { 12584 assign( res, mkexpr(sV) ); 12585 } 12586 else if (d32 >= 1 && d32 <= 7) { 12587 assign(res, 12588 binop(Iop_Or64, 12589 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d32)), 12590 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d32)) 12591 ))); 12592 } 12593 else if (d32 == 8) { 12594 assign( res, mkexpr(dV) ); 12595 } 12596 else if (d32 >= 9 && d32 <= 15) { 12597 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d32-8))) ); 12598 } 12599 else if (d32 >= 16 && d32 <= 255) { 12600 assign( res, mkU64(0) ); 12601 } 12602 else 12603 vassert(0); 12604 12605 putMMXReg( gregOfRM(modrm), mkexpr(res) ); 12606 goto decode_success; 12607 } 12608 12609 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */ 12610 if (sz == 2 12611 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { 12612 IRTemp sV = newTemp(Ity_V128); 12613 IRTemp dV = newTemp(Ity_V128); 12614 IRTemp sHi = newTemp(Ity_I64); 12615 IRTemp sLo = newTemp(Ity_I64); 12616 IRTemp dHi = newTemp(Ity_I64); 12617 IRTemp dLo = newTemp(Ity_I64); 12618 IRTemp rHi = newTemp(Ity_I64); 12619 IRTemp rLo = newTemp(Ity_I64); 12620 12621 modrm = insn[3]; 12622 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12623 12624 if (epartIsReg(modrm)) { 12625 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12626 d32 = (UInt)insn[3+1]; 12627 delta += 3+1+1; 12628 DIP("palignr $%u,%s,%s\n", d32, 12629 nameXMMReg(eregOfRM(modrm)), 12630 nameXMMReg(gregOfRM(modrm))); 12631 } else { 12632 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12633 gen_SEGV_if_not_16_aligned( addr ); 12634 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12635 d32 = (UInt)insn[3+alen]; 12636 delta += 3+alen+1; 12637 DIP("palignr $%u,%s,%s\n", d32, 12638 dis_buf, 12639 nameXMMReg(gregOfRM(modrm))); 12640 } 12641 12642 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12643 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12644 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12645 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12646 12647 if (d32 == 0) { 12648 assign( rHi, mkexpr(sHi) ); 12649 assign( rLo, mkexpr(sLo) ); 12650 } 12651 else if (d32 >= 1 && d32 <= 7) { 12652 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d32) ); 12653 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d32) ); 12654 } 12655 else if (d32 == 8) { 12656 assign( rHi, mkexpr(dLo) ); 12657 assign( rLo, mkexpr(sHi) ); 12658 } 12659 else if (d32 >= 9 && d32 <= 15) { 12660 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d32-8) ); 12661 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d32-8) ); 12662 } 12663 else if (d32 == 16) { 12664 assign( rHi, mkexpr(dHi) ); 12665 assign( rLo, mkexpr(dLo) ); 12666 } 12667 else if (d32 >= 17 && d32 <= 23) { 12668 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-16))) ); 12669 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d32-16) ); 12670 } 12671 else if (d32 == 24) { 12672 assign( rHi, mkU64(0) ); 12673 assign( rLo, mkexpr(dHi) ); 12674 } 12675 else if (d32 >= 25 && d32 <= 31) { 12676 assign( rHi, mkU64(0) ); 12677 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-24))) ); 12678 } 12679 else if (d32 >= 32 && d32 <= 255) { 12680 assign( rHi, mkU64(0) ); 12681 assign( rLo, mkU64(0) ); 12682 } 12683 else 12684 vassert(0); 12685 12686 putXMMReg( 12687 gregOfRM(modrm), 12688 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) 12689 ); 12690 goto decode_success; 12691 } 12692 12693 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */ 12694 if (sz == 4 12695 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { 12696 IRTemp sV = newTemp(Ity_I64); 12697 IRTemp dV = newTemp(Ity_I64); 12698 12699 modrm = insn[3]; 12700 do_MMX_preamble(); 12701 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12702 12703 if (epartIsReg(modrm)) { 12704 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12705 delta += 3+1; 12706 DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm)), 12707 nameMMXReg(gregOfRM(modrm))); 12708 } else { 12709 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12710 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12711 delta += 3+alen; 12712 DIP("pshufb %s,%s\n", dis_buf, 12713 nameMMXReg(gregOfRM(modrm))); 12714 } 12715 12716 putMMXReg( 12717 gregOfRM(modrm), 12718 binop( 12719 Iop_And64, 12720 /* permute the lanes */ 12721 binop( 12722 Iop_Perm8x8, 12723 mkexpr(dV), 12724 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL)) 12725 ), 12726 /* mask off lanes which have (index & 0x80) == 0x80 */ 12727 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7))) 12728 ) 12729 ); 12730 goto decode_success; 12731 } 12732 12733 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */ 12734 if (sz == 2 12735 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { 12736 IRTemp sV = newTemp(Ity_V128); 12737 IRTemp dV = newTemp(Ity_V128); 12738 IRTemp sHi = newTemp(Ity_I64); 12739 IRTemp sLo = newTemp(Ity_I64); 12740 IRTemp dHi = newTemp(Ity_I64); 12741 IRTemp dLo = newTemp(Ity_I64); 12742 IRTemp rHi = newTemp(Ity_I64); 12743 IRTemp rLo = newTemp(Ity_I64); 12744 IRTemp sevens = newTemp(Ity_I64); 12745 IRTemp mask0x80hi = newTemp(Ity_I64); 12746 IRTemp mask0x80lo = newTemp(Ity_I64); 12747 IRTemp maskBit3hi = newTemp(Ity_I64); 12748 IRTemp maskBit3lo = newTemp(Ity_I64); 12749 IRTemp sAnd7hi = newTemp(Ity_I64); 12750 IRTemp sAnd7lo = newTemp(Ity_I64); 12751 IRTemp permdHi = newTemp(Ity_I64); 12752 IRTemp permdLo = newTemp(Ity_I64); 12753 12754 modrm = insn[3]; 12755 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12756 12757 if (epartIsReg(modrm)) { 12758 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12759 delta += 3+1; 12760 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm)), 12761 nameXMMReg(gregOfRM(modrm))); 12762 } else { 12763 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12764 gen_SEGV_if_not_16_aligned( addr ); 12765 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12766 delta += 3+alen; 12767 DIP("pshufb %s,%s\n", dis_buf, 12768 nameXMMReg(gregOfRM(modrm))); 12769 } 12770 12771 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12772 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12773 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12774 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12775 12776 assign( sevens, mkU64(0x0707070707070707ULL) ); 12777 12778 /* 12779 mask0x80hi = Not(SarN8x8(sHi,7)) 12780 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7) 12781 sAnd7hi = And(sHi,sevens) 12782 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi), 12783 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) ) 12784 rHi = And(permdHi,mask0x80hi) 12785 */ 12786 assign( 12787 mask0x80hi, 12788 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7)))); 12789 12790 assign( 12791 maskBit3hi, 12792 binop(Iop_SarN8x8, 12793 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)), 12794 mkU8(7))); 12795 12796 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens))); 12797 12798 assign( 12799 permdHi, 12800 binop( 12801 Iop_Or64, 12802 binop(Iop_And64, 12803 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)), 12804 mkexpr(maskBit3hi)), 12805 binop(Iop_And64, 12806 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)), 12807 unop(Iop_Not64,mkexpr(maskBit3hi))) )); 12808 12809 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) ); 12810 12811 /* And the same for the lower half of the result. What fun. */ 12812 12813 assign( 12814 mask0x80lo, 12815 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7)))); 12816 12817 assign( 12818 maskBit3lo, 12819 binop(Iop_SarN8x8, 12820 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)), 12821 mkU8(7))); 12822 12823 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens))); 12824 12825 assign( 12826 permdLo, 12827 binop( 12828 Iop_Or64, 12829 binop(Iop_And64, 12830 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)), 12831 mkexpr(maskBit3lo)), 12832 binop(Iop_And64, 12833 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)), 12834 unop(Iop_Not64,mkexpr(maskBit3lo))) )); 12835 12836 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) ); 12837 12838 putXMMReg( 12839 gregOfRM(modrm), 12840 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) 12841 ); 12842 goto decode_success; 12843 } 12844 12845 /* 0F 38 F0 = MOVBE m16/32(E), r16/32(G) */ 12846 /* 0F 38 F1 = MOVBE r16/32(G), m16/32(E) */ 12847 if ((sz == 2 || sz == 4) 12848 && insn[0] == 0x0F && insn[1] == 0x38 12849 && (insn[2] == 0xF0 || insn[2] == 0xF1) 12850 && !epartIsReg(insn[3])) { 12851 12852 modrm = insn[3]; 12853 addr = disAMode(&alen, sorb, delta + 3, dis_buf); 12854 delta += 3 + alen; 12855 ty = szToITy(sz); 12856 IRTemp src = newTemp(ty); 12857 12858 if (insn[2] == 0xF0) { /* LOAD */ 12859 assign(src, loadLE(ty, mkexpr(addr))); 12860 IRTemp dst = math_BSWAP(src, ty); 12861 putIReg(sz, gregOfRM(modrm), mkexpr(dst)); 12862 DIP("movbe %s,%s\n", dis_buf, nameIReg(sz, gregOfRM(modrm))); 12863 } else { /* STORE */ 12864 assign(src, getIReg(sz, gregOfRM(modrm))); 12865 IRTemp dst = math_BSWAP(src, ty); 12866 storeLE(mkexpr(addr), mkexpr(dst)); 12867 DIP("movbe %s,%s\n", nameIReg(sz, gregOfRM(modrm)), dis_buf); 12868 } 12869 goto decode_success; 12870 } 12871 12872 /* ---------------------------------------------------- */ 12873 /* --- end of the SSSE3 decoder. --- */ 12874 /* ---------------------------------------------------- */ 12875 12876 /* ---------------------------------------------------- */ 12877 /* --- start of the SSE4 decoder --- */ 12878 /* ---------------------------------------------------- */ 12879 12880 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 12881 (Partial implementation only -- only deal with cases where 12882 the rounding mode is specified directly by the immediate byte.) 12883 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1 12884 (Limitations ditto) 12885 */ 12886 if (sz == 2 12887 && insn[0] == 0x0F && insn[1] == 0x3A 12888 && (insn[2] == 0x0B || insn[2] == 0x0A)) { 12889 12890 Bool isD = insn[2] == 0x0B; 12891 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); 12892 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); 12893 Int imm = 0; 12894 12895 modrm = insn[3]; 12896 12897 if (epartIsReg(modrm)) { 12898 assign( src, 12899 isD ? getXMMRegLane64F( eregOfRM(modrm), 0 ) 12900 : getXMMRegLane32F( eregOfRM(modrm), 0 ) ); 12901 imm = insn[3+1]; 12902 if (imm & ~3) goto decode_failure; 12903 delta += 3+1+1; 12904 DIP( "rounds%c $%d,%s,%s\n", 12905 isD ? 'd' : 's', 12906 imm, nameXMMReg( eregOfRM(modrm) ), 12907 nameXMMReg( gregOfRM(modrm) ) ); 12908 } else { 12909 addr = disAMode( &alen, sorb, delta+3, dis_buf ); 12910 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); 12911 imm = insn[3+alen]; 12912 if (imm & ~3) goto decode_failure; 12913 delta += 3+alen+1; 12914 DIP( "roundsd $%d,%s,%s\n", 12915 imm, dis_buf, nameXMMReg( gregOfRM(modrm) ) ); 12916 } 12917 12918 /* (imm & 3) contains an Intel-encoded rounding mode. Because 12919 that encoding is the same as the encoding for IRRoundingMode, 12920 we can use that value directly in the IR as a rounding 12921 mode. */ 12922 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 12923 mkU32(imm & 3), mkexpr(src)) ); 12924 12925 if (isD) 12926 putXMMRegLane64F( gregOfRM(modrm), 0, mkexpr(res) ); 12927 else 12928 putXMMRegLane32F( gregOfRM(modrm), 0, mkexpr(res) ); 12929 12930 goto decode_success; 12931 } 12932 12933 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, 12934 which we can only decode if we're sure this is an AMD cpu that 12935 supports LZCNT, since otherwise it's BSR, which behaves 12936 differently. */ 12937 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD 12938 && 0 != (archinfo->hwcaps & VEX_HWCAPS_X86_LZCNT)) { 12939 vassert(sz == 2 || sz == 4); 12940 /*IRType*/ ty = szToITy(sz); 12941 IRTemp src = newTemp(ty); 12942 modrm = insn[3]; 12943 if (epartIsReg(modrm)) { 12944 assign(src, getIReg(sz, eregOfRM(modrm))); 12945 delta += 3+1; 12946 DIP("lzcnt%c %s, %s\n", nameISize(sz), 12947 nameIReg(sz, eregOfRM(modrm)), 12948 nameIReg(sz, gregOfRM(modrm))); 12949 } else { 12950 addr = disAMode( &alen, sorb, delta+3, dis_buf ); 12951 assign(src, loadLE(ty, mkexpr(addr))); 12952 delta += 3+alen; 12953 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf, 12954 nameIReg(sz, gregOfRM(modrm))); 12955 } 12956 12957 IRTemp res = gen_LZCNT(ty, src); 12958 putIReg(sz, gregOfRM(modrm), mkexpr(res)); 12959 12960 // Update flags. This is pretty lame .. perhaps can do better 12961 // if this turns out to be performance critical. 12962 // O S A P are cleared. Z is set if RESULT == 0. 12963 // C is set if SRC is zero. 12964 IRTemp src32 = newTemp(Ity_I32); 12965 IRTemp res32 = newTemp(Ity_I32); 12966 assign(src32, widenUto32(mkexpr(src))); 12967 assign(res32, widenUto32(mkexpr(res))); 12968 12969 IRTemp oszacp = newTemp(Ity_I32); 12970 assign( 12971 oszacp, 12972 binop(Iop_Or32, 12973 binop(Iop_Shl32, 12974 unop(Iop_1Uto32, 12975 binop(Iop_CmpEQ32, mkexpr(res32), mkU32(0))), 12976 mkU8(X86G_CC_SHIFT_Z)), 12977 binop(Iop_Shl32, 12978 unop(Iop_1Uto32, 12979 binop(Iop_CmpEQ32, mkexpr(src32), mkU32(0))), 12980 mkU8(X86G_CC_SHIFT_C)) 12981 ) 12982 ); 12983 12984 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 12985 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 12986 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 12987 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) )); 12988 12989 goto decode_success; 12990 } 12991 12992 /* ---------------------------------------------------- */ 12993 /* --- end of the SSE4 decoder --- */ 12994 /* ---------------------------------------------------- */ 12995 12996 after_sse_decoders: 12997 12998 /* ---------------------------------------------------- */ 12999 /* --- deal with misc 0x67 pfxs (addr size override) -- */ 13000 /* ---------------------------------------------------- */ 13001 13002 /* 67 E3 = JCXZ (for JECXZ see below) */ 13003 if (insn[0] == 0x67 && insn[1] == 0xE3 && sz == 4) { 13004 delta += 2; 13005 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 13006 delta ++; 13007 stmt( IRStmt_Exit( 13008 binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)), 13009 Ijk_Boring, 13010 IRConst_U32(d32), 13011 OFFB_EIP 13012 )); 13013 DIP("jcxz 0x%x\n", d32); 13014 goto decode_success; 13015 } 13016 13017 /* ---------------------------------------------------- */ 13018 /* --- start of the baseline insn decoder -- */ 13019 /* ---------------------------------------------------- */ 13020 13021 /* Get the primary opcode. */ 13022 opc = getIByte(delta); delta++; 13023 13024 /* We get here if the current insn isn't SSE, or this CPU doesn't 13025 support SSE. */ 13026 13027 switch (opc) { 13028 13029 /* ------------------------ Control flow --------------- */ 13030 13031 case 0xC2: /* RET imm16 */ 13032 d32 = getUDisp16(delta); 13033 delta += 2; 13034 dis_ret(&dres, d32); 13035 DIP("ret %u\n", d32); 13036 break; 13037 case 0xC3: /* RET */ 13038 dis_ret(&dres, 0); 13039 DIP("ret\n"); 13040 break; 13041 13042 case 0xCF: /* IRET */ 13043 /* Note, this is an extremely kludgey and limited implementation 13044 of iret. All it really does is: 13045 popl %EIP; popl %CS; popl %EFLAGS. 13046 %CS is set but ignored (as it is in (eg) popw %cs)". */ 13047 t1 = newTemp(Ity_I32); /* ESP */ 13048 t2 = newTemp(Ity_I32); /* new EIP */ 13049 t3 = newTemp(Ity_I32); /* new CS */ 13050 t4 = newTemp(Ity_I32); /* new EFLAGS */ 13051 assign(t1, getIReg(4,R_ESP)); 13052 assign(t2, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(0) ))); 13053 assign(t3, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(4) ))); 13054 assign(t4, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(8) ))); 13055 /* Get stuff off stack */ 13056 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(12))); 13057 /* set %CS (which is ignored anyway) */ 13058 putSReg( R_CS, unop(Iop_32to16, mkexpr(t3)) ); 13059 /* set %EFLAGS */ 13060 set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ ); 13061 /* goto new EIP value */ 13062 jmp_treg(&dres, Ijk_Ret, t2); 13063 vassert(dres.whatNext == Dis_StopHere); 13064 DIP("iret (very kludgey)\n"); 13065 break; 13066 13067 case 0xE8: /* CALL J4 */ 13068 d32 = getUDisp32(delta); delta += 4; 13069 d32 += (guest_EIP_bbstart+delta); 13070 /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */ 13071 if (d32 == guest_EIP_bbstart+delta && getIByte(delta) >= 0x58 13072 && getIByte(delta) <= 0x5F) { 13073 /* Specially treat the position-independent-code idiom 13074 call X 13075 X: popl %reg 13076 as 13077 movl %eip, %reg. 13078 since this generates better code, but for no other reason. */ 13079 Int archReg = getIByte(delta) - 0x58; 13080 /* vex_printf("-- fPIC thingy\n"); */ 13081 putIReg(4, archReg, mkU32(guest_EIP_bbstart+delta)); 13082 delta++; /* Step over the POP */ 13083 DIP("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg)); 13084 } else { 13085 /* The normal sequence for a call. */ 13086 t1 = newTemp(Ity_I32); 13087 assign(t1, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 13088 putIReg(4, R_ESP, mkexpr(t1)); 13089 storeLE( mkexpr(t1), mkU32(guest_EIP_bbstart+delta)); 13090 if (resteerOkFn( callback_opaque, (Addr32)d32 )) { 13091 /* follow into the call target. */ 13092 dres.whatNext = Dis_ResteerU; 13093 dres.continueAt = (Addr32)d32; 13094 } else { 13095 jmp_lit(&dres, Ijk_Call, d32); 13096 vassert(dres.whatNext == Dis_StopHere); 13097 } 13098 DIP("call 0x%x\n",d32); 13099 } 13100 break; 13101 13102//-- case 0xC8: /* ENTER */ 13103//-- d32 = getUDisp16(eip); eip += 2; 13104//-- abyte = getIByte(delta); delta++; 13105//-- 13106//-- vg_assert(sz == 4); 13107//-- vg_assert(abyte == 0); 13108//-- 13109//-- t1 = newTemp(cb); t2 = newTemp(cb); 13110//-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1); 13111//-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2); 13112//-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); 13113//-- uLiteral(cb, sz); 13114//-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); 13115//-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); 13116//-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP); 13117//-- if (d32) { 13118//-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); 13119//-- uLiteral(cb, d32); 13120//-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); 13121//-- } 13122//-- DIP("enter 0x%x, 0x%x", d32, abyte); 13123//-- break; 13124 13125 case 0xC9: /* LEAVE */ 13126 vassert(sz == 4); 13127 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32); 13128 assign(t1, getIReg(4,R_EBP)); 13129 /* First PUT ESP looks redundant, but need it because ESP must 13130 always be up-to-date for Memcheck to work... */ 13131 putIReg(4, R_ESP, mkexpr(t1)); 13132 assign(t2, loadLE(Ity_I32,mkexpr(t1))); 13133 putIReg(4, R_EBP, mkexpr(t2)); 13134 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(4)) ); 13135 DIP("leave\n"); 13136 break; 13137 13138 /* ---------------- Misc weird-ass insns --------------- */ 13139 13140 case 0x27: /* DAA */ 13141 case 0x2F: /* DAS */ 13142 case 0x37: /* AAA */ 13143 case 0x3F: /* AAS */ 13144 /* An ugly implementation for some ugly instructions. Oh 13145 well. */ 13146 if (sz != 4) goto decode_failure; 13147 t1 = newTemp(Ity_I32); 13148 t2 = newTemp(Ity_I32); 13149 /* Make up a 32-bit value (t1), with the old value of AX in the 13150 bottom 16 bits, and the old OSZACP bitmask in the upper 16 13151 bits. */ 13152 assign(t1, 13153 binop(Iop_16HLto32, 13154 unop(Iop_32to16, 13155 mk_x86g_calculate_eflags_all()), 13156 getIReg(2, R_EAX) 13157 )); 13158 /* Call the helper fn, to get a new AX and OSZACP value, and 13159 poke both back into the guest state. Also pass the helper 13160 the actual opcode so it knows which of the 4 instructions it 13161 is doing the computation for. */ 13162 vassert(opc == 0x27 || opc == 0x2F || opc == 0x37 || opc == 0x3F); 13163 assign(t2, 13164 mkIRExprCCall( 13165 Ity_I32, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas", 13166 &x86g_calculate_daa_das_aaa_aas, 13167 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) ) 13168 )); 13169 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) )); 13170 13171 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 13172 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 13173 stmt( IRStmt_Put( OFFB_CC_DEP1, 13174 binop(Iop_And32, 13175 binop(Iop_Shr32, mkexpr(t2), mkU8(16)), 13176 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P 13177 | X86G_CC_MASK_A | X86G_CC_MASK_Z 13178 | X86G_CC_MASK_S| X86G_CC_MASK_O ) 13179 ) 13180 ) 13181 ); 13182 /* Set NDEP even though it isn't used. This makes redundant-PUT 13183 elimination of previous stores to this field work better. */ 13184 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 13185 switch (opc) { 13186 case 0x27: DIP("daa\n"); break; 13187 case 0x2F: DIP("das\n"); break; 13188 case 0x37: DIP("aaa\n"); break; 13189 case 0x3F: DIP("aas\n"); break; 13190 default: vassert(0); 13191 } 13192 break; 13193 13194 case 0xD4: /* AAM */ 13195 case 0xD5: /* AAD */ 13196 d32 = getIByte(delta); delta++; 13197 if (sz != 4 || d32 != 10) goto decode_failure; 13198 t1 = newTemp(Ity_I32); 13199 t2 = newTemp(Ity_I32); 13200 /* Make up a 32-bit value (t1), with the old value of AX in the 13201 bottom 16 bits, and the old OSZACP bitmask in the upper 16 13202 bits. */ 13203 assign(t1, 13204 binop(Iop_16HLto32, 13205 unop(Iop_32to16, 13206 mk_x86g_calculate_eflags_all()), 13207 getIReg(2, R_EAX) 13208 )); 13209 /* Call the helper fn, to get a new AX and OSZACP value, and 13210 poke both back into the guest state. Also pass the helper 13211 the actual opcode so it knows which of the 2 instructions it 13212 is doing the computation for. */ 13213 assign(t2, 13214 mkIRExprCCall( 13215 Ity_I32, 0/*regparm*/, "x86g_calculate_aad_aam", 13216 &x86g_calculate_aad_aam, 13217 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) ) 13218 )); 13219 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) )); 13220 13221 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 13222 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 13223 stmt( IRStmt_Put( OFFB_CC_DEP1, 13224 binop(Iop_And32, 13225 binop(Iop_Shr32, mkexpr(t2), mkU8(16)), 13226 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P 13227 | X86G_CC_MASK_A | X86G_CC_MASK_Z 13228 | X86G_CC_MASK_S| X86G_CC_MASK_O ) 13229 ) 13230 ) 13231 ); 13232 /* Set NDEP even though it isn't used. This makes 13233 redundant-PUT elimination of previous stores to this field 13234 work better. */ 13235 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 13236 13237 DIP(opc == 0xD4 ? "aam\n" : "aad\n"); 13238 break; 13239 13240 /* ------------------------ CWD/CDQ -------------------- */ 13241 13242 case 0x98: /* CBW */ 13243 if (sz == 4) { 13244 putIReg(4, R_EAX, unop(Iop_16Sto32, getIReg(2, R_EAX))); 13245 DIP("cwde\n"); 13246 } else { 13247 vassert(sz == 2); 13248 putIReg(2, R_EAX, unop(Iop_8Sto16, getIReg(1, R_EAX))); 13249 DIP("cbw\n"); 13250 } 13251 break; 13252 13253 case 0x99: /* CWD/CDQ */ 13254 ty = szToITy(sz); 13255 putIReg(sz, R_EDX, 13256 binop(mkSizedOp(ty,Iop_Sar8), 13257 getIReg(sz, R_EAX), 13258 mkU8(sz == 2 ? 15 : 31)) ); 13259 DIP(sz == 2 ? "cwdq\n" : "cdqq\n"); 13260 break; 13261 13262 /* ------------------------ FPU ops -------------------- */ 13263 13264 case 0x9E: /* SAHF */ 13265 codegen_SAHF(); 13266 DIP("sahf\n"); 13267 break; 13268 13269 case 0x9F: /* LAHF */ 13270 codegen_LAHF(); 13271 DIP("lahf\n"); 13272 break; 13273 13274 case 0x9B: /* FWAIT */ 13275 /* ignore? */ 13276 DIP("fwait\n"); 13277 break; 13278 13279 case 0xD8: 13280 case 0xD9: 13281 case 0xDA: 13282 case 0xDB: 13283 case 0xDC: 13284 case 0xDD: 13285 case 0xDE: 13286 case 0xDF: { 13287 Int delta0 = delta; 13288 Bool decode_OK = False; 13289 delta = dis_FPU ( &decode_OK, sorb, delta ); 13290 if (!decode_OK) { 13291 delta = delta0; 13292 goto decode_failure; 13293 } 13294 break; 13295 } 13296 13297 /* ------------------------ INC & DEC ------------------ */ 13298 13299 case 0x40: /* INC eAX */ 13300 case 0x41: /* INC eCX */ 13301 case 0x42: /* INC eDX */ 13302 case 0x43: /* INC eBX */ 13303 case 0x44: /* INC eSP */ 13304 case 0x45: /* INC eBP */ 13305 case 0x46: /* INC eSI */ 13306 case 0x47: /* INC eDI */ 13307 vassert(sz == 2 || sz == 4); 13308 ty = szToITy(sz); 13309 t1 = newTemp(ty); 13310 assign( t1, binop(mkSizedOp(ty,Iop_Add8), 13311 getIReg(sz, (UInt)(opc - 0x40)), 13312 mkU(ty,1)) ); 13313 setFlags_INC_DEC( True, t1, ty ); 13314 putIReg(sz, (UInt)(opc - 0x40), mkexpr(t1)); 13315 DIP("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40)); 13316 break; 13317 13318 case 0x48: /* DEC eAX */ 13319 case 0x49: /* DEC eCX */ 13320 case 0x4A: /* DEC eDX */ 13321 case 0x4B: /* DEC eBX */ 13322 case 0x4C: /* DEC eSP */ 13323 case 0x4D: /* DEC eBP */ 13324 case 0x4E: /* DEC eSI */ 13325 case 0x4F: /* DEC eDI */ 13326 vassert(sz == 2 || sz == 4); 13327 ty = szToITy(sz); 13328 t1 = newTemp(ty); 13329 assign( t1, binop(mkSizedOp(ty,Iop_Sub8), 13330 getIReg(sz, (UInt)(opc - 0x48)), 13331 mkU(ty,1)) ); 13332 setFlags_INC_DEC( False, t1, ty ); 13333 putIReg(sz, (UInt)(opc - 0x48), mkexpr(t1)); 13334 DIP("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48)); 13335 break; 13336 13337 /* ------------------------ INT ------------------------ */ 13338 13339 case 0xCC: /* INT 3 */ 13340 jmp_lit(&dres, Ijk_SigTRAP, ((Addr32)guest_EIP_bbstart)+delta); 13341 vassert(dres.whatNext == Dis_StopHere); 13342 DIP("int $0x3\n"); 13343 break; 13344 13345 case 0xCD: /* INT imm8 */ 13346 d32 = getIByte(delta); delta++; 13347 13348 /* For any of the cases where we emit a jump (that is, for all 13349 currently handled cases), it's important that all ArchRegs 13350 carry their up-to-date value at this point. So we declare an 13351 end-of-block here, which forces any TempRegs caching ArchRegs 13352 to be flushed. */ 13353 13354 /* Handle int $0x3F .. $0x4F by synthesising a segfault and a 13355 restart of this instruction (hence the "-2" two lines below, 13356 to get the restart EIP to be this instruction. This is 13357 probably Linux-specific and it would be more correct to only 13358 do this if the VexAbiInfo says that is what we should do. 13359 This used to handle just 0x40-0x43; Jikes RVM uses a larger 13360 range (0x3F-0x49), and this allows some slack as well. */ 13361 if (d32 >= 0x3F && d32 <= 0x4F) { 13362 jmp_lit(&dres, Ijk_SigSEGV, ((Addr32)guest_EIP_bbstart)+delta-2); 13363 vassert(dres.whatNext == Dis_StopHere); 13364 DIP("int $0x%x\n", d32); 13365 break; 13366 } 13367 13368 /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82 13369 (darwin syscalls), int $0x91 (Solaris syscalls) and int $0xD2 13370 (Solaris fasttrap syscalls). As part of this, note where we are, so we 13371 can back up the guest to this point if the syscall needs to 13372 be restarted. */ 13373 IRJumpKind jump_kind; 13374 switch (d32) { 13375 case 0x80: 13376 jump_kind = Ijk_Sys_int128; 13377 break; 13378 case 0x81: 13379 jump_kind = Ijk_Sys_int129; 13380 break; 13381 case 0x82: 13382 jump_kind = Ijk_Sys_int130; 13383 break; 13384 case 0x91: 13385 jump_kind = Ijk_Sys_int145; 13386 break; 13387 case 0xD2: 13388 jump_kind = Ijk_Sys_int210; 13389 break; 13390 default: 13391 /* none of the above */ 13392 goto decode_failure; 13393 } 13394 13395 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 13396 mkU32(guest_EIP_curr_instr) ) ); 13397 jmp_lit(&dres, jump_kind, ((Addr32)guest_EIP_bbstart)+delta); 13398 vassert(dres.whatNext == Dis_StopHere); 13399 DIP("int $0x%x\n", d32); 13400 break; 13401 13402 /* ------------------------ Jcond, byte offset --------- */ 13403 13404 case 0xEB: /* Jb (jump, byte offset) */ 13405 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 13406 delta++; 13407 if (resteerOkFn( callback_opaque, (Addr32)d32) ) { 13408 dres.whatNext = Dis_ResteerU; 13409 dres.continueAt = (Addr32)d32; 13410 } else { 13411 jmp_lit(&dres, Ijk_Boring, d32); 13412 vassert(dres.whatNext == Dis_StopHere); 13413 } 13414 DIP("jmp-8 0x%x\n", d32); 13415 break; 13416 13417 case 0xE9: /* Jv (jump, 16/32 offset) */ 13418 vassert(sz == 4); /* JRS added 2004 July 11 */ 13419 d32 = (((Addr32)guest_EIP_bbstart)+delta+sz) + getSDisp(sz,delta); 13420 delta += sz; 13421 if (resteerOkFn( callback_opaque, (Addr32)d32) ) { 13422 dres.whatNext = Dis_ResteerU; 13423 dres.continueAt = (Addr32)d32; 13424 } else { 13425 jmp_lit(&dres, Ijk_Boring, d32); 13426 vassert(dres.whatNext == Dis_StopHere); 13427 } 13428 DIP("jmp 0x%x\n", d32); 13429 break; 13430 13431 case 0x70: 13432 case 0x71: 13433 case 0x72: /* JBb/JNAEb (jump below) */ 13434 case 0x73: /* JNBb/JAEb (jump not below) */ 13435 case 0x74: /* JZb/JEb (jump zero) */ 13436 case 0x75: /* JNZb/JNEb (jump not zero) */ 13437 case 0x76: /* JBEb/JNAb (jump below or equal) */ 13438 case 0x77: /* JNBEb/JAb (jump not below or equal) */ 13439 case 0x78: /* JSb (jump negative) */ 13440 case 0x79: /* JSb (jump not negative) */ 13441 case 0x7A: /* JP (jump parity even) */ 13442 case 0x7B: /* JNP/JPO (jump parity odd) */ 13443 case 0x7C: /* JLb/JNGEb (jump less) */ 13444 case 0x7D: /* JGEb/JNLb (jump greater or equal) */ 13445 case 0x7E: /* JLEb/JNGb (jump less or equal) */ 13446 case 0x7F: /* JGb/JNLEb (jump greater) */ 13447 { Int jmpDelta; 13448 const HChar* comment = ""; 13449 jmpDelta = (Int)getSDisp8(delta); 13450 vassert(-128 <= jmpDelta && jmpDelta < 128); 13451 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + jmpDelta; 13452 delta++; 13453 if (resteerCisOk 13454 && vex_control.guest_chase_cond 13455 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 13456 && jmpDelta < 0 13457 && resteerOkFn( callback_opaque, (Addr32)d32) ) { 13458 /* Speculation: assume this backward branch is taken. So we 13459 need to emit a side-exit to the insn following this one, 13460 on the negation of the condition, and continue at the 13461 branch target address (d32). If we wind up back at the 13462 first instruction of the trace, just stop; it's better to 13463 let the IR loop unroller handle that case. */ 13464 stmt( IRStmt_Exit( 13465 mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))), 13466 Ijk_Boring, 13467 IRConst_U32(guest_EIP_bbstart+delta), 13468 OFFB_EIP ) ); 13469 dres.whatNext = Dis_ResteerC; 13470 dres.continueAt = (Addr32)d32; 13471 comment = "(assumed taken)"; 13472 } 13473 else 13474 if (resteerCisOk 13475 && vex_control.guest_chase_cond 13476 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 13477 && jmpDelta >= 0 13478 && resteerOkFn( callback_opaque, 13479 (Addr32)(guest_EIP_bbstart+delta)) ) { 13480 /* Speculation: assume this forward branch is not taken. So 13481 we need to emit a side-exit to d32 (the dest) and continue 13482 disassembling at the insn immediately following this 13483 one. */ 13484 stmt( IRStmt_Exit( 13485 mk_x86g_calculate_condition((X86Condcode)(opc - 0x70)), 13486 Ijk_Boring, 13487 IRConst_U32(d32), 13488 OFFB_EIP ) ); 13489 dres.whatNext = Dis_ResteerC; 13490 dres.continueAt = guest_EIP_bbstart + delta; 13491 comment = "(assumed not taken)"; 13492 } 13493 else { 13494 /* Conservative default translation - end the block at this 13495 point. */ 13496 jcc_01( &dres, (X86Condcode)(opc - 0x70), 13497 (Addr32)(guest_EIP_bbstart+delta), d32); 13498 vassert(dres.whatNext == Dis_StopHere); 13499 } 13500 DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc - 0x70), d32, comment); 13501 break; 13502 } 13503 13504 case 0xE3: /* JECXZ (for JCXZ see above) */ 13505 if (sz != 4) goto decode_failure; 13506 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 13507 delta ++; 13508 stmt( IRStmt_Exit( 13509 binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)), 13510 Ijk_Boring, 13511 IRConst_U32(d32), 13512 OFFB_EIP 13513 )); 13514 DIP("jecxz 0x%x\n", d32); 13515 break; 13516 13517 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */ 13518 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */ 13519 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */ 13520 { /* Again, the docs say this uses ECX/CX as a count depending on 13521 the address size override, not the operand one. Since we 13522 don't handle address size overrides, I guess that means 13523 ECX. */ 13524 IRExpr* zbit = NULL; 13525 IRExpr* count = NULL; 13526 IRExpr* cond = NULL; 13527 const HChar* xtra = NULL; 13528 13529 if (sz != 4) goto decode_failure; 13530 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 13531 delta++; 13532 putIReg(4, R_ECX, binop(Iop_Sub32, getIReg(4,R_ECX), mkU32(1))); 13533 13534 count = getIReg(4,R_ECX); 13535 cond = binop(Iop_CmpNE32, count, mkU32(0)); 13536 switch (opc) { 13537 case 0xE2: 13538 xtra = ""; 13539 break; 13540 case 0xE1: 13541 xtra = "e"; 13542 zbit = mk_x86g_calculate_condition( X86CondZ ); 13543 cond = mkAnd1(cond, zbit); 13544 break; 13545 case 0xE0: 13546 xtra = "ne"; 13547 zbit = mk_x86g_calculate_condition( X86CondNZ ); 13548 cond = mkAnd1(cond, zbit); 13549 break; 13550 default: 13551 vassert(0); 13552 } 13553 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32), OFFB_EIP) ); 13554 13555 DIP("loop%s 0x%x\n", xtra, d32); 13556 break; 13557 } 13558 13559 /* ------------------------ IMUL ----------------------- */ 13560 13561 case 0x69: /* IMUL Iv, Ev, Gv */ 13562 delta = dis_imul_I_E_G ( sorb, sz, delta, sz ); 13563 break; 13564 case 0x6B: /* IMUL Ib, Ev, Gv */ 13565 delta = dis_imul_I_E_G ( sorb, sz, delta, 1 ); 13566 break; 13567 13568 /* ------------------------ MOV ------------------------ */ 13569 13570 case 0x88: /* MOV Gb,Eb */ 13571 delta = dis_mov_G_E(sorb, 1, delta); 13572 break; 13573 13574 case 0x89: /* MOV Gv,Ev */ 13575 delta = dis_mov_G_E(sorb, sz, delta); 13576 break; 13577 13578 case 0x8A: /* MOV Eb,Gb */ 13579 delta = dis_mov_E_G(sorb, 1, delta); 13580 break; 13581 13582 case 0x8B: /* MOV Ev,Gv */ 13583 delta = dis_mov_E_G(sorb, sz, delta); 13584 break; 13585 13586 case 0x8D: /* LEA M,Gv */ 13587 if (sz != 4) 13588 goto decode_failure; 13589 modrm = getIByte(delta); 13590 if (epartIsReg(modrm)) 13591 goto decode_failure; 13592 /* NOTE! this is the one place where a segment override prefix 13593 has no effect on the address calculation. Therefore we pass 13594 zero instead of sorb here. */ 13595 addr = disAMode ( &alen, /*sorb*/ 0, delta, dis_buf ); 13596 delta += alen; 13597 putIReg(sz, gregOfRM(modrm), mkexpr(addr)); 13598 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf, 13599 nameIReg(sz,gregOfRM(modrm))); 13600 break; 13601 13602 case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */ 13603 delta = dis_mov_Sw_Ew(sorb, sz, delta); 13604 break; 13605 13606 case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */ 13607 delta = dis_mov_Ew_Sw(sorb, delta); 13608 break; 13609 13610 case 0xA0: /* MOV Ob,AL */ 13611 sz = 1; 13612 /* Fall through ... */ 13613 case 0xA1: /* MOV Ov,eAX */ 13614 d32 = getUDisp32(delta); delta += 4; 13615 ty = szToITy(sz); 13616 addr = newTemp(Ity_I32); 13617 assign( addr, handleSegOverride(sorb, mkU32(d32)) ); 13618 putIReg(sz, R_EAX, loadLE(ty, mkexpr(addr))); 13619 DIP("mov%c %s0x%x, %s\n", nameISize(sz), sorbTxt(sorb), 13620 d32, nameIReg(sz,R_EAX)); 13621 break; 13622 13623 case 0xA2: /* MOV Ob,AL */ 13624 sz = 1; 13625 /* Fall through ... */ 13626 case 0xA3: /* MOV eAX,Ov */ 13627 d32 = getUDisp32(delta); delta += 4; 13628 ty = szToITy(sz); 13629 addr = newTemp(Ity_I32); 13630 assign( addr, handleSegOverride(sorb, mkU32(d32)) ); 13631 storeLE( mkexpr(addr), getIReg(sz,R_EAX) ); 13632 DIP("mov%c %s, %s0x%x\n", nameISize(sz), nameIReg(sz,R_EAX), 13633 sorbTxt(sorb), d32); 13634 break; 13635 13636 case 0xB0: /* MOV imm,AL */ 13637 case 0xB1: /* MOV imm,CL */ 13638 case 0xB2: /* MOV imm,DL */ 13639 case 0xB3: /* MOV imm,BL */ 13640 case 0xB4: /* MOV imm,AH */ 13641 case 0xB5: /* MOV imm,CH */ 13642 case 0xB6: /* MOV imm,DH */ 13643 case 0xB7: /* MOV imm,BH */ 13644 d32 = getIByte(delta); delta += 1; 13645 putIReg(1, opc-0xB0, mkU8(d32)); 13646 DIP("movb $0x%x,%s\n", d32, nameIReg(1,opc-0xB0)); 13647 break; 13648 13649 case 0xB8: /* MOV imm,eAX */ 13650 case 0xB9: /* MOV imm,eCX */ 13651 case 0xBA: /* MOV imm,eDX */ 13652 case 0xBB: /* MOV imm,eBX */ 13653 case 0xBC: /* MOV imm,eSP */ 13654 case 0xBD: /* MOV imm,eBP */ 13655 case 0xBE: /* MOV imm,eSI */ 13656 case 0xBF: /* MOV imm,eDI */ 13657 d32 = getUDisp(sz,delta); delta += sz; 13658 putIReg(sz, opc-0xB8, mkU(szToITy(sz), d32)); 13659 DIP("mov%c $0x%x,%s\n", nameISize(sz), d32, nameIReg(sz,opc-0xB8)); 13660 break; 13661 13662 case 0xC6: /* C6 /0 = MOV Ib,Eb */ 13663 sz = 1; 13664 goto maybe_do_Mov_I_E; 13665 case 0xC7: /* C7 /0 = MOV Iv,Ev */ 13666 goto maybe_do_Mov_I_E; 13667 13668 maybe_do_Mov_I_E: 13669 modrm = getIByte(delta); 13670 if (gregOfRM(modrm) == 0) { 13671 if (epartIsReg(modrm)) { 13672 delta++; /* mod/rm byte */ 13673 d32 = getUDisp(sz,delta); delta += sz; 13674 putIReg(sz, eregOfRM(modrm), mkU(szToITy(sz), d32)); 13675 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, 13676 nameIReg(sz,eregOfRM(modrm))); 13677 } else { 13678 addr = disAMode ( &alen, sorb, delta, dis_buf ); 13679 delta += alen; 13680 d32 = getUDisp(sz,delta); delta += sz; 13681 storeLE(mkexpr(addr), mkU(szToITy(sz), d32)); 13682 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf); 13683 } 13684 break; 13685 } 13686 goto decode_failure; 13687 13688 /* ------------------------ opl imm, A ----------------- */ 13689 13690 case 0x04: /* ADD Ib, AL */ 13691 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" ); 13692 break; 13693 case 0x05: /* ADD Iv, eAX */ 13694 delta = dis_op_imm_A( sz, False, Iop_Add8, True, delta, "add" ); 13695 break; 13696 13697 case 0x0C: /* OR Ib, AL */ 13698 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" ); 13699 break; 13700 case 0x0D: /* OR Iv, eAX */ 13701 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" ); 13702 break; 13703 13704 case 0x14: /* ADC Ib, AL */ 13705 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" ); 13706 break; 13707 case 0x15: /* ADC Iv, eAX */ 13708 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" ); 13709 break; 13710 13711 case 0x1C: /* SBB Ib, AL */ 13712 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" ); 13713 break; 13714 case 0x1D: /* SBB Iv, eAX */ 13715 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" ); 13716 break; 13717 13718 case 0x24: /* AND Ib, AL */ 13719 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" ); 13720 break; 13721 case 0x25: /* AND Iv, eAX */ 13722 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" ); 13723 break; 13724 13725 case 0x2C: /* SUB Ib, AL */ 13726 delta = dis_op_imm_A( 1, False, Iop_Sub8, True, delta, "sub" ); 13727 break; 13728 case 0x2D: /* SUB Iv, eAX */ 13729 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" ); 13730 break; 13731 13732 case 0x34: /* XOR Ib, AL */ 13733 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" ); 13734 break; 13735 case 0x35: /* XOR Iv, eAX */ 13736 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" ); 13737 break; 13738 13739 case 0x3C: /* CMP Ib, AL */ 13740 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" ); 13741 break; 13742 case 0x3D: /* CMP Iv, eAX */ 13743 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" ); 13744 break; 13745 13746 case 0xA8: /* TEST Ib, AL */ 13747 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" ); 13748 break; 13749 case 0xA9: /* TEST Iv, eAX */ 13750 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" ); 13751 break; 13752 13753 /* ------------------------ opl Ev, Gv ----------------- */ 13754 13755 case 0x02: /* ADD Eb,Gb */ 13756 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, 1, delta, "add" ); 13757 break; 13758 case 0x03: /* ADD Ev,Gv */ 13759 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, sz, delta, "add" ); 13760 break; 13761 13762 case 0x0A: /* OR Eb,Gb */ 13763 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, 1, delta, "or" ); 13764 break; 13765 case 0x0B: /* OR Ev,Gv */ 13766 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, sz, delta, "or" ); 13767 break; 13768 13769 case 0x12: /* ADC Eb,Gb */ 13770 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, 1, delta, "adc" ); 13771 break; 13772 case 0x13: /* ADC Ev,Gv */ 13773 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, sz, delta, "adc" ); 13774 break; 13775 13776 case 0x1A: /* SBB Eb,Gb */ 13777 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, 1, delta, "sbb" ); 13778 break; 13779 case 0x1B: /* SBB Ev,Gv */ 13780 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, sz, delta, "sbb" ); 13781 break; 13782 13783 case 0x22: /* AND Eb,Gb */ 13784 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, 1, delta, "and" ); 13785 break; 13786 case 0x23: /* AND Ev,Gv */ 13787 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, sz, delta, "and" ); 13788 break; 13789 13790 case 0x2A: /* SUB Eb,Gb */ 13791 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, 1, delta, "sub" ); 13792 break; 13793 case 0x2B: /* SUB Ev,Gv */ 13794 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, sz, delta, "sub" ); 13795 break; 13796 13797 case 0x32: /* XOR Eb,Gb */ 13798 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, 1, delta, "xor" ); 13799 break; 13800 case 0x33: /* XOR Ev,Gv */ 13801 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, sz, delta, "xor" ); 13802 break; 13803 13804 case 0x3A: /* CMP Eb,Gb */ 13805 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, 1, delta, "cmp" ); 13806 break; 13807 case 0x3B: /* CMP Ev,Gv */ 13808 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, sz, delta, "cmp" ); 13809 break; 13810 13811 case 0x84: /* TEST Eb,Gb */ 13812 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, 1, delta, "test" ); 13813 break; 13814 case 0x85: /* TEST Ev,Gv */ 13815 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, sz, delta, "test" ); 13816 break; 13817 13818 /* ------------------------ opl Gv, Ev ----------------- */ 13819 13820 case 0x00: /* ADD Gb,Eb */ 13821 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13822 Iop_Add8, True, 1, delta, "add" ); 13823 break; 13824 case 0x01: /* ADD Gv,Ev */ 13825 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13826 Iop_Add8, True, sz, delta, "add" ); 13827 break; 13828 13829 case 0x08: /* OR Gb,Eb */ 13830 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13831 Iop_Or8, True, 1, delta, "or" ); 13832 break; 13833 case 0x09: /* OR Gv,Ev */ 13834 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13835 Iop_Or8, True, sz, delta, "or" ); 13836 break; 13837 13838 case 0x10: /* ADC Gb,Eb */ 13839 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13840 Iop_Add8, True, 1, delta, "adc" ); 13841 break; 13842 case 0x11: /* ADC Gv,Ev */ 13843 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13844 Iop_Add8, True, sz, delta, "adc" ); 13845 break; 13846 13847 case 0x18: /* SBB Gb,Eb */ 13848 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13849 Iop_Sub8, True, 1, delta, "sbb" ); 13850 break; 13851 case 0x19: /* SBB Gv,Ev */ 13852 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13853 Iop_Sub8, True, sz, delta, "sbb" ); 13854 break; 13855 13856 case 0x20: /* AND Gb,Eb */ 13857 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13858 Iop_And8, True, 1, delta, "and" ); 13859 break; 13860 case 0x21: /* AND Gv,Ev */ 13861 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13862 Iop_And8, True, sz, delta, "and" ); 13863 break; 13864 13865 case 0x28: /* SUB Gb,Eb */ 13866 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13867 Iop_Sub8, True, 1, delta, "sub" ); 13868 break; 13869 case 0x29: /* SUB Gv,Ev */ 13870 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13871 Iop_Sub8, True, sz, delta, "sub" ); 13872 break; 13873 13874 case 0x30: /* XOR Gb,Eb */ 13875 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13876 Iop_Xor8, True, 1, delta, "xor" ); 13877 break; 13878 case 0x31: /* XOR Gv,Ev */ 13879 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13880 Iop_Xor8, True, sz, delta, "xor" ); 13881 break; 13882 13883 case 0x38: /* CMP Gb,Eb */ 13884 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13885 Iop_Sub8, False, 1, delta, "cmp" ); 13886 break; 13887 case 0x39: /* CMP Gv,Ev */ 13888 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13889 Iop_Sub8, False, sz, delta, "cmp" ); 13890 break; 13891 13892 /* ------------------------ POP ------------------------ */ 13893 13894 case 0x58: /* POP eAX */ 13895 case 0x59: /* POP eCX */ 13896 case 0x5A: /* POP eDX */ 13897 case 0x5B: /* POP eBX */ 13898 case 0x5D: /* POP eBP */ 13899 case 0x5E: /* POP eSI */ 13900 case 0x5F: /* POP eDI */ 13901 case 0x5C: /* POP eSP */ 13902 vassert(sz == 2 || sz == 4); 13903 t1 = newTemp(szToITy(sz)); t2 = newTemp(Ity_I32); 13904 assign(t2, getIReg(4, R_ESP)); 13905 assign(t1, loadLE(szToITy(sz),mkexpr(t2))); 13906 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz))); 13907 putIReg(sz, opc-0x58, mkexpr(t1)); 13908 DIP("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58)); 13909 break; 13910 13911 case 0x9D: /* POPF */ 13912 vassert(sz == 2 || sz == 4); 13913 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32); 13914 assign(t2, getIReg(4, R_ESP)); 13915 assign(t1, widenUto32(loadLE(szToITy(sz),mkexpr(t2)))); 13916 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz))); 13917 13918 /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the 13919 value in t1. */ 13920 set_EFLAGS_from_value( t1, True/*emit_AC_emwarn*/, 13921 ((Addr32)guest_EIP_bbstart)+delta ); 13922 13923 DIP("popf%c\n", nameISize(sz)); 13924 break; 13925 13926 case 0x61: /* POPA */ 13927 /* This is almost certainly wrong for sz==2. So ... */ 13928 if (sz != 4) goto decode_failure; 13929 13930 /* t5 is the old %ESP value. */ 13931 t5 = newTemp(Ity_I32); 13932 assign( t5, getIReg(4, R_ESP) ); 13933 13934 /* Reload all the registers, except %esp. */ 13935 putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) )); 13936 putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) )); 13937 putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) )); 13938 putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) )); 13939 /* ignore saved %ESP */ 13940 putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) )); 13941 putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) )); 13942 putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) )); 13943 13944 /* and move %ESP back up */ 13945 putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) ); 13946 13947 DIP("popa%c\n", nameISize(sz)); 13948 break; 13949 13950 case 0x8F: /* POPL/POPW m32 */ 13951 { Int len; 13952 UChar rm = getIByte(delta); 13953 13954 /* make sure this instruction is correct POP */ 13955 if (epartIsReg(rm) || gregOfRM(rm) != 0) 13956 goto decode_failure; 13957 /* and has correct size */ 13958 if (sz != 4 && sz != 2) 13959 goto decode_failure; 13960 ty = szToITy(sz); 13961 13962 t1 = newTemp(Ity_I32); /* stack address */ 13963 t3 = newTemp(ty); /* data */ 13964 /* set t1 to ESP: t1 = ESP */ 13965 assign( t1, getIReg(4, R_ESP) ); 13966 /* load M[ESP] to virtual register t3: t3 = M[t1] */ 13967 assign( t3, loadLE(ty, mkexpr(t1)) ); 13968 13969 /* increase ESP; must be done before the STORE. Intel manual says: 13970 If the ESP register is used as a base register for addressing 13971 a destination operand in memory, the POP instruction computes 13972 the effective address of the operand after it increments the 13973 ESP register. 13974 */ 13975 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(sz)) ); 13976 13977 /* resolve MODR/M */ 13978 addr = disAMode ( &len, sorb, delta, dis_buf); 13979 storeLE( mkexpr(addr), mkexpr(t3) ); 13980 13981 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', dis_buf); 13982 13983 delta += len; 13984 break; 13985 } 13986 13987 case 0x1F: /* POP %DS */ 13988 dis_pop_segreg( R_DS, sz ); break; 13989 case 0x07: /* POP %ES */ 13990 dis_pop_segreg( R_ES, sz ); break; 13991 case 0x17: /* POP %SS */ 13992 dis_pop_segreg( R_SS, sz ); break; 13993 13994 /* ------------------------ PUSH ----------------------- */ 13995 13996 case 0x50: /* PUSH eAX */ 13997 case 0x51: /* PUSH eCX */ 13998 case 0x52: /* PUSH eDX */ 13999 case 0x53: /* PUSH eBX */ 14000 case 0x55: /* PUSH eBP */ 14001 case 0x56: /* PUSH eSI */ 14002 case 0x57: /* PUSH eDI */ 14003 case 0x54: /* PUSH eSP */ 14004 /* This is the Right Way, in that the value to be pushed is 14005 established before %esp is changed, so that pushl %esp 14006 correctly pushes the old value. */ 14007 vassert(sz == 2 || sz == 4); 14008 ty = sz==2 ? Ity_I16 : Ity_I32; 14009 t1 = newTemp(ty); t2 = newTemp(Ity_I32); 14010 assign(t1, getIReg(sz, opc-0x50)); 14011 assign(t2, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz))); 14012 putIReg(4, R_ESP, mkexpr(t2) ); 14013 storeLE(mkexpr(t2),mkexpr(t1)); 14014 DIP("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50)); 14015 break; 14016 14017 14018 case 0x68: /* PUSH Iv */ 14019 d32 = getUDisp(sz,delta); delta += sz; 14020 goto do_push_I; 14021 case 0x6A: /* PUSH Ib, sign-extended to sz */ 14022 d32 = getSDisp8(delta); delta += 1; 14023 goto do_push_I; 14024 do_push_I: 14025 ty = szToITy(sz); 14026 t1 = newTemp(Ity_I32); t2 = newTemp(ty); 14027 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 14028 putIReg(4, R_ESP, mkexpr(t1) ); 14029 /* stop mkU16 asserting if d32 is a negative 16-bit number 14030 (bug #132813) */ 14031 if (ty == Ity_I16) 14032 d32 &= 0xFFFF; 14033 storeLE( mkexpr(t1), mkU(ty,d32) ); 14034 DIP("push%c $0x%x\n", nameISize(sz), d32); 14035 break; 14036 14037 case 0x9C: /* PUSHF */ { 14038 vassert(sz == 2 || sz == 4); 14039 14040 t1 = newTemp(Ity_I32); 14041 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 14042 putIReg(4, R_ESP, mkexpr(t1) ); 14043 14044 /* Calculate OSZACP, and patch in fixed fields as per 14045 Intel docs. 14046 - bit 1 is always 1 14047 - bit 9 is Interrupt Enable (should always be 1 in user mode?) 14048 */ 14049 t2 = newTemp(Ity_I32); 14050 assign( t2, binop(Iop_Or32, 14051 mk_x86g_calculate_eflags_all(), 14052 mkU32( (1<<1)|(1<<9) ) )); 14053 14054 /* Patch in the D flag. This can simply be a copy of bit 10 of 14055 baseBlock[OFFB_DFLAG]. */ 14056 t3 = newTemp(Ity_I32); 14057 assign( t3, binop(Iop_Or32, 14058 mkexpr(t2), 14059 binop(Iop_And32, 14060 IRExpr_Get(OFFB_DFLAG,Ity_I32), 14061 mkU32(1<<10))) 14062 ); 14063 14064 /* And patch in the ID flag. */ 14065 t4 = newTemp(Ity_I32); 14066 assign( t4, binop(Iop_Or32, 14067 mkexpr(t3), 14068 binop(Iop_And32, 14069 binop(Iop_Shl32, IRExpr_Get(OFFB_IDFLAG,Ity_I32), 14070 mkU8(21)), 14071 mkU32(1<<21))) 14072 ); 14073 14074 /* And patch in the AC flag. */ 14075 t5 = newTemp(Ity_I32); 14076 assign( t5, binop(Iop_Or32, 14077 mkexpr(t4), 14078 binop(Iop_And32, 14079 binop(Iop_Shl32, IRExpr_Get(OFFB_ACFLAG,Ity_I32), 14080 mkU8(18)), 14081 mkU32(1<<18))) 14082 ); 14083 14084 /* if sz==2, the stored value needs to be narrowed. */ 14085 if (sz == 2) 14086 storeLE( mkexpr(t1), unop(Iop_32to16,mkexpr(t5)) ); 14087 else 14088 storeLE( mkexpr(t1), mkexpr(t5) ); 14089 14090 DIP("pushf%c\n", nameISize(sz)); 14091 break; 14092 } 14093 14094 case 0x60: /* PUSHA */ 14095 /* This is almost certainly wrong for sz==2. So ... */ 14096 if (sz != 4) goto decode_failure; 14097 14098 /* This is the Right Way, in that the value to be pushed is 14099 established before %esp is changed, so that pusha 14100 correctly pushes the old %esp value. New value of %esp is 14101 pushed at start. */ 14102 /* t0 is the %ESP value we're going to push. */ 14103 t0 = newTemp(Ity_I32); 14104 assign( t0, getIReg(4, R_ESP) ); 14105 14106 /* t5 will be the new %ESP value. */ 14107 t5 = newTemp(Ity_I32); 14108 assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) ); 14109 14110 /* Update guest state before prodding memory. */ 14111 putIReg(4, R_ESP, mkexpr(t5)); 14112 14113 /* Dump all the registers. */ 14114 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) ); 14115 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) ); 14116 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) ); 14117 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) ); 14118 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/); 14119 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) ); 14120 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) ); 14121 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) ); 14122 14123 DIP("pusha%c\n", nameISize(sz)); 14124 break; 14125 14126 case 0x0E: /* PUSH %CS */ 14127 dis_push_segreg( R_CS, sz ); break; 14128 case 0x1E: /* PUSH %DS */ 14129 dis_push_segreg( R_DS, sz ); break; 14130 case 0x06: /* PUSH %ES */ 14131 dis_push_segreg( R_ES, sz ); break; 14132 case 0x16: /* PUSH %SS */ 14133 dis_push_segreg( R_SS, sz ); break; 14134 14135 /* ------------------------ SCAS et al ----------------- */ 14136 14137 case 0xA4: /* MOVS, no REP prefix */ 14138 case 0xA5: 14139 if (sorb != 0) 14140 goto decode_failure; /* else dis_string_op asserts */ 14141 dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb ); 14142 break; 14143 14144 case 0xA6: /* CMPSb, no REP prefix */ 14145 case 0xA7: 14146 if (sorb != 0) 14147 goto decode_failure; /* else dis_string_op asserts */ 14148 dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb ); 14149 break; 14150 14151 case 0xAA: /* STOS, no REP prefix */ 14152 case 0xAB: 14153 if (sorb != 0) 14154 goto decode_failure; /* else dis_string_op asserts */ 14155 dis_string_op( dis_STOS, ( opc == 0xAA ? 1 : sz ), "stos", sorb ); 14156 break; 14157 14158 case 0xAC: /* LODS, no REP prefix */ 14159 case 0xAD: 14160 if (sorb != 0) 14161 goto decode_failure; /* else dis_string_op asserts */ 14162 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", sorb ); 14163 break; 14164 14165 case 0xAE: /* SCAS, no REP prefix */ 14166 case 0xAF: 14167 if (sorb != 0) 14168 goto decode_failure; /* else dis_string_op asserts */ 14169 dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb ); 14170 break; 14171 14172 14173 case 0xFC: /* CLD */ 14174 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(1)) ); 14175 DIP("cld\n"); 14176 break; 14177 14178 case 0xFD: /* STD */ 14179 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(0xFFFFFFFF)) ); 14180 DIP("std\n"); 14181 break; 14182 14183 case 0xF8: /* CLC */ 14184 case 0xF9: /* STC */ 14185 case 0xF5: /* CMC */ 14186 t0 = newTemp(Ity_I32); 14187 t1 = newTemp(Ity_I32); 14188 assign( t0, mk_x86g_calculate_eflags_all() ); 14189 switch (opc) { 14190 case 0xF8: 14191 assign( t1, binop(Iop_And32, mkexpr(t0), 14192 mkU32(~X86G_CC_MASK_C))); 14193 DIP("clc\n"); 14194 break; 14195 case 0xF9: 14196 assign( t1, binop(Iop_Or32, mkexpr(t0), 14197 mkU32(X86G_CC_MASK_C))); 14198 DIP("stc\n"); 14199 break; 14200 case 0xF5: 14201 assign( t1, binop(Iop_Xor32, mkexpr(t0), 14202 mkU32(X86G_CC_MASK_C))); 14203 DIP("cmc\n"); 14204 break; 14205 default: 14206 vpanic("disInstr(x86)(clc/stc/cmc)"); 14207 } 14208 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 14209 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 14210 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) )); 14211 /* Set NDEP even though it isn't used. This makes redundant-PUT 14212 elimination of previous stores to this field work better. */ 14213 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 14214 break; 14215 14216 case 0xD6: /* SALC */ 14217 t0 = newTemp(Ity_I32); 14218 t1 = newTemp(Ity_I32); 14219 assign( t0, binop(Iop_And32, 14220 mk_x86g_calculate_eflags_c(), 14221 mkU32(1)) ); 14222 assign( t1, binop(Iop_Sar32, 14223 binop(Iop_Shl32, mkexpr(t0), mkU8(31)), 14224 mkU8(31)) ); 14225 putIReg(1, R_EAX, unop(Iop_32to8, mkexpr(t1)) ); 14226 DIP("salc\n"); 14227 break; 14228 14229 /* REPNE prefix insn */ 14230 case 0xF2: { 14231 Addr32 eip_orig = guest_EIP_bbstart + delta_start; 14232 if (sorb != 0) goto decode_failure; 14233 abyte = getIByte(delta); delta++; 14234 14235 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; } 14236 14237 switch (abyte) { 14238 /* According to the Intel manual, "repne movs" should never occur, but 14239 * in practice it has happened, so allow for it here... */ 14240 case 0xA4: sz = 1; /* REPNE MOVS<sz> */ 14241 case 0xA5: 14242 dis_REP_op ( &dres, X86CondNZ, dis_MOVS, sz, eip_orig, 14243 guest_EIP_bbstart+delta, "repne movs" ); 14244 break; 14245 14246 case 0xA6: sz = 1; /* REPNE CMP<sz> */ 14247 case 0xA7: 14248 dis_REP_op ( &dres, X86CondNZ, dis_CMPS, sz, eip_orig, 14249 guest_EIP_bbstart+delta, "repne cmps" ); 14250 break; 14251 14252 case 0xAA: sz = 1; /* REPNE STOS<sz> */ 14253 case 0xAB: 14254 dis_REP_op ( &dres, X86CondNZ, dis_STOS, sz, eip_orig, 14255 guest_EIP_bbstart+delta, "repne stos" ); 14256 break; 14257 14258 case 0xAE: sz = 1; /* REPNE SCAS<sz> */ 14259 case 0xAF: 14260 dis_REP_op ( &dres, X86CondNZ, dis_SCAS, sz, eip_orig, 14261 guest_EIP_bbstart+delta, "repne scas" ); 14262 break; 14263 14264 default: 14265 goto decode_failure; 14266 } 14267 break; 14268 } 14269 14270 /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE, 14271 for the rest, it means REP) */ 14272 case 0xF3: { 14273 Addr32 eip_orig = guest_EIP_bbstart + delta_start; 14274 abyte = getIByte(delta); delta++; 14275 14276 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; } 14277 14278 if (sorb != 0 && abyte != 0x0F) goto decode_failure; 14279 14280 switch (abyte) { 14281 case 0x0F: 14282 switch (getIByte(delta)) { 14283 /* On older CPUs, TZCNT behaves the same as BSF. */ 14284 case 0xBC: /* REP BSF Gv,Ev */ 14285 delta = dis_bs_E_G ( sorb, sz, delta + 1, True ); 14286 break; 14287 /* On older CPUs, LZCNT behaves the same as BSR. */ 14288 case 0xBD: /* REP BSR Gv,Ev */ 14289 delta = dis_bs_E_G ( sorb, sz, delta + 1, False ); 14290 break; 14291 default: 14292 goto decode_failure; 14293 } 14294 break; 14295 14296 case 0xA4: sz = 1; /* REP MOVS<sz> */ 14297 case 0xA5: 14298 dis_REP_op ( &dres, X86CondAlways, dis_MOVS, sz, eip_orig, 14299 guest_EIP_bbstart+delta, "rep movs" ); 14300 break; 14301 14302 case 0xA6: sz = 1; /* REPE CMP<sz> */ 14303 case 0xA7: 14304 dis_REP_op ( &dres, X86CondZ, dis_CMPS, sz, eip_orig, 14305 guest_EIP_bbstart+delta, "repe cmps" ); 14306 break; 14307 14308 case 0xAA: sz = 1; /* REP STOS<sz> */ 14309 case 0xAB: 14310 dis_REP_op ( &dres, X86CondAlways, dis_STOS, sz, eip_orig, 14311 guest_EIP_bbstart+delta, "rep stos" ); 14312 break; 14313 14314 case 0xAC: sz = 1; /* REP LODS<sz> */ 14315 case 0xAD: 14316 dis_REP_op ( &dres, X86CondAlways, dis_LODS, sz, eip_orig, 14317 guest_EIP_bbstart+delta, "rep lods" ); 14318 break; 14319 14320 case 0xAE: sz = 1; /* REPE SCAS<sz> */ 14321 case 0xAF: 14322 dis_REP_op ( &dres, X86CondZ, dis_SCAS, sz, eip_orig, 14323 guest_EIP_bbstart+delta, "repe scas" ); 14324 break; 14325 14326 case 0x90: /* REP NOP (PAUSE) */ 14327 /* a hint to the P4 re spin-wait loop */ 14328 DIP("rep nop (P4 pause)\n"); 14329 /* "observe" the hint. The Vex client needs to be careful not 14330 to cause very long delays as a result, though. */ 14331 jmp_lit(&dres, Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta); 14332 vassert(dres.whatNext == Dis_StopHere); 14333 break; 14334 14335 case 0xC3: /* REP RET -- same as normal ret? */ 14336 dis_ret(&dres, 0); 14337 DIP("rep ret\n"); 14338 break; 14339 14340 default: 14341 goto decode_failure; 14342 } 14343 break; 14344 } 14345 14346 /* ------------------------ XCHG ----------------------- */ 14347 14348 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK 14349 prefix; hence it must be translated with an IRCAS (at least, the 14350 memory variant). */ 14351 case 0x86: /* XCHG Gb,Eb */ 14352 sz = 1; 14353 /* Fall through ... */ 14354 case 0x87: /* XCHG Gv,Ev */ 14355 modrm = getIByte(delta); 14356 ty = szToITy(sz); 14357 t1 = newTemp(ty); t2 = newTemp(ty); 14358 if (epartIsReg(modrm)) { 14359 assign(t1, getIReg(sz, eregOfRM(modrm))); 14360 assign(t2, getIReg(sz, gregOfRM(modrm))); 14361 putIReg(sz, gregOfRM(modrm), mkexpr(t1)); 14362 putIReg(sz, eregOfRM(modrm), mkexpr(t2)); 14363 delta++; 14364 DIP("xchg%c %s, %s\n", 14365 nameISize(sz), nameIReg(sz,gregOfRM(modrm)), 14366 nameIReg(sz,eregOfRM(modrm))); 14367 } else { 14368 *expect_CAS = True; 14369 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14370 assign( t1, loadLE(ty,mkexpr(addr)) ); 14371 assign( t2, getIReg(sz,gregOfRM(modrm)) ); 14372 casLE( mkexpr(addr), 14373 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); 14374 putIReg( sz, gregOfRM(modrm), mkexpr(t1) ); 14375 delta += alen; 14376 DIP("xchg%c %s, %s\n", nameISize(sz), 14377 nameIReg(sz,gregOfRM(modrm)), dis_buf); 14378 } 14379 break; 14380 14381 case 0x90: /* XCHG eAX,eAX */ 14382 DIP("nop\n"); 14383 break; 14384 case 0x91: /* XCHG eAX,eCX */ 14385 case 0x92: /* XCHG eAX,eDX */ 14386 case 0x93: /* XCHG eAX,eBX */ 14387 case 0x94: /* XCHG eAX,eSP */ 14388 case 0x95: /* XCHG eAX,eBP */ 14389 case 0x96: /* XCHG eAX,eSI */ 14390 case 0x97: /* XCHG eAX,eDI */ 14391 codegen_xchg_eAX_Reg ( sz, opc - 0x90 ); 14392 break; 14393 14394 /* ------------------------ XLAT ----------------------- */ 14395 14396 case 0xD7: /* XLAT */ 14397 if (sz != 4) goto decode_failure; /* sz == 2 is also allowed (0x66) */ 14398 putIReg( 14399 1, 14400 R_EAX/*AL*/, 14401 loadLE(Ity_I8, 14402 handleSegOverride( 14403 sorb, 14404 binop(Iop_Add32, 14405 getIReg(4, R_EBX), 14406 unop(Iop_8Uto32, getIReg(1, R_EAX/*AL*/)))))); 14407 14408 DIP("xlat%c [ebx]\n", nameISize(sz)); 14409 break; 14410 14411 /* ------------------------ IN / OUT ----------------------- */ 14412 14413 case 0xE4: /* IN imm8, AL */ 14414 sz = 1; 14415 t1 = newTemp(Ity_I32); 14416 abyte = getIByte(delta); delta++; 14417 assign(t1, mkU32( abyte & 0xFF )); 14418 DIP("in%c $%d,%s\n", nameISize(sz), abyte, nameIReg(sz,R_EAX)); 14419 goto do_IN; 14420 case 0xE5: /* IN imm8, eAX */ 14421 vassert(sz == 2 || sz == 4); 14422 t1 = newTemp(Ity_I32); 14423 abyte = getIByte(delta); delta++; 14424 assign(t1, mkU32( abyte & 0xFF )); 14425 DIP("in%c $%d,%s\n", nameISize(sz), abyte, nameIReg(sz,R_EAX)); 14426 goto do_IN; 14427 case 0xEC: /* IN %DX, AL */ 14428 sz = 1; 14429 t1 = newTemp(Ity_I32); 14430 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX))); 14431 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX), 14432 nameIReg(sz,R_EAX)); 14433 goto do_IN; 14434 case 0xED: /* IN %DX, eAX */ 14435 vassert(sz == 2 || sz == 4); 14436 t1 = newTemp(Ity_I32); 14437 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX))); 14438 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX), 14439 nameIReg(sz,R_EAX)); 14440 goto do_IN; 14441 do_IN: { 14442 /* At this point, sz indicates the width, and t1 is a 32-bit 14443 value giving port number. */ 14444 IRDirty* d; 14445 vassert(sz == 1 || sz == 2 || sz == 4); 14446 ty = szToITy(sz); 14447 t2 = newTemp(Ity_I32); 14448 d = unsafeIRDirty_1_N( 14449 t2, 14450 0/*regparms*/, 14451 "x86g_dirtyhelper_IN", 14452 &x86g_dirtyhelper_IN, 14453 mkIRExprVec_2( mkexpr(t1), mkU32(sz) ) 14454 ); 14455 /* do the call, dumping the result in t2. */ 14456 stmt( IRStmt_Dirty(d) ); 14457 putIReg(sz, R_EAX, narrowTo( ty, mkexpr(t2) ) ); 14458 break; 14459 } 14460 14461 case 0xE6: /* OUT AL, imm8 */ 14462 sz = 1; 14463 t1 = newTemp(Ity_I32); 14464 abyte = getIByte(delta); delta++; 14465 assign( t1, mkU32( abyte & 0xFF ) ); 14466 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), abyte); 14467 goto do_OUT; 14468 case 0xE7: /* OUT eAX, imm8 */ 14469 vassert(sz == 2 || sz == 4); 14470 t1 = newTemp(Ity_I32); 14471 abyte = getIByte(delta); delta++; 14472 assign( t1, mkU32( abyte & 0xFF ) ); 14473 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), abyte); 14474 goto do_OUT; 14475 case 0xEE: /* OUT AL, %DX */ 14476 sz = 1; 14477 t1 = newTemp(Ity_I32); 14478 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) ); 14479 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX), 14480 nameIReg(2,R_EDX)); 14481 goto do_OUT; 14482 case 0xEF: /* OUT eAX, %DX */ 14483 vassert(sz == 2 || sz == 4); 14484 t1 = newTemp(Ity_I32); 14485 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) ); 14486 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX), 14487 nameIReg(2,R_EDX)); 14488 goto do_OUT; 14489 do_OUT: { 14490 /* At this point, sz indicates the width, and t1 is a 32-bit 14491 value giving port number. */ 14492 IRDirty* d; 14493 vassert(sz == 1 || sz == 2 || sz == 4); 14494 ty = szToITy(sz); 14495 d = unsafeIRDirty_0_N( 14496 0/*regparms*/, 14497 "x86g_dirtyhelper_OUT", 14498 &x86g_dirtyhelper_OUT, 14499 mkIRExprVec_3( mkexpr(t1), 14500 widenUto32( getIReg(sz, R_EAX) ), 14501 mkU32(sz) ) 14502 ); 14503 stmt( IRStmt_Dirty(d) ); 14504 break; 14505 } 14506 14507 /* ------------------------ (Grp1 extensions) ---------- */ 14508 14509 case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as 14510 case 0x80, but only in 32-bit mode. */ 14511 /* fallthru */ 14512 case 0x80: /* Grp1 Ib,Eb */ 14513 modrm = getIByte(delta); 14514 am_sz = lengthAMode(delta); 14515 sz = 1; 14516 d_sz = 1; 14517 d32 = getUChar(delta + am_sz); 14518 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); 14519 break; 14520 14521 case 0x81: /* Grp1 Iv,Ev */ 14522 modrm = getIByte(delta); 14523 am_sz = lengthAMode(delta); 14524 d_sz = sz; 14525 d32 = getUDisp(d_sz, delta + am_sz); 14526 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); 14527 break; 14528 14529 case 0x83: /* Grp1 Ib,Ev */ 14530 modrm = getIByte(delta); 14531 am_sz = lengthAMode(delta); 14532 d_sz = 1; 14533 d32 = getSDisp8(delta + am_sz); 14534 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); 14535 break; 14536 14537 /* ------------------------ (Grp2 extensions) ---------- */ 14538 14539 case 0xC0: { /* Grp2 Ib,Eb */ 14540 Bool decode_OK = True; 14541 modrm = getIByte(delta); 14542 am_sz = lengthAMode(delta); 14543 d_sz = 1; 14544 d32 = getUChar(delta + am_sz); 14545 sz = 1; 14546 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14547 mkU8(d32 & 0xFF), NULL, &decode_OK ); 14548 if (!decode_OK) 14549 goto decode_failure; 14550 break; 14551 } 14552 case 0xC1: { /* Grp2 Ib,Ev */ 14553 Bool decode_OK = True; 14554 modrm = getIByte(delta); 14555 am_sz = lengthAMode(delta); 14556 d_sz = 1; 14557 d32 = getUChar(delta + am_sz); 14558 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14559 mkU8(d32 & 0xFF), NULL, &decode_OK ); 14560 if (!decode_OK) 14561 goto decode_failure; 14562 break; 14563 } 14564 case 0xD0: { /* Grp2 1,Eb */ 14565 Bool decode_OK = True; 14566 modrm = getIByte(delta); 14567 am_sz = lengthAMode(delta); 14568 d_sz = 0; 14569 d32 = 1; 14570 sz = 1; 14571 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14572 mkU8(d32), NULL, &decode_OK ); 14573 if (!decode_OK) 14574 goto decode_failure; 14575 break; 14576 } 14577 case 0xD1: { /* Grp2 1,Ev */ 14578 Bool decode_OK = True; 14579 modrm = getUChar(delta); 14580 am_sz = lengthAMode(delta); 14581 d_sz = 0; 14582 d32 = 1; 14583 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14584 mkU8(d32), NULL, &decode_OK ); 14585 if (!decode_OK) 14586 goto decode_failure; 14587 break; 14588 } 14589 case 0xD2: { /* Grp2 CL,Eb */ 14590 Bool decode_OK = True; 14591 modrm = getUChar(delta); 14592 am_sz = lengthAMode(delta); 14593 d_sz = 0; 14594 sz = 1; 14595 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14596 getIReg(1,R_ECX), "%cl", &decode_OK ); 14597 if (!decode_OK) 14598 goto decode_failure; 14599 break; 14600 } 14601 case 0xD3: { /* Grp2 CL,Ev */ 14602 Bool decode_OK = True; 14603 modrm = getIByte(delta); 14604 am_sz = lengthAMode(delta); 14605 d_sz = 0; 14606 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14607 getIReg(1,R_ECX), "%cl", &decode_OK ); 14608 if (!decode_OK) 14609 goto decode_failure; 14610 break; 14611 } 14612 14613 /* ------------------------ (Grp3 extensions) ---------- */ 14614 14615 case 0xF6: { /* Grp3 Eb */ 14616 Bool decode_OK = True; 14617 delta = dis_Grp3 ( sorb, pfx_lock, 1, delta, &decode_OK ); 14618 if (!decode_OK) 14619 goto decode_failure; 14620 break; 14621 } 14622 case 0xF7: { /* Grp3 Ev */ 14623 Bool decode_OK = True; 14624 delta = dis_Grp3 ( sorb, pfx_lock, sz, delta, &decode_OK ); 14625 if (!decode_OK) 14626 goto decode_failure; 14627 break; 14628 } 14629 14630 /* ------------------------ (Grp4 extensions) ---------- */ 14631 14632 case 0xFE: { /* Grp4 Eb */ 14633 Bool decode_OK = True; 14634 delta = dis_Grp4 ( sorb, pfx_lock, delta, &decode_OK ); 14635 if (!decode_OK) 14636 goto decode_failure; 14637 break; 14638 } 14639 14640 /* ------------------------ (Grp5 extensions) ---------- */ 14641 14642 case 0xFF: { /* Grp5 Ev */ 14643 Bool decode_OK = True; 14644 delta = dis_Grp5 ( sorb, pfx_lock, sz, delta, &dres, &decode_OK ); 14645 if (!decode_OK) 14646 goto decode_failure; 14647 break; 14648 } 14649 14650 /* ------------------------ Escapes to 2-byte opcodes -- */ 14651 14652 case 0x0F: { 14653 opc = getIByte(delta); delta++; 14654 switch (opc) { 14655 14656 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */ 14657 14658 case 0xBA: { /* Grp8 Ib,Ev */ 14659 Bool decode_OK = False; 14660 modrm = getUChar(delta); 14661 am_sz = lengthAMode(delta); 14662 d32 = getSDisp8(delta + am_sz); 14663 delta = dis_Grp8_Imm ( sorb, pfx_lock, delta, modrm, 14664 am_sz, sz, d32, &decode_OK ); 14665 if (!decode_OK) 14666 goto decode_failure; 14667 break; 14668 } 14669 14670 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */ 14671 14672 case 0xBC: /* BSF Gv,Ev */ 14673 delta = dis_bs_E_G ( sorb, sz, delta, True ); 14674 break; 14675 case 0xBD: /* BSR Gv,Ev */ 14676 delta = dis_bs_E_G ( sorb, sz, delta, False ); 14677 break; 14678 14679 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */ 14680 14681 case 0xC8: /* BSWAP %eax */ 14682 case 0xC9: 14683 case 0xCA: 14684 case 0xCB: 14685 case 0xCC: 14686 case 0xCD: 14687 case 0xCE: 14688 case 0xCF: /* BSWAP %edi */ 14689 /* AFAICS from the Intel docs, this only exists at size 4. */ 14690 if (sz != 4) goto decode_failure; 14691 14692 t1 = newTemp(Ity_I32); 14693 assign( t1, getIReg(4, opc-0xC8) ); 14694 t2 = math_BSWAP(t1, Ity_I32); 14695 14696 putIReg(4, opc-0xC8, mkexpr(t2)); 14697 DIP("bswapl %s\n", nameIReg(4, opc-0xC8)); 14698 break; 14699 14700 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */ 14701 14702 case 0xA3: /* BT Gv,Ev */ 14703 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpNone ); 14704 break; 14705 case 0xB3: /* BTR Gv,Ev */ 14706 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpReset ); 14707 break; 14708 case 0xAB: /* BTS Gv,Ev */ 14709 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpSet ); 14710 break; 14711 case 0xBB: /* BTC Gv,Ev */ 14712 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpComp ); 14713 break; 14714 14715 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */ 14716 14717 case 0x40: 14718 case 0x41: 14719 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */ 14720 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */ 14721 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */ 14722 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */ 14723 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */ 14724 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */ 14725 case 0x48: /* CMOVSb (cmov negative) */ 14726 case 0x49: /* CMOVSb (cmov not negative) */ 14727 case 0x4A: /* CMOVP (cmov parity even) */ 14728 case 0x4B: /* CMOVNP (cmov parity odd) */ 14729 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */ 14730 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */ 14731 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */ 14732 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */ 14733 delta = dis_cmov_E_G(sorb, sz, (X86Condcode)(opc - 0x40), delta); 14734 break; 14735 14736 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */ 14737 14738 case 0xB0: /* CMPXCHG Gb,Eb */ 14739 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, 1, delta ); 14740 break; 14741 case 0xB1: /* CMPXCHG Gv,Ev */ 14742 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, sz, delta ); 14743 break; 14744 14745 case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */ 14746 IRTemp expdHi = newTemp(Ity_I32); 14747 IRTemp expdLo = newTemp(Ity_I32); 14748 IRTemp dataHi = newTemp(Ity_I32); 14749 IRTemp dataLo = newTemp(Ity_I32); 14750 IRTemp oldHi = newTemp(Ity_I32); 14751 IRTemp oldLo = newTemp(Ity_I32); 14752 IRTemp flags_old = newTemp(Ity_I32); 14753 IRTemp flags_new = newTemp(Ity_I32); 14754 IRTemp success = newTemp(Ity_I1); 14755 14756 /* Translate this using a DCAS, even if there is no LOCK 14757 prefix. Life is too short to bother with generating two 14758 different translations for the with/without-LOCK-prefix 14759 cases. */ 14760 *expect_CAS = True; 14761 14762 /* Decode, and generate address. */ 14763 if (sz != 4) goto decode_failure; 14764 modrm = getIByte(delta); 14765 if (epartIsReg(modrm)) goto decode_failure; 14766 if (gregOfRM(modrm) != 1) goto decode_failure; 14767 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14768 delta += alen; 14769 14770 /* Get the expected and new values. */ 14771 assign( expdHi, getIReg(4,R_EDX) ); 14772 assign( expdLo, getIReg(4,R_EAX) ); 14773 assign( dataHi, getIReg(4,R_ECX) ); 14774 assign( dataLo, getIReg(4,R_EBX) ); 14775 14776 /* Do the DCAS */ 14777 stmt( IRStmt_CAS( 14778 mkIRCAS( oldHi, oldLo, 14779 Iend_LE, mkexpr(addr), 14780 mkexpr(expdHi), mkexpr(expdLo), 14781 mkexpr(dataHi), mkexpr(dataLo) 14782 ))); 14783 14784 /* success when oldHi:oldLo == expdHi:expdLo */ 14785 assign( success, 14786 binop(Iop_CasCmpEQ32, 14787 binop(Iop_Or32, 14788 binop(Iop_Xor32, mkexpr(oldHi), mkexpr(expdHi)), 14789 binop(Iop_Xor32, mkexpr(oldLo), mkexpr(expdLo)) 14790 ), 14791 mkU32(0) 14792 )); 14793 14794 /* If the DCAS is successful, that is to say oldHi:oldLo == 14795 expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX, 14796 which is where they came from originally. Both the actual 14797 contents of these two regs, and any shadow values, are 14798 unchanged. If the DCAS fails then we're putting into 14799 EDX:EAX the value seen in memory. */ 14800 putIReg(4, R_EDX, 14801 IRExpr_ITE( mkexpr(success), 14802 mkexpr(expdHi), mkexpr(oldHi) 14803 )); 14804 putIReg(4, R_EAX, 14805 IRExpr_ITE( mkexpr(success), 14806 mkexpr(expdLo), mkexpr(oldLo) 14807 )); 14808 14809 /* Copy the success bit into the Z flag and leave the others 14810 unchanged */ 14811 assign( flags_old, widenUto32(mk_x86g_calculate_eflags_all())); 14812 assign( 14813 flags_new, 14814 binop(Iop_Or32, 14815 binop(Iop_And32, mkexpr(flags_old), 14816 mkU32(~X86G_CC_MASK_Z)), 14817 binop(Iop_Shl32, 14818 binop(Iop_And32, 14819 unop(Iop_1Uto32, mkexpr(success)), mkU32(1)), 14820 mkU8(X86G_CC_SHIFT_Z)) )); 14821 14822 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 14823 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) )); 14824 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 14825 /* Set NDEP even though it isn't used. This makes 14826 redundant-PUT elimination of previous stores to this field 14827 work better. */ 14828 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 14829 14830 /* Sheesh. Aren't you glad it was me and not you that had to 14831 write and validate all this grunge? */ 14832 14833 DIP("cmpxchg8b %s\n", dis_buf); 14834 break; 14835 } 14836 14837 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */ 14838 14839 case 0xA2: { /* CPUID */ 14840 /* Uses dirty helper: 14841 void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* ) 14842 declared to mod eax, wr ebx, ecx, edx 14843 */ 14844 IRDirty* d = NULL; 14845 void* fAddr = NULL; 14846 const HChar* fName = NULL; 14847 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3) { 14848 fName = "x86g_dirtyhelper_CPUID_sse3"; 14849 fAddr = &x86g_dirtyhelper_CPUID_sse3; 14850 } 14851 else 14852 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2) { 14853 fName = "x86g_dirtyhelper_CPUID_sse2"; 14854 fAddr = &x86g_dirtyhelper_CPUID_sse2; 14855 } 14856 else 14857 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE1) { 14858 fName = "x86g_dirtyhelper_CPUID_sse1"; 14859 fAddr = &x86g_dirtyhelper_CPUID_sse1; 14860 } 14861 else 14862 if (archinfo->hwcaps & VEX_HWCAPS_X86_MMXEXT) { 14863 fName = "x86g_dirtyhelper_CPUID_mmxext"; 14864 fAddr = &x86g_dirtyhelper_CPUID_mmxext; 14865 } 14866 else 14867 if (archinfo->hwcaps == 0/*no SSE*/) { 14868 fName = "x86g_dirtyhelper_CPUID_sse0"; 14869 fAddr = &x86g_dirtyhelper_CPUID_sse0; 14870 } else 14871 vpanic("disInstr(x86)(cpuid)"); 14872 14873 vassert(fName); vassert(fAddr); 14874 d = unsafeIRDirty_0_N ( 0/*regparms*/, 14875 fName, fAddr, mkIRExprVec_1(IRExpr_BBPTR()) ); 14876 /* declare guest state effects */ 14877 d->nFxState = 4; 14878 vex_bzero(&d->fxState, sizeof(d->fxState)); 14879 d->fxState[0].fx = Ifx_Modify; 14880 d->fxState[0].offset = OFFB_EAX; 14881 d->fxState[0].size = 4; 14882 d->fxState[1].fx = Ifx_Write; 14883 d->fxState[1].offset = OFFB_EBX; 14884 d->fxState[1].size = 4; 14885 d->fxState[2].fx = Ifx_Modify; 14886 d->fxState[2].offset = OFFB_ECX; 14887 d->fxState[2].size = 4; 14888 d->fxState[3].fx = Ifx_Write; 14889 d->fxState[3].offset = OFFB_EDX; 14890 d->fxState[3].size = 4; 14891 /* execute the dirty call, side-effecting guest state */ 14892 stmt( IRStmt_Dirty(d) ); 14893 /* CPUID is a serialising insn. So, just in case someone is 14894 using it as a memory fence ... */ 14895 stmt( IRStmt_MBE(Imbe_Fence) ); 14896 DIP("cpuid\n"); 14897 break; 14898 } 14899 14900//-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID)) 14901//-- goto decode_failure; 14902//-- 14903//-- t1 = newTemp(cb); 14904//-- t2 = newTemp(cb); 14905//-- t3 = newTemp(cb); 14906//-- t4 = newTemp(cb); 14907//-- uInstr0(cb, CALLM_S, 0); 14908//-- 14909//-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1); 14910//-- uInstr1(cb, PUSH, 4, TempReg, t1); 14911//-- 14912//-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2); 14913//-- uLiteral(cb, 0); 14914//-- uInstr1(cb, PUSH, 4, TempReg, t2); 14915//-- 14916//-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3); 14917//-- uLiteral(cb, 0); 14918//-- uInstr1(cb, PUSH, 4, TempReg, t3); 14919//-- 14920//-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4); 14921//-- uLiteral(cb, 0); 14922//-- uInstr1(cb, PUSH, 4, TempReg, t4); 14923//-- 14924//-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID)); 14925//-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty); 14926//-- 14927//-- uInstr1(cb, POP, 4, TempReg, t4); 14928//-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX); 14929//-- 14930//-- uInstr1(cb, POP, 4, TempReg, t3); 14931//-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX); 14932//-- 14933//-- uInstr1(cb, POP, 4, TempReg, t2); 14934//-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX); 14935//-- 14936//-- uInstr1(cb, POP, 4, TempReg, t1); 14937//-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX); 14938//-- 14939//-- uInstr0(cb, CALLM_E, 0); 14940//-- DIP("cpuid\n"); 14941//-- break; 14942//-- 14943 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */ 14944 14945 case 0xB6: /* MOVZXb Eb,Gv */ 14946 if (sz != 2 && sz != 4) 14947 goto decode_failure; 14948 delta = dis_movx_E_G ( sorb, delta, 1, sz, False ); 14949 break; 14950 14951 case 0xB7: /* MOVZXw Ew,Gv */ 14952 if (sz != 4) 14953 goto decode_failure; 14954 delta = dis_movx_E_G ( sorb, delta, 2, 4, False ); 14955 break; 14956 14957 case 0xBE: /* MOVSXb Eb,Gv */ 14958 if (sz != 2 && sz != 4) 14959 goto decode_failure; 14960 delta = dis_movx_E_G ( sorb, delta, 1, sz, True ); 14961 break; 14962 14963 case 0xBF: /* MOVSXw Ew,Gv */ 14964 if (sz != 4 && /* accept movsww, sigh, see #250799 */sz != 2) 14965 goto decode_failure; 14966 delta = dis_movx_E_G ( sorb, delta, 2, sz, True ); 14967 break; 14968 14969//-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */ 14970//-- 14971//-- case 0xC3: /* MOVNTI Gv,Ev */ 14972//-- vg_assert(sz == 4); 14973//-- modrm = getUChar(eip); 14974//-- vg_assert(!epartIsReg(modrm)); 14975//-- t1 = newTemp(cb); 14976//-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1); 14977//-- pair = disAMode ( cb, sorb, eip, dis_buf ); 14978//-- t2 = LOW24(pair); 14979//-- eip += HI8(pair); 14980//-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); 14981//-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf); 14982//-- break; 14983 14984 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */ 14985 14986 case 0xAF: /* IMUL Ev, Gv */ 14987 delta = dis_mul_E_G ( sorb, sz, delta ); 14988 break; 14989 14990 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */ 14991 14992 case 0x1F: 14993 modrm = getUChar(delta); 14994 if (epartIsReg(modrm)) goto decode_failure; 14995 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14996 delta += alen; 14997 DIP("nop%c %s\n", nameISize(sz), dis_buf); 14998 break; 14999 15000 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */ 15001 case 0x80: 15002 case 0x81: 15003 case 0x82: /* JBb/JNAEb (jump below) */ 15004 case 0x83: /* JNBb/JAEb (jump not below) */ 15005 case 0x84: /* JZb/JEb (jump zero) */ 15006 case 0x85: /* JNZb/JNEb (jump not zero) */ 15007 case 0x86: /* JBEb/JNAb (jump below or equal) */ 15008 case 0x87: /* JNBEb/JAb (jump not below or equal) */ 15009 case 0x88: /* JSb (jump negative) */ 15010 case 0x89: /* JSb (jump not negative) */ 15011 case 0x8A: /* JP (jump parity even) */ 15012 case 0x8B: /* JNP/JPO (jump parity odd) */ 15013 case 0x8C: /* JLb/JNGEb (jump less) */ 15014 case 0x8D: /* JGEb/JNLb (jump greater or equal) */ 15015 case 0x8E: /* JLEb/JNGb (jump less or equal) */ 15016 case 0x8F: /* JGb/JNLEb (jump greater) */ 15017 { Int jmpDelta; 15018 const HChar* comment = ""; 15019 jmpDelta = (Int)getUDisp32(delta); 15020 d32 = (((Addr32)guest_EIP_bbstart)+delta+4) + jmpDelta; 15021 delta += 4; 15022 if (resteerCisOk 15023 && vex_control.guest_chase_cond 15024 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 15025 && jmpDelta < 0 15026 && resteerOkFn( callback_opaque, (Addr32)d32) ) { 15027 /* Speculation: assume this backward branch is taken. So 15028 we need to emit a side-exit to the insn following this 15029 one, on the negation of the condition, and continue at 15030 the branch target address (d32). If we wind up back at 15031 the first instruction of the trace, just stop; it's 15032 better to let the IR loop unroller handle that case.*/ 15033 stmt( IRStmt_Exit( 15034 mk_x86g_calculate_condition((X86Condcode) 15035 (1 ^ (opc - 0x80))), 15036 Ijk_Boring, 15037 IRConst_U32(guest_EIP_bbstart+delta), 15038 OFFB_EIP ) ); 15039 dres.whatNext = Dis_ResteerC; 15040 dres.continueAt = (Addr32)d32; 15041 comment = "(assumed taken)"; 15042 } 15043 else 15044 if (resteerCisOk 15045 && vex_control.guest_chase_cond 15046 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 15047 && jmpDelta >= 0 15048 && resteerOkFn( callback_opaque, 15049 (Addr32)(guest_EIP_bbstart+delta)) ) { 15050 /* Speculation: assume this forward branch is not taken. 15051 So we need to emit a side-exit to d32 (the dest) and 15052 continue disassembling at the insn immediately 15053 following this one. */ 15054 stmt( IRStmt_Exit( 15055 mk_x86g_calculate_condition((X86Condcode)(opc - 0x80)), 15056 Ijk_Boring, 15057 IRConst_U32(d32), 15058 OFFB_EIP ) ); 15059 dres.whatNext = Dis_ResteerC; 15060 dres.continueAt = guest_EIP_bbstart + delta; 15061 comment = "(assumed not taken)"; 15062 } 15063 else { 15064 /* Conservative default translation - end the block at 15065 this point. */ 15066 jcc_01( &dres, (X86Condcode)(opc - 0x80), 15067 (Addr32)(guest_EIP_bbstart+delta), d32); 15068 vassert(dres.whatNext == Dis_StopHere); 15069 } 15070 DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc - 0x80), d32, comment); 15071 break; 15072 } 15073 15074 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */ 15075 case 0x31: { /* RDTSC */ 15076 IRTemp val = newTemp(Ity_I64); 15077 IRExpr** args = mkIRExprVec_0(); 15078 IRDirty* d = unsafeIRDirty_1_N ( 15079 val, 15080 0/*regparms*/, 15081 "x86g_dirtyhelper_RDTSC", 15082 &x86g_dirtyhelper_RDTSC, 15083 args 15084 ); 15085 /* execute the dirty call, dumping the result in val. */ 15086 stmt( IRStmt_Dirty(d) ); 15087 putIReg(4, R_EDX, unop(Iop_64HIto32, mkexpr(val))); 15088 putIReg(4, R_EAX, unop(Iop_64to32, mkexpr(val))); 15089 DIP("rdtsc\n"); 15090 break; 15091 } 15092 15093 /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */ 15094 15095 case 0xA1: /* POP %FS */ 15096 dis_pop_segreg( R_FS, sz ); break; 15097 case 0xA9: /* POP %GS */ 15098 dis_pop_segreg( R_GS, sz ); break; 15099 15100 case 0xA0: /* PUSH %FS */ 15101 dis_push_segreg( R_FS, sz ); break; 15102 case 0xA8: /* PUSH %GS */ 15103 dis_push_segreg( R_GS, sz ); break; 15104 15105 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */ 15106 case 0x90: 15107 case 0x91: 15108 case 0x92: /* set-Bb/set-NAEb (jump below) */ 15109 case 0x93: /* set-NBb/set-AEb (jump not below) */ 15110 case 0x94: /* set-Zb/set-Eb (jump zero) */ 15111 case 0x95: /* set-NZb/set-NEb (jump not zero) */ 15112 case 0x96: /* set-BEb/set-NAb (jump below or equal) */ 15113 case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */ 15114 case 0x98: /* set-Sb (jump negative) */ 15115 case 0x99: /* set-Sb (jump not negative) */ 15116 case 0x9A: /* set-P (jump parity even) */ 15117 case 0x9B: /* set-NP (jump parity odd) */ 15118 case 0x9C: /* set-Lb/set-NGEb (jump less) */ 15119 case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */ 15120 case 0x9E: /* set-LEb/set-NGb (jump less or equal) */ 15121 case 0x9F: /* set-Gb/set-NLEb (jump greater) */ 15122 t1 = newTemp(Ity_I8); 15123 assign( t1, unop(Iop_1Uto8,mk_x86g_calculate_condition(opc-0x90)) ); 15124 modrm = getIByte(delta); 15125 if (epartIsReg(modrm)) { 15126 delta++; 15127 putIReg(1, eregOfRM(modrm), mkexpr(t1)); 15128 DIP("set%s %s\n", name_X86Condcode(opc-0x90), 15129 nameIReg(1,eregOfRM(modrm))); 15130 } else { 15131 addr = disAMode ( &alen, sorb, delta, dis_buf ); 15132 delta += alen; 15133 storeLE( mkexpr(addr), mkexpr(t1) ); 15134 DIP("set%s %s\n", name_X86Condcode(opc-0x90), dis_buf); 15135 } 15136 break; 15137 15138 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */ 15139 15140 case 0xA4: /* SHLDv imm8,Gv,Ev */ 15141 modrm = getIByte(delta); 15142 d32 = delta + lengthAMode(delta); 15143 vex_sprintf(dis_buf, "$%d", getIByte(d32)); 15144 delta = dis_SHLRD_Gv_Ev ( 15145 sorb, delta, modrm, sz, 15146 mkU8(getIByte(d32)), True, /* literal */ 15147 dis_buf, True ); 15148 break; 15149 case 0xA5: /* SHLDv %cl,Gv,Ev */ 15150 modrm = getIByte(delta); 15151 delta = dis_SHLRD_Gv_Ev ( 15152 sorb, delta, modrm, sz, 15153 getIReg(1,R_ECX), False, /* not literal */ 15154 "%cl", True ); 15155 break; 15156 15157 case 0xAC: /* SHRDv imm8,Gv,Ev */ 15158 modrm = getIByte(delta); 15159 d32 = delta + lengthAMode(delta); 15160 vex_sprintf(dis_buf, "$%d", getIByte(d32)); 15161 delta = dis_SHLRD_Gv_Ev ( 15162 sorb, delta, modrm, sz, 15163 mkU8(getIByte(d32)), True, /* literal */ 15164 dis_buf, False ); 15165 break; 15166 case 0xAD: /* SHRDv %cl,Gv,Ev */ 15167 modrm = getIByte(delta); 15168 delta = dis_SHLRD_Gv_Ev ( 15169 sorb, delta, modrm, sz, 15170 getIReg(1,R_ECX), False, /* not literal */ 15171 "%cl", False ); 15172 break; 15173 15174 /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */ 15175 15176 case 0x34: 15177 /* Simple implementation needing a long explaination. 15178 15179 sysenter is a kind of syscall entry. The key thing here 15180 is that the return address is not known -- that is 15181 something that is beyond Vex's knowledge. So this IR 15182 forces a return to the scheduler, which can do what it 15183 likes to simulate the systenter, but it MUST set this 15184 thread's guest_EIP field with the continuation address 15185 before resuming execution. If that doesn't happen, the 15186 thread will jump to address zero, which is probably 15187 fatal. 15188 */ 15189 15190 /* Note where we are, so we can back up the guest to this 15191 point if the syscall needs to be restarted. */ 15192 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 15193 mkU32(guest_EIP_curr_instr) ) ); 15194 jmp_lit(&dres, Ijk_Sys_sysenter, 0/*bogus next EIP value*/); 15195 vassert(dres.whatNext == Dis_StopHere); 15196 DIP("sysenter"); 15197 break; 15198 15199 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */ 15200 15201 case 0xC0: { /* XADD Gb,Eb */ 15202 Bool decodeOK; 15203 delta = dis_xadd_G_E ( sorb, pfx_lock, 1, delta, &decodeOK ); 15204 if (!decodeOK) goto decode_failure; 15205 break; 15206 } 15207 case 0xC1: { /* XADD Gv,Ev */ 15208 Bool decodeOK; 15209 delta = dis_xadd_G_E ( sorb, pfx_lock, sz, delta, &decodeOK ); 15210 if (!decodeOK) goto decode_failure; 15211 break; 15212 } 15213 15214 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */ 15215 15216 case 0x71: 15217 case 0x72: 15218 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 15219 15220 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */ 15221 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */ 15222 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 15223 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 15224 15225 case 0xFC: 15226 case 0xFD: 15227 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 15228 15229 case 0xEC: 15230 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 15231 15232 case 0xDC: 15233 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 15234 15235 case 0xF8: 15236 case 0xF9: 15237 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 15238 15239 case 0xE8: 15240 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 15241 15242 case 0xD8: 15243 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 15244 15245 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 15246 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 15247 15248 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 15249 15250 case 0x74: 15251 case 0x75: 15252 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 15253 15254 case 0x64: 15255 case 0x65: 15256 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 15257 15258 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 15259 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 15260 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 15261 15262 case 0x68: 15263 case 0x69: 15264 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 15265 15266 case 0x60: 15267 case 0x61: 15268 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 15269 15270 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 15271 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 15272 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 15273 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 15274 15275 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 15276 case 0xF2: 15277 case 0xF3: 15278 15279 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 15280 case 0xD2: 15281 case 0xD3: 15282 15283 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 15284 case 0xE2: 15285 { 15286 Int delta0 = delta-1; 15287 Bool decode_OK = False; 15288 15289 /* If sz==2 this is SSE, and we assume sse idec has 15290 already spotted those cases by now. */ 15291 if (sz != 4) 15292 goto decode_failure; 15293 15294 delta = dis_MMX ( &decode_OK, sorb, sz, delta-1 ); 15295 if (!decode_OK) { 15296 delta = delta0; 15297 goto decode_failure; 15298 } 15299 break; 15300 } 15301 15302 case 0x0E: /* FEMMS */ 15303 case 0x77: /* EMMS */ 15304 if (sz != 4) 15305 goto decode_failure; 15306 do_EMMS_preamble(); 15307 DIP("{f}emms\n"); 15308 break; 15309 15310 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */ 15311 case 0x01: /* 0F 01 /0 -- SGDT */ 15312 /* 0F 01 /1 -- SIDT */ 15313 { 15314 /* This is really revolting, but ... since each processor 15315 (core) only has one IDT and one GDT, just let the guest 15316 see it (pass-through semantics). I can't see any way to 15317 construct a faked-up value, so don't bother to try. */ 15318 modrm = getUChar(delta); 15319 addr = disAMode ( &alen, sorb, delta, dis_buf ); 15320 delta += alen; 15321 if (epartIsReg(modrm)) goto decode_failure; 15322 if (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1) 15323 goto decode_failure; 15324 switch (gregOfRM(modrm)) { 15325 case 0: DIP("sgdt %s\n", dis_buf); break; 15326 case 1: DIP("sidt %s\n", dis_buf); break; 15327 default: vassert(0); /*NOTREACHED*/ 15328 } 15329 15330 IRDirty* d = unsafeIRDirty_0_N ( 15331 0/*regparms*/, 15332 "x86g_dirtyhelper_SxDT", 15333 &x86g_dirtyhelper_SxDT, 15334 mkIRExprVec_2( mkexpr(addr), 15335 mkU32(gregOfRM(modrm)) ) 15336 ); 15337 /* declare we're writing memory */ 15338 d->mFx = Ifx_Write; 15339 d->mAddr = mkexpr(addr); 15340 d->mSize = 6; 15341 stmt( IRStmt_Dirty(d) ); 15342 break; 15343 } 15344 15345 case 0x05: /* AMD's syscall */ 15346 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 15347 mkU32(guest_EIP_curr_instr) ) ); 15348 jmp_lit(&dres, Ijk_Sys_syscall, ((Addr32)guest_EIP_bbstart)+delta); 15349 vassert(dres.whatNext == Dis_StopHere); 15350 DIP("syscall\n"); 15351 break; 15352 15353 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */ 15354 15355 default: 15356 goto decode_failure; 15357 } /* switch (opc) for the 2-byte opcodes */ 15358 goto decode_success; 15359 } /* case 0x0F: of primary opcode */ 15360 15361 /* ------------------------ ??? ------------------------ */ 15362 15363 default: 15364 decode_failure: 15365 /* All decode failures end up here. */ 15366 if (sigill_diag) { 15367 vex_printf("vex x86->IR: unhandled instruction bytes: " 15368 "0x%x 0x%x 0x%x 0x%x\n", 15369 getIByte(delta_start+0), 15370 getIByte(delta_start+1), 15371 getIByte(delta_start+2), 15372 getIByte(delta_start+3)); 15373 } 15374 15375 /* Tell the dispatcher that this insn cannot be decoded, and so has 15376 not been executed, and (is currently) the next to be executed. 15377 EIP should be up-to-date since it made so at the start of each 15378 insn, but nevertheless be paranoid and update it again right 15379 now. */ 15380 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) ); 15381 jmp_lit(&dres, Ijk_NoDecode, guest_EIP_curr_instr); 15382 vassert(dres.whatNext == Dis_StopHere); 15383 dres.len = 0; 15384 /* We also need to say that a CAS is not expected now, regardless 15385 of what it might have been set to at the start of the function, 15386 since the IR that we've emitted just above (to synthesis a 15387 SIGILL) does not involve any CAS, and presumably no other IR has 15388 been emitted for this (non-decoded) insn. */ 15389 *expect_CAS = False; 15390 return dres; 15391 15392 } /* switch (opc) for the main (primary) opcode switch. */ 15393 15394 decode_success: 15395 /* All decode successes end up here. */ 15396 switch (dres.whatNext) { 15397 case Dis_Continue: 15398 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) ); 15399 break; 15400 case Dis_ResteerU: 15401 case Dis_ResteerC: 15402 stmt( IRStmt_Put( OFFB_EIP, mkU32(dres.continueAt) ) ); 15403 break; 15404 case Dis_StopHere: 15405 break; 15406 default: 15407 vassert(0); 15408 } 15409 15410 DIP("\n"); 15411 dres.len = delta - delta_start; 15412 return dres; 15413} 15414 15415#undef DIP 15416#undef DIS 15417 15418 15419/*------------------------------------------------------------*/ 15420/*--- Top-level fn ---*/ 15421/*------------------------------------------------------------*/ 15422 15423/* Disassemble a single instruction into IR. The instruction 15424 is located in host memory at &guest_code[delta]. */ 15425 15426DisResult disInstr_X86 ( IRSB* irsb_IN, 15427 Bool (*resteerOkFn) ( void*, Addr ), 15428 Bool resteerCisOk, 15429 void* callback_opaque, 15430 const UChar* guest_code_IN, 15431 Long delta, 15432 Addr guest_IP, 15433 VexArch guest_arch, 15434 const VexArchInfo* archinfo, 15435 const VexAbiInfo* abiinfo, 15436 VexEndness host_endness_IN, 15437 Bool sigill_diag_IN ) 15438{ 15439 Int i, x1, x2; 15440 Bool expect_CAS, has_CAS; 15441 DisResult dres; 15442 15443 /* Set globals (see top of this file) */ 15444 vassert(guest_arch == VexArchX86); 15445 guest_code = guest_code_IN; 15446 irsb = irsb_IN; 15447 host_endness = host_endness_IN; 15448 guest_EIP_curr_instr = (Addr32)guest_IP; 15449 guest_EIP_bbstart = (Addr32)toUInt(guest_IP - delta); 15450 15451 x1 = irsb_IN->stmts_used; 15452 expect_CAS = False; 15453 dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn, 15454 resteerCisOk, 15455 callback_opaque, 15456 delta, archinfo, abiinfo, sigill_diag_IN ); 15457 x2 = irsb_IN->stmts_used; 15458 vassert(x2 >= x1); 15459 15460 /* See comment at the top of disInstr_X86_WRK for meaning of 15461 expect_CAS. Here, we (sanity-)check for the presence/absence of 15462 IRCAS as directed by the returned expect_CAS value. */ 15463 has_CAS = False; 15464 for (i = x1; i < x2; i++) { 15465 if (irsb_IN->stmts[i]->tag == Ist_CAS) 15466 has_CAS = True; 15467 } 15468 15469 if (expect_CAS != has_CAS) { 15470 /* inconsistency detected. re-disassemble the instruction so as 15471 to generate a useful error message; then assert. */ 15472 vex_traceflags |= VEX_TRACE_FE; 15473 dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn, 15474 resteerCisOk, 15475 callback_opaque, 15476 delta, archinfo, abiinfo, sigill_diag_IN ); 15477 for (i = x1; i < x2; i++) { 15478 vex_printf("\t\t"); 15479 ppIRStmt(irsb_IN->stmts[i]); 15480 vex_printf("\n"); 15481 } 15482 /* Failure of this assertion is serious and denotes a bug in 15483 disInstr. */ 15484 vpanic("disInstr_X86: inconsistency in LOCK prefix handling"); 15485 } 15486 15487 return dres; 15488} 15489 15490 15491/*--------------------------------------------------------------------*/ 15492/*--- end guest_x86_toIR.c ---*/ 15493/*--------------------------------------------------------------------*/ 15494