1 2/*---------------------------------------------------------------*/ 3/*--- begin host_x86_isel.c ---*/ 4/*---------------------------------------------------------------*/ 5 6/* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2017 OpenWorks LLP 11 info@open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34*/ 35 36#include "libvex_basictypes.h" 37#include "libvex_ir.h" 38#include "libvex.h" 39 40#include "ir_match.h" 41#include "main_util.h" 42#include "main_globals.h" 43#include "host_generic_regs.h" 44#include "host_generic_simd64.h" 45#include "host_generic_simd128.h" 46#include "host_x86_defs.h" 47 48/* TODO 21 Apr 2005: 49 50 -- (Really an assembler issue) don't emit CMov32 as a cmov 51 insn, since that's expensive on P4 and conditional branch 52 is cheaper if (as we expect) the condition is highly predictable 53 54 -- preserve xmm registers across function calls (by declaring them 55 as trashed by call insns) 56 57 -- preserve x87 ST stack discipline across function calls. Sigh. 58 59 -- Check doHelperCall: if a call is conditional, we cannot safely 60 compute any regparm args directly to registers. Hence, the 61 fast-regparm marshalling should be restricted to unconditional 62 calls only. 63*/ 64 65/*---------------------------------------------------------*/ 66/*--- x87 control word stuff ---*/ 67/*---------------------------------------------------------*/ 68 69/* Vex-generated code expects to run with the FPU set as follows: all 70 exceptions masked, round-to-nearest, precision = 53 bits. This 71 corresponds to a FPU control word value of 0x027F. 72 73 Similarly the SSE control word (%mxcsr) should be 0x1F80. 74 75 %fpucw and %mxcsr should have these values on entry to 76 Vex-generated code, and should those values should be 77 unchanged at exit. 78*/ 79 80#define DEFAULT_FPUCW 0x027F 81 82/* debugging only, do not use */ 83/* define DEFAULT_FPUCW 0x037F */ 84 85 86/*---------------------------------------------------------*/ 87/*--- misc helpers ---*/ 88/*---------------------------------------------------------*/ 89 90/* These are duplicated in guest-x86/toIR.c */ 91static IRExpr* unop ( IROp op, IRExpr* a ) 92{ 93 return IRExpr_Unop(op, a); 94} 95 96static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 97{ 98 return IRExpr_Binop(op, a1, a2); 99} 100 101static IRExpr* bind ( Int binder ) 102{ 103 return IRExpr_Binder(binder); 104} 105 106static Bool isZeroU8 ( IRExpr* e ) 107{ 108 return e->tag == Iex_Const 109 && e->Iex.Const.con->tag == Ico_U8 110 && e->Iex.Const.con->Ico.U8 == 0; 111} 112 113static Bool isZeroU32 ( IRExpr* e ) 114{ 115 return e->tag == Iex_Const 116 && e->Iex.Const.con->tag == Ico_U32 117 && e->Iex.Const.con->Ico.U32 == 0; 118} 119 120//static Bool isZeroU64 ( IRExpr* e ) 121//{ 122// return e->tag == Iex_Const 123// && e->Iex.Const.con->tag == Ico_U64 124// && e->Iex.Const.con->Ico.U64 == 0ULL; 125//} 126 127 128/*---------------------------------------------------------*/ 129/*--- ISelEnv ---*/ 130/*---------------------------------------------------------*/ 131 132/* This carries around: 133 134 - A mapping from IRTemp to IRType, giving the type of any IRTemp we 135 might encounter. This is computed before insn selection starts, 136 and does not change. 137 138 - A mapping from IRTemp to HReg. This tells the insn selector 139 which virtual register(s) are associated with each IRTemp 140 temporary. This is computed before insn selection starts, and 141 does not change. We expect this mapping to map precisely the 142 same set of IRTemps as the type mapping does. 143 144 - vregmap holds the primary register for the IRTemp. 145 - vregmapHI is only used for 64-bit integer-typed 146 IRTemps. It holds the identity of a second 147 32-bit virtual HReg, which holds the high half 148 of the value. 149 150 - The code array, that is, the insns selected so far. 151 152 - A counter, for generating new virtual registers. 153 154 - The host subarchitecture we are selecting insns for. 155 This is set at the start and does not change. 156 157 - A Bool for indicating whether we may generate chain-me 158 instructions for control flow transfers, or whether we must use 159 XAssisted. 160 161 - The maximum guest address of any guest insn in this block. 162 Actually, the address of the highest-addressed byte from any insn 163 in this block. Is set at the start and does not change. This is 164 used for detecting jumps which are definitely forward-edges from 165 this block, and therefore can be made (chained) to the fast entry 166 point of the destination, thereby avoiding the destination's 167 event check. 168 169 Note, this is all (well, mostly) host-independent. 170*/ 171 172typedef 173 struct { 174 /* Constant -- are set at the start and do not change. */ 175 IRTypeEnv* type_env; 176 177 HReg* vregmap; 178 HReg* vregmapHI; 179 Int n_vregmap; 180 181 UInt hwcaps; 182 183 Bool chainingAllowed; 184 Addr32 max_ga; 185 186 /* These are modified as we go along. */ 187 HInstrArray* code; 188 Int vreg_ctr; 189 } 190 ISelEnv; 191 192 193static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) 194{ 195 vassert(tmp >= 0); 196 vassert(tmp < env->n_vregmap); 197 return env->vregmap[tmp]; 198} 199 200static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp ) 201{ 202 vassert(tmp >= 0); 203 vassert(tmp < env->n_vregmap); 204 vassert(! hregIsInvalid(env->vregmapHI[tmp])); 205 *vrLO = env->vregmap[tmp]; 206 *vrHI = env->vregmapHI[tmp]; 207} 208 209static void addInstr ( ISelEnv* env, X86Instr* instr ) 210{ 211 addHInstr(env->code, instr); 212 if (vex_traceflags & VEX_TRACE_VCODE) { 213 ppX86Instr(instr, False); 214 vex_printf("\n"); 215 } 216} 217 218static HReg newVRegI ( ISelEnv* env ) 219{ 220 HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr); 221 env->vreg_ctr++; 222 return reg; 223} 224 225static HReg newVRegF ( ISelEnv* env ) 226{ 227 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr); 228 env->vreg_ctr++; 229 return reg; 230} 231 232static HReg newVRegV ( ISelEnv* env ) 233{ 234 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr); 235 env->vreg_ctr++; 236 return reg; 237} 238 239 240/*---------------------------------------------------------*/ 241/*--- ISEL: Forward declarations ---*/ 242/*---------------------------------------------------------*/ 243 244/* These are organised as iselXXX and iselXXX_wrk pairs. The 245 iselXXX_wrk do the real work, but are not to be called directly. 246 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then 247 checks that all returned registers are virtual. You should not 248 call the _wrk version directly. 249*/ 250static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e ); 251static X86RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e ); 252 253static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e ); 254static X86RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e ); 255 256static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e ); 257static X86RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e ); 258 259static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e ); 260static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e ); 261 262static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e ); 263static X86AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e ); 264 265static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, 266 ISelEnv* env, const IRExpr* e ); 267static void iselInt64Expr ( HReg* rHi, HReg* rLo, 268 ISelEnv* env, const IRExpr* e ); 269 270static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e ); 271static X86CondCode iselCondCode ( ISelEnv* env, const IRExpr* e ); 272 273static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e ); 274static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e ); 275 276static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e ); 277static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e ); 278 279static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e ); 280static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e ); 281 282 283/*---------------------------------------------------------*/ 284/*--- ISEL: Misc helpers ---*/ 285/*---------------------------------------------------------*/ 286 287/* Make a int reg-reg move. */ 288 289static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst ) 290{ 291 vassert(hregClass(src) == HRcInt32); 292 vassert(hregClass(dst) == HRcInt32); 293 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst); 294} 295 296 297/* Make a vector reg-reg move. */ 298 299static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst ) 300{ 301 vassert(hregClass(src) == HRcVec128); 302 vassert(hregClass(dst) == HRcVec128); 303 return X86Instr_SseReRg(Xsse_MOV, src, dst); 304} 305 306/* Advance/retreat %esp by n. */ 307 308static void add_to_esp ( ISelEnv* env, Int n ) 309{ 310 vassert(n > 0 && n < 256 && (n%4) == 0); 311 addInstr(env, 312 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP())); 313} 314 315static void sub_from_esp ( ISelEnv* env, Int n ) 316{ 317 vassert(n > 0 && n < 256 && (n%4) == 0); 318 addInstr(env, 319 X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP())); 320} 321 322 323/* Given an amode, return one which references 4 bytes further 324 along. */ 325 326static X86AMode* advance4 ( X86AMode* am ) 327{ 328 X86AMode* am4 = dopyX86AMode(am); 329 switch (am4->tag) { 330 case Xam_IRRS: 331 am4->Xam.IRRS.imm += 4; break; 332 case Xam_IR: 333 am4->Xam.IR.imm += 4; break; 334 default: 335 vpanic("advance4(x86,host)"); 336 } 337 return am4; 338} 339 340 341/* Push an arg onto the host stack, in preparation for a call to a 342 helper function of some kind. Returns the number of 32-bit words 343 pushed. If we encounter an IRExpr_VECRET() then we expect that 344 r_vecRetAddr will be a valid register, that holds the relevant 345 address. 346*/ 347static Int pushArg ( ISelEnv* env, IRExpr* arg, HReg r_vecRetAddr ) 348{ 349 if (UNLIKELY(arg->tag == Iex_VECRET)) { 350 vassert(0); //ATC 351 vassert(!hregIsInvalid(r_vecRetAddr)); 352 addInstr(env, X86Instr_Push(X86RMI_Reg(r_vecRetAddr))); 353 return 1; 354 } 355 if (UNLIKELY(arg->tag == Iex_GSPTR)) { 356 addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP()))); 357 return 1; 358 } 359 /* Else it's a "normal" expression. */ 360 IRType arg_ty = typeOfIRExpr(env->type_env, arg); 361 if (arg_ty == Ity_I32) { 362 addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg))); 363 return 1; 364 } else 365 if (arg_ty == Ity_I64) { 366 HReg rHi, rLo; 367 iselInt64Expr(&rHi, &rLo, env, arg); 368 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 369 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 370 return 2; 371 } 372 ppIRExpr(arg); 373 vpanic("pushArg(x86): can't handle arg of this type"); 374} 375 376 377/* Complete the call to a helper function, by calling the 378 helper and clearing the args off the stack. */ 379 380static 381void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc, 382 IRCallee* cee, Int n_arg_ws, 383 RetLoc rloc ) 384{ 385 /* Complication. Need to decide which reg to use as the fn address 386 pointer, in a way that doesn't trash regparm-passed 387 parameters. */ 388 vassert(sizeof(void*) == 4); 389 390 addInstr(env, X86Instr_Call( cc, (Addr)cee->addr, 391 cee->regparms, rloc)); 392 if (n_arg_ws > 0) 393 add_to_esp(env, 4*n_arg_ws); 394} 395 396 397/* Used only in doHelperCall. See big comment in doHelperCall re 398 handling of regparm args. This function figures out whether 399 evaluation of an expression might require use of a fixed register. 400 If in doubt return True (safe but suboptimal). 401*/ 402static 403Bool mightRequireFixedRegs ( IRExpr* e ) 404{ 405 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) { 406 // These are always "safe" -- either a copy of %esp in some 407 // arbitrary vreg, or a copy of %ebp, respectively. 408 return False; 409 } 410 /* Else it's a "normal" expression. */ 411 switch (e->tag) { 412 case Iex_RdTmp: case Iex_Const: case Iex_Get: 413 return False; 414 default: 415 return True; 416 } 417} 418 419 420/* Do a complete function call. |guard| is a Ity_Bit expression 421 indicating whether or not the call happens. If guard==NULL, the 422 call is unconditional. |retloc| is set to indicate where the 423 return value is after the call. The caller (of this fn) must 424 generate code to add |stackAdjustAfterCall| to the stack pointer 425 after the call is done. */ 426 427static 428void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall, 429 /*OUT*/RetLoc* retloc, 430 ISelEnv* env, 431 IRExpr* guard, 432 IRCallee* cee, IRType retTy, IRExpr** args ) 433{ 434 X86CondCode cc; 435 HReg argregs[3]; 436 HReg tmpregs[3]; 437 Bool danger; 438 Int not_done_yet, n_args, n_arg_ws, stack_limit, 439 i, argreg, argregX; 440 441 /* Set default returns. We'll update them later if needed. */ 442 *stackAdjustAfterCall = 0; 443 *retloc = mk_RetLoc_INVALID(); 444 445 /* These are used for cross-checking that IR-level constraints on 446 the use of Iex_VECRET and Iex_GSPTR are observed. */ 447 UInt nVECRETs = 0; 448 UInt nGSPTRs = 0; 449 450 /* Marshal args for a call, do the call, and clear the stack. 451 Complexities to consider: 452 453 * The return type can be I{64,32,16,8} or V128. In the V128 454 case, it is expected that |args| will contain the special 455 node IRExpr_VECRET(), in which case this routine generates 456 code to allocate space on the stack for the vector return 457 value. Since we are not passing any scalars on the stack, it 458 is enough to preallocate the return space before marshalling 459 any arguments, in this case. 460 461 |args| may also contain IRExpr_GSPTR(), in which case the 462 value in %ebp is passed as the corresponding argument. 463 464 * If the callee claims regparmness of 1, 2 or 3, we must pass the 465 first 1, 2 or 3 args in registers (EAX, EDX, and ECX 466 respectively). To keep things relatively simple, only args of 467 type I32 may be passed as regparms -- just bomb out if anything 468 else turns up. Clearly this depends on the front ends not 469 trying to pass any other types as regparms. 470 */ 471 472 /* 16 Nov 2004: the regparm handling is complicated by the 473 following problem. 474 475 Consider a call two a function with two regparm parameters: 476 f(e1,e2). We need to compute e1 into %eax and e2 into %edx. 477 Suppose code is first generated to compute e1 into %eax. Then, 478 code is generated to compute e2 into %edx. Unfortunately, if 479 the latter code sequence uses %eax, it will trash the value of 480 e1 computed by the former sequence. This could happen if (for 481 example) e2 itself involved a function call. In the code below, 482 args are evaluated right-to-left, not left-to-right, but the 483 principle and the problem are the same. 484 485 One solution is to compute all regparm-bound args into vregs 486 first, and once they are all done, move them to the relevant 487 real regs. This always gives correct code, but it also gives 488 a bunch of vreg-to-rreg moves which are usually redundant but 489 are hard for the register allocator to get rid of. 490 491 A compromise is to first examine all regparm'd argument 492 expressions. If they are all so simple that it is clear 493 they will be evaluated without use of any fixed registers, 494 use the old compute-directly-to-fixed-target scheme. If not, 495 be safe and use the via-vregs scheme. 496 497 Note this requires being able to examine an expression and 498 determine whether or not evaluation of it might use a fixed 499 register. That requires knowledge of how the rest of this 500 insn selector works. Currently just the following 3 are 501 regarded as safe -- hopefully they cover the majority of 502 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. 503 */ 504 vassert(cee->regparms >= 0 && cee->regparms <= 3); 505 506 /* Count the number of args and also the VECRETs */ 507 n_args = n_arg_ws = 0; 508 while (args[n_args]) { 509 IRExpr* arg = args[n_args]; 510 n_args++; 511 if (UNLIKELY(arg->tag == Iex_VECRET)) { 512 nVECRETs++; 513 } else if (UNLIKELY(arg->tag == Iex_GSPTR)) { 514 nGSPTRs++; 515 } 516 } 517 518 /* If this fails, the IR is ill-formed */ 519 vassert(nGSPTRs == 0 || nGSPTRs == 1); 520 521 /* If we have a VECRET, allocate space on the stack for the return 522 value, and record the stack pointer after that. */ 523 HReg r_vecRetAddr = INVALID_HREG; 524 if (nVECRETs == 1) { 525 vassert(retTy == Ity_V128 || retTy == Ity_V256); 526 vassert(retTy != Ity_V256); // we don't handle that yet (if ever) 527 r_vecRetAddr = newVRegI(env); 528 sub_from_esp(env, 16); 529 addInstr(env, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr )); 530 } else { 531 // If either of these fail, the IR is ill-formed 532 vassert(retTy != Ity_V128 && retTy != Ity_V256); 533 vassert(nVECRETs == 0); 534 } 535 536 not_done_yet = n_args; 537 538 stack_limit = cee->regparms; 539 540 /* ------ BEGIN marshall all arguments ------ */ 541 542 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */ 543 for (i = n_args-1; i >= stack_limit; i--) { 544 n_arg_ws += pushArg(env, args[i], r_vecRetAddr); 545 not_done_yet--; 546 } 547 548 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in 549 registers. */ 550 551 if (cee->regparms > 0) { 552 553 /* ------ BEGIN deal with regparms ------ */ 554 555 /* deal with regparms, not forgetting %ebp if needed. */ 556 argregs[0] = hregX86_EAX(); 557 argregs[1] = hregX86_EDX(); 558 argregs[2] = hregX86_ECX(); 559 tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG; 560 561 argreg = cee->regparms; 562 563 /* In keeping with big comment above, detect potential danger 564 and use the via-vregs scheme if needed. */ 565 danger = False; 566 for (i = stack_limit-1; i >= 0; i--) { 567 if (mightRequireFixedRegs(args[i])) { 568 danger = True; 569 break; 570 } 571 } 572 573 if (danger) { 574 575 /* Move via temporaries */ 576 argregX = argreg; 577 for (i = stack_limit-1; i >= 0; i--) { 578 579 if (0) { 580 vex_printf("x86 host: register param is complex: "); 581 ppIRExpr(args[i]); 582 vex_printf("\n"); 583 } 584 585 IRExpr* arg = args[i]; 586 argreg--; 587 vassert(argreg >= 0); 588 if (UNLIKELY(arg->tag == Iex_VECRET)) { 589 vassert(0); //ATC 590 } 591 else if (UNLIKELY(arg->tag == Iex_GSPTR)) { 592 vassert(0); //ATC 593 } else { 594 vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32); 595 tmpregs[argreg] = iselIntExpr_R(env, arg); 596 } 597 not_done_yet--; 598 } 599 for (i = stack_limit-1; i >= 0; i--) { 600 argregX--; 601 vassert(argregX >= 0); 602 addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) ); 603 } 604 605 } else { 606 /* It's safe to compute all regparm args directly into their 607 target registers. */ 608 for (i = stack_limit-1; i >= 0; i--) { 609 IRExpr* arg = args[i]; 610 argreg--; 611 vassert(argreg >= 0); 612 if (UNLIKELY(arg->tag == Iex_VECRET)) { 613 vassert(!hregIsInvalid(r_vecRetAddr)); 614 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 615 X86RMI_Reg(r_vecRetAddr), 616 argregs[argreg])); 617 } 618 else if (UNLIKELY(arg->tag == Iex_GSPTR)) { 619 vassert(0); //ATC 620 } else { 621 vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32); 622 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 623 iselIntExpr_RMI(env, arg), 624 argregs[argreg])); 625 } 626 not_done_yet--; 627 } 628 629 } 630 631 /* ------ END deal with regparms ------ */ 632 633 } 634 635 vassert(not_done_yet == 0); 636 637 /* ------ END marshall all arguments ------ */ 638 639 /* Now we can compute the condition. We can't do it earlier 640 because the argument computations could trash the condition 641 codes. Be a bit clever to handle the common case where the 642 guard is 1:Bit. */ 643 cc = Xcc_ALWAYS; 644 if (guard) { 645 if (guard->tag == Iex_Const 646 && guard->Iex.Const.con->tag == Ico_U1 647 && guard->Iex.Const.con->Ico.U1 == True) { 648 /* unconditional -- do nothing */ 649 } else { 650 cc = iselCondCode( env, guard ); 651 } 652 } 653 654 /* Do final checks, set the return values, and generate the call 655 instruction proper. */ 656 vassert(*stackAdjustAfterCall == 0); 657 vassert(is_RetLoc_INVALID(*retloc)); 658 switch (retTy) { 659 case Ity_INVALID: 660 /* Function doesn't return a value. */ 661 *retloc = mk_RetLoc_simple(RLPri_None); 662 break; 663 case Ity_I64: 664 *retloc = mk_RetLoc_simple(RLPri_2Int); 665 break; 666 case Ity_I32: case Ity_I16: case Ity_I8: 667 *retloc = mk_RetLoc_simple(RLPri_Int); 668 break; 669 case Ity_V128: 670 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0); 671 *stackAdjustAfterCall = 16; 672 break; 673 case Ity_V256: 674 vassert(0); // ATC 675 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0); 676 *stackAdjustAfterCall = 32; 677 break; 678 default: 679 /* IR can denote other possible return types, but we don't 680 handle those here. */ 681 vassert(0); 682 } 683 684 /* Finally, generate the call itself. This needs the *retloc value 685 set in the switch above, which is why it's at the end. */ 686 callHelperAndClearArgs( env, cc, cee, n_arg_ws, *retloc ); 687} 688 689 690/* Given a guest-state array descriptor, an index expression and a 691 bias, generate an X86AMode holding the relevant guest state 692 offset. */ 693 694static 695X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr, 696 IRExpr* off, Int bias ) 697{ 698 HReg tmp, roff; 699 Int elemSz = sizeofIRType(descr->elemTy); 700 Int nElems = descr->nElems; 701 Int shift = 0; 702 703 /* throw out any cases not generated by an x86 front end. In 704 theory there might be a day where we need to handle them -- if 705 we ever run non-x86-guest on x86 host. */ 706 707 if (nElems != 8) 708 vpanic("genGuestArrayOffset(x86 host)(1)"); 709 710 switch (elemSz) { 711 case 1: shift = 0; break; 712 case 4: shift = 2; break; 713 case 8: shift = 3; break; 714 default: vpanic("genGuestArrayOffset(x86 host)(2)"); 715 } 716 717 /* Compute off into a reg, %off. Then return: 718 719 movl %off, %tmp 720 addl $bias, %tmp (if bias != 0) 721 andl %tmp, 7 722 ... base(%ebp, %tmp, shift) ... 723 */ 724 tmp = newVRegI(env); 725 roff = iselIntExpr_R(env, off); 726 addInstr(env, mk_iMOVsd_RR(roff, tmp)); 727 if (bias != 0) { 728 addInstr(env, 729 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp)); 730 } 731 addInstr(env, 732 X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp)); 733 return 734 X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift ); 735} 736 737 738/* Mess with the FPU's rounding mode: set to the default rounding mode 739 (DEFAULT_FPUCW). */ 740static 741void set_FPU_rounding_default ( ISelEnv* env ) 742{ 743 /* pushl $DEFAULT_FPUCW 744 fldcw 0(%esp) 745 addl $4, %esp 746 */ 747 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 748 addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW))); 749 addInstr(env, X86Instr_FpLdCW(zero_esp)); 750 add_to_esp(env, 4); 751} 752 753 754/* Mess with the FPU's rounding mode: 'mode' is an I32-typed 755 expression denoting a value in the range 0 .. 3, indicating a round 756 mode encoded as per type IRRoundingMode. Set the x87 FPU to have 757 the same rounding. 758*/ 759static 760void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode ) 761{ 762 HReg rrm = iselIntExpr_R(env, mode); 763 HReg rrm2 = newVRegI(env); 764 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 765 766 /* movl %rrm, %rrm2 767 andl $3, %rrm2 -- shouldn't be needed; paranoia 768 shll $10, %rrm2 769 orl $DEFAULT_FPUCW, %rrm2 770 pushl %rrm2 771 fldcw 0(%esp) 772 addl $4, %esp 773 */ 774 addInstr(env, mk_iMOVsd_RR(rrm, rrm2)); 775 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2)); 776 addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2)); 777 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2)); 778 addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2))); 779 addInstr(env, X86Instr_FpLdCW(zero_esp)); 780 add_to_esp(env, 4); 781} 782 783 784/* Generate !src into a new vector register, and be sure that the code 785 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy 786 way to do this. 787*/ 788static HReg do_sse_Not128 ( ISelEnv* env, HReg src ) 789{ 790 HReg dst = newVRegV(env); 791 /* Set dst to zero. If dst contains a NaN then all hell might 792 break loose after the comparison. So, first zero it. */ 793 addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst)); 794 /* And now make it all 1s ... */ 795 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst)); 796 /* Finally, xor 'src' into it. */ 797 addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst)); 798 /* Doesn't that just totally suck? */ 799 return dst; 800} 801 802 803/* Round an x87 FPU value to 53-bit-mantissa precision, to be used 804 after most non-simple FPU operations (simple = +, -, *, / and 805 sqrt). 806 807 This could be done a lot more efficiently if needed, by loading 808 zero and adding it to the value to be rounded (fldz ; faddp?). 809*/ 810static void roundToF64 ( ISelEnv* env, HReg reg ) 811{ 812 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 813 sub_from_esp(env, 8); 814 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp)); 815 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp)); 816 add_to_esp(env, 8); 817} 818 819 820/*---------------------------------------------------------*/ 821/*--- ISEL: Integer expressions (32/16/8 bit) ---*/ 822/*---------------------------------------------------------*/ 823 824/* Select insns for an integer-typed expression, and add them to the 825 code list. Return a reg holding the result. This reg will be a 826 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you 827 want to modify it, ask for a new vreg, copy it in there, and modify 828 the copy. The register allocator will do its best to map both 829 vregs to the same real register, so the copies will often disappear 830 later in the game. 831 832 This should handle expressions of 32, 16 and 8-bit type. All 833 results are returned in a 32-bit register. For 16- and 8-bit 834 expressions, the upper 16/24 bits are arbitrary, so you should mask 835 or sign extend partial values if necessary. 836*/ 837 838static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e ) 839{ 840 HReg r = iselIntExpr_R_wrk(env, e); 841 /* sanity checks ... */ 842# if 0 843 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 844# endif 845 vassert(hregClass(r) == HRcInt32); 846 vassert(hregIsVirtual(r)); 847 return r; 848} 849 850/* DO NOT CALL THIS DIRECTLY ! */ 851static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e ) 852{ 853 MatchInfo mi; 854 855 IRType ty = typeOfIRExpr(env->type_env,e); 856 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 857 858 switch (e->tag) { 859 860 /* --------- TEMP --------- */ 861 case Iex_RdTmp: { 862 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 863 } 864 865 /* --------- LOAD --------- */ 866 case Iex_Load: { 867 HReg dst = newVRegI(env); 868 X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr ); 869 870 /* We can't handle big-endian loads, nor load-linked. */ 871 if (e->Iex.Load.end != Iend_LE) 872 goto irreducible; 873 874 if (ty == Ity_I32) { 875 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 876 X86RMI_Mem(amode), dst) ); 877 return dst; 878 } 879 if (ty == Ity_I16) { 880 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 881 return dst; 882 } 883 if (ty == Ity_I8) { 884 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 885 return dst; 886 } 887 break; 888 } 889 890 /* --------- TERNARY OP --------- */ 891 case Iex_Triop: { 892 IRTriop *triop = e->Iex.Triop.details; 893 /* C3210 flags following FPU partial remainder (fprem), both 894 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */ 895 if (triop->op == Iop_PRemC3210F64 896 || triop->op == Iop_PRem1C3210F64) { 897 HReg junk = newVRegF(env); 898 HReg dst = newVRegI(env); 899 HReg srcL = iselDblExpr(env, triop->arg2); 900 HReg srcR = iselDblExpr(env, triop->arg3); 901 /* XXXROUNDINGFIXME */ 902 /* set roundingmode here */ 903 addInstr(env, X86Instr_FpBinary( 904 e->Iex.Binop.op==Iop_PRemC3210F64 905 ? Xfp_PREM : Xfp_PREM1, 906 srcL,srcR,junk 907 )); 908 /* The previous pseudo-insn will have left the FPU's C3210 909 flags set correctly. So bag them. */ 910 addInstr(env, X86Instr_FpStSW_AX()); 911 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 912 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst)); 913 return dst; 914 } 915 916 break; 917 } 918 919 /* --------- BINARY OP --------- */ 920 case Iex_Binop: { 921 X86AluOp aluOp; 922 X86ShiftOp shOp; 923 924 /* Pattern: Sub32(0,x) */ 925 if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) { 926 HReg dst = newVRegI(env); 927 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2); 928 addInstr(env, mk_iMOVsd_RR(reg,dst)); 929 addInstr(env, X86Instr_Unary32(Xun_NEG,dst)); 930 return dst; 931 } 932 933 /* Is it an addition or logical style op? */ 934 switch (e->Iex.Binop.op) { 935 case Iop_Add8: case Iop_Add16: case Iop_Add32: 936 aluOp = Xalu_ADD; break; 937 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: 938 aluOp = Xalu_SUB; break; 939 case Iop_And8: case Iop_And16: case Iop_And32: 940 aluOp = Xalu_AND; break; 941 case Iop_Or8: case Iop_Or16: case Iop_Or32: 942 aluOp = Xalu_OR; break; 943 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: 944 aluOp = Xalu_XOR; break; 945 case Iop_Mul16: case Iop_Mul32: 946 aluOp = Xalu_MUL; break; 947 default: 948 aluOp = Xalu_INVALID; break; 949 } 950 /* For commutative ops we assume any literal 951 values are on the second operand. */ 952 if (aluOp != Xalu_INVALID) { 953 HReg dst = newVRegI(env); 954 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 955 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 956 addInstr(env, mk_iMOVsd_RR(reg,dst)); 957 addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst)); 958 return dst; 959 } 960 /* Could do better here; forcing the first arg into a reg 961 isn't always clever. 962 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)), 963 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32( 964 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32))) 965 movl 0xFFFFFFA0(%vr41),%vr107 966 movl 0xFFFFFFA4(%vr41),%vr108 967 movl %vr107,%vr106 968 xorl %vr108,%vr106 969 movl 0xFFFFFFA8(%vr41),%vr109 970 movl %vr106,%vr105 971 andl %vr109,%vr105 972 movl 0xFFFFFFA0(%vr41),%vr110 973 movl %vr105,%vr104 974 xorl %vr110,%vr104 975 movl %vr104,%vr70 976 */ 977 978 /* Perhaps a shift op? */ 979 switch (e->Iex.Binop.op) { 980 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8: 981 shOp = Xsh_SHL; break; 982 case Iop_Shr32: case Iop_Shr16: case Iop_Shr8: 983 shOp = Xsh_SHR; break; 984 case Iop_Sar32: case Iop_Sar16: case Iop_Sar8: 985 shOp = Xsh_SAR; break; 986 default: 987 shOp = Xsh_INVALID; break; 988 } 989 if (shOp != Xsh_INVALID) { 990 HReg dst = newVRegI(env); 991 992 /* regL = the value to be shifted */ 993 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); 994 addInstr(env, mk_iMOVsd_RR(regL,dst)); 995 996 /* Do any necessary widening for 16/8 bit operands */ 997 switch (e->Iex.Binop.op) { 998 case Iop_Shr8: 999 addInstr(env, X86Instr_Alu32R( 1000 Xalu_AND, X86RMI_Imm(0xFF), dst)); 1001 break; 1002 case Iop_Shr16: 1003 addInstr(env, X86Instr_Alu32R( 1004 Xalu_AND, X86RMI_Imm(0xFFFF), dst)); 1005 break; 1006 case Iop_Sar8: 1007 addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst)); 1008 addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst)); 1009 break; 1010 case Iop_Sar16: 1011 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst)); 1012 addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst)); 1013 break; 1014 default: break; 1015 } 1016 1017 /* Now consider the shift amount. If it's a literal, we 1018 can do a much better job than the general case. */ 1019 if (e->Iex.Binop.arg2->tag == Iex_Const) { 1020 /* assert that the IR is well-typed */ 1021 Int nshift; 1022 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1023 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1024 vassert(nshift >= 0); 1025 if (nshift > 0) 1026 /* Can't allow nshift==0 since that means %cl */ 1027 addInstr(env, X86Instr_Sh32( shOp, nshift, dst )); 1028 } else { 1029 /* General case; we have to force the amount into %cl. */ 1030 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1031 addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX())); 1032 addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst)); 1033 } 1034 return dst; 1035 } 1036 1037 /* Handle misc other ops. */ 1038 1039 if (e->Iex.Binop.op == Iop_Max32U) { 1040 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1041 HReg dst = newVRegI(env); 1042 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); 1043 addInstr(env, mk_iMOVsd_RR(src1,dst)); 1044 addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst)); 1045 addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst)); 1046 return dst; 1047 } 1048 1049 if (e->Iex.Binop.op == Iop_8HLto16) { 1050 HReg hi8 = newVRegI(env); 1051 HReg lo8 = newVRegI(env); 1052 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1053 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1054 addInstr(env, mk_iMOVsd_RR(hi8s, hi8)); 1055 addInstr(env, mk_iMOVsd_RR(lo8s, lo8)); 1056 addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8)); 1057 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8)); 1058 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8)); 1059 return hi8; 1060 } 1061 1062 if (e->Iex.Binop.op == Iop_16HLto32) { 1063 HReg hi16 = newVRegI(env); 1064 HReg lo16 = newVRegI(env); 1065 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1066 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1067 addInstr(env, mk_iMOVsd_RR(hi16s, hi16)); 1068 addInstr(env, mk_iMOVsd_RR(lo16s, lo16)); 1069 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16)); 1070 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16)); 1071 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16)); 1072 return hi16; 1073 } 1074 1075 if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8 1076 || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) { 1077 HReg a16 = newVRegI(env); 1078 HReg b16 = newVRegI(env); 1079 HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1080 HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1081 Int shift = (e->Iex.Binop.op == Iop_MullS8 1082 || e->Iex.Binop.op == Iop_MullU8) 1083 ? 24 : 16; 1084 X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8 1085 || e->Iex.Binop.op == Iop_MullS16) 1086 ? Xsh_SAR : Xsh_SHR; 1087 1088 addInstr(env, mk_iMOVsd_RR(a16s, a16)); 1089 addInstr(env, mk_iMOVsd_RR(b16s, b16)); 1090 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16)); 1091 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16)); 1092 addInstr(env, X86Instr_Sh32(shr_op, shift, a16)); 1093 addInstr(env, X86Instr_Sh32(shr_op, shift, b16)); 1094 addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16)); 1095 return b16; 1096 } 1097 1098 if (e->Iex.Binop.op == Iop_CmpF64) { 1099 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1); 1100 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2); 1101 HReg dst = newVRegI(env); 1102 addInstr(env, X86Instr_FpCmp(fL,fR,dst)); 1103 /* shift this right 8 bits so as to conform to CmpF64 1104 definition. */ 1105 addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst)); 1106 return dst; 1107 } 1108 1109 if (e->Iex.Binop.op == Iop_F64toI32S 1110 || e->Iex.Binop.op == Iop_F64toI16S) { 1111 Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4; 1112 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 1113 HReg dst = newVRegI(env); 1114 1115 /* Used several times ... */ 1116 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 1117 1118 /* rf now holds the value to be converted, and rrm holds the 1119 rounding mode value, encoded as per the IRRoundingMode 1120 enum. The first thing to do is set the FPU's rounding 1121 mode accordingly. */ 1122 1123 /* Create a space for the format conversion. */ 1124 /* subl $4, %esp */ 1125 sub_from_esp(env, 4); 1126 1127 /* Set host rounding mode */ 1128 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 1129 1130 /* gistw/l %rf, 0(%esp) */ 1131 addInstr(env, X86Instr_FpLdStI(False/*store*/, 1132 toUChar(sz), rf, zero_esp)); 1133 1134 if (sz == 2) { 1135 /* movzwl 0(%esp), %dst */ 1136 addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst)); 1137 } else { 1138 /* movl 0(%esp), %dst */ 1139 vassert(sz == 4); 1140 addInstr(env, X86Instr_Alu32R( 1141 Xalu_MOV, X86RMI_Mem(zero_esp), dst)); 1142 } 1143 1144 /* Restore default FPU rounding. */ 1145 set_FPU_rounding_default( env ); 1146 1147 /* addl $4, %esp */ 1148 add_to_esp(env, 4); 1149 return dst; 1150 } 1151 1152 break; 1153 } 1154 1155 /* --------- UNARY OP --------- */ 1156 case Iex_Unop: { 1157 1158 /* 1Uto8(32to1(expr32)) */ 1159 if (e->Iex.Unop.op == Iop_1Uto8) { 1160 DECLARE_PATTERN(p_32to1_then_1Uto8); 1161 DEFINE_PATTERN(p_32to1_then_1Uto8, 1162 unop(Iop_1Uto8,unop(Iop_32to1,bind(0)))); 1163 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) { 1164 const IRExpr* expr32 = mi.bindee[0]; 1165 HReg dst = newVRegI(env); 1166 HReg src = iselIntExpr_R(env, expr32); 1167 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1168 addInstr(env, X86Instr_Alu32R(Xalu_AND, 1169 X86RMI_Imm(1), dst)); 1170 return dst; 1171 } 1172 } 1173 1174 /* 8Uto32(LDle(expr32)) */ 1175 if (e->Iex.Unop.op == Iop_8Uto32) { 1176 DECLARE_PATTERN(p_LDle8_then_8Uto32); 1177 DEFINE_PATTERN(p_LDle8_then_8Uto32, 1178 unop(Iop_8Uto32, 1179 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1180 if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) { 1181 HReg dst = newVRegI(env); 1182 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1183 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1184 return dst; 1185 } 1186 } 1187 1188 /* 8Sto32(LDle(expr32)) */ 1189 if (e->Iex.Unop.op == Iop_8Sto32) { 1190 DECLARE_PATTERN(p_LDle8_then_8Sto32); 1191 DEFINE_PATTERN(p_LDle8_then_8Sto32, 1192 unop(Iop_8Sto32, 1193 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1194 if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) { 1195 HReg dst = newVRegI(env); 1196 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1197 addInstr(env, X86Instr_LoadEX(1,True,amode,dst)); 1198 return dst; 1199 } 1200 } 1201 1202 /* 16Uto32(LDle(expr32)) */ 1203 if (e->Iex.Unop.op == Iop_16Uto32) { 1204 DECLARE_PATTERN(p_LDle16_then_16Uto32); 1205 DEFINE_PATTERN(p_LDle16_then_16Uto32, 1206 unop(Iop_16Uto32, 1207 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) ); 1208 if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) { 1209 HReg dst = newVRegI(env); 1210 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1211 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1212 return dst; 1213 } 1214 } 1215 1216 /* 8Uto32(GET:I8) */ 1217 if (e->Iex.Unop.op == Iop_8Uto32) { 1218 if (e->Iex.Unop.arg->tag == Iex_Get) { 1219 HReg dst; 1220 X86AMode* amode; 1221 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8); 1222 dst = newVRegI(env); 1223 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1224 hregX86_EBP()); 1225 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1226 return dst; 1227 } 1228 } 1229 1230 /* 16to32(GET:I16) */ 1231 if (e->Iex.Unop.op == Iop_16Uto32) { 1232 if (e->Iex.Unop.arg->tag == Iex_Get) { 1233 HReg dst; 1234 X86AMode* amode; 1235 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16); 1236 dst = newVRegI(env); 1237 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1238 hregX86_EBP()); 1239 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1240 return dst; 1241 } 1242 } 1243 1244 switch (e->Iex.Unop.op) { 1245 case Iop_8Uto16: 1246 case Iop_8Uto32: 1247 case Iop_16Uto32: { 1248 HReg dst = newVRegI(env); 1249 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1250 UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF; 1251 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1252 addInstr(env, X86Instr_Alu32R(Xalu_AND, 1253 X86RMI_Imm(mask), dst)); 1254 return dst; 1255 } 1256 case Iop_8Sto16: 1257 case Iop_8Sto32: 1258 case Iop_16Sto32: { 1259 HReg dst = newVRegI(env); 1260 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1261 UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24; 1262 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1263 addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst)); 1264 addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst)); 1265 return dst; 1266 } 1267 case Iop_Not8: 1268 case Iop_Not16: 1269 case Iop_Not32: { 1270 HReg dst = newVRegI(env); 1271 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1272 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1273 addInstr(env, X86Instr_Unary32(Xun_NOT,dst)); 1274 return dst; 1275 } 1276 case Iop_64HIto32: { 1277 HReg rHi, rLo; 1278 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1279 return rHi; /* and abandon rLo .. poor wee thing :-) */ 1280 } 1281 case Iop_64to32: { 1282 HReg rHi, rLo; 1283 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1284 return rLo; /* similar stupid comment to the above ... */ 1285 } 1286 case Iop_16HIto8: 1287 case Iop_32HIto16: { 1288 HReg dst = newVRegI(env); 1289 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1290 Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16; 1291 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1292 addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst)); 1293 return dst; 1294 } 1295 case Iop_1Uto32: 1296 case Iop_1Uto8: { 1297 HReg dst = newVRegI(env); 1298 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1299 addInstr(env, X86Instr_Set32(cond,dst)); 1300 return dst; 1301 } 1302 case Iop_1Sto8: 1303 case Iop_1Sto16: 1304 case Iop_1Sto32: { 1305 /* could do better than this, but for now ... */ 1306 HReg dst = newVRegI(env); 1307 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1308 addInstr(env, X86Instr_Set32(cond,dst)); 1309 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst)); 1310 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst)); 1311 return dst; 1312 } 1313 case Iop_Ctz32: { 1314 /* Count trailing zeroes, implemented by x86 'bsfl' */ 1315 HReg dst = newVRegI(env); 1316 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1317 addInstr(env, X86Instr_Bsfr32(True,src,dst)); 1318 return dst; 1319 } 1320 case Iop_Clz32: { 1321 /* Count leading zeroes. Do 'bsrl' to establish the index 1322 of the highest set bit, and subtract that value from 1323 31. */ 1324 HReg tmp = newVRegI(env); 1325 HReg dst = newVRegI(env); 1326 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1327 addInstr(env, X86Instr_Bsfr32(False,src,tmp)); 1328 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 1329 X86RMI_Imm(31), dst)); 1330 addInstr(env, X86Instr_Alu32R(Xalu_SUB, 1331 X86RMI_Reg(tmp), dst)); 1332 return dst; 1333 } 1334 1335 case Iop_CmpwNEZ32: { 1336 HReg dst = newVRegI(env); 1337 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1338 addInstr(env, mk_iMOVsd_RR(src,dst)); 1339 addInstr(env, X86Instr_Unary32(Xun_NEG,dst)); 1340 addInstr(env, X86Instr_Alu32R(Xalu_OR, 1341 X86RMI_Reg(src), dst)); 1342 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst)); 1343 return dst; 1344 } 1345 case Iop_Left8: 1346 case Iop_Left16: 1347 case Iop_Left32: { 1348 HReg dst = newVRegI(env); 1349 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1350 addInstr(env, mk_iMOVsd_RR(src, dst)); 1351 addInstr(env, X86Instr_Unary32(Xun_NEG, dst)); 1352 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst)); 1353 return dst; 1354 } 1355 1356 case Iop_V128to32: { 1357 HReg dst = newVRegI(env); 1358 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1359 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 1360 sub_from_esp(env, 16); 1361 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0)); 1362 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst )); 1363 add_to_esp(env, 16); 1364 return dst; 1365 } 1366 1367 /* ReinterpF32asI32(e) */ 1368 /* Given an IEEE754 single, produce an I32 with the same bit 1369 pattern. Keep stack 8-aligned even though only using 4 1370 bytes. */ 1371 case Iop_ReinterpF32asI32: { 1372 HReg rf = iselFltExpr(env, e->Iex.Unop.arg); 1373 HReg dst = newVRegI(env); 1374 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 1375 /* paranoia */ 1376 set_FPU_rounding_default(env); 1377 /* subl $8, %esp */ 1378 sub_from_esp(env, 8); 1379 /* gstF %rf, 0(%esp) */ 1380 addInstr(env, 1381 X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp)); 1382 /* movl 0(%esp), %dst */ 1383 addInstr(env, 1384 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst)); 1385 /* addl $8, %esp */ 1386 add_to_esp(env, 8); 1387 return dst; 1388 } 1389 1390 case Iop_16to8: 1391 case Iop_32to8: 1392 case Iop_32to16: 1393 /* These are no-ops. */ 1394 return iselIntExpr_R(env, e->Iex.Unop.arg); 1395 1396 case Iop_GetMSBs8x8: { 1397 /* Note: the following assumes the helper is of 1398 signature 1399 UInt fn ( ULong ), and is not a regparm fn. 1400 */ 1401 HReg xLo, xHi; 1402 HReg dst = newVRegI(env); 1403 Addr fn = (Addr)h_generic_calc_GetMSBs8x8; 1404 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg); 1405 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 1406 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 1407 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 1408 0, mk_RetLoc_simple(RLPri_Int) )); 1409 add_to_esp(env, 2*4); 1410 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 1411 return dst; 1412 } 1413 1414 default: 1415 break; 1416 } 1417 break; 1418 } 1419 1420 /* --------- GET --------- */ 1421 case Iex_Get: { 1422 if (ty == Ity_I32) { 1423 HReg dst = newVRegI(env); 1424 addInstr(env, X86Instr_Alu32R( 1425 Xalu_MOV, 1426 X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset, 1427 hregX86_EBP())), 1428 dst)); 1429 return dst; 1430 } 1431 if (ty == Ity_I8 || ty == Ity_I16) { 1432 HReg dst = newVRegI(env); 1433 addInstr(env, X86Instr_LoadEX( 1434 toUChar(ty==Ity_I8 ? 1 : 2), 1435 False, 1436 X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()), 1437 dst)); 1438 return dst; 1439 } 1440 break; 1441 } 1442 1443 case Iex_GetI: { 1444 X86AMode* am 1445 = genGuestArrayOffset( 1446 env, e->Iex.GetI.descr, 1447 e->Iex.GetI.ix, e->Iex.GetI.bias ); 1448 HReg dst = newVRegI(env); 1449 if (ty == Ity_I8) { 1450 addInstr(env, X86Instr_LoadEX( 1, False, am, dst )); 1451 return dst; 1452 } 1453 if (ty == Ity_I32) { 1454 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst)); 1455 return dst; 1456 } 1457 break; 1458 } 1459 1460 /* --------- CCALL --------- */ 1461 case Iex_CCall: { 1462 HReg dst = newVRegI(env); 1463 vassert(ty == e->Iex.CCall.retty); 1464 1465 /* be very restrictive for now. Only 32/64-bit ints allowed for 1466 args, and 32 bits for return type. Don't forget to change 1467 the RetLoc if more return types are allowed in future. */ 1468 if (e->Iex.CCall.retty != Ity_I32) 1469 goto irreducible; 1470 1471 /* Marshal args, do the call, clear stack. */ 1472 UInt addToSp = 0; 1473 RetLoc rloc = mk_RetLoc_INVALID(); 1474 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, 1475 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args ); 1476 vassert(is_sane_RetLoc(rloc)); 1477 vassert(rloc.pri == RLPri_Int); 1478 vassert(addToSp == 0); 1479 1480 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 1481 return dst; 1482 } 1483 1484 /* --------- LITERAL --------- */ 1485 /* 32/16/8-bit literals */ 1486 case Iex_Const: { 1487 X86RMI* rmi = iselIntExpr_RMI ( env, e ); 1488 HReg r = newVRegI(env); 1489 addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r)); 1490 return r; 1491 } 1492 1493 /* --------- MULTIPLEX --------- */ 1494 case Iex_ITE: { // VFD 1495 if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) 1496 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) { 1497 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue); 1498 X86RM* r0 = iselIntExpr_RM(env, e->Iex.ITE.iffalse); 1499 HReg dst = newVRegI(env); 1500 addInstr(env, mk_iMOVsd_RR(r1,dst)); 1501 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 1502 addInstr(env, X86Instr_CMov32(cc ^ 1, r0, dst)); 1503 return dst; 1504 } 1505 break; 1506 } 1507 1508 default: 1509 break; 1510 } /* switch (e->tag) */ 1511 1512 /* We get here if no pattern matched. */ 1513 irreducible: 1514 ppIRExpr(e); 1515 vpanic("iselIntExpr_R: cannot reduce tree"); 1516} 1517 1518 1519/*---------------------------------------------------------*/ 1520/*--- ISEL: Integer expression auxiliaries ---*/ 1521/*---------------------------------------------------------*/ 1522 1523/* --------------------- AMODEs --------------------- */ 1524 1525/* Return an AMode which computes the value of the specified 1526 expression, possibly also adding insns to the code list as a 1527 result. The expression may only be a 32-bit one. 1528*/ 1529 1530static Bool sane_AMode ( X86AMode* am ) 1531{ 1532 switch (am->tag) { 1533 case Xam_IR: 1534 return 1535 toBool( hregClass(am->Xam.IR.reg) == HRcInt32 1536 && (hregIsVirtual(am->Xam.IR.reg) 1537 || sameHReg(am->Xam.IR.reg, hregX86_EBP())) ); 1538 case Xam_IRRS: 1539 return 1540 toBool( hregClass(am->Xam.IRRS.base) == HRcInt32 1541 && hregIsVirtual(am->Xam.IRRS.base) 1542 && hregClass(am->Xam.IRRS.index) == HRcInt32 1543 && hregIsVirtual(am->Xam.IRRS.index) ); 1544 default: 1545 vpanic("sane_AMode: unknown x86 amode tag"); 1546 } 1547} 1548 1549static X86AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e ) 1550{ 1551 X86AMode* am = iselIntExpr_AMode_wrk(env, e); 1552 vassert(sane_AMode(am)); 1553 return am; 1554} 1555 1556/* DO NOT CALL THIS DIRECTLY ! */ 1557static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e ) 1558{ 1559 IRType ty = typeOfIRExpr(env->type_env,e); 1560 vassert(ty == Ity_I32); 1561 1562 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */ 1563 if (e->tag == Iex_Binop 1564 && e->Iex.Binop.op == Iop_Add32 1565 && e->Iex.Binop.arg2->tag == Iex_Const 1566 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32 1567 && e->Iex.Binop.arg1->tag == Iex_Binop 1568 && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32 1569 && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop 1570 && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32 1571 && e->Iex.Binop.arg1 1572 ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const 1573 && e->Iex.Binop.arg1 1574 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { 1575 UInt shift = e->Iex.Binop.arg1 1576 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1577 UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1578 if (shift == 1 || shift == 2 || shift == 3) { 1579 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1); 1580 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1 1581 ->Iex.Binop.arg2->Iex.Binop.arg1 ); 1582 return X86AMode_IRRS(imm32, r1, r2, shift); 1583 } 1584 } 1585 1586 /* Add32(expr1, Shl32(expr2, imm)) */ 1587 if (e->tag == Iex_Binop 1588 && e->Iex.Binop.op == Iop_Add32 1589 && e->Iex.Binop.arg2->tag == Iex_Binop 1590 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32 1591 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const 1592 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { 1593 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1594 if (shift == 1 || shift == 2 || shift == 3) { 1595 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1596 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 ); 1597 return X86AMode_IRRS(0, r1, r2, shift); 1598 } 1599 } 1600 1601 /* Add32(expr,i) */ 1602 if (e->tag == Iex_Binop 1603 && e->Iex.Binop.op == Iop_Add32 1604 && e->Iex.Binop.arg2->tag == Iex_Const 1605 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) { 1606 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1607 return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1); 1608 } 1609 1610 /* Doesn't match anything in particular. Generate it into 1611 a register and use that. */ 1612 { 1613 HReg r1 = iselIntExpr_R(env, e); 1614 return X86AMode_IR(0, r1); 1615 } 1616} 1617 1618 1619/* --------------------- RMIs --------------------- */ 1620 1621/* Similarly, calculate an expression into an X86RMI operand. As with 1622 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1623 1624static X86RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e ) 1625{ 1626 X86RMI* rmi = iselIntExpr_RMI_wrk(env, e); 1627 /* sanity checks ... */ 1628 switch (rmi->tag) { 1629 case Xrmi_Imm: 1630 return rmi; 1631 case Xrmi_Reg: 1632 vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32); 1633 vassert(hregIsVirtual(rmi->Xrmi.Reg.reg)); 1634 return rmi; 1635 case Xrmi_Mem: 1636 vassert(sane_AMode(rmi->Xrmi.Mem.am)); 1637 return rmi; 1638 default: 1639 vpanic("iselIntExpr_RMI: unknown x86 RMI tag"); 1640 } 1641} 1642 1643/* DO NOT CALL THIS DIRECTLY ! */ 1644static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e ) 1645{ 1646 IRType ty = typeOfIRExpr(env->type_env,e); 1647 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1648 1649 /* special case: immediate */ 1650 if (e->tag == Iex_Const) { 1651 UInt u; 1652 switch (e->Iex.Const.con->tag) { 1653 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1654 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 1655 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 1656 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)"); 1657 } 1658 return X86RMI_Imm(u); 1659 } 1660 1661 /* special case: 32-bit GET */ 1662 if (e->tag == Iex_Get && ty == Ity_I32) { 1663 return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset, 1664 hregX86_EBP())); 1665 } 1666 1667 /* special case: 32-bit load from memory */ 1668 if (e->tag == Iex_Load && ty == Ity_I32 1669 && e->Iex.Load.end == Iend_LE) { 1670 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 1671 return X86RMI_Mem(am); 1672 } 1673 1674 /* default case: calculate into a register and return that */ 1675 { 1676 HReg r = iselIntExpr_R ( env, e ); 1677 return X86RMI_Reg(r); 1678 } 1679} 1680 1681 1682/* --------------------- RIs --------------------- */ 1683 1684/* Calculate an expression into an X86RI operand. As with 1685 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1686 1687static X86RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e ) 1688{ 1689 X86RI* ri = iselIntExpr_RI_wrk(env, e); 1690 /* sanity checks ... */ 1691 switch (ri->tag) { 1692 case Xri_Imm: 1693 return ri; 1694 case Xri_Reg: 1695 vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32); 1696 vassert(hregIsVirtual(ri->Xri.Reg.reg)); 1697 return ri; 1698 default: 1699 vpanic("iselIntExpr_RI: unknown x86 RI tag"); 1700 } 1701} 1702 1703/* DO NOT CALL THIS DIRECTLY ! */ 1704static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e ) 1705{ 1706 IRType ty = typeOfIRExpr(env->type_env,e); 1707 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1708 1709 /* special case: immediate */ 1710 if (e->tag == Iex_Const) { 1711 UInt u; 1712 switch (e->Iex.Const.con->tag) { 1713 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1714 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 1715 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 1716 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)"); 1717 } 1718 return X86RI_Imm(u); 1719 } 1720 1721 /* default case: calculate into a register and return that */ 1722 { 1723 HReg r = iselIntExpr_R ( env, e ); 1724 return X86RI_Reg(r); 1725 } 1726} 1727 1728 1729/* --------------------- RMs --------------------- */ 1730 1731/* Similarly, calculate an expression into an X86RM operand. As with 1732 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1733 1734static X86RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e ) 1735{ 1736 X86RM* rm = iselIntExpr_RM_wrk(env, e); 1737 /* sanity checks ... */ 1738 switch (rm->tag) { 1739 case Xrm_Reg: 1740 vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32); 1741 vassert(hregIsVirtual(rm->Xrm.Reg.reg)); 1742 return rm; 1743 case Xrm_Mem: 1744 vassert(sane_AMode(rm->Xrm.Mem.am)); 1745 return rm; 1746 default: 1747 vpanic("iselIntExpr_RM: unknown x86 RM tag"); 1748 } 1749} 1750 1751/* DO NOT CALL THIS DIRECTLY ! */ 1752static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e ) 1753{ 1754 IRType ty = typeOfIRExpr(env->type_env,e); 1755 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1756 1757 /* special case: 32-bit GET */ 1758 if (e->tag == Iex_Get && ty == Ity_I32) { 1759 return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset, 1760 hregX86_EBP())); 1761 } 1762 1763 /* special case: load from memory */ 1764 1765 /* default case: calculate into a register and return that */ 1766 { 1767 HReg r = iselIntExpr_R ( env, e ); 1768 return X86RM_Reg(r); 1769 } 1770} 1771 1772 1773/* --------------------- CONDCODE --------------------- */ 1774 1775/* Generate code to evaluated a bit-typed expression, returning the 1776 condition code which would correspond when the expression would 1777 notionally have returned 1. */ 1778 1779static X86CondCode iselCondCode ( ISelEnv* env, const IRExpr* e ) 1780{ 1781 /* Uh, there's nothing we can sanity check here, unfortunately. */ 1782 return iselCondCode_wrk(env,e); 1783} 1784 1785/* DO NOT CALL THIS DIRECTLY ! */ 1786static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e ) 1787{ 1788 MatchInfo mi; 1789 1790 vassert(e); 1791 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); 1792 1793 /* var */ 1794 if (e->tag == Iex_RdTmp) { 1795 HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp); 1796 /* Test32 doesn't modify r32; so this is OK. */ 1797 addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32))); 1798 return Xcc_NZ; 1799 } 1800 1801 /* Constant 1:Bit */ 1802 if (e->tag == Iex_Const) { 1803 HReg r; 1804 vassert(e->Iex.Const.con->tag == Ico_U1); 1805 vassert(e->Iex.Const.con->Ico.U1 == True 1806 || e->Iex.Const.con->Ico.U1 == False); 1807 r = newVRegI(env); 1808 addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r)); 1809 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r)); 1810 return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ; 1811 } 1812 1813 /* Not1(e) */ 1814 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { 1815 /* Generate code for the arg, and negate the test condition */ 1816 return 1 ^ iselCondCode(env, e->Iex.Unop.arg); 1817 } 1818 1819 /* --- patterns rooted at: 32to1 --- */ 1820 1821 if (e->tag == Iex_Unop 1822 && e->Iex.Unop.op == Iop_32to1) { 1823 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1824 addInstr(env, X86Instr_Test32(1,rm)); 1825 return Xcc_NZ; 1826 } 1827 1828 /* --- patterns rooted at: CmpNEZ8 --- */ 1829 1830 /* CmpNEZ8(x) */ 1831 if (e->tag == Iex_Unop 1832 && e->Iex.Unop.op == Iop_CmpNEZ8) { 1833 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1834 addInstr(env, X86Instr_Test32(0xFF,rm)); 1835 return Xcc_NZ; 1836 } 1837 1838 /* --- patterns rooted at: CmpNEZ16 --- */ 1839 1840 /* CmpNEZ16(x) */ 1841 if (e->tag == Iex_Unop 1842 && e->Iex.Unop.op == Iop_CmpNEZ16) { 1843 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1844 addInstr(env, X86Instr_Test32(0xFFFF,rm)); 1845 return Xcc_NZ; 1846 } 1847 1848 /* --- patterns rooted at: CmpNEZ32 --- */ 1849 1850 /* CmpNEZ32(And32(x,y)) */ 1851 { 1852 DECLARE_PATTERN(p_CmpNEZ32_And32); 1853 DEFINE_PATTERN(p_CmpNEZ32_And32, 1854 unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1)))); 1855 if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) { 1856 HReg r0 = iselIntExpr_R(env, mi.bindee[0]); 1857 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]); 1858 HReg tmp = newVRegI(env); 1859 addInstr(env, mk_iMOVsd_RR(r0, tmp)); 1860 addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp)); 1861 return Xcc_NZ; 1862 } 1863 } 1864 1865 /* CmpNEZ32(Or32(x,y)) */ 1866 { 1867 DECLARE_PATTERN(p_CmpNEZ32_Or32); 1868 DEFINE_PATTERN(p_CmpNEZ32_Or32, 1869 unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1)))); 1870 if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) { 1871 HReg r0 = iselIntExpr_R(env, mi.bindee[0]); 1872 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]); 1873 HReg tmp = newVRegI(env); 1874 addInstr(env, mk_iMOVsd_RR(r0, tmp)); 1875 addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp)); 1876 return Xcc_NZ; 1877 } 1878 } 1879 1880 /* CmpNEZ32(GET(..):I32) */ 1881 if (e->tag == Iex_Unop 1882 && e->Iex.Unop.op == Iop_CmpNEZ32 1883 && e->Iex.Unop.arg->tag == Iex_Get) { 1884 X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1885 hregX86_EBP()); 1886 addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am)); 1887 return Xcc_NZ; 1888 } 1889 1890 /* CmpNEZ32(x) */ 1891 if (e->tag == Iex_Unop 1892 && e->Iex.Unop.op == Iop_CmpNEZ32) { 1893 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1894 X86RMI* rmi2 = X86RMI_Imm(0); 1895 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1)); 1896 return Xcc_NZ; 1897 } 1898 1899 /* --- patterns rooted at: CmpNEZ64 --- */ 1900 1901 /* CmpNEZ64(Or64(x,y)) */ 1902 { 1903 DECLARE_PATTERN(p_CmpNEZ64_Or64); 1904 DEFINE_PATTERN(p_CmpNEZ64_Or64, 1905 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1)))); 1906 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) { 1907 HReg hi1, lo1, hi2, lo2; 1908 HReg tmp = newVRegI(env); 1909 iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] ); 1910 addInstr(env, mk_iMOVsd_RR(hi1, tmp)); 1911 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp)); 1912 iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] ); 1913 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp)); 1914 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp)); 1915 return Xcc_NZ; 1916 } 1917 } 1918 1919 /* CmpNEZ64(x) */ 1920 if (e->tag == Iex_Unop 1921 && e->Iex.Unop.op == Iop_CmpNEZ64) { 1922 HReg hi, lo; 1923 HReg tmp = newVRegI(env); 1924 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg ); 1925 addInstr(env, mk_iMOVsd_RR(hi, tmp)); 1926 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp)); 1927 return Xcc_NZ; 1928 } 1929 1930 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */ 1931 1932 /* CmpEQ8 / CmpNE8 */ 1933 if (e->tag == Iex_Binop 1934 && (e->Iex.Binop.op == Iop_CmpEQ8 1935 || e->Iex.Binop.op == Iop_CmpNE8 1936 || e->Iex.Binop.op == Iop_CasCmpEQ8 1937 || e->Iex.Binop.op == Iop_CasCmpNE8)) { 1938 if (isZeroU8(e->Iex.Binop.arg2)) { 1939 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1940 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1))); 1941 switch (e->Iex.Binop.op) { 1942 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z; 1943 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ; 1944 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)"); 1945 } 1946 } else { 1947 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1948 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1949 HReg r = newVRegI(env); 1950 addInstr(env, mk_iMOVsd_RR(r1,r)); 1951 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r)); 1952 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r))); 1953 switch (e->Iex.Binop.op) { 1954 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z; 1955 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ; 1956 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)"); 1957 } 1958 } 1959 } 1960 1961 /* CmpEQ16 / CmpNE16 */ 1962 if (e->tag == Iex_Binop 1963 && (e->Iex.Binop.op == Iop_CmpEQ16 1964 || e->Iex.Binop.op == Iop_CmpNE16 1965 || e->Iex.Binop.op == Iop_CasCmpEQ16 1966 || e->Iex.Binop.op == Iop_CasCmpNE16 1967 || e->Iex.Binop.op == Iop_ExpCmpNE16)) { 1968 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1969 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1970 HReg r = newVRegI(env); 1971 addInstr(env, mk_iMOVsd_RR(r1,r)); 1972 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r)); 1973 addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r))); 1974 switch (e->Iex.Binop.op) { 1975 case Iop_CmpEQ16: case Iop_CasCmpEQ16: 1976 return Xcc_Z; 1977 case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16: 1978 return Xcc_NZ; 1979 default: 1980 vpanic("iselCondCode(x86): CmpXX16"); 1981 } 1982 } 1983 1984 /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation). 1985 Saves a "movl %eax, %tmp" compared to the default route. */ 1986 if (e->tag == Iex_Binop 1987 && e->Iex.Binop.op == Iop_CmpNE32 1988 && e->Iex.Binop.arg1->tag == Iex_CCall 1989 && e->Iex.Binop.arg2->tag == Iex_Const) { 1990 IRExpr* cal = e->Iex.Binop.arg1; 1991 IRExpr* con = e->Iex.Binop.arg2; 1992 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */ 1993 vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */ 1994 vassert(con->Iex.Const.con->tag == Ico_U32); 1995 /* Marshal args, do the call. */ 1996 UInt addToSp = 0; 1997 RetLoc rloc = mk_RetLoc_INVALID(); 1998 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, 1999 cal->Iex.CCall.cee, 2000 cal->Iex.CCall.retty, cal->Iex.CCall.args ); 2001 vassert(is_sane_RetLoc(rloc)); 2002 vassert(rloc.pri == RLPri_Int); 2003 vassert(addToSp == 0); 2004 /* */ 2005 addInstr(env, X86Instr_Alu32R(Xalu_CMP, 2006 X86RMI_Imm(con->Iex.Const.con->Ico.U32), 2007 hregX86_EAX())); 2008 return Xcc_NZ; 2009 } 2010 2011 /* Cmp*32*(x,y) */ 2012 if (e->tag == Iex_Binop 2013 && (e->Iex.Binop.op == Iop_CmpEQ32 2014 || e->Iex.Binop.op == Iop_CmpNE32 2015 || e->Iex.Binop.op == Iop_CmpLT32S 2016 || e->Iex.Binop.op == Iop_CmpLT32U 2017 || e->Iex.Binop.op == Iop_CmpLE32S 2018 || e->Iex.Binop.op == Iop_CmpLE32U 2019 || e->Iex.Binop.op == Iop_CasCmpEQ32 2020 || e->Iex.Binop.op == Iop_CasCmpNE32 2021 || e->Iex.Binop.op == Iop_ExpCmpNE32)) { 2022 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2023 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2024 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1)); 2025 switch (e->Iex.Binop.op) { 2026 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z; 2027 case Iop_CmpNE32: 2028 case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ; 2029 case Iop_CmpLT32S: return Xcc_L; 2030 case Iop_CmpLT32U: return Xcc_B; 2031 case Iop_CmpLE32S: return Xcc_LE; 2032 case Iop_CmpLE32U: return Xcc_BE; 2033 default: vpanic("iselCondCode(x86): CmpXX32"); 2034 } 2035 } 2036 2037 /* CmpNE64 */ 2038 if (e->tag == Iex_Binop 2039 && (e->Iex.Binop.op == Iop_CmpNE64 2040 || e->Iex.Binop.op == Iop_CmpEQ64)) { 2041 HReg hi1, hi2, lo1, lo2; 2042 HReg tHi = newVRegI(env); 2043 HReg tLo = newVRegI(env); 2044 iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 ); 2045 iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 ); 2046 addInstr(env, mk_iMOVsd_RR(hi1, tHi)); 2047 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi)); 2048 addInstr(env, mk_iMOVsd_RR(lo1, tLo)); 2049 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo)); 2050 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo)); 2051 switch (e->Iex.Binop.op) { 2052 case Iop_CmpNE64: return Xcc_NZ; 2053 case Iop_CmpEQ64: return Xcc_Z; 2054 default: vpanic("iselCondCode(x86): CmpXX64"); 2055 } 2056 } 2057 2058 ppIRExpr(e); 2059 vpanic("iselCondCode"); 2060} 2061 2062 2063/*---------------------------------------------------------*/ 2064/*--- ISEL: Integer expressions (64 bit) ---*/ 2065/*---------------------------------------------------------*/ 2066 2067/* Compute a 64-bit value into a register pair, which is returned as 2068 the first two parameters. As with iselIntExpr_R, these may be 2069 either real or virtual regs; in any case they must not be changed 2070 by subsequent code emitted by the caller. */ 2071 2072static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, 2073 const IRExpr* e ) 2074{ 2075 iselInt64Expr_wrk(rHi, rLo, env, e); 2076# if 0 2077 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2078# endif 2079 vassert(hregClass(*rHi) == HRcInt32); 2080 vassert(hregIsVirtual(*rHi)); 2081 vassert(hregClass(*rLo) == HRcInt32); 2082 vassert(hregIsVirtual(*rLo)); 2083} 2084 2085/* DO NOT CALL THIS DIRECTLY ! */ 2086static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, 2087 const IRExpr* e ) 2088{ 2089 MatchInfo mi; 2090 HWord fn = 0; /* helper fn for most SIMD64 stuff */ 2091 vassert(e); 2092 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64); 2093 2094 /* 64-bit literal */ 2095 if (e->tag == Iex_Const) { 2096 ULong w64 = e->Iex.Const.con->Ico.U64; 2097 UInt wHi = toUInt(w64 >> 32); 2098 UInt wLo = toUInt(w64); 2099 HReg tLo = newVRegI(env); 2100 HReg tHi = newVRegI(env); 2101 vassert(e->Iex.Const.con->tag == Ico_U64); 2102 if (wLo == wHi) { 2103 /* Save a precious Int register in this special case. */ 2104 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo)); 2105 *rHi = tLo; 2106 *rLo = tLo; 2107 } else { 2108 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi)); 2109 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo)); 2110 *rHi = tHi; 2111 *rLo = tLo; 2112 } 2113 return; 2114 } 2115 2116 /* read 64-bit IRTemp */ 2117 if (e->tag == Iex_RdTmp) { 2118 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp); 2119 return; 2120 } 2121 2122 /* 64-bit load */ 2123 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2124 HReg tLo, tHi; 2125 X86AMode *am0, *am4; 2126 vassert(e->Iex.Load.ty == Ity_I64); 2127 tLo = newVRegI(env); 2128 tHi = newVRegI(env); 2129 am0 = iselIntExpr_AMode(env, e->Iex.Load.addr); 2130 am4 = advance4(am0); 2131 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo )); 2132 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 2133 *rHi = tHi; 2134 *rLo = tLo; 2135 return; 2136 } 2137 2138 /* 64-bit GET */ 2139 if (e->tag == Iex_Get) { 2140 X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP()); 2141 X86AMode* am4 = advance4(am); 2142 HReg tLo = newVRegI(env); 2143 HReg tHi = newVRegI(env); 2144 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo )); 2145 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 2146 *rHi = tHi; 2147 *rLo = tLo; 2148 return; 2149 } 2150 2151 /* 64-bit GETI */ 2152 if (e->tag == Iex_GetI) { 2153 X86AMode* am 2154 = genGuestArrayOffset( env, e->Iex.GetI.descr, 2155 e->Iex.GetI.ix, e->Iex.GetI.bias ); 2156 X86AMode* am4 = advance4(am); 2157 HReg tLo = newVRegI(env); 2158 HReg tHi = newVRegI(env); 2159 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo )); 2160 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 2161 *rHi = tHi; 2162 *rLo = tLo; 2163 return; 2164 } 2165 2166 /* 64-bit ITE: ITE(g, expr, expr) */ // VFD 2167 if (e->tag == Iex_ITE) { 2168 HReg e0Lo, e0Hi, e1Lo, e1Hi; 2169 HReg tLo = newVRegI(env); 2170 HReg tHi = newVRegI(env); 2171 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse); 2172 iselInt64Expr(&e1Hi, &e1Lo, env, e->Iex.ITE.iftrue); 2173 addInstr(env, mk_iMOVsd_RR(e1Hi, tHi)); 2174 addInstr(env, mk_iMOVsd_RR(e1Lo, tLo)); 2175 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 2176 /* This assumes the first cmov32 doesn't trash the condition 2177 codes, so they are still available for the second cmov32 */ 2178 addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Hi), tHi)); 2179 addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Lo), tLo)); 2180 *rHi = tHi; 2181 *rLo = tLo; 2182 return; 2183 } 2184 2185 /* --------- BINARY ops --------- */ 2186 if (e->tag == Iex_Binop) { 2187 switch (e->Iex.Binop.op) { 2188 /* 32 x 32 -> 64 multiply */ 2189 case Iop_MullU32: 2190 case Iop_MullS32: { 2191 /* get one operand into %eax, and the other into a R/M. 2192 Need to make an educated guess about which is better in 2193 which. */ 2194 HReg tLo = newVRegI(env); 2195 HReg tHi = newVRegI(env); 2196 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32); 2197 X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1); 2198 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2); 2199 addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX())); 2200 addInstr(env, X86Instr_MulL(syned, rmLeft)); 2201 /* Result is now in EDX:EAX. Tell the caller. */ 2202 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2203 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2204 *rHi = tHi; 2205 *rLo = tLo; 2206 return; 2207 } 2208 2209 /* 64 x 32 -> (32(rem),32(div)) division */ 2210 case Iop_DivModU64to32: 2211 case Iop_DivModS64to32: { 2212 /* Get the 64-bit operand into edx:eax, and the other into 2213 any old R/M. */ 2214 HReg sHi, sLo; 2215 HReg tLo = newVRegI(env); 2216 HReg tHi = newVRegI(env); 2217 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32); 2218 X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2); 2219 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2220 addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX())); 2221 addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX())); 2222 addInstr(env, X86Instr_Div(syned, rmRight)); 2223 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2224 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2225 *rHi = tHi; 2226 *rLo = tLo; 2227 return; 2228 } 2229 2230 /* Or64/And64/Xor64 */ 2231 case Iop_Or64: 2232 case Iop_And64: 2233 case Iop_Xor64: { 2234 HReg xLo, xHi, yLo, yHi; 2235 HReg tLo = newVRegI(env); 2236 HReg tHi = newVRegI(env); 2237 X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR 2238 : e->Iex.Binop.op==Iop_And64 ? Xalu_AND 2239 : Xalu_XOR; 2240 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2241 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2242 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2243 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi)); 2244 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2245 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo)); 2246 *rHi = tHi; 2247 *rLo = tLo; 2248 return; 2249 } 2250 2251 /* Add64/Sub64 */ 2252 case Iop_Add64: 2253 if (e->Iex.Binop.arg2->tag == Iex_Const) { 2254 /* special case Add64(e, const) */ 2255 ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 2256 UInt wHi = toUInt(w64 >> 32); 2257 UInt wLo = toUInt(w64); 2258 HReg tLo = newVRegI(env); 2259 HReg tHi = newVRegI(env); 2260 HReg xLo, xHi; 2261 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64); 2262 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2263 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2264 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2265 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo)); 2266 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi)); 2267 *rHi = tHi; 2268 *rLo = tLo; 2269 return; 2270 } 2271 /* else fall through to the generic case */ 2272 case Iop_Sub64: { 2273 HReg xLo, xHi, yLo, yHi; 2274 HReg tLo = newVRegI(env); 2275 HReg tHi = newVRegI(env); 2276 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2277 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2278 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2279 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2280 if (e->Iex.Binop.op==Iop_Add64) { 2281 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo)); 2282 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi)); 2283 } else { 2284 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo)); 2285 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi)); 2286 } 2287 *rHi = tHi; 2288 *rLo = tLo; 2289 return; 2290 } 2291 2292 /* 32HLto64(e1,e2) */ 2293 case Iop_32HLto64: 2294 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2295 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2296 return; 2297 2298 /* 64-bit shifts */ 2299 case Iop_Shl64: { 2300 /* We use the same ingenious scheme as gcc. Put the value 2301 to be shifted into %hi:%lo, and the shift amount into 2302 %cl. Then (dsts on right, a la ATT syntax): 2303 2304 shldl %cl, %lo, %hi -- make %hi be right for the 2305 -- shift amt %cl % 32 2306 shll %cl, %lo -- make %lo be right for the 2307 -- shift amt %cl % 32 2308 2309 Now, if (shift amount % 64) is in the range 32 .. 63, 2310 we have to do a fixup, which puts the result low half 2311 into the result high half, and zeroes the low half: 2312 2313 testl $32, %ecx 2314 2315 cmovnz %lo, %hi 2316 movl $0, %tmp -- sigh; need yet another reg 2317 cmovnz %tmp, %lo 2318 */ 2319 HReg rAmt, sHi, sLo, tHi, tLo, tTemp; 2320 tLo = newVRegI(env); 2321 tHi = newVRegI(env); 2322 tTemp = newVRegI(env); 2323 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2); 2324 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2325 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX())); 2326 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2327 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2328 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo 2329 and those regs are legitimately modifiable. */ 2330 addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi)); 2331 addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo)); 2332 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX()))); 2333 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi)); 2334 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp)); 2335 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo)); 2336 *rHi = tHi; 2337 *rLo = tLo; 2338 return; 2339 } 2340 2341 case Iop_Shr64: { 2342 /* We use the same ingenious scheme as gcc. Put the value 2343 to be shifted into %hi:%lo, and the shift amount into 2344 %cl. Then: 2345 2346 shrdl %cl, %hi, %lo -- make %lo be right for the 2347 -- shift amt %cl % 32 2348 shrl %cl, %hi -- make %hi be right for the 2349 -- shift amt %cl % 32 2350 2351 Now, if (shift amount % 64) is in the range 32 .. 63, 2352 we have to do a fixup, which puts the result high half 2353 into the result low half, and zeroes the high half: 2354 2355 testl $32, %ecx 2356 2357 cmovnz %hi, %lo 2358 movl $0, %tmp -- sigh; need yet another reg 2359 cmovnz %tmp, %hi 2360 */ 2361 HReg rAmt, sHi, sLo, tHi, tLo, tTemp; 2362 tLo = newVRegI(env); 2363 tHi = newVRegI(env); 2364 tTemp = newVRegI(env); 2365 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2); 2366 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2367 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX())); 2368 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2369 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2370 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo 2371 and those regs are legitimately modifiable. */ 2372 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo)); 2373 addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi)); 2374 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX()))); 2375 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo)); 2376 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp)); 2377 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi)); 2378 *rHi = tHi; 2379 *rLo = tLo; 2380 return; 2381 } 2382 2383 /* F64 -> I64 */ 2384 /* Sigh, this is an almost exact copy of the F64 -> I32/I16 2385 case. Unfortunately I see no easy way to avoid the 2386 duplication. */ 2387 case Iop_F64toI64S: { 2388 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 2389 HReg tLo = newVRegI(env); 2390 HReg tHi = newVRegI(env); 2391 2392 /* Used several times ... */ 2393 /* Careful ... this sharing is only safe because 2394 zero_esp/four_esp do not hold any registers which the 2395 register allocator could attempt to swizzle later. */ 2396 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2397 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP()); 2398 2399 /* rf now holds the value to be converted, and rrm holds 2400 the rounding mode value, encoded as per the 2401 IRRoundingMode enum. The first thing to do is set the 2402 FPU's rounding mode accordingly. */ 2403 2404 /* Create a space for the format conversion. */ 2405 /* subl $8, %esp */ 2406 sub_from_esp(env, 8); 2407 2408 /* Set host rounding mode */ 2409 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2410 2411 /* gistll %rf, 0(%esp) */ 2412 addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp)); 2413 2414 /* movl 0(%esp), %dstLo */ 2415 /* movl 4(%esp), %dstHi */ 2416 addInstr(env, X86Instr_Alu32R( 2417 Xalu_MOV, X86RMI_Mem(zero_esp), tLo)); 2418 addInstr(env, X86Instr_Alu32R( 2419 Xalu_MOV, X86RMI_Mem(four_esp), tHi)); 2420 2421 /* Restore default FPU rounding. */ 2422 set_FPU_rounding_default( env ); 2423 2424 /* addl $8, %esp */ 2425 add_to_esp(env, 8); 2426 2427 *rHi = tHi; 2428 *rLo = tLo; 2429 return; 2430 } 2431 2432 case Iop_Add8x8: 2433 fn = (HWord)h_generic_calc_Add8x8; goto binnish; 2434 case Iop_Add16x4: 2435 fn = (HWord)h_generic_calc_Add16x4; goto binnish; 2436 case Iop_Add32x2: 2437 fn = (HWord)h_generic_calc_Add32x2; goto binnish; 2438 2439 case Iop_Avg8Ux8: 2440 fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish; 2441 case Iop_Avg16Ux4: 2442 fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish; 2443 2444 case Iop_CmpEQ8x8: 2445 fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish; 2446 case Iop_CmpEQ16x4: 2447 fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish; 2448 case Iop_CmpEQ32x2: 2449 fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish; 2450 2451 case Iop_CmpGT8Sx8: 2452 fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish; 2453 case Iop_CmpGT16Sx4: 2454 fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish; 2455 case Iop_CmpGT32Sx2: 2456 fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish; 2457 2458 case Iop_InterleaveHI8x8: 2459 fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish; 2460 case Iop_InterleaveLO8x8: 2461 fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish; 2462 case Iop_InterleaveHI16x4: 2463 fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish; 2464 case Iop_InterleaveLO16x4: 2465 fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish; 2466 case Iop_InterleaveHI32x2: 2467 fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish; 2468 case Iop_InterleaveLO32x2: 2469 fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish; 2470 case Iop_CatOddLanes16x4: 2471 fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish; 2472 case Iop_CatEvenLanes16x4: 2473 fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish; 2474 case Iop_Perm8x8: 2475 fn = (HWord)h_generic_calc_Perm8x8; goto binnish; 2476 2477 case Iop_Max8Ux8: 2478 fn = (HWord)h_generic_calc_Max8Ux8; goto binnish; 2479 case Iop_Max16Sx4: 2480 fn = (HWord)h_generic_calc_Max16Sx4; goto binnish; 2481 case Iop_Min8Ux8: 2482 fn = (HWord)h_generic_calc_Min8Ux8; goto binnish; 2483 case Iop_Min16Sx4: 2484 fn = (HWord)h_generic_calc_Min16Sx4; goto binnish; 2485 2486 case Iop_Mul16x4: 2487 fn = (HWord)h_generic_calc_Mul16x4; goto binnish; 2488 case Iop_Mul32x2: 2489 fn = (HWord)h_generic_calc_Mul32x2; goto binnish; 2490 case Iop_MulHi16Sx4: 2491 fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish; 2492 case Iop_MulHi16Ux4: 2493 fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish; 2494 2495 case Iop_QAdd8Sx8: 2496 fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish; 2497 case Iop_QAdd16Sx4: 2498 fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish; 2499 case Iop_QAdd8Ux8: 2500 fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish; 2501 case Iop_QAdd16Ux4: 2502 fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish; 2503 2504 case Iop_QNarrowBin32Sto16Sx4: 2505 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish; 2506 case Iop_QNarrowBin16Sto8Sx8: 2507 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish; 2508 case Iop_QNarrowBin16Sto8Ux8: 2509 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish; 2510 case Iop_NarrowBin16to8x8: 2511 fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish; 2512 case Iop_NarrowBin32to16x4: 2513 fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish; 2514 2515 case Iop_QSub8Sx8: 2516 fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish; 2517 case Iop_QSub16Sx4: 2518 fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish; 2519 case Iop_QSub8Ux8: 2520 fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish; 2521 case Iop_QSub16Ux4: 2522 fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish; 2523 2524 case Iop_Sub8x8: 2525 fn = (HWord)h_generic_calc_Sub8x8; goto binnish; 2526 case Iop_Sub16x4: 2527 fn = (HWord)h_generic_calc_Sub16x4; goto binnish; 2528 case Iop_Sub32x2: 2529 fn = (HWord)h_generic_calc_Sub32x2; goto binnish; 2530 2531 binnish: { 2532 /* Note: the following assumes all helpers are of 2533 signature 2534 ULong fn ( ULong, ULong ), and they are 2535 not marked as regparm functions. 2536 */ 2537 HReg xLo, xHi, yLo, yHi; 2538 HReg tLo = newVRegI(env); 2539 HReg tHi = newVRegI(env); 2540 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2541 addInstr(env, X86Instr_Push(X86RMI_Reg(yHi))); 2542 addInstr(env, X86Instr_Push(X86RMI_Reg(yLo))); 2543 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2544 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2545 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2546 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 2547 0, mk_RetLoc_simple(RLPri_2Int) )); 2548 add_to_esp(env, 4*4); 2549 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2550 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2551 *rHi = tHi; 2552 *rLo = tLo; 2553 return; 2554 } 2555 2556 case Iop_ShlN32x2: 2557 fn = (HWord)h_generic_calc_ShlN32x2; goto shifty; 2558 case Iop_ShlN16x4: 2559 fn = (HWord)h_generic_calc_ShlN16x4; goto shifty; 2560 case Iop_ShlN8x8: 2561 fn = (HWord)h_generic_calc_ShlN8x8; goto shifty; 2562 case Iop_ShrN32x2: 2563 fn = (HWord)h_generic_calc_ShrN32x2; goto shifty; 2564 case Iop_ShrN16x4: 2565 fn = (HWord)h_generic_calc_ShrN16x4; goto shifty; 2566 case Iop_SarN32x2: 2567 fn = (HWord)h_generic_calc_SarN32x2; goto shifty; 2568 case Iop_SarN16x4: 2569 fn = (HWord)h_generic_calc_SarN16x4; goto shifty; 2570 case Iop_SarN8x8: 2571 fn = (HWord)h_generic_calc_SarN8x8; goto shifty; 2572 shifty: { 2573 /* Note: the following assumes all helpers are of 2574 signature 2575 ULong fn ( ULong, UInt ), and they are 2576 not marked as regparm functions. 2577 */ 2578 HReg xLo, xHi; 2579 HReg tLo = newVRegI(env); 2580 HReg tHi = newVRegI(env); 2581 X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2582 addInstr(env, X86Instr_Push(y)); 2583 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2584 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2585 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2586 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 2587 0, mk_RetLoc_simple(RLPri_2Int) )); 2588 add_to_esp(env, 3*4); 2589 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2590 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2591 *rHi = tHi; 2592 *rLo = tLo; 2593 return; 2594 } 2595 2596 default: 2597 break; 2598 } 2599 } /* if (e->tag == Iex_Binop) */ 2600 2601 2602 /* --------- UNARY ops --------- */ 2603 if (e->tag == Iex_Unop) { 2604 switch (e->Iex.Unop.op) { 2605 2606 /* 32Sto64(e) */ 2607 case Iop_32Sto64: { 2608 HReg tLo = newVRegI(env); 2609 HReg tHi = newVRegI(env); 2610 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2611 addInstr(env, mk_iMOVsd_RR(src,tHi)); 2612 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2613 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi)); 2614 *rHi = tHi; 2615 *rLo = tLo; 2616 return; 2617 } 2618 2619 /* 32Uto64(e) */ 2620 case Iop_32Uto64: { 2621 HReg tLo = newVRegI(env); 2622 HReg tHi = newVRegI(env); 2623 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2624 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2625 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2626 *rHi = tHi; 2627 *rLo = tLo; 2628 return; 2629 } 2630 2631 /* 16Uto64(e) */ 2632 case Iop_16Uto64: { 2633 HReg tLo = newVRegI(env); 2634 HReg tHi = newVRegI(env); 2635 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2636 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2637 addInstr(env, X86Instr_Alu32R(Xalu_AND, 2638 X86RMI_Imm(0xFFFF), tLo)); 2639 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2640 *rHi = tHi; 2641 *rLo = tLo; 2642 return; 2643 } 2644 2645 /* V128{HI}to64 */ 2646 case Iop_V128HIto64: 2647 case Iop_V128to64: { 2648 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0; 2649 HReg tLo = newVRegI(env); 2650 HReg tHi = newVRegI(env); 2651 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 2652 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 2653 X86AMode* espLO = X86AMode_IR(off, hregX86_ESP()); 2654 X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP()); 2655 sub_from_esp(env, 16); 2656 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0)); 2657 addInstr(env, X86Instr_Alu32R( Xalu_MOV, 2658 X86RMI_Mem(espLO), tLo )); 2659 addInstr(env, X86Instr_Alu32R( Xalu_MOV, 2660 X86RMI_Mem(espHI), tHi )); 2661 add_to_esp(env, 16); 2662 *rHi = tHi; 2663 *rLo = tLo; 2664 return; 2665 } 2666 2667 /* could do better than this, but for now ... */ 2668 case Iop_1Sto64: { 2669 HReg tLo = newVRegI(env); 2670 HReg tHi = newVRegI(env); 2671 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 2672 addInstr(env, X86Instr_Set32(cond,tLo)); 2673 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo)); 2674 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo)); 2675 addInstr(env, mk_iMOVsd_RR(tLo, tHi)); 2676 *rHi = tHi; 2677 *rLo = tLo; 2678 return; 2679 } 2680 2681 /* Not64(e) */ 2682 case Iop_Not64: { 2683 HReg tLo = newVRegI(env); 2684 HReg tHi = newVRegI(env); 2685 HReg sHi, sLo; 2686 iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg); 2687 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2688 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2689 addInstr(env, X86Instr_Unary32(Xun_NOT,tHi)); 2690 addInstr(env, X86Instr_Unary32(Xun_NOT,tLo)); 2691 *rHi = tHi; 2692 *rLo = tLo; 2693 return; 2694 } 2695 2696 /* Left64(e) */ 2697 case Iop_Left64: { 2698 HReg yLo, yHi; 2699 HReg tLo = newVRegI(env); 2700 HReg tHi = newVRegI(env); 2701 /* yHi:yLo = arg */ 2702 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg); 2703 /* tLo = 0 - yLo, and set carry */ 2704 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo)); 2705 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo)); 2706 /* tHi = 0 - yHi - carry */ 2707 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2708 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi)); 2709 /* So now we have tHi:tLo = -arg. To finish off, or 'arg' 2710 back in, so as to give the final result 2711 tHi:tLo = arg | -arg. */ 2712 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo)); 2713 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi)); 2714 *rHi = tHi; 2715 *rLo = tLo; 2716 return; 2717 } 2718 2719 /* --- patterns rooted at: CmpwNEZ64 --- */ 2720 2721 /* CmpwNEZ64(e) */ 2722 case Iop_CmpwNEZ64: { 2723 2724 DECLARE_PATTERN(p_CmpwNEZ64_Or64); 2725 DEFINE_PATTERN(p_CmpwNEZ64_Or64, 2726 unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1)))); 2727 if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) { 2728 /* CmpwNEZ64(Or64(x,y)) */ 2729 HReg xHi,xLo,yHi,yLo; 2730 HReg xBoth = newVRegI(env); 2731 HReg merged = newVRegI(env); 2732 HReg tmp2 = newVRegI(env); 2733 2734 iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]); 2735 addInstr(env, mk_iMOVsd_RR(xHi,xBoth)); 2736 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2737 X86RMI_Reg(xLo),xBoth)); 2738 2739 iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]); 2740 addInstr(env, mk_iMOVsd_RR(yHi,merged)); 2741 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2742 X86RMI_Reg(yLo),merged)); 2743 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2744 X86RMI_Reg(xBoth),merged)); 2745 2746 /* tmp2 = (merged | -merged) >>s 31 */ 2747 addInstr(env, mk_iMOVsd_RR(merged,tmp2)); 2748 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2)); 2749 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2750 X86RMI_Reg(merged), tmp2)); 2751 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2)); 2752 *rHi = tmp2; 2753 *rLo = tmp2; 2754 return; 2755 } else { 2756 /* CmpwNEZ64(e) */ 2757 HReg srcLo, srcHi; 2758 HReg tmp1 = newVRegI(env); 2759 HReg tmp2 = newVRegI(env); 2760 /* srcHi:srcLo = arg */ 2761 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); 2762 /* tmp1 = srcHi | srcLo */ 2763 addInstr(env, mk_iMOVsd_RR(srcHi,tmp1)); 2764 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2765 X86RMI_Reg(srcLo), tmp1)); 2766 /* tmp2 = (tmp1 | -tmp1) >>s 31 */ 2767 addInstr(env, mk_iMOVsd_RR(tmp1,tmp2)); 2768 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2)); 2769 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2770 X86RMI_Reg(tmp1), tmp2)); 2771 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2)); 2772 *rHi = tmp2; 2773 *rLo = tmp2; 2774 return; 2775 } 2776 } 2777 2778 /* ReinterpF64asI64(e) */ 2779 /* Given an IEEE754 double, produce an I64 with the same bit 2780 pattern. */ 2781 case Iop_ReinterpF64asI64: { 2782 HReg rf = iselDblExpr(env, e->Iex.Unop.arg); 2783 HReg tLo = newVRegI(env); 2784 HReg tHi = newVRegI(env); 2785 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2786 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP()); 2787 /* paranoia */ 2788 set_FPU_rounding_default(env); 2789 /* subl $8, %esp */ 2790 sub_from_esp(env, 8); 2791 /* gstD %rf, 0(%esp) */ 2792 addInstr(env, 2793 X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp)); 2794 /* movl 0(%esp), %tLo */ 2795 addInstr(env, 2796 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo)); 2797 /* movl 4(%esp), %tHi */ 2798 addInstr(env, 2799 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi)); 2800 /* addl $8, %esp */ 2801 add_to_esp(env, 8); 2802 *rHi = tHi; 2803 *rLo = tLo; 2804 return; 2805 } 2806 2807 case Iop_CmpNEZ32x2: 2808 fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish; 2809 case Iop_CmpNEZ16x4: 2810 fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish; 2811 case Iop_CmpNEZ8x8: 2812 fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish; 2813 unish: { 2814 /* Note: the following assumes all helpers are of 2815 signature 2816 ULong fn ( ULong ), and they are 2817 not marked as regparm functions. 2818 */ 2819 HReg xLo, xHi; 2820 HReg tLo = newVRegI(env); 2821 HReg tHi = newVRegI(env); 2822 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg); 2823 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2824 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2825 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 2826 0, mk_RetLoc_simple(RLPri_2Int) )); 2827 add_to_esp(env, 2*4); 2828 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2829 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2830 *rHi = tHi; 2831 *rLo = tLo; 2832 return; 2833 } 2834 2835 default: 2836 break; 2837 } 2838 } /* if (e->tag == Iex_Unop) */ 2839 2840 2841 /* --------- CCALL --------- */ 2842 if (e->tag == Iex_CCall) { 2843 HReg tLo = newVRegI(env); 2844 HReg tHi = newVRegI(env); 2845 2846 /* Marshal args, do the call, clear stack. */ 2847 UInt addToSp = 0; 2848 RetLoc rloc = mk_RetLoc_INVALID(); 2849 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, 2850 e->Iex.CCall.cee, 2851 e->Iex.CCall.retty, e->Iex.CCall.args ); 2852 vassert(is_sane_RetLoc(rloc)); 2853 vassert(rloc.pri == RLPri_2Int); 2854 vassert(addToSp == 0); 2855 /* */ 2856 2857 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2858 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2859 *rHi = tHi; 2860 *rLo = tLo; 2861 return; 2862 } 2863 2864 ppIRExpr(e); 2865 vpanic("iselInt64Expr"); 2866} 2867 2868 2869/*---------------------------------------------------------*/ 2870/*--- ISEL: Floating point expressions (32 bit) ---*/ 2871/*---------------------------------------------------------*/ 2872 2873/* Nothing interesting here; really just wrappers for 2874 64-bit stuff. */ 2875 2876static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e ) 2877{ 2878 HReg r = iselFltExpr_wrk( env, e ); 2879# if 0 2880 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2881# endif 2882 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */ 2883 vassert(hregIsVirtual(r)); 2884 return r; 2885} 2886 2887/* DO NOT CALL THIS DIRECTLY */ 2888static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e ) 2889{ 2890 IRType ty = typeOfIRExpr(env->type_env,e); 2891 vassert(ty == Ity_F32); 2892 2893 if (e->tag == Iex_RdTmp) { 2894 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2895 } 2896 2897 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2898 X86AMode* am; 2899 HReg res = newVRegF(env); 2900 vassert(e->Iex.Load.ty == Ity_F32); 2901 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2902 addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am)); 2903 return res; 2904 } 2905 2906 if (e->tag == Iex_Binop 2907 && e->Iex.Binop.op == Iop_F64toF32) { 2908 /* Although the result is still held in a standard FPU register, 2909 we need to round it to reflect the loss of accuracy/range 2910 entailed in casting it to a 32-bit float. */ 2911 HReg dst = newVRegF(env); 2912 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 2913 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2914 addInstr(env, X86Instr_Fp64to32(src,dst)); 2915 set_FPU_rounding_default( env ); 2916 return dst; 2917 } 2918 2919 if (e->tag == Iex_Get) { 2920 X86AMode* am = X86AMode_IR( e->Iex.Get.offset, 2921 hregX86_EBP() ); 2922 HReg res = newVRegF(env); 2923 addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am )); 2924 return res; 2925 } 2926 2927 if (e->tag == Iex_Unop 2928 && e->Iex.Unop.op == Iop_ReinterpI32asF32) { 2929 /* Given an I32, produce an IEEE754 float with the same bit 2930 pattern. */ 2931 HReg dst = newVRegF(env); 2932 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg); 2933 /* paranoia */ 2934 addInstr(env, X86Instr_Push(rmi)); 2935 addInstr(env, X86Instr_FpLdSt( 2936 True/*load*/, 4, dst, 2937 X86AMode_IR(0, hregX86_ESP()))); 2938 add_to_esp(env, 4); 2939 return dst; 2940 } 2941 2942 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) { 2943 HReg rf = iselFltExpr(env, e->Iex.Binop.arg2); 2944 HReg dst = newVRegF(env); 2945 2946 /* rf now holds the value to be rounded. The first thing to do 2947 is set the FPU's rounding mode accordingly. */ 2948 2949 /* Set host rounding mode */ 2950 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2951 2952 /* grndint %rf, %dst */ 2953 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst)); 2954 2955 /* Restore default FPU rounding. */ 2956 set_FPU_rounding_default( env ); 2957 2958 return dst; 2959 } 2960 2961 ppIRExpr(e); 2962 vpanic("iselFltExpr_wrk"); 2963} 2964 2965 2966/*---------------------------------------------------------*/ 2967/*--- ISEL: Floating point expressions (64 bit) ---*/ 2968/*---------------------------------------------------------*/ 2969 2970/* Compute a 64-bit floating point value into a register, the identity 2971 of which is returned. As with iselIntExpr_R, the reg may be either 2972 real or virtual; in any case it must not be changed by subsequent 2973 code emitted by the caller. */ 2974 2975/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm: 2976 2977 Type S (1 bit) E (11 bits) F (52 bits) 2978 ---- --------- ----------- ----------- 2979 signalling NaN u 2047 (max) .0uuuuu---u 2980 (with at least 2981 one 1 bit) 2982 quiet NaN u 2047 (max) .1uuuuu---u 2983 2984 negative infinity 1 2047 (max) .000000---0 2985 2986 positive infinity 0 2047 (max) .000000---0 2987 2988 negative zero 1 0 .000000---0 2989 2990 positive zero 0 0 .000000---0 2991*/ 2992 2993static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e ) 2994{ 2995 HReg r = iselDblExpr_wrk( env, e ); 2996# if 0 2997 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2998# endif 2999 vassert(hregClass(r) == HRcFlt64); 3000 vassert(hregIsVirtual(r)); 3001 return r; 3002} 3003 3004/* DO NOT CALL THIS DIRECTLY */ 3005static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e ) 3006{ 3007 IRType ty = typeOfIRExpr(env->type_env,e); 3008 vassert(e); 3009 vassert(ty == Ity_F64); 3010 3011 if (e->tag == Iex_RdTmp) { 3012 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3013 } 3014 3015 if (e->tag == Iex_Const) { 3016 union { UInt u32x2[2]; ULong u64; Double f64; } u; 3017 HReg freg = newVRegF(env); 3018 vassert(sizeof(u) == 8); 3019 vassert(sizeof(u.u64) == 8); 3020 vassert(sizeof(u.f64) == 8); 3021 vassert(sizeof(u.u32x2) == 8); 3022 3023 if (e->Iex.Const.con->tag == Ico_F64) { 3024 u.f64 = e->Iex.Const.con->Ico.F64; 3025 } 3026 else if (e->Iex.Const.con->tag == Ico_F64i) { 3027 u.u64 = e->Iex.Const.con->Ico.F64i; 3028 } 3029 else 3030 vpanic("iselDblExpr(x86): const"); 3031 3032 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1]))); 3033 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0]))); 3034 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg, 3035 X86AMode_IR(0, hregX86_ESP()))); 3036 add_to_esp(env, 8); 3037 return freg; 3038 } 3039 3040 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 3041 X86AMode* am; 3042 HReg res = newVRegF(env); 3043 vassert(e->Iex.Load.ty == Ity_F64); 3044 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 3045 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am)); 3046 return res; 3047 } 3048 3049 if (e->tag == Iex_Get) { 3050 X86AMode* am = X86AMode_IR( e->Iex.Get.offset, 3051 hregX86_EBP() ); 3052 HReg res = newVRegF(env); 3053 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am )); 3054 return res; 3055 } 3056 3057 if (e->tag == Iex_GetI) { 3058 X86AMode* am 3059 = genGuestArrayOffset( 3060 env, e->Iex.GetI.descr, 3061 e->Iex.GetI.ix, e->Iex.GetI.bias ); 3062 HReg res = newVRegF(env); 3063 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am )); 3064 return res; 3065 } 3066 3067 if (e->tag == Iex_Triop) { 3068 X86FpOp fpop = Xfp_INVALID; 3069 IRTriop *triop = e->Iex.Triop.details; 3070 switch (triop->op) { 3071 case Iop_AddF64: fpop = Xfp_ADD; break; 3072 case Iop_SubF64: fpop = Xfp_SUB; break; 3073 case Iop_MulF64: fpop = Xfp_MUL; break; 3074 case Iop_DivF64: fpop = Xfp_DIV; break; 3075 case Iop_ScaleF64: fpop = Xfp_SCALE; break; 3076 case Iop_Yl2xF64: fpop = Xfp_YL2X; break; 3077 case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break; 3078 case Iop_AtanF64: fpop = Xfp_ATAN; break; 3079 case Iop_PRemF64: fpop = Xfp_PREM; break; 3080 case Iop_PRem1F64: fpop = Xfp_PREM1; break; 3081 default: break; 3082 } 3083 if (fpop != Xfp_INVALID) { 3084 HReg res = newVRegF(env); 3085 HReg srcL = iselDblExpr(env, triop->arg2); 3086 HReg srcR = iselDblExpr(env, triop->arg3); 3087 /* XXXROUNDINGFIXME */ 3088 /* set roundingmode here */ 3089 addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res)); 3090 if (fpop != Xfp_ADD && fpop != Xfp_SUB 3091 && fpop != Xfp_MUL && fpop != Xfp_DIV) 3092 roundToF64(env, res); 3093 return res; 3094 } 3095 } 3096 3097 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) { 3098 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 3099 HReg dst = newVRegF(env); 3100 3101 /* rf now holds the value to be rounded. The first thing to do 3102 is set the FPU's rounding mode accordingly. */ 3103 3104 /* Set host rounding mode */ 3105 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 3106 3107 /* grndint %rf, %dst */ 3108 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst)); 3109 3110 /* Restore default FPU rounding. */ 3111 set_FPU_rounding_default( env ); 3112 3113 return dst; 3114 } 3115 3116 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) { 3117 HReg dst = newVRegF(env); 3118 HReg rHi,rLo; 3119 iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2); 3120 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 3121 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 3122 3123 /* Set host rounding mode */ 3124 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 3125 3126 addInstr(env, X86Instr_FpLdStI( 3127 True/*load*/, 8, dst, 3128 X86AMode_IR(0, hregX86_ESP()))); 3129 3130 /* Restore default FPU rounding. */ 3131 set_FPU_rounding_default( env ); 3132 3133 add_to_esp(env, 8); 3134 return dst; 3135 } 3136 3137 if (e->tag == Iex_Binop) { 3138 X86FpOp fpop = Xfp_INVALID; 3139 switch (e->Iex.Binop.op) { 3140 case Iop_SinF64: fpop = Xfp_SIN; break; 3141 case Iop_CosF64: fpop = Xfp_COS; break; 3142 case Iop_TanF64: fpop = Xfp_TAN; break; 3143 case Iop_2xm1F64: fpop = Xfp_2XM1; break; 3144 case Iop_SqrtF64: fpop = Xfp_SQRT; break; 3145 default: break; 3146 } 3147 if (fpop != Xfp_INVALID) { 3148 HReg res = newVRegF(env); 3149 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 3150 /* XXXROUNDINGFIXME */ 3151 /* set roundingmode here */ 3152 /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition 3153 codes. I don't think that matters, since this insn 3154 selector never generates such an instruction intervening 3155 between an flag-setting instruction and a flag-using 3156 instruction. */ 3157 addInstr(env, X86Instr_FpUnary(fpop,src,res)); 3158 if (fpop != Xfp_SQRT 3159 && fpop != Xfp_NEG && fpop != Xfp_ABS) 3160 roundToF64(env, res); 3161 return res; 3162 } 3163 } 3164 3165 if (e->tag == Iex_Unop) { 3166 X86FpOp fpop = Xfp_INVALID; 3167 switch (e->Iex.Unop.op) { 3168 case Iop_NegF64: fpop = Xfp_NEG; break; 3169 case Iop_AbsF64: fpop = Xfp_ABS; break; 3170 default: break; 3171 } 3172 if (fpop != Xfp_INVALID) { 3173 HReg res = newVRegF(env); 3174 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 3175 addInstr(env, X86Instr_FpUnary(fpop,src,res)); 3176 /* No need to do roundToF64(env,res) for Xfp_NEG or Xfp_ABS, 3177 but might need to do that for other unary ops. */ 3178 return res; 3179 } 3180 } 3181 3182 if (e->tag == Iex_Unop) { 3183 switch (e->Iex.Unop.op) { 3184 case Iop_I32StoF64: { 3185 HReg dst = newVRegF(env); 3186 HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg); 3187 addInstr(env, X86Instr_Push(X86RMI_Reg(ri))); 3188 set_FPU_rounding_default(env); 3189 addInstr(env, X86Instr_FpLdStI( 3190 True/*load*/, 4, dst, 3191 X86AMode_IR(0, hregX86_ESP()))); 3192 add_to_esp(env, 4); 3193 return dst; 3194 } 3195 case Iop_ReinterpI64asF64: { 3196 /* Given an I64, produce an IEEE754 double with the same 3197 bit pattern. */ 3198 HReg dst = newVRegF(env); 3199 HReg rHi, rLo; 3200 iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg); 3201 /* paranoia */ 3202 set_FPU_rounding_default(env); 3203 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 3204 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 3205 addInstr(env, X86Instr_FpLdSt( 3206 True/*load*/, 8, dst, 3207 X86AMode_IR(0, hregX86_ESP()))); 3208 add_to_esp(env, 8); 3209 return dst; 3210 } 3211 case Iop_F32toF64: { 3212 /* this is a no-op */ 3213 HReg res = iselFltExpr(env, e->Iex.Unop.arg); 3214 return res; 3215 } 3216 default: 3217 break; 3218 } 3219 } 3220 3221 /* --------- MULTIPLEX --------- */ 3222 if (e->tag == Iex_ITE) { // VFD 3223 if (ty == Ity_F64 3224 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) { 3225 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue); 3226 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse); 3227 HReg dst = newVRegF(env); 3228 addInstr(env, X86Instr_FpUnary(Xfp_MOV,r1,dst)); 3229 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 3230 addInstr(env, X86Instr_FpCMov(cc ^ 1, r0, dst)); 3231 return dst; 3232 } 3233 } 3234 3235 ppIRExpr(e); 3236 vpanic("iselDblExpr_wrk"); 3237} 3238 3239 3240/*---------------------------------------------------------*/ 3241/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/ 3242/*---------------------------------------------------------*/ 3243 3244static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e ) 3245{ 3246 HReg r = iselVecExpr_wrk( env, e ); 3247# if 0 3248 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3249# endif 3250 vassert(hregClass(r) == HRcVec128); 3251 vassert(hregIsVirtual(r)); 3252 return r; 3253} 3254 3255 3256/* DO NOT CALL THIS DIRECTLY */ 3257static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e ) 3258{ 3259 3260# define REQUIRE_SSE1 \ 3261 do { if (env->hwcaps == 0/*baseline, no sse*/ \ 3262 || env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \ 3263 goto vec_fail; \ 3264 } while (0) 3265 3266# define REQUIRE_SSE2 \ 3267 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \ 3268 goto vec_fail; \ 3269 } while (0) 3270 3271# define SSE2_OR_ABOVE \ 3272 (env->hwcaps & VEX_HWCAPS_X86_SSE2) 3273 3274 HWord fn = 0; /* address of helper fn, if required */ 3275 MatchInfo mi; 3276 Bool arg1isEReg = False; 3277 X86SseOp op = Xsse_INVALID; 3278 IRType ty = typeOfIRExpr(env->type_env,e); 3279 vassert(e); 3280 vassert(ty == Ity_V128); 3281 3282 REQUIRE_SSE1; 3283 3284 if (e->tag == Iex_RdTmp) { 3285 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3286 } 3287 3288 if (e->tag == Iex_Get) { 3289 HReg dst = newVRegV(env); 3290 addInstr(env, X86Instr_SseLdSt( 3291 True/*load*/, 3292 dst, 3293 X86AMode_IR(e->Iex.Get.offset, hregX86_EBP()) 3294 ) 3295 ); 3296 return dst; 3297 } 3298 3299 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 3300 HReg dst = newVRegV(env); 3301 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 3302 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am )); 3303 return dst; 3304 } 3305 3306 if (e->tag == Iex_Const) { 3307 HReg dst = newVRegV(env); 3308 vassert(e->Iex.Const.con->tag == Ico_V128); 3309 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst)); 3310 return dst; 3311 } 3312 3313 if (e->tag == Iex_Unop) { 3314 3315 if (SSE2_OR_ABOVE) { 3316 /* 64UtoV128(LDle:I64(addr)) */ 3317 DECLARE_PATTERN(p_zwiden_load64); 3318 DEFINE_PATTERN(p_zwiden_load64, 3319 unop(Iop_64UtoV128, 3320 IRExpr_Load(Iend_LE,Ity_I64,bind(0)))); 3321 if (matchIRExpr(&mi, p_zwiden_load64, e)) { 3322 X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]); 3323 HReg dst = newVRegV(env); 3324 addInstr(env, X86Instr_SseLdzLO(8, dst, am)); 3325 return dst; 3326 } 3327 } 3328 3329 switch (e->Iex.Unop.op) { 3330 3331 case Iop_NotV128: { 3332 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3333 return do_sse_Not128(env, arg); 3334 } 3335 3336 case Iop_CmpNEZ64x2: { 3337 /* We can use SSE2 instructions for this. */ 3338 /* Ideally, we want to do a 64Ix2 comparison against zero of 3339 the operand. Problem is no such insn exists. Solution 3340 therefore is to do a 32Ix4 comparison instead, and bitwise- 3341 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and 3342 let the not'd result of this initial comparison be a:b:c:d. 3343 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use 3344 pshufd to create a value b:a:d:c, and OR that with a:b:c:d, 3345 giving the required result. 3346 3347 The required selection sequence is 2,3,0,1, which 3348 according to Intel's documentation means the pshufd 3349 literal value is 0xB1, that is, 3350 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0) 3351 */ 3352 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3353 HReg tmp = newVRegV(env); 3354 HReg dst = newVRegV(env); 3355 REQUIRE_SSE2; 3356 addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp)); 3357 addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp)); 3358 tmp = do_sse_Not128(env, tmp); 3359 addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst)); 3360 addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst)); 3361 return dst; 3362 } 3363 3364 case Iop_CmpNEZ32x4: { 3365 /* Sigh, we have to generate lousy code since this has to 3366 work on SSE1 hosts */ 3367 /* basically, the idea is: for each lane: 3368 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1) 3369 sbbl %r, %r (now %r = 1Sto32(CF)) 3370 movl %r, lane 3371 */ 3372 Int i; 3373 X86AMode* am; 3374 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3375 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3376 HReg dst = newVRegV(env); 3377 HReg r32 = newVRegI(env); 3378 sub_from_esp(env, 16); 3379 addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0)); 3380 for (i = 0; i < 4; i++) { 3381 am = X86AMode_IR(i*4, hregX86_ESP()); 3382 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32)); 3383 addInstr(env, X86Instr_Unary32(Xun_NEG, r32)); 3384 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32)); 3385 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am)); 3386 } 3387 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3388 add_to_esp(env, 16); 3389 return dst; 3390 } 3391 3392 case Iop_CmpNEZ8x16: 3393 case Iop_CmpNEZ16x8: { 3394 /* We can use SSE2 instructions for this. */ 3395 HReg arg; 3396 HReg vec0 = newVRegV(env); 3397 HReg vec1 = newVRegV(env); 3398 HReg dst = newVRegV(env); 3399 X86SseOp cmpOp 3400 = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16 3401 : Xsse_CMPEQ8; 3402 REQUIRE_SSE2; 3403 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0)); 3404 addInstr(env, mk_vMOVsd_RR(vec0, vec1)); 3405 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1)); 3406 /* defer arg computation to here so as to give CMPEQF as long 3407 as possible to complete */ 3408 arg = iselVecExpr(env, e->Iex.Unop.arg); 3409 /* vec0 is all 0s; vec1 is all 1s */ 3410 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3411 /* 16x8 or 8x16 comparison == */ 3412 addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst)); 3413 /* invert result */ 3414 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst)); 3415 return dst; 3416 } 3417 3418 case Iop_RecipEst32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary; 3419 case Iop_RSqrtEst32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary; 3420 do_32Fx4_unary: 3421 { 3422 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3423 HReg dst = newVRegV(env); 3424 addInstr(env, X86Instr_Sse32Fx4(op, arg, dst)); 3425 return dst; 3426 } 3427 3428 case Iop_RecipEst32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary; 3429 case Iop_RSqrtEst32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary; 3430 case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary; 3431 do_32F0x4_unary: 3432 { 3433 /* A bit subtle. We have to copy the arg to the result 3434 register first, because actually doing the SSE scalar insn 3435 leaves the upper 3/4 of the destination register 3436 unchanged. Whereas the required semantics of these 3437 primops is that the upper 3/4 is simply copied in from the 3438 argument. */ 3439 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3440 HReg dst = newVRegV(env); 3441 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3442 addInstr(env, X86Instr_Sse32FLo(op, arg, dst)); 3443 return dst; 3444 } 3445 3446 case Iop_Sqrt64F0x2: op = Xsse_SQRTF; goto do_64F0x2_unary; 3447 do_64F0x2_unary: 3448 { 3449 /* A bit subtle. We have to copy the arg to the result 3450 register first, because actually doing the SSE scalar insn 3451 leaves the upper half of the destination register 3452 unchanged. Whereas the required semantics of these 3453 primops is that the upper half is simply copied in from the 3454 argument. */ 3455 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3456 HReg dst = newVRegV(env); 3457 REQUIRE_SSE2; 3458 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3459 addInstr(env, X86Instr_Sse64FLo(op, arg, dst)); 3460 return dst; 3461 } 3462 3463 case Iop_32UtoV128: { 3464 HReg dst = newVRegV(env); 3465 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3466 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg); 3467 addInstr(env, X86Instr_Push(rmi)); 3468 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0)); 3469 add_to_esp(env, 4); 3470 return dst; 3471 } 3472 3473 case Iop_64UtoV128: { 3474 HReg rHi, rLo; 3475 HReg dst = newVRegV(env); 3476 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3477 iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg); 3478 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 3479 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 3480 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0)); 3481 add_to_esp(env, 8); 3482 return dst; 3483 } 3484 3485 default: 3486 break; 3487 } /* switch (e->Iex.Unop.op) */ 3488 } /* if (e->tag == Iex_Unop) */ 3489 3490 if (e->tag == Iex_Binop) { 3491 switch (e->Iex.Binop.op) { 3492 3493 case Iop_Sqrt64Fx2: 3494 REQUIRE_SSE2; 3495 /* fallthrough */ 3496 case Iop_Sqrt32Fx4: { 3497 /* :: (rmode, vec) -> vec */ 3498 HReg arg = iselVecExpr(env, e->Iex.Binop.arg2); 3499 HReg dst = newVRegV(env); 3500 /* XXXROUNDINGFIXME */ 3501 /* set roundingmode here */ 3502 addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2 3503 ? X86Instr_Sse64Fx2 : X86Instr_Sse32Fx4) 3504 (Xsse_SQRTF, arg, dst)); 3505 return dst; 3506 } 3507 3508 case Iop_SetV128lo32: { 3509 HReg dst = newVRegV(env); 3510 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3511 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3512 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3513 sub_from_esp(env, 16); 3514 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0)); 3515 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0)); 3516 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3517 add_to_esp(env, 16); 3518 return dst; 3519 } 3520 3521 case Iop_SetV128lo64: { 3522 HReg dst = newVRegV(env); 3523 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3524 HReg srcIhi, srcIlo; 3525 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3526 X86AMode* esp4 = advance4(esp0); 3527 iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2); 3528 sub_from_esp(env, 16); 3529 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0)); 3530 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0)); 3531 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4)); 3532 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3533 add_to_esp(env, 16); 3534 return dst; 3535 } 3536 3537 case Iop_64HLtoV128: { 3538 HReg r3, r2, r1, r0; 3539 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3540 X86AMode* esp4 = advance4(esp0); 3541 X86AMode* esp8 = advance4(esp4); 3542 X86AMode* esp12 = advance4(esp8); 3543 HReg dst = newVRegV(env); 3544 /* do this via the stack (easy, convenient, etc) */ 3545 sub_from_esp(env, 16); 3546 /* Do the less significant 64 bits */ 3547 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2); 3548 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0)); 3549 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4)); 3550 /* Do the more significant 64 bits */ 3551 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1); 3552 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8)); 3553 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12)); 3554 /* Fetch result back from stack. */ 3555 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3556 add_to_esp(env, 16); 3557 return dst; 3558 } 3559 3560 case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4; 3561 case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4; 3562 case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4; 3563 case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4; 3564 case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4; 3565 case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4; 3566 do_32Fx4: 3567 { 3568 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3569 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3570 HReg dst = newVRegV(env); 3571 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3572 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst)); 3573 return dst; 3574 } 3575 3576 case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2; 3577 case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2; 3578 case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2; 3579 case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2; 3580 case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2; 3581 case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2; 3582 do_64Fx2: 3583 { 3584 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3585 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3586 HReg dst = newVRegV(env); 3587 REQUIRE_SSE2; 3588 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3589 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst)); 3590 return dst; 3591 } 3592 3593 case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4; 3594 case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4; 3595 case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4; 3596 case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4; 3597 case Iop_Add32F0x4: op = Xsse_ADDF; goto do_32F0x4; 3598 case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4; 3599 case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4; 3600 case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4; 3601 case Iop_Mul32F0x4: op = Xsse_MULF; goto do_32F0x4; 3602 case Iop_Sub32F0x4: op = Xsse_SUBF; goto do_32F0x4; 3603 do_32F0x4: { 3604 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3605 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3606 HReg dst = newVRegV(env); 3607 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3608 addInstr(env, X86Instr_Sse32FLo(op, argR, dst)); 3609 return dst; 3610 } 3611 3612 case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2; 3613 case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2; 3614 case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2; 3615 case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2; 3616 case Iop_Add64F0x2: op = Xsse_ADDF; goto do_64F0x2; 3617 case Iop_Div64F0x2: op = Xsse_DIVF; goto do_64F0x2; 3618 case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2; 3619 case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2; 3620 case Iop_Mul64F0x2: op = Xsse_MULF; goto do_64F0x2; 3621 case Iop_Sub64F0x2: op = Xsse_SUBF; goto do_64F0x2; 3622 do_64F0x2: { 3623 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3624 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3625 HReg dst = newVRegV(env); 3626 REQUIRE_SSE2; 3627 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3628 addInstr(env, X86Instr_Sse64FLo(op, argR, dst)); 3629 return dst; 3630 } 3631 3632 case Iop_QNarrowBin32Sto16Sx8: 3633 op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg; 3634 case Iop_QNarrowBin16Sto8Sx16: 3635 op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg; 3636 case Iop_QNarrowBin16Sto8Ux16: 3637 op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg; 3638 3639 case Iop_InterleaveHI8x16: 3640 op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg; 3641 case Iop_InterleaveHI16x8: 3642 op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg; 3643 case Iop_InterleaveHI32x4: 3644 op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg; 3645 case Iop_InterleaveHI64x2: 3646 op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg; 3647 3648 case Iop_InterleaveLO8x16: 3649 op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg; 3650 case Iop_InterleaveLO16x8: 3651 op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg; 3652 case Iop_InterleaveLO32x4: 3653 op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg; 3654 case Iop_InterleaveLO64x2: 3655 op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg; 3656 3657 case Iop_AndV128: op = Xsse_AND; goto do_SseReRg; 3658 case Iop_OrV128: op = Xsse_OR; goto do_SseReRg; 3659 case Iop_XorV128: op = Xsse_XOR; goto do_SseReRg; 3660 case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg; 3661 case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg; 3662 case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg; 3663 case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg; 3664 case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg; 3665 case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg; 3666 case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg; 3667 case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg; 3668 case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg; 3669 case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg; 3670 case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg; 3671 case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg; 3672 case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg; 3673 case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg; 3674 case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg; 3675 case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg; 3676 case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg; 3677 case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg; 3678 case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg; 3679 case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg; 3680 case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg; 3681 case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg; 3682 case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg; 3683 case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg; 3684 case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg; 3685 case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg; 3686 case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg; 3687 case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg; 3688 case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg; 3689 case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg; 3690 case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg; 3691 do_SseReRg: { 3692 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); 3693 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); 3694 HReg dst = newVRegV(env); 3695 if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR) 3696 REQUIRE_SSE2; 3697 if (arg1isEReg) { 3698 addInstr(env, mk_vMOVsd_RR(arg2, dst)); 3699 addInstr(env, X86Instr_SseReRg(op, arg1, dst)); 3700 } else { 3701 addInstr(env, mk_vMOVsd_RR(arg1, dst)); 3702 addInstr(env, X86Instr_SseReRg(op, arg2, dst)); 3703 } 3704 return dst; 3705 } 3706 3707 case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift; 3708 case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift; 3709 case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift; 3710 case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift; 3711 case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift; 3712 case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift; 3713 case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift; 3714 case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift; 3715 do_SseShift: { 3716 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1); 3717 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 3718 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3719 HReg ereg = newVRegV(env); 3720 HReg dst = newVRegV(env); 3721 REQUIRE_SSE2; 3722 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3723 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3724 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3725 addInstr(env, X86Instr_Push(rmi)); 3726 addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0)); 3727 addInstr(env, mk_vMOVsd_RR(greg, dst)); 3728 addInstr(env, X86Instr_SseReRg(op, ereg, dst)); 3729 add_to_esp(env, 16); 3730 return dst; 3731 } 3732 3733 case Iop_NarrowBin32to16x8: 3734 fn = (HWord)h_generic_calc_NarrowBin32to16x8; 3735 goto do_SseAssistedBinary; 3736 case Iop_NarrowBin16to8x16: 3737 fn = (HWord)h_generic_calc_NarrowBin16to8x16; 3738 goto do_SseAssistedBinary; 3739 do_SseAssistedBinary: { 3740 /* As with the amd64 case (where this is copied from) we 3741 generate pretty bad code. */ 3742 vassert(fn != 0); 3743 HReg dst = newVRegV(env); 3744 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3745 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3746 HReg argp = newVRegI(env); 3747 /* subl $112, %esp -- make a space */ 3748 sub_from_esp(env, 112); 3749 /* leal 48(%esp), %r_argp -- point into it */ 3750 addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()), 3751 argp)); 3752 /* andl $-16, %r_argp -- 16-align the pointer */ 3753 addInstr(env, X86Instr_Alu32R(Xalu_AND, 3754 X86RMI_Imm( ~(UInt)15 ), 3755 argp)); 3756 /* Prepare 3 arg regs: 3757 leal 0(%r_argp), %eax 3758 leal 16(%r_argp), %edx 3759 leal 32(%r_argp), %ecx 3760 */ 3761 addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp), 3762 hregX86_EAX())); 3763 addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp), 3764 hregX86_EDX())); 3765 addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp), 3766 hregX86_ECX())); 3767 /* Store the two args, at (%edx) and (%ecx): 3768 movupd %argL, 0(%edx) 3769 movupd %argR, 0(%ecx) 3770 */ 3771 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL, 3772 X86AMode_IR(0, hregX86_EDX()))); 3773 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR, 3774 X86AMode_IR(0, hregX86_ECX()))); 3775 /* call the helper */ 3776 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 3777 3, mk_RetLoc_simple(RLPri_None) )); 3778 /* fetch the result from memory, using %r_argp, which the 3779 register allocator will keep alive across the call. */ 3780 addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst, 3781 X86AMode_IR(0, argp))); 3782 /* and finally, clear the space */ 3783 add_to_esp(env, 112); 3784 return dst; 3785 } 3786 3787 default: 3788 break; 3789 } /* switch (e->Iex.Binop.op) */ 3790 } /* if (e->tag == Iex_Binop) */ 3791 3792 3793 if (e->tag == Iex_Triop) { 3794 IRTriop *triop = e->Iex.Triop.details; 3795 switch (triop->op) { 3796 3797 case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4_w_rm; 3798 case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4_w_rm; 3799 case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4_w_rm; 3800 case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4_w_rm; 3801 do_32Fx4_w_rm: 3802 { 3803 HReg argL = iselVecExpr(env, triop->arg2); 3804 HReg argR = iselVecExpr(env, triop->arg3); 3805 HReg dst = newVRegV(env); 3806 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3807 /* XXXROUNDINGFIXME */ 3808 /* set roundingmode here */ 3809 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst)); 3810 return dst; 3811 } 3812 3813 case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2_w_rm; 3814 case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2_w_rm; 3815 case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2_w_rm; 3816 case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2_w_rm; 3817 do_64Fx2_w_rm: 3818 { 3819 HReg argL = iselVecExpr(env, triop->arg2); 3820 HReg argR = iselVecExpr(env, triop->arg3); 3821 HReg dst = newVRegV(env); 3822 REQUIRE_SSE2; 3823 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3824 /* XXXROUNDINGFIXME */ 3825 /* set roundingmode here */ 3826 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst)); 3827 return dst; 3828 } 3829 3830 default: 3831 break; 3832 } /* switch (triop->op) */ 3833 } /* if (e->tag == Iex_Triop) */ 3834 3835 3836 if (e->tag == Iex_ITE) { // VFD 3837 HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue); 3838 HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse); 3839 HReg dst = newVRegV(env); 3840 addInstr(env, mk_vMOVsd_RR(r1,dst)); 3841 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 3842 addInstr(env, X86Instr_SseCMov(cc ^ 1, r0, dst)); 3843 return dst; 3844 } 3845 3846 vec_fail: 3847 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n", 3848 LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps)); 3849 ppIRExpr(e); 3850 vpanic("iselVecExpr_wrk"); 3851 3852# undef REQUIRE_SSE1 3853# undef REQUIRE_SSE2 3854# undef SSE2_OR_ABOVE 3855} 3856 3857 3858/*---------------------------------------------------------*/ 3859/*--- ISEL: Statements ---*/ 3860/*---------------------------------------------------------*/ 3861 3862static void iselStmt ( ISelEnv* env, IRStmt* stmt ) 3863{ 3864 if (vex_traceflags & VEX_TRACE_VCODE) { 3865 vex_printf("\n-- "); 3866 ppIRStmt(stmt); 3867 vex_printf("\n"); 3868 } 3869 3870 switch (stmt->tag) { 3871 3872 /* --------- STORE --------- */ 3873 case Ist_Store: { 3874 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); 3875 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); 3876 IREndness end = stmt->Ist.Store.end; 3877 3878 if (tya != Ity_I32 || end != Iend_LE) 3879 goto stmt_fail; 3880 3881 if (tyd == Ity_I32) { 3882 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3883 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data); 3884 addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am)); 3885 return; 3886 } 3887 if (tyd == Ity_I8 || tyd == Ity_I16) { 3888 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3889 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data); 3890 addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2), 3891 r,am )); 3892 return; 3893 } 3894 if (tyd == Ity_F64) { 3895 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3896 HReg r = iselDblExpr(env, stmt->Ist.Store.data); 3897 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am)); 3898 return; 3899 } 3900 if (tyd == Ity_F32) { 3901 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3902 HReg r = iselFltExpr(env, stmt->Ist.Store.data); 3903 addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am)); 3904 return; 3905 } 3906 if (tyd == Ity_I64) { 3907 HReg vHi, vLo, rA; 3908 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data); 3909 rA = iselIntExpr_R(env, stmt->Ist.Store.addr); 3910 addInstr(env, X86Instr_Alu32M( 3911 Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA))); 3912 addInstr(env, X86Instr_Alu32M( 3913 Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA))); 3914 return; 3915 } 3916 if (tyd == Ity_V128) { 3917 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3918 HReg r = iselVecExpr(env, stmt->Ist.Store.data); 3919 addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am)); 3920 return; 3921 } 3922 break; 3923 } 3924 3925 /* --------- PUT --------- */ 3926 case Ist_Put: { 3927 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); 3928 if (ty == Ity_I32) { 3929 /* We're going to write to memory, so compute the RHS into an 3930 X86RI. */ 3931 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data); 3932 addInstr(env, 3933 X86Instr_Alu32M( 3934 Xalu_MOV, 3935 ri, 3936 X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP()) 3937 )); 3938 return; 3939 } 3940 if (ty == Ity_I8 || ty == Ity_I16) { 3941 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data); 3942 addInstr(env, X86Instr_Store( 3943 toUChar(ty==Ity_I8 ? 1 : 2), 3944 r, 3945 X86AMode_IR(stmt->Ist.Put.offset, 3946 hregX86_EBP()))); 3947 return; 3948 } 3949 if (ty == Ity_I64) { 3950 HReg vHi, vLo; 3951 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3952 X86AMode* am4 = advance4(am); 3953 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data); 3954 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am )); 3955 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 )); 3956 return; 3957 } 3958 if (ty == Ity_V128) { 3959 HReg vec = iselVecExpr(env, stmt->Ist.Put.data); 3960 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3961 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am)); 3962 return; 3963 } 3964 if (ty == Ity_F32) { 3965 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data); 3966 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3967 set_FPU_rounding_default(env); /* paranoia */ 3968 addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am )); 3969 return; 3970 } 3971 if (ty == Ity_F64) { 3972 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data); 3973 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3974 set_FPU_rounding_default(env); /* paranoia */ 3975 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am )); 3976 return; 3977 } 3978 break; 3979 } 3980 3981 /* --------- Indexed PUT --------- */ 3982 case Ist_PutI: { 3983 IRPutI *puti = stmt->Ist.PutI.details; 3984 3985 X86AMode* am 3986 = genGuestArrayOffset( 3987 env, puti->descr, 3988 puti->ix, puti->bias ); 3989 3990 IRType ty = typeOfIRExpr(env->type_env, puti->data); 3991 if (ty == Ity_F64) { 3992 HReg val = iselDblExpr(env, puti->data); 3993 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am )); 3994 return; 3995 } 3996 if (ty == Ity_I8) { 3997 HReg r = iselIntExpr_R(env, puti->data); 3998 addInstr(env, X86Instr_Store( 1, r, am )); 3999 return; 4000 } 4001 if (ty == Ity_I32) { 4002 HReg r = iselIntExpr_R(env, puti->data); 4003 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am )); 4004 return; 4005 } 4006 if (ty == Ity_I64) { 4007 HReg rHi, rLo; 4008 X86AMode* am4 = advance4(am); 4009 iselInt64Expr(&rHi, &rLo, env, puti->data); 4010 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am )); 4011 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 )); 4012 return; 4013 } 4014 break; 4015 } 4016 4017 /* --------- TMP --------- */ 4018 case Ist_WrTmp: { 4019 IRTemp tmp = stmt->Ist.WrTmp.tmp; 4020 IRType ty = typeOfIRTemp(env->type_env, tmp); 4021 4022 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..), 4023 compute it into an AMode and then use LEA. This usually 4024 produces fewer instructions, often because (for memcheck 4025 created IR) we get t = address-expression, (t is later used 4026 twice) and so doing this naturally turns address-expression 4027 back into an X86 amode. */ 4028 if (ty == Ity_I32 4029 && stmt->Ist.WrTmp.data->tag == Iex_Binop 4030 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) { 4031 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data); 4032 HReg dst = lookupIRTemp(env, tmp); 4033 if (am->tag == Xam_IR && am->Xam.IR.imm == 0) { 4034 /* Hmm, iselIntExpr_AMode wimped out and just computed the 4035 value into a register. Just emit a normal reg-reg move 4036 so reg-alloc can coalesce it away in the usual way. */ 4037 HReg src = am->Xam.IR.reg; 4038 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst)); 4039 } else { 4040 addInstr(env, X86Instr_Lea32(am,dst)); 4041 } 4042 return; 4043 } 4044 4045 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { 4046 X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data); 4047 HReg dst = lookupIRTemp(env, tmp); 4048 addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst)); 4049 return; 4050 } 4051 if (ty == Ity_I64) { 4052 HReg rHi, rLo, dstHi, dstLo; 4053 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); 4054 lookupIRTemp64( &dstHi, &dstLo, env, tmp); 4055 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) ); 4056 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) ); 4057 return; 4058 } 4059 if (ty == Ity_I1) { 4060 X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data); 4061 HReg dst = lookupIRTemp(env, tmp); 4062 addInstr(env, X86Instr_Set32(cond, dst)); 4063 return; 4064 } 4065 if (ty == Ity_F64) { 4066 HReg dst = lookupIRTemp(env, tmp); 4067 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); 4068 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst)); 4069 return; 4070 } 4071 if (ty == Ity_F32) { 4072 HReg dst = lookupIRTemp(env, tmp); 4073 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); 4074 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst)); 4075 return; 4076 } 4077 if (ty == Ity_V128) { 4078 HReg dst = lookupIRTemp(env, tmp); 4079 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data); 4080 addInstr(env, mk_vMOVsd_RR(src,dst)); 4081 return; 4082 } 4083 break; 4084 } 4085 4086 /* --------- Call to DIRTY helper --------- */ 4087 case Ist_Dirty: { 4088 IRDirty* d = stmt->Ist.Dirty.details; 4089 4090 /* Figure out the return type, if any. */ 4091 IRType retty = Ity_INVALID; 4092 if (d->tmp != IRTemp_INVALID) 4093 retty = typeOfIRTemp(env->type_env, d->tmp); 4094 4095 Bool retty_ok = False; 4096 switch (retty) { 4097 case Ity_INVALID: /* function doesn't return anything */ 4098 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 4099 case Ity_V128: 4100 retty_ok = True; break; 4101 default: 4102 break; 4103 } 4104 if (!retty_ok) 4105 break; /* will go to stmt_fail: */ 4106 4107 /* Marshal args, do the call, and set the return value to 4108 0x555..555 if this is a conditional call that returns a value 4109 and the call is skipped. */ 4110 UInt addToSp = 0; 4111 RetLoc rloc = mk_RetLoc_INVALID(); 4112 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args ); 4113 vassert(is_sane_RetLoc(rloc)); 4114 4115 /* Now figure out what to do with the returned value, if any. */ 4116 switch (retty) { 4117 case Ity_INVALID: { 4118 /* No return value. Nothing to do. */ 4119 vassert(d->tmp == IRTemp_INVALID); 4120 vassert(rloc.pri == RLPri_None); 4121 vassert(addToSp == 0); 4122 return; 4123 } 4124 case Ity_I32: case Ity_I16: case Ity_I8: { 4125 /* The returned value is in %eax. Park it in the register 4126 associated with tmp. */ 4127 vassert(rloc.pri == RLPri_Int); 4128 vassert(addToSp == 0); 4129 HReg dst = lookupIRTemp(env, d->tmp); 4130 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) ); 4131 return; 4132 } 4133 case Ity_I64: { 4134 /* The returned value is in %edx:%eax. Park it in the 4135 register-pair associated with tmp. */ 4136 vassert(rloc.pri == RLPri_2Int); 4137 vassert(addToSp == 0); 4138 HReg dstHi, dstLo; 4139 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp); 4140 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) ); 4141 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) ); 4142 return; 4143 } 4144 case Ity_V128: { 4145 /* The returned value is on the stack, and *retloc tells 4146 us where. Fish it off the stack and then move the 4147 stack pointer upwards to clear it, as directed by 4148 doHelperCall. */ 4149 vassert(rloc.pri == RLPri_V128SpRel); 4150 vassert(addToSp >= 16); 4151 HReg dst = lookupIRTemp(env, d->tmp); 4152 X86AMode* am = X86AMode_IR(rloc.spOff, hregX86_ESP()); 4153 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am )); 4154 add_to_esp(env, addToSp); 4155 return; 4156 } 4157 default: 4158 /*NOTREACHED*/ 4159 vassert(0); 4160 } 4161 break; 4162 } 4163 4164 /* --------- MEM FENCE --------- */ 4165 case Ist_MBE: 4166 switch (stmt->Ist.MBE.event) { 4167 case Imbe_Fence: 4168 addInstr(env, X86Instr_MFence(env->hwcaps)); 4169 return; 4170 default: 4171 break; 4172 } 4173 break; 4174 4175 /* --------- ACAS --------- */ 4176 case Ist_CAS: 4177 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) { 4178 /* "normal" singleton CAS */ 4179 UChar sz; 4180 IRCAS* cas = stmt->Ist.CAS.details; 4181 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 4182 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */ 4183 X86AMode* am = iselIntExpr_AMode(env, cas->addr); 4184 HReg rDataLo = iselIntExpr_R(env, cas->dataLo); 4185 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo); 4186 HReg rOldLo = lookupIRTemp(env, cas->oldLo); 4187 vassert(cas->expdHi == NULL); 4188 vassert(cas->dataHi == NULL); 4189 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo)); 4190 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX())); 4191 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX())); 4192 switch (ty) { 4193 case Ity_I32: sz = 4; break; 4194 case Ity_I16: sz = 2; break; 4195 case Ity_I8: sz = 1; break; 4196 default: goto unhandled_cas; 4197 } 4198 addInstr(env, X86Instr_ACAS(am, sz)); 4199 addInstr(env, 4200 X86Instr_CMov32(Xcc_NZ, 4201 X86RM_Reg(hregX86_EAX()), rOldLo)); 4202 return; 4203 } else { 4204 /* double CAS */ 4205 IRCAS* cas = stmt->Ist.CAS.details; 4206 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 4207 /* only 32-bit allowed in this case */ 4208 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */ 4209 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */ 4210 X86AMode* am = iselIntExpr_AMode(env, cas->addr); 4211 HReg rDataHi = iselIntExpr_R(env, cas->dataHi); 4212 HReg rDataLo = iselIntExpr_R(env, cas->dataLo); 4213 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi); 4214 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo); 4215 HReg rOldHi = lookupIRTemp(env, cas->oldHi); 4216 HReg rOldLo = lookupIRTemp(env, cas->oldLo); 4217 if (ty != Ity_I32) 4218 goto unhandled_cas; 4219 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi)); 4220 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo)); 4221 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX())); 4222 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX())); 4223 addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX())); 4224 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX())); 4225 addInstr(env, X86Instr_DACAS(am)); 4226 addInstr(env, 4227 X86Instr_CMov32(Xcc_NZ, 4228 X86RM_Reg(hregX86_EDX()), rOldHi)); 4229 addInstr(env, 4230 X86Instr_CMov32(Xcc_NZ, 4231 X86RM_Reg(hregX86_EAX()), rOldLo)); 4232 return; 4233 } 4234 unhandled_cas: 4235 break; 4236 4237 /* --------- INSTR MARK --------- */ 4238 /* Doesn't generate any executable code ... */ 4239 case Ist_IMark: 4240 return; 4241 4242 /* --------- NO-OP --------- */ 4243 /* Fairly self-explanatory, wouldn't you say? */ 4244 case Ist_NoOp: 4245 return; 4246 4247 /* --------- EXIT --------- */ 4248 case Ist_Exit: { 4249 if (stmt->Ist.Exit.dst->tag != Ico_U32) 4250 vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value"); 4251 4252 X86CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard); 4253 X86AMode* amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP, 4254 hregX86_EBP()); 4255 4256 /* Case: boring transfer to known address */ 4257 if (stmt->Ist.Exit.jk == Ijk_Boring) { 4258 if (env->chainingAllowed) { 4259 /* .. almost always true .. */ 4260 /* Skip the event check at the dst if this is a forwards 4261 edge. */ 4262 Bool toFastEP 4263 = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga; 4264 if (0) vex_printf("%s", toFastEP ? "Y" : ","); 4265 addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32, 4266 amEIP, cc, toFastEP)); 4267 } else { 4268 /* .. very occasionally .. */ 4269 /* We can't use chaining, so ask for an assisted transfer, 4270 as that's the only alternative that is allowable. */ 4271 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 4272 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring)); 4273 } 4274 return; 4275 } 4276 4277 /* Case: assisted transfer to arbitrary address */ 4278 switch (stmt->Ist.Exit.jk) { 4279 /* Keep this list in sync with that in iselNext below */ 4280 case Ijk_ClientReq: 4281 case Ijk_EmWarn: 4282 case Ijk_MapFail: 4283 case Ijk_NoDecode: 4284 case Ijk_NoRedir: 4285 case Ijk_SigSEGV: 4286 case Ijk_SigTRAP: 4287 case Ijk_Sys_int128: 4288 case Ijk_Sys_int129: 4289 case Ijk_Sys_int130: 4290 case Ijk_Sys_int145: 4291 case Ijk_Sys_int210: 4292 case Ijk_Sys_syscall: 4293 case Ijk_Sys_sysenter: 4294 case Ijk_InvalICache: 4295 case Ijk_Yield: 4296 { 4297 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 4298 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk)); 4299 return; 4300 } 4301 default: 4302 break; 4303 } 4304 4305 /* Do we ever expect to see any other kind? */ 4306 goto stmt_fail; 4307 } 4308 4309 default: break; 4310 } 4311 stmt_fail: 4312 ppIRStmt(stmt); 4313 vpanic("iselStmt"); 4314} 4315 4316 4317/*---------------------------------------------------------*/ 4318/*--- ISEL: Basic block terminators (Nexts) ---*/ 4319/*---------------------------------------------------------*/ 4320 4321static void iselNext ( ISelEnv* env, 4322 IRExpr* next, IRJumpKind jk, Int offsIP ) 4323{ 4324 if (vex_traceflags & VEX_TRACE_VCODE) { 4325 vex_printf( "\n-- PUT(%d) = ", offsIP); 4326 ppIRExpr( next ); 4327 vex_printf( "; exit-"); 4328 ppIRJumpKind(jk); 4329 vex_printf( "\n"); 4330 } 4331 4332 /* Case: boring transfer to known address */ 4333 if (next->tag == Iex_Const) { 4334 IRConst* cdst = next->Iex.Const.con; 4335 vassert(cdst->tag == Ico_U32); 4336 if (jk == Ijk_Boring || jk == Ijk_Call) { 4337 /* Boring transfer to known address */ 4338 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP()); 4339 if (env->chainingAllowed) { 4340 /* .. almost always true .. */ 4341 /* Skip the event check at the dst if this is a forwards 4342 edge. */ 4343 Bool toFastEP 4344 = ((Addr32)cdst->Ico.U32) > env->max_ga; 4345 if (0) vex_printf("%s", toFastEP ? "X" : "."); 4346 addInstr(env, X86Instr_XDirect(cdst->Ico.U32, 4347 amEIP, Xcc_ALWAYS, 4348 toFastEP)); 4349 } else { 4350 /* .. very occasionally .. */ 4351 /* We can't use chaining, so ask for an assisted transfer, 4352 as that's the only alternative that is allowable. */ 4353 HReg r = iselIntExpr_R(env, next); 4354 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, 4355 Ijk_Boring)); 4356 } 4357 return; 4358 } 4359 } 4360 4361 /* Case: call/return (==boring) transfer to any address */ 4362 switch (jk) { 4363 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { 4364 HReg r = iselIntExpr_R(env, next); 4365 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP()); 4366 if (env->chainingAllowed) { 4367 addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS)); 4368 } else { 4369 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, 4370 Ijk_Boring)); 4371 } 4372 return; 4373 } 4374 default: 4375 break; 4376 } 4377 4378 /* Case: assisted transfer to arbitrary address */ 4379 switch (jk) { 4380 /* Keep this list in sync with that for Ist_Exit above */ 4381 case Ijk_ClientReq: 4382 case Ijk_EmWarn: 4383 case Ijk_MapFail: 4384 case Ijk_NoDecode: 4385 case Ijk_NoRedir: 4386 case Ijk_SigSEGV: 4387 case Ijk_SigTRAP: 4388 case Ijk_Sys_int128: 4389 case Ijk_Sys_int129: 4390 case Ijk_Sys_int130: 4391 case Ijk_Sys_int145: 4392 case Ijk_Sys_int210: 4393 case Ijk_Sys_syscall: 4394 case Ijk_Sys_sysenter: 4395 case Ijk_InvalICache: 4396 case Ijk_Yield: 4397 { 4398 HReg r = iselIntExpr_R(env, next); 4399 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP()); 4400 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk)); 4401 return; 4402 } 4403 default: 4404 break; 4405 } 4406 4407 vex_printf( "\n-- PUT(%d) = ", offsIP); 4408 ppIRExpr( next ); 4409 vex_printf( "; exit-"); 4410 ppIRJumpKind(jk); 4411 vex_printf( "\n"); 4412 vassert(0); // are we expecting any other kind? 4413} 4414 4415 4416/*---------------------------------------------------------*/ 4417/*--- Insn selector top-level ---*/ 4418/*---------------------------------------------------------*/ 4419 4420/* Translate an entire SB to x86 code. */ 4421 4422HInstrArray* iselSB_X86 ( const IRSB* bb, 4423 VexArch arch_host, 4424 const VexArchInfo* archinfo_host, 4425 const VexAbiInfo* vbi/*UNUSED*/, 4426 Int offs_Host_EvC_Counter, 4427 Int offs_Host_EvC_FailAddr, 4428 Bool chainingAllowed, 4429 Bool addProfInc, 4430 Addr max_ga ) 4431{ 4432 Int i, j; 4433 HReg hreg, hregHI; 4434 ISelEnv* env; 4435 UInt hwcaps_host = archinfo_host->hwcaps; 4436 X86AMode *amCounter, *amFailAddr; 4437 4438 /* sanity ... */ 4439 vassert(arch_host == VexArchX86); 4440 vassert(0 == (hwcaps_host 4441 & ~(VEX_HWCAPS_X86_MMXEXT 4442 | VEX_HWCAPS_X86_SSE1 4443 | VEX_HWCAPS_X86_SSE2 4444 | VEX_HWCAPS_X86_SSE3 4445 | VEX_HWCAPS_X86_LZCNT))); 4446 4447 /* Check that the host's endianness is as expected. */ 4448 vassert(archinfo_host->endness == VexEndnessLE); 4449 4450 /* Make up an initial environment to use. */ 4451 env = LibVEX_Alloc_inline(sizeof(ISelEnv)); 4452 env->vreg_ctr = 0; 4453 4454 /* Set up output code array. */ 4455 env->code = newHInstrArray(); 4456 4457 /* Copy BB's type env. */ 4458 env->type_env = bb->tyenv; 4459 4460 /* Make up an IRTemp -> virtual HReg mapping. This doesn't 4461 change as we go along. */ 4462 env->n_vregmap = bb->tyenv->types_used; 4463 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg)); 4464 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg)); 4465 4466 /* and finally ... */ 4467 env->chainingAllowed = chainingAllowed; 4468 env->hwcaps = hwcaps_host; 4469 env->max_ga = max_ga; 4470 4471 /* For each IR temporary, allocate a suitably-kinded virtual 4472 register. */ 4473 j = 0; 4474 for (i = 0; i < env->n_vregmap; i++) { 4475 hregHI = hreg = INVALID_HREG; 4476 switch (bb->tyenv->types[i]) { 4477 case Ity_I1: 4478 case Ity_I8: 4479 case Ity_I16: 4480 case Ity_I32: hreg = mkHReg(True, HRcInt32, 0, j++); break; 4481 case Ity_I64: hreg = mkHReg(True, HRcInt32, 0, j++); 4482 hregHI = mkHReg(True, HRcInt32, 0, j++); break; 4483 case Ity_F32: 4484 case Ity_F64: hreg = mkHReg(True, HRcFlt64, 0, j++); break; 4485 case Ity_V128: hreg = mkHReg(True, HRcVec128, 0, j++); break; 4486 default: ppIRType(bb->tyenv->types[i]); 4487 vpanic("iselBB: IRTemp type"); 4488 } 4489 env->vregmap[i] = hreg; 4490 env->vregmapHI[i] = hregHI; 4491 } 4492 env->vreg_ctr = j; 4493 4494 /* The very first instruction must be an event check. */ 4495 amCounter = X86AMode_IR(offs_Host_EvC_Counter, hregX86_EBP()); 4496 amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP()); 4497 addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr)); 4498 4499 /* Possibly a block counter increment (for profiling). At this 4500 point we don't know the address of the counter, so just pretend 4501 it is zero. It will have to be patched later, but before this 4502 translation is used, by a call to LibVEX_patchProfCtr. */ 4503 if (addProfInc) { 4504 addInstr(env, X86Instr_ProfInc()); 4505 } 4506 4507 /* Ok, finally we can iterate over the statements. */ 4508 for (i = 0; i < bb->stmts_used; i++) 4509 iselStmt(env, bb->stmts[i]); 4510 4511 iselNext(env, bb->next, bb->jumpkind, bb->offsIP); 4512 4513 /* record the number of vregs we used. */ 4514 env->code->n_vregs = env->vreg_ctr; 4515 return env->code; 4516} 4517 4518 4519/*---------------------------------------------------------------*/ 4520/*--- end host_x86_isel.c ---*/ 4521/*---------------------------------------------------------------*/ 4522