1 2/*---------------------------------------------------------------*/ 3/*--- begin host_x86_isel.c ---*/ 4/*---------------------------------------------------------------*/ 5 6/* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2011 OpenWorks LLP 11 info@open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34*/ 35 36#include "libvex_basictypes.h" 37#include "libvex_ir.h" 38#include "libvex.h" 39 40#include "ir_match.h" 41#include "main_util.h" 42#include "main_globals.h" 43#include "host_generic_regs.h" 44#include "host_generic_simd64.h" 45#include "host_generic_simd128.h" 46#include "host_x86_defs.h" 47 48/* TODO 21 Apr 2005: 49 50 -- (Really an assembler issue) don't emit CMov32 as a cmov 51 insn, since that's expensive on P4 and conditional branch 52 is cheaper if (as we expect) the condition is highly predictable 53 54 -- preserve xmm registers across function calls (by declaring them 55 as trashed by call insns) 56 57 -- preserve x87 ST stack discipline across function calls. Sigh. 58 59 -- Check doHelperCall: if a call is conditional, we cannot safely 60 compute any regparm args directly to registers. Hence, the 61 fast-regparm marshalling should be restricted to unconditional 62 calls only. 63*/ 64 65/*---------------------------------------------------------*/ 66/*--- x87 control word stuff ---*/ 67/*---------------------------------------------------------*/ 68 69/* Vex-generated code expects to run with the FPU set as follows: all 70 exceptions masked, round-to-nearest, precision = 53 bits. This 71 corresponds to a FPU control word value of 0x027F. 72 73 Similarly the SSE control word (%mxcsr) should be 0x1F80. 74 75 %fpucw and %mxcsr should have these values on entry to 76 Vex-generated code, and should those values should be 77 unchanged at exit. 78*/ 79 80#define DEFAULT_FPUCW 0x027F 81 82/* debugging only, do not use */ 83/* define DEFAULT_FPUCW 0x037F */ 84 85 86/*---------------------------------------------------------*/ 87/*--- misc helpers ---*/ 88/*---------------------------------------------------------*/ 89 90/* These are duplicated in guest-x86/toIR.c */ 91static IRExpr* unop ( IROp op, IRExpr* a ) 92{ 93 return IRExpr_Unop(op, a); 94} 95 96static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 97{ 98 return IRExpr_Binop(op, a1, a2); 99} 100 101static IRExpr* bind ( Int binder ) 102{ 103 return IRExpr_Binder(binder); 104} 105 106static Bool isZeroU8 ( IRExpr* e ) 107{ 108 return e->tag == Iex_Const 109 && e->Iex.Const.con->tag == Ico_U8 110 && e->Iex.Const.con->Ico.U8 == 0; 111} 112 113static Bool isZeroU32 ( IRExpr* e ) 114{ 115 return e->tag == Iex_Const 116 && e->Iex.Const.con->tag == Ico_U32 117 && e->Iex.Const.con->Ico.U32 == 0; 118} 119 120static Bool isZeroU64 ( IRExpr* e ) 121{ 122 return e->tag == Iex_Const 123 && e->Iex.Const.con->tag == Ico_U64 124 && e->Iex.Const.con->Ico.U64 == 0ULL; 125} 126 127 128/*---------------------------------------------------------*/ 129/*--- ISelEnv ---*/ 130/*---------------------------------------------------------*/ 131 132/* This carries around: 133 134 - A mapping from IRTemp to IRType, giving the type of any IRTemp we 135 might encounter. This is computed before insn selection starts, 136 and does not change. 137 138 - A mapping from IRTemp to HReg. This tells the insn selector 139 which virtual register(s) are associated with each IRTemp 140 temporary. This is computed before insn selection starts, and 141 does not change. We expect this mapping to map precisely the 142 same set of IRTemps as the type mapping does. 143 144 - vregmap holds the primary register for the IRTemp. 145 - vregmapHI is only used for 64-bit integer-typed 146 IRTemps. It holds the identity of a second 147 32-bit virtual HReg, which holds the high half 148 of the value. 149 150 - The code array, that is, the insns selected so far. 151 152 - A counter, for generating new virtual registers. 153 154 - The host subarchitecture we are selecting insns for. 155 This is set at the start and does not change. 156 157 Note, this is all host-independent. */ 158 159typedef 160 struct { 161 IRTypeEnv* type_env; 162 163 HReg* vregmap; 164 HReg* vregmapHI; 165 Int n_vregmap; 166 167 HInstrArray* code; 168 169 Int vreg_ctr; 170 171 UInt hwcaps; 172 } 173 ISelEnv; 174 175 176static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) 177{ 178 vassert(tmp >= 0); 179 vassert(tmp < env->n_vregmap); 180 return env->vregmap[tmp]; 181} 182 183static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp ) 184{ 185 vassert(tmp >= 0); 186 vassert(tmp < env->n_vregmap); 187 vassert(env->vregmapHI[tmp] != INVALID_HREG); 188 *vrLO = env->vregmap[tmp]; 189 *vrHI = env->vregmapHI[tmp]; 190} 191 192static void addInstr ( ISelEnv* env, X86Instr* instr ) 193{ 194 addHInstr(env->code, instr); 195 if (vex_traceflags & VEX_TRACE_VCODE) { 196 ppX86Instr(instr, False); 197 vex_printf("\n"); 198 } 199} 200 201static HReg newVRegI ( ISelEnv* env ) 202{ 203 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/); 204 env->vreg_ctr++; 205 return reg; 206} 207 208static HReg newVRegF ( ISelEnv* env ) 209{ 210 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/); 211 env->vreg_ctr++; 212 return reg; 213} 214 215static HReg newVRegV ( ISelEnv* env ) 216{ 217 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/); 218 env->vreg_ctr++; 219 return reg; 220} 221 222 223/*---------------------------------------------------------*/ 224/*--- ISEL: Forward declarations ---*/ 225/*---------------------------------------------------------*/ 226 227/* These are organised as iselXXX and iselXXX_wrk pairs. The 228 iselXXX_wrk do the real work, but are not to be called directly. 229 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then 230 checks that all returned registers are virtual. You should not 231 call the _wrk version directly. 232*/ 233static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e ); 234static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e ); 235 236static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ); 237static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ); 238 239static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e ); 240static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e ); 241 242static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); 243static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); 244 245static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ); 246static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e ); 247 248static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, 249 ISelEnv* env, IRExpr* e ); 250static void iselInt64Expr ( HReg* rHi, HReg* rLo, 251 ISelEnv* env, IRExpr* e ); 252 253static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ); 254static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e ); 255 256static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); 257static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); 258 259static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); 260static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); 261 262static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ); 263static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ); 264 265 266/*---------------------------------------------------------*/ 267/*--- ISEL: Misc helpers ---*/ 268/*---------------------------------------------------------*/ 269 270/* Make a int reg-reg move. */ 271 272static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst ) 273{ 274 vassert(hregClass(src) == HRcInt32); 275 vassert(hregClass(dst) == HRcInt32); 276 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst); 277} 278 279 280/* Make a vector reg-reg move. */ 281 282static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst ) 283{ 284 vassert(hregClass(src) == HRcVec128); 285 vassert(hregClass(dst) == HRcVec128); 286 return X86Instr_SseReRg(Xsse_MOV, src, dst); 287} 288 289/* Advance/retreat %esp by n. */ 290 291static void add_to_esp ( ISelEnv* env, Int n ) 292{ 293 vassert(n > 0 && n < 256 && (n%4) == 0); 294 addInstr(env, 295 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP())); 296} 297 298static void sub_from_esp ( ISelEnv* env, Int n ) 299{ 300 vassert(n > 0 && n < 256 && (n%4) == 0); 301 addInstr(env, 302 X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP())); 303} 304 305 306/* Given an amode, return one which references 4 bytes further 307 along. */ 308 309static X86AMode* advance4 ( X86AMode* am ) 310{ 311 X86AMode* am4 = dopyX86AMode(am); 312 switch (am4->tag) { 313 case Xam_IRRS: 314 am4->Xam.IRRS.imm += 4; break; 315 case Xam_IR: 316 am4->Xam.IR.imm += 4; break; 317 default: 318 vpanic("advance4(x86,host)"); 319 } 320 return am4; 321} 322 323 324/* Push an arg onto the host stack, in preparation for a call to a 325 helper function of some kind. Returns the number of 32-bit words 326 pushed. */ 327 328static Int pushArg ( ISelEnv* env, IRExpr* arg ) 329{ 330 IRType arg_ty = typeOfIRExpr(env->type_env, arg); 331 if (arg_ty == Ity_I32) { 332 addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg))); 333 return 1; 334 } else 335 if (arg_ty == Ity_I64) { 336 HReg rHi, rLo; 337 iselInt64Expr(&rHi, &rLo, env, arg); 338 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 339 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 340 return 2; 341 } 342 ppIRExpr(arg); 343 vpanic("pushArg(x86): can't handle arg of this type"); 344} 345 346 347/* Complete the call to a helper function, by calling the 348 helper and clearing the args off the stack. */ 349 350static 351void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc, 352 IRCallee* cee, Int n_arg_ws ) 353{ 354 /* Complication. Need to decide which reg to use as the fn address 355 pointer, in a way that doesn't trash regparm-passed 356 parameters. */ 357 vassert(sizeof(void*) == 4); 358 359 addInstr(env, X86Instr_Call( cc, toUInt(Ptr_to_ULong(cee->addr)), 360 cee->regparms)); 361 if (n_arg_ws > 0) 362 add_to_esp(env, 4*n_arg_ws); 363} 364 365 366/* Used only in doHelperCall. See big comment in doHelperCall re 367 handling of regparm args. This function figures out whether 368 evaluation of an expression might require use of a fixed register. 369 If in doubt return True (safe but suboptimal). 370*/ 371static 372Bool mightRequireFixedRegs ( IRExpr* e ) 373{ 374 switch (e->tag) { 375 case Iex_RdTmp: case Iex_Const: case Iex_Get: 376 return False; 377 default: 378 return True; 379 } 380} 381 382 383/* Do a complete function call. guard is a Ity_Bit expression 384 indicating whether or not the call happens. If guard==NULL, the 385 call is unconditional. */ 386 387static 388void doHelperCall ( ISelEnv* env, 389 Bool passBBP, 390 IRExpr* guard, IRCallee* cee, IRExpr** args ) 391{ 392 X86CondCode cc; 393 HReg argregs[3]; 394 HReg tmpregs[3]; 395 Bool danger; 396 Int not_done_yet, n_args, n_arg_ws, stack_limit, 397 i, argreg, argregX; 398 399 /* Marshal args for a call, do the call, and clear the stack. 400 Complexities to consider: 401 402 * if passBBP is True, %ebp (the baseblock pointer) is to be 403 passed as the first arg. 404 405 * If the callee claims regparmness of 1, 2 or 3, we must pass the 406 first 1, 2 or 3 args in registers (EAX, EDX, and ECX 407 respectively). To keep things relatively simple, only args of 408 type I32 may be passed as regparms -- just bomb out if anything 409 else turns up. Clearly this depends on the front ends not 410 trying to pass any other types as regparms. 411 */ 412 413 /* 16 Nov 2004: the regparm handling is complicated by the 414 following problem. 415 416 Consider a call two a function with two regparm parameters: 417 f(e1,e2). We need to compute e1 into %eax and e2 into %edx. 418 Suppose code is first generated to compute e1 into %eax. Then, 419 code is generated to compute e2 into %edx. Unfortunately, if 420 the latter code sequence uses %eax, it will trash the value of 421 e1 computed by the former sequence. This could happen if (for 422 example) e2 itself involved a function call. In the code below, 423 args are evaluated right-to-left, not left-to-right, but the 424 principle and the problem are the same. 425 426 One solution is to compute all regparm-bound args into vregs 427 first, and once they are all done, move them to the relevant 428 real regs. This always gives correct code, but it also gives 429 a bunch of vreg-to-rreg moves which are usually redundant but 430 are hard for the register allocator to get rid of. 431 432 A compromise is to first examine all regparm'd argument 433 expressions. If they are all so simple that it is clear 434 they will be evaluated without use of any fixed registers, 435 use the old compute-directly-to-fixed-target scheme. If not, 436 be safe and use the via-vregs scheme. 437 438 Note this requires being able to examine an expression and 439 determine whether or not evaluation of it might use a fixed 440 register. That requires knowledge of how the rest of this 441 insn selector works. Currently just the following 3 are 442 regarded as safe -- hopefully they cover the majority of 443 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. 444 */ 445 vassert(cee->regparms >= 0 && cee->regparms <= 3); 446 447 n_args = n_arg_ws = 0; 448 while (args[n_args]) n_args++; 449 450 not_done_yet = n_args; 451 if (passBBP) 452 not_done_yet++; 453 454 stack_limit = cee->regparms; 455 if (cee->regparms > 0 && passBBP) stack_limit--; 456 457 /* ------ BEGIN marshall all arguments ------ */ 458 459 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */ 460 for (i = n_args-1; i >= stack_limit; i--) { 461 n_arg_ws += pushArg(env, args[i]); 462 not_done_yet--; 463 } 464 465 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in 466 registers. */ 467 468 if (cee->regparms > 0) { 469 470 /* ------ BEGIN deal with regparms ------ */ 471 472 /* deal with regparms, not forgetting %ebp if needed. */ 473 argregs[0] = hregX86_EAX(); 474 argregs[1] = hregX86_EDX(); 475 argregs[2] = hregX86_ECX(); 476 tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG; 477 478 argreg = cee->regparms; 479 480 /* In keeping with big comment above, detect potential danger 481 and use the via-vregs scheme if needed. */ 482 danger = False; 483 for (i = stack_limit-1; i >= 0; i--) { 484 if (mightRequireFixedRegs(args[i])) { 485 danger = True; 486 break; 487 } 488 } 489 490 if (danger) { 491 492 /* Move via temporaries */ 493 argregX = argreg; 494 for (i = stack_limit-1; i >= 0; i--) { 495 496 if (0) { 497 vex_printf("x86 host: register param is complex: "); 498 ppIRExpr(args[i]); 499 vex_printf("\n"); 500 } 501 502 argreg--; 503 vassert(argreg >= 0); 504 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32); 505 tmpregs[argreg] = iselIntExpr_R(env, args[i]); 506 not_done_yet--; 507 } 508 for (i = stack_limit-1; i >= 0; i--) { 509 argregX--; 510 vassert(argregX >= 0); 511 addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) ); 512 } 513 514 } else { 515 /* It's safe to compute all regparm args directly into their 516 target registers. */ 517 for (i = stack_limit-1; i >= 0; i--) { 518 argreg--; 519 vassert(argreg >= 0); 520 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32); 521 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 522 iselIntExpr_RMI(env, args[i]), 523 argregs[argreg])); 524 not_done_yet--; 525 } 526 527 } 528 529 /* Not forgetting %ebp if needed. */ 530 if (passBBP) { 531 vassert(argreg == 1); 532 addInstr(env, mk_iMOVsd_RR( hregX86_EBP(), argregs[0])); 533 not_done_yet--; 534 } 535 536 /* ------ END deal with regparms ------ */ 537 538 } else { 539 540 /* No regparms. Heave %ebp on the stack if needed. */ 541 if (passBBP) { 542 addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP()))); 543 n_arg_ws++; 544 not_done_yet--; 545 } 546 547 } 548 549 vassert(not_done_yet == 0); 550 551 /* ------ END marshall all arguments ------ */ 552 553 /* Now we can compute the condition. We can't do it earlier 554 because the argument computations could trash the condition 555 codes. Be a bit clever to handle the common case where the 556 guard is 1:Bit. */ 557 cc = Xcc_ALWAYS; 558 if (guard) { 559 if (guard->tag == Iex_Const 560 && guard->Iex.Const.con->tag == Ico_U1 561 && guard->Iex.Const.con->Ico.U1 == True) { 562 /* unconditional -- do nothing */ 563 } else { 564 cc = iselCondCode( env, guard ); 565 } 566 } 567 568 /* call the helper, and get the args off the stack afterwards. */ 569 callHelperAndClearArgs( env, cc, cee, n_arg_ws ); 570} 571 572 573/* Given a guest-state array descriptor, an index expression and a 574 bias, generate an X86AMode holding the relevant guest state 575 offset. */ 576 577static 578X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr, 579 IRExpr* off, Int bias ) 580{ 581 HReg tmp, roff; 582 Int elemSz = sizeofIRType(descr->elemTy); 583 Int nElems = descr->nElems; 584 Int shift = 0; 585 586 /* throw out any cases not generated by an x86 front end. In 587 theory there might be a day where we need to handle them -- if 588 we ever run non-x86-guest on x86 host. */ 589 590 if (nElems != 8) 591 vpanic("genGuestArrayOffset(x86 host)(1)"); 592 593 switch (elemSz) { 594 case 1: shift = 0; break; 595 case 4: shift = 2; break; 596 case 8: shift = 3; break; 597 default: vpanic("genGuestArrayOffset(x86 host)(2)"); 598 } 599 600 /* Compute off into a reg, %off. Then return: 601 602 movl %off, %tmp 603 addl $bias, %tmp (if bias != 0) 604 andl %tmp, 7 605 ... base(%ebp, %tmp, shift) ... 606 */ 607 tmp = newVRegI(env); 608 roff = iselIntExpr_R(env, off); 609 addInstr(env, mk_iMOVsd_RR(roff, tmp)); 610 if (bias != 0) { 611 addInstr(env, 612 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp)); 613 } 614 addInstr(env, 615 X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp)); 616 return 617 X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift ); 618} 619 620 621/* Mess with the FPU's rounding mode: set to the default rounding mode 622 (DEFAULT_FPUCW). */ 623static 624void set_FPU_rounding_default ( ISelEnv* env ) 625{ 626 /* pushl $DEFAULT_FPUCW 627 fldcw 0(%esp) 628 addl $4, %esp 629 */ 630 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 631 addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW))); 632 addInstr(env, X86Instr_FpLdCW(zero_esp)); 633 add_to_esp(env, 4); 634} 635 636 637/* Mess with the FPU's rounding mode: 'mode' is an I32-typed 638 expression denoting a value in the range 0 .. 3, indicating a round 639 mode encoded as per type IRRoundingMode. Set the x87 FPU to have 640 the same rounding. 641*/ 642static 643void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode ) 644{ 645 HReg rrm = iselIntExpr_R(env, mode); 646 HReg rrm2 = newVRegI(env); 647 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 648 649 /* movl %rrm, %rrm2 650 andl $3, %rrm2 -- shouldn't be needed; paranoia 651 shll $10, %rrm2 652 orl $DEFAULT_FPUCW, %rrm2 653 pushl %rrm2 654 fldcw 0(%esp) 655 addl $4, %esp 656 */ 657 addInstr(env, mk_iMOVsd_RR(rrm, rrm2)); 658 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2)); 659 addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2)); 660 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2)); 661 addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2))); 662 addInstr(env, X86Instr_FpLdCW(zero_esp)); 663 add_to_esp(env, 4); 664} 665 666 667/* Generate !src into a new vector register, and be sure that the code 668 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy 669 way to do this. 670*/ 671static HReg do_sse_Not128 ( ISelEnv* env, HReg src ) 672{ 673 HReg dst = newVRegV(env); 674 /* Set dst to zero. If dst contains a NaN then all hell might 675 break loose after the comparison. So, first zero it. */ 676 addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst)); 677 /* And now make it all 1s ... */ 678 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst)); 679 /* Finally, xor 'src' into it. */ 680 addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst)); 681 /* Doesn't that just totally suck? */ 682 return dst; 683} 684 685 686/* Round an x87 FPU value to 53-bit-mantissa precision, to be used 687 after most non-simple FPU operations (simple = +, -, *, / and 688 sqrt). 689 690 This could be done a lot more efficiently if needed, by loading 691 zero and adding it to the value to be rounded (fldz ; faddp?). 692*/ 693static void roundToF64 ( ISelEnv* env, HReg reg ) 694{ 695 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 696 sub_from_esp(env, 8); 697 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp)); 698 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp)); 699 add_to_esp(env, 8); 700} 701 702 703/*---------------------------------------------------------*/ 704/*--- ISEL: Integer expressions (32/16/8 bit) ---*/ 705/*---------------------------------------------------------*/ 706 707/* Select insns for an integer-typed expression, and add them to the 708 code list. Return a reg holding the result. This reg will be a 709 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you 710 want to modify it, ask for a new vreg, copy it in there, and modify 711 the copy. The register allocator will do its best to map both 712 vregs to the same real register, so the copies will often disappear 713 later in the game. 714 715 This should handle expressions of 32, 16 and 8-bit type. All 716 results are returned in a 32-bit register. For 16- and 8-bit 717 expressions, the upper 16/24 bits are arbitrary, so you should mask 718 or sign extend partial values if necessary. 719*/ 720 721static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) 722{ 723 HReg r = iselIntExpr_R_wrk(env, e); 724 /* sanity checks ... */ 725# if 0 726 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 727# endif 728 vassert(hregClass(r) == HRcInt32); 729 vassert(hregIsVirtual(r)); 730 return r; 731} 732 733/* DO NOT CALL THIS DIRECTLY ! */ 734static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) 735{ 736 MatchInfo mi; 737 738 IRType ty = typeOfIRExpr(env->type_env,e); 739 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 740 741 switch (e->tag) { 742 743 /* --------- TEMP --------- */ 744 case Iex_RdTmp: { 745 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 746 } 747 748 /* --------- LOAD --------- */ 749 case Iex_Load: { 750 HReg dst = newVRegI(env); 751 X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr ); 752 753 /* We can't handle big-endian loads, nor load-linked. */ 754 if (e->Iex.Load.end != Iend_LE) 755 goto irreducible; 756 757 if (ty == Ity_I32) { 758 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 759 X86RMI_Mem(amode), dst) ); 760 return dst; 761 } 762 if (ty == Ity_I16) { 763 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 764 return dst; 765 } 766 if (ty == Ity_I8) { 767 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 768 return dst; 769 } 770 break; 771 } 772 773 /* --------- TERNARY OP --------- */ 774 case Iex_Triop: { 775 /* C3210 flags following FPU partial remainder (fprem), both 776 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */ 777 if (e->Iex.Triop.op == Iop_PRemC3210F64 778 || e->Iex.Triop.op == Iop_PRem1C3210F64) { 779 HReg junk = newVRegF(env); 780 HReg dst = newVRegI(env); 781 HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2); 782 HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3); 783 /* XXXROUNDINGFIXME */ 784 /* set roundingmode here */ 785 addInstr(env, X86Instr_FpBinary( 786 e->Iex.Binop.op==Iop_PRemC3210F64 787 ? Xfp_PREM : Xfp_PREM1, 788 srcL,srcR,junk 789 )); 790 /* The previous pseudo-insn will have left the FPU's C3210 791 flags set correctly. So bag them. */ 792 addInstr(env, X86Instr_FpStSW_AX()); 793 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 794 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst)); 795 return dst; 796 } 797 798 break; 799 } 800 801 /* --------- BINARY OP --------- */ 802 case Iex_Binop: { 803 X86AluOp aluOp; 804 X86ShiftOp shOp; 805 806 /* Pattern: Sub32(0,x) */ 807 if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) { 808 HReg dst = newVRegI(env); 809 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2); 810 addInstr(env, mk_iMOVsd_RR(reg,dst)); 811 addInstr(env, X86Instr_Unary32(Xun_NEG,dst)); 812 return dst; 813 } 814 815 /* Is it an addition or logical style op? */ 816 switch (e->Iex.Binop.op) { 817 case Iop_Add8: case Iop_Add16: case Iop_Add32: 818 aluOp = Xalu_ADD; break; 819 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: 820 aluOp = Xalu_SUB; break; 821 case Iop_And8: case Iop_And16: case Iop_And32: 822 aluOp = Xalu_AND; break; 823 case Iop_Or8: case Iop_Or16: case Iop_Or32: 824 aluOp = Xalu_OR; break; 825 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: 826 aluOp = Xalu_XOR; break; 827 case Iop_Mul16: case Iop_Mul32: 828 aluOp = Xalu_MUL; break; 829 default: 830 aluOp = Xalu_INVALID; break; 831 } 832 /* For commutative ops we assume any literal 833 values are on the second operand. */ 834 if (aluOp != Xalu_INVALID) { 835 HReg dst = newVRegI(env); 836 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 837 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 838 addInstr(env, mk_iMOVsd_RR(reg,dst)); 839 addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst)); 840 return dst; 841 } 842 /* Could do better here; forcing the first arg into a reg 843 isn't always clever. 844 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)), 845 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32( 846 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32))) 847 movl 0xFFFFFFA0(%vr41),%vr107 848 movl 0xFFFFFFA4(%vr41),%vr108 849 movl %vr107,%vr106 850 xorl %vr108,%vr106 851 movl 0xFFFFFFA8(%vr41),%vr109 852 movl %vr106,%vr105 853 andl %vr109,%vr105 854 movl 0xFFFFFFA0(%vr41),%vr110 855 movl %vr105,%vr104 856 xorl %vr110,%vr104 857 movl %vr104,%vr70 858 */ 859 860 /* Perhaps a shift op? */ 861 switch (e->Iex.Binop.op) { 862 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8: 863 shOp = Xsh_SHL; break; 864 case Iop_Shr32: case Iop_Shr16: case Iop_Shr8: 865 shOp = Xsh_SHR; break; 866 case Iop_Sar32: case Iop_Sar16: case Iop_Sar8: 867 shOp = Xsh_SAR; break; 868 default: 869 shOp = Xsh_INVALID; break; 870 } 871 if (shOp != Xsh_INVALID) { 872 HReg dst = newVRegI(env); 873 874 /* regL = the value to be shifted */ 875 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); 876 addInstr(env, mk_iMOVsd_RR(regL,dst)); 877 878 /* Do any necessary widening for 16/8 bit operands */ 879 switch (e->Iex.Binop.op) { 880 case Iop_Shr8: 881 addInstr(env, X86Instr_Alu32R( 882 Xalu_AND, X86RMI_Imm(0xFF), dst)); 883 break; 884 case Iop_Shr16: 885 addInstr(env, X86Instr_Alu32R( 886 Xalu_AND, X86RMI_Imm(0xFFFF), dst)); 887 break; 888 case Iop_Sar8: 889 addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst)); 890 addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst)); 891 break; 892 case Iop_Sar16: 893 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst)); 894 addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst)); 895 break; 896 default: break; 897 } 898 899 /* Now consider the shift amount. If it's a literal, we 900 can do a much better job than the general case. */ 901 if (e->Iex.Binop.arg2->tag == Iex_Const) { 902 /* assert that the IR is well-typed */ 903 Int nshift; 904 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 905 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 906 vassert(nshift >= 0); 907 if (nshift > 0) 908 /* Can't allow nshift==0 since that means %cl */ 909 addInstr(env, X86Instr_Sh32( shOp, nshift, dst )); 910 } else { 911 /* General case; we have to force the amount into %cl. */ 912 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); 913 addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX())); 914 addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst)); 915 } 916 return dst; 917 } 918 919 /* Handle misc other ops. */ 920 921 if (e->Iex.Binop.op == Iop_Max32U) { 922 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 923 HReg dst = newVRegI(env); 924 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); 925 addInstr(env, mk_iMOVsd_RR(src1,dst)); 926 addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst)); 927 addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst)); 928 return dst; 929 } 930 931 if (e->Iex.Binop.op == Iop_8HLto16) { 932 HReg hi8 = newVRegI(env); 933 HReg lo8 = newVRegI(env); 934 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1); 935 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2); 936 addInstr(env, mk_iMOVsd_RR(hi8s, hi8)); 937 addInstr(env, mk_iMOVsd_RR(lo8s, lo8)); 938 addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8)); 939 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8)); 940 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8)); 941 return hi8; 942 } 943 944 if (e->Iex.Binop.op == Iop_16HLto32) { 945 HReg hi16 = newVRegI(env); 946 HReg lo16 = newVRegI(env); 947 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1); 948 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2); 949 addInstr(env, mk_iMOVsd_RR(hi16s, hi16)); 950 addInstr(env, mk_iMOVsd_RR(lo16s, lo16)); 951 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16)); 952 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16)); 953 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16)); 954 return hi16; 955 } 956 957 if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8 958 || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) { 959 HReg a16 = newVRegI(env); 960 HReg b16 = newVRegI(env); 961 HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1); 962 HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2); 963 Int shift = (e->Iex.Binop.op == Iop_MullS8 964 || e->Iex.Binop.op == Iop_MullU8) 965 ? 24 : 16; 966 X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8 967 || e->Iex.Binop.op == Iop_MullS16) 968 ? Xsh_SAR : Xsh_SHR; 969 970 addInstr(env, mk_iMOVsd_RR(a16s, a16)); 971 addInstr(env, mk_iMOVsd_RR(b16s, b16)); 972 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16)); 973 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16)); 974 addInstr(env, X86Instr_Sh32(shr_op, shift, a16)); 975 addInstr(env, X86Instr_Sh32(shr_op, shift, b16)); 976 addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16)); 977 return b16; 978 } 979 980 if (e->Iex.Binop.op == Iop_CmpF64) { 981 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1); 982 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2); 983 HReg dst = newVRegI(env); 984 addInstr(env, X86Instr_FpCmp(fL,fR,dst)); 985 /* shift this right 8 bits so as to conform to CmpF64 986 definition. */ 987 addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst)); 988 return dst; 989 } 990 991 if (e->Iex.Binop.op == Iop_F64toI32S 992 || e->Iex.Binop.op == Iop_F64toI16S) { 993 Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4; 994 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 995 HReg dst = newVRegI(env); 996 997 /* Used several times ... */ 998 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 999 1000 /* rf now holds the value to be converted, and rrm holds the 1001 rounding mode value, encoded as per the IRRoundingMode 1002 enum. The first thing to do is set the FPU's rounding 1003 mode accordingly. */ 1004 1005 /* Create a space for the format conversion. */ 1006 /* subl $4, %esp */ 1007 sub_from_esp(env, 4); 1008 1009 /* Set host rounding mode */ 1010 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 1011 1012 /* gistw/l %rf, 0(%esp) */ 1013 addInstr(env, X86Instr_FpLdStI(False/*store*/, 1014 toUChar(sz), rf, zero_esp)); 1015 1016 if (sz == 2) { 1017 /* movzwl 0(%esp), %dst */ 1018 addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst)); 1019 } else { 1020 /* movl 0(%esp), %dst */ 1021 vassert(sz == 4); 1022 addInstr(env, X86Instr_Alu32R( 1023 Xalu_MOV, X86RMI_Mem(zero_esp), dst)); 1024 } 1025 1026 /* Restore default FPU rounding. */ 1027 set_FPU_rounding_default( env ); 1028 1029 /* addl $4, %esp */ 1030 add_to_esp(env, 4); 1031 return dst; 1032 } 1033 1034 break; 1035 } 1036 1037 /* --------- UNARY OP --------- */ 1038 case Iex_Unop: { 1039 1040 /* 1Uto8(32to1(expr32)) */ 1041 if (e->Iex.Unop.op == Iop_1Uto8) { 1042 DECLARE_PATTERN(p_32to1_then_1Uto8); 1043 DEFINE_PATTERN(p_32to1_then_1Uto8, 1044 unop(Iop_1Uto8,unop(Iop_32to1,bind(0)))); 1045 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) { 1046 IRExpr* expr32 = mi.bindee[0]; 1047 HReg dst = newVRegI(env); 1048 HReg src = iselIntExpr_R(env, expr32); 1049 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1050 addInstr(env, X86Instr_Alu32R(Xalu_AND, 1051 X86RMI_Imm(1), dst)); 1052 return dst; 1053 } 1054 } 1055 1056 /* 8Uto32(LDle(expr32)) */ 1057 if (e->Iex.Unop.op == Iop_8Uto32) { 1058 DECLARE_PATTERN(p_LDle8_then_8Uto32); 1059 DEFINE_PATTERN(p_LDle8_then_8Uto32, 1060 unop(Iop_8Uto32, 1061 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1062 if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) { 1063 HReg dst = newVRegI(env); 1064 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1065 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1066 return dst; 1067 } 1068 } 1069 1070 /* 8Sto32(LDle(expr32)) */ 1071 if (e->Iex.Unop.op == Iop_8Sto32) { 1072 DECLARE_PATTERN(p_LDle8_then_8Sto32); 1073 DEFINE_PATTERN(p_LDle8_then_8Sto32, 1074 unop(Iop_8Sto32, 1075 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1076 if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) { 1077 HReg dst = newVRegI(env); 1078 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1079 addInstr(env, X86Instr_LoadEX(1,True,amode,dst)); 1080 return dst; 1081 } 1082 } 1083 1084 /* 16Uto32(LDle(expr32)) */ 1085 if (e->Iex.Unop.op == Iop_16Uto32) { 1086 DECLARE_PATTERN(p_LDle16_then_16Uto32); 1087 DEFINE_PATTERN(p_LDle16_then_16Uto32, 1088 unop(Iop_16Uto32, 1089 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) ); 1090 if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) { 1091 HReg dst = newVRegI(env); 1092 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1093 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1094 return dst; 1095 } 1096 } 1097 1098 /* 8Uto32(GET:I8) */ 1099 if (e->Iex.Unop.op == Iop_8Uto32) { 1100 if (e->Iex.Unop.arg->tag == Iex_Get) { 1101 HReg dst; 1102 X86AMode* amode; 1103 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8); 1104 dst = newVRegI(env); 1105 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1106 hregX86_EBP()); 1107 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1108 return dst; 1109 } 1110 } 1111 1112 /* 16to32(GET:I16) */ 1113 if (e->Iex.Unop.op == Iop_16Uto32) { 1114 if (e->Iex.Unop.arg->tag == Iex_Get) { 1115 HReg dst; 1116 X86AMode* amode; 1117 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16); 1118 dst = newVRegI(env); 1119 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1120 hregX86_EBP()); 1121 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1122 return dst; 1123 } 1124 } 1125 1126 switch (e->Iex.Unop.op) { 1127 case Iop_8Uto16: 1128 case Iop_8Uto32: 1129 case Iop_16Uto32: { 1130 HReg dst = newVRegI(env); 1131 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1132 UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF; 1133 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1134 addInstr(env, X86Instr_Alu32R(Xalu_AND, 1135 X86RMI_Imm(mask), dst)); 1136 return dst; 1137 } 1138 case Iop_8Sto16: 1139 case Iop_8Sto32: 1140 case Iop_16Sto32: { 1141 HReg dst = newVRegI(env); 1142 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1143 UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24; 1144 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1145 addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst)); 1146 addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst)); 1147 return dst; 1148 } 1149 case Iop_Not8: 1150 case Iop_Not16: 1151 case Iop_Not32: { 1152 HReg dst = newVRegI(env); 1153 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1154 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1155 addInstr(env, X86Instr_Unary32(Xun_NOT,dst)); 1156 return dst; 1157 } 1158 case Iop_64HIto32: { 1159 HReg rHi, rLo; 1160 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1161 return rHi; /* and abandon rLo .. poor wee thing :-) */ 1162 } 1163 case Iop_64to32: { 1164 HReg rHi, rLo; 1165 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1166 return rLo; /* similar stupid comment to the above ... */ 1167 } 1168 case Iop_16HIto8: 1169 case Iop_32HIto16: { 1170 HReg dst = newVRegI(env); 1171 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1172 Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16; 1173 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1174 addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst)); 1175 return dst; 1176 } 1177 case Iop_1Uto32: 1178 case Iop_1Uto8: { 1179 HReg dst = newVRegI(env); 1180 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1181 addInstr(env, X86Instr_Set32(cond,dst)); 1182 return dst; 1183 } 1184 case Iop_1Sto8: 1185 case Iop_1Sto16: 1186 case Iop_1Sto32: { 1187 /* could do better than this, but for now ... */ 1188 HReg dst = newVRegI(env); 1189 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1190 addInstr(env, X86Instr_Set32(cond,dst)); 1191 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst)); 1192 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst)); 1193 return dst; 1194 } 1195 case Iop_Ctz32: { 1196 /* Count trailing zeroes, implemented by x86 'bsfl' */ 1197 HReg dst = newVRegI(env); 1198 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1199 addInstr(env, X86Instr_Bsfr32(True,src,dst)); 1200 return dst; 1201 } 1202 case Iop_Clz32: { 1203 /* Count leading zeroes. Do 'bsrl' to establish the index 1204 of the highest set bit, and subtract that value from 1205 31. */ 1206 HReg tmp = newVRegI(env); 1207 HReg dst = newVRegI(env); 1208 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1209 addInstr(env, X86Instr_Bsfr32(False,src,tmp)); 1210 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 1211 X86RMI_Imm(31), dst)); 1212 addInstr(env, X86Instr_Alu32R(Xalu_SUB, 1213 X86RMI_Reg(tmp), dst)); 1214 return dst; 1215 } 1216 1217 case Iop_CmpwNEZ32: { 1218 HReg dst = newVRegI(env); 1219 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1220 addInstr(env, mk_iMOVsd_RR(src,dst)); 1221 addInstr(env, X86Instr_Unary32(Xun_NEG,dst)); 1222 addInstr(env, X86Instr_Alu32R(Xalu_OR, 1223 X86RMI_Reg(src), dst)); 1224 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst)); 1225 return dst; 1226 } 1227 case Iop_Left8: 1228 case Iop_Left16: 1229 case Iop_Left32: { 1230 HReg dst = newVRegI(env); 1231 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1232 addInstr(env, mk_iMOVsd_RR(src, dst)); 1233 addInstr(env, X86Instr_Unary32(Xun_NEG, dst)); 1234 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst)); 1235 return dst; 1236 } 1237 1238 case Iop_V128to32: { 1239 HReg dst = newVRegI(env); 1240 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1241 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 1242 sub_from_esp(env, 16); 1243 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0)); 1244 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst )); 1245 add_to_esp(env, 16); 1246 return dst; 1247 } 1248 1249 /* ReinterpF32asI32(e) */ 1250 /* Given an IEEE754 single, produce an I32 with the same bit 1251 pattern. Keep stack 8-aligned even though only using 4 1252 bytes. */ 1253 case Iop_ReinterpF32asI32: { 1254 HReg rf = iselFltExpr(env, e->Iex.Unop.arg); 1255 HReg dst = newVRegI(env); 1256 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 1257 /* paranoia */ 1258 set_FPU_rounding_default(env); 1259 /* subl $8, %esp */ 1260 sub_from_esp(env, 8); 1261 /* gstF %rf, 0(%esp) */ 1262 addInstr(env, 1263 X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp)); 1264 /* movl 0(%esp), %dst */ 1265 addInstr(env, 1266 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst)); 1267 /* addl $8, %esp */ 1268 add_to_esp(env, 8); 1269 return dst; 1270 } 1271 1272 case Iop_16to8: 1273 case Iop_32to8: 1274 case Iop_32to16: 1275 /* These are no-ops. */ 1276 return iselIntExpr_R(env, e->Iex.Unop.arg); 1277 1278 default: 1279 break; 1280 } 1281 break; 1282 } 1283 1284 /* --------- GET --------- */ 1285 case Iex_Get: { 1286 if (ty == Ity_I32) { 1287 HReg dst = newVRegI(env); 1288 addInstr(env, X86Instr_Alu32R( 1289 Xalu_MOV, 1290 X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset, 1291 hregX86_EBP())), 1292 dst)); 1293 return dst; 1294 } 1295 if (ty == Ity_I8 || ty == Ity_I16) { 1296 HReg dst = newVRegI(env); 1297 addInstr(env, X86Instr_LoadEX( 1298 toUChar(ty==Ity_I8 ? 1 : 2), 1299 False, 1300 X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()), 1301 dst)); 1302 return dst; 1303 } 1304 break; 1305 } 1306 1307 case Iex_GetI: { 1308 X86AMode* am 1309 = genGuestArrayOffset( 1310 env, e->Iex.GetI.descr, 1311 e->Iex.GetI.ix, e->Iex.GetI.bias ); 1312 HReg dst = newVRegI(env); 1313 if (ty == Ity_I8) { 1314 addInstr(env, X86Instr_LoadEX( 1, False, am, dst )); 1315 return dst; 1316 } 1317 if (ty == Ity_I32) { 1318 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst)); 1319 return dst; 1320 } 1321 break; 1322 } 1323 1324 /* --------- CCALL --------- */ 1325 case Iex_CCall: { 1326 HReg dst = newVRegI(env); 1327 vassert(ty == e->Iex.CCall.retty); 1328 1329 /* be very restrictive for now. Only 32/64-bit ints allowed 1330 for args, and 32 bits for return type. */ 1331 if (e->Iex.CCall.retty != Ity_I32) 1332 goto irreducible; 1333 1334 /* Marshal args, do the call, clear stack. */ 1335 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args ); 1336 1337 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 1338 return dst; 1339 } 1340 1341 /* --------- LITERAL --------- */ 1342 /* 32/16/8-bit literals */ 1343 case Iex_Const: { 1344 X86RMI* rmi = iselIntExpr_RMI ( env, e ); 1345 HReg r = newVRegI(env); 1346 addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r)); 1347 return r; 1348 } 1349 1350 /* --------- MULTIPLEX --------- */ 1351 case Iex_Mux0X: { 1352 if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) 1353 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) { 1354 X86RM* r8; 1355 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX); 1356 X86RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0); 1357 HReg dst = newVRegI(env); 1358 addInstr(env, mk_iMOVsd_RR(rX,dst)); 1359 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 1360 addInstr(env, X86Instr_Test32(0xFF, r8)); 1361 addInstr(env, X86Instr_CMov32(Xcc_Z,r0,dst)); 1362 return dst; 1363 } 1364 break; 1365 } 1366 1367 default: 1368 break; 1369 } /* switch (e->tag) */ 1370 1371 /* We get here if no pattern matched. */ 1372 irreducible: 1373 ppIRExpr(e); 1374 vpanic("iselIntExpr_R: cannot reduce tree"); 1375} 1376 1377 1378/*---------------------------------------------------------*/ 1379/*--- ISEL: Integer expression auxiliaries ---*/ 1380/*---------------------------------------------------------*/ 1381 1382/* --------------------- AMODEs --------------------- */ 1383 1384/* Return an AMode which computes the value of the specified 1385 expression, possibly also adding insns to the code list as a 1386 result. The expression may only be a 32-bit one. 1387*/ 1388 1389static Bool sane_AMode ( X86AMode* am ) 1390{ 1391 switch (am->tag) { 1392 case Xam_IR: 1393 return 1394 toBool( hregClass(am->Xam.IR.reg) == HRcInt32 1395 && (hregIsVirtual(am->Xam.IR.reg) 1396 || am->Xam.IR.reg == hregX86_EBP()) ); 1397 case Xam_IRRS: 1398 return 1399 toBool( hregClass(am->Xam.IRRS.base) == HRcInt32 1400 && hregIsVirtual(am->Xam.IRRS.base) 1401 && hregClass(am->Xam.IRRS.index) == HRcInt32 1402 && hregIsVirtual(am->Xam.IRRS.index) ); 1403 default: 1404 vpanic("sane_AMode: unknown x86 amode tag"); 1405 } 1406} 1407 1408static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e ) 1409{ 1410 X86AMode* am = iselIntExpr_AMode_wrk(env, e); 1411 vassert(sane_AMode(am)); 1412 return am; 1413} 1414 1415/* DO NOT CALL THIS DIRECTLY ! */ 1416static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ) 1417{ 1418 IRType ty = typeOfIRExpr(env->type_env,e); 1419 vassert(ty == Ity_I32); 1420 1421 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */ 1422 if (e->tag == Iex_Binop 1423 && e->Iex.Binop.op == Iop_Add32 1424 && e->Iex.Binop.arg2->tag == Iex_Const 1425 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32 1426 && e->Iex.Binop.arg1->tag == Iex_Binop 1427 && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32 1428 && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop 1429 && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32 1430 && e->Iex.Binop.arg1 1431 ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const 1432 && e->Iex.Binop.arg1 1433 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { 1434 UInt shift = e->Iex.Binop.arg1 1435 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1436 UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1437 if (shift == 1 || shift == 2 || shift == 3) { 1438 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1); 1439 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1 1440 ->Iex.Binop.arg2->Iex.Binop.arg1 ); 1441 return X86AMode_IRRS(imm32, r1, r2, shift); 1442 } 1443 } 1444 1445 /* Add32(expr1, Shl32(expr2, imm)) */ 1446 if (e->tag == Iex_Binop 1447 && e->Iex.Binop.op == Iop_Add32 1448 && e->Iex.Binop.arg2->tag == Iex_Binop 1449 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32 1450 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const 1451 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { 1452 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1453 if (shift == 1 || shift == 2 || shift == 3) { 1454 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1455 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 ); 1456 return X86AMode_IRRS(0, r1, r2, shift); 1457 } 1458 } 1459 1460 /* Add32(expr,i) */ 1461 if (e->tag == Iex_Binop 1462 && e->Iex.Binop.op == Iop_Add32 1463 && e->Iex.Binop.arg2->tag == Iex_Const 1464 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) { 1465 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1466 return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1); 1467 } 1468 1469 /* Doesn't match anything in particular. Generate it into 1470 a register and use that. */ 1471 { 1472 HReg r1 = iselIntExpr_R(env, e); 1473 return X86AMode_IR(0, r1); 1474 } 1475} 1476 1477 1478/* --------------------- RMIs --------------------- */ 1479 1480/* Similarly, calculate an expression into an X86RMI operand. As with 1481 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1482 1483static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e ) 1484{ 1485 X86RMI* rmi = iselIntExpr_RMI_wrk(env, e); 1486 /* sanity checks ... */ 1487 switch (rmi->tag) { 1488 case Xrmi_Imm: 1489 return rmi; 1490 case Xrmi_Reg: 1491 vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32); 1492 vassert(hregIsVirtual(rmi->Xrmi.Reg.reg)); 1493 return rmi; 1494 case Xrmi_Mem: 1495 vassert(sane_AMode(rmi->Xrmi.Mem.am)); 1496 return rmi; 1497 default: 1498 vpanic("iselIntExpr_RMI: unknown x86 RMI tag"); 1499 } 1500} 1501 1502/* DO NOT CALL THIS DIRECTLY ! */ 1503static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e ) 1504{ 1505 IRType ty = typeOfIRExpr(env->type_env,e); 1506 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1507 1508 /* special case: immediate */ 1509 if (e->tag == Iex_Const) { 1510 UInt u; 1511 switch (e->Iex.Const.con->tag) { 1512 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1513 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 1514 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 1515 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)"); 1516 } 1517 return X86RMI_Imm(u); 1518 } 1519 1520 /* special case: 32-bit GET */ 1521 if (e->tag == Iex_Get && ty == Ity_I32) { 1522 return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset, 1523 hregX86_EBP())); 1524 } 1525 1526 /* special case: 32-bit load from memory */ 1527 if (e->tag == Iex_Load && ty == Ity_I32 1528 && e->Iex.Load.end == Iend_LE) { 1529 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 1530 return X86RMI_Mem(am); 1531 } 1532 1533 /* default case: calculate into a register and return that */ 1534 { 1535 HReg r = iselIntExpr_R ( env, e ); 1536 return X86RMI_Reg(r); 1537 } 1538} 1539 1540 1541/* --------------------- RIs --------------------- */ 1542 1543/* Calculate an expression into an X86RI operand. As with 1544 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1545 1546static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ) 1547{ 1548 X86RI* ri = iselIntExpr_RI_wrk(env, e); 1549 /* sanity checks ... */ 1550 switch (ri->tag) { 1551 case Xri_Imm: 1552 return ri; 1553 case Xri_Reg: 1554 vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32); 1555 vassert(hregIsVirtual(ri->Xri.Reg.reg)); 1556 return ri; 1557 default: 1558 vpanic("iselIntExpr_RI: unknown x86 RI tag"); 1559 } 1560} 1561 1562/* DO NOT CALL THIS DIRECTLY ! */ 1563static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ) 1564{ 1565 IRType ty = typeOfIRExpr(env->type_env,e); 1566 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1567 1568 /* special case: immediate */ 1569 if (e->tag == Iex_Const) { 1570 UInt u; 1571 switch (e->Iex.Const.con->tag) { 1572 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1573 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 1574 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 1575 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)"); 1576 } 1577 return X86RI_Imm(u); 1578 } 1579 1580 /* default case: calculate into a register and return that */ 1581 { 1582 HReg r = iselIntExpr_R ( env, e ); 1583 return X86RI_Reg(r); 1584 } 1585} 1586 1587 1588/* --------------------- RMs --------------------- */ 1589 1590/* Similarly, calculate an expression into an X86RM operand. As with 1591 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1592 1593static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e ) 1594{ 1595 X86RM* rm = iselIntExpr_RM_wrk(env, e); 1596 /* sanity checks ... */ 1597 switch (rm->tag) { 1598 case Xrm_Reg: 1599 vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32); 1600 vassert(hregIsVirtual(rm->Xrm.Reg.reg)); 1601 return rm; 1602 case Xrm_Mem: 1603 vassert(sane_AMode(rm->Xrm.Mem.am)); 1604 return rm; 1605 default: 1606 vpanic("iselIntExpr_RM: unknown x86 RM tag"); 1607 } 1608} 1609 1610/* DO NOT CALL THIS DIRECTLY ! */ 1611static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e ) 1612{ 1613 IRType ty = typeOfIRExpr(env->type_env,e); 1614 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1615 1616 /* special case: 32-bit GET */ 1617 if (e->tag == Iex_Get && ty == Ity_I32) { 1618 return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset, 1619 hregX86_EBP())); 1620 } 1621 1622 /* special case: load from memory */ 1623 1624 /* default case: calculate into a register and return that */ 1625 { 1626 HReg r = iselIntExpr_R ( env, e ); 1627 return X86RM_Reg(r); 1628 } 1629} 1630 1631 1632/* --------------------- CONDCODE --------------------- */ 1633 1634/* Generate code to evaluated a bit-typed expression, returning the 1635 condition code which would correspond when the expression would 1636 notionally have returned 1. */ 1637 1638static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e ) 1639{ 1640 /* Uh, there's nothing we can sanity check here, unfortunately. */ 1641 return iselCondCode_wrk(env,e); 1642} 1643 1644/* DO NOT CALL THIS DIRECTLY ! */ 1645static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) 1646{ 1647 MatchInfo mi; 1648 1649 vassert(e); 1650 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); 1651 1652 /* var */ 1653 if (e->tag == Iex_RdTmp) { 1654 HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp); 1655 /* Test32 doesn't modify r32; so this is OK. */ 1656 addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32))); 1657 return Xcc_NZ; 1658 } 1659 1660 /* Constant 1:Bit */ 1661 if (e->tag == Iex_Const) { 1662 HReg r; 1663 vassert(e->Iex.Const.con->tag == Ico_U1); 1664 vassert(e->Iex.Const.con->Ico.U1 == True 1665 || e->Iex.Const.con->Ico.U1 == False); 1666 r = newVRegI(env); 1667 addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r)); 1668 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r)); 1669 return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ; 1670 } 1671 1672 /* Not1(e) */ 1673 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { 1674 /* Generate code for the arg, and negate the test condition */ 1675 return 1 ^ iselCondCode(env, e->Iex.Unop.arg); 1676 } 1677 1678 /* --- patterns rooted at: 32to1 --- */ 1679 1680 if (e->tag == Iex_Unop 1681 && e->Iex.Unop.op == Iop_32to1) { 1682 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1683 addInstr(env, X86Instr_Test32(1,rm)); 1684 return Xcc_NZ; 1685 } 1686 1687 /* --- patterns rooted at: CmpNEZ8 --- */ 1688 1689 /* CmpNEZ8(x) */ 1690 if (e->tag == Iex_Unop 1691 && e->Iex.Unop.op == Iop_CmpNEZ8) { 1692 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1693 addInstr(env, X86Instr_Test32(0xFF,rm)); 1694 return Xcc_NZ; 1695 } 1696 1697 /* --- patterns rooted at: CmpNEZ16 --- */ 1698 1699 /* CmpNEZ16(x) */ 1700 if (e->tag == Iex_Unop 1701 && e->Iex.Unop.op == Iop_CmpNEZ16) { 1702 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1703 addInstr(env, X86Instr_Test32(0xFFFF,rm)); 1704 return Xcc_NZ; 1705 } 1706 1707 /* --- patterns rooted at: CmpNEZ32 --- */ 1708 1709 /* CmpNEZ32(And32(x,y)) */ 1710 { 1711 DECLARE_PATTERN(p_CmpNEZ32_And32); 1712 DEFINE_PATTERN(p_CmpNEZ32_And32, 1713 unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1)))); 1714 if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) { 1715 HReg r0 = iselIntExpr_R(env, mi.bindee[0]); 1716 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]); 1717 HReg tmp = newVRegI(env); 1718 addInstr(env, mk_iMOVsd_RR(r0, tmp)); 1719 addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp)); 1720 return Xcc_NZ; 1721 } 1722 } 1723 1724 /* CmpNEZ32(Or32(x,y)) */ 1725 { 1726 DECLARE_PATTERN(p_CmpNEZ32_Or32); 1727 DEFINE_PATTERN(p_CmpNEZ32_Or32, 1728 unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1)))); 1729 if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) { 1730 HReg r0 = iselIntExpr_R(env, mi.bindee[0]); 1731 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]); 1732 HReg tmp = newVRegI(env); 1733 addInstr(env, mk_iMOVsd_RR(r0, tmp)); 1734 addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp)); 1735 return Xcc_NZ; 1736 } 1737 } 1738 1739 /* CmpNEZ32(GET(..):I32) */ 1740 if (e->tag == Iex_Unop 1741 && e->Iex.Unop.op == Iop_CmpNEZ32 1742 && e->Iex.Unop.arg->tag == Iex_Get) { 1743 X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1744 hregX86_EBP()); 1745 addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am)); 1746 return Xcc_NZ; 1747 } 1748 1749 /* CmpNEZ32(x) */ 1750 if (e->tag == Iex_Unop 1751 && e->Iex.Unop.op == Iop_CmpNEZ32) { 1752 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1753 X86RMI* rmi2 = X86RMI_Imm(0); 1754 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1)); 1755 return Xcc_NZ; 1756 } 1757 1758 /* --- patterns rooted at: CmpNEZ64 --- */ 1759 1760 /* CmpNEZ64(Or64(x,y)) */ 1761 { 1762 DECLARE_PATTERN(p_CmpNEZ64_Or64); 1763 DEFINE_PATTERN(p_CmpNEZ64_Or64, 1764 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1)))); 1765 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) { 1766 HReg hi1, lo1, hi2, lo2; 1767 HReg tmp = newVRegI(env); 1768 iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] ); 1769 addInstr(env, mk_iMOVsd_RR(hi1, tmp)); 1770 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp)); 1771 iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] ); 1772 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp)); 1773 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp)); 1774 return Xcc_NZ; 1775 } 1776 } 1777 1778 /* CmpNEZ64(x) */ 1779 if (e->tag == Iex_Unop 1780 && e->Iex.Unop.op == Iop_CmpNEZ64) { 1781 HReg hi, lo; 1782 HReg tmp = newVRegI(env); 1783 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg ); 1784 addInstr(env, mk_iMOVsd_RR(hi, tmp)); 1785 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp)); 1786 return Xcc_NZ; 1787 } 1788 1789 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */ 1790 1791 /* CmpEQ8 / CmpNE8 */ 1792 if (e->tag == Iex_Binop 1793 && (e->Iex.Binop.op == Iop_CmpEQ8 1794 || e->Iex.Binop.op == Iop_CmpNE8 1795 || e->Iex.Binop.op == Iop_CasCmpEQ8 1796 || e->Iex.Binop.op == Iop_CasCmpNE8)) { 1797 if (isZeroU8(e->Iex.Binop.arg2)) { 1798 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1799 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1))); 1800 switch (e->Iex.Binop.op) { 1801 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z; 1802 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ; 1803 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)"); 1804 } 1805 } else { 1806 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1807 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1808 HReg r = newVRegI(env); 1809 addInstr(env, mk_iMOVsd_RR(r1,r)); 1810 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r)); 1811 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r))); 1812 switch (e->Iex.Binop.op) { 1813 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z; 1814 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ; 1815 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)"); 1816 } 1817 } 1818 } 1819 1820 /* CmpEQ16 / CmpNE16 */ 1821 if (e->tag == Iex_Binop 1822 && (e->Iex.Binop.op == Iop_CmpEQ16 1823 || e->Iex.Binop.op == Iop_CmpNE16 1824 || e->Iex.Binop.op == Iop_CasCmpEQ16 1825 || e->Iex.Binop.op == Iop_CasCmpNE16)) { 1826 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1827 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1828 HReg r = newVRegI(env); 1829 addInstr(env, mk_iMOVsd_RR(r1,r)); 1830 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r)); 1831 addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r))); 1832 switch (e->Iex.Binop.op) { 1833 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Xcc_Z; 1834 case Iop_CmpNE16: case Iop_CasCmpNE16: return Xcc_NZ; 1835 default: vpanic("iselCondCode(x86): CmpXX16"); 1836 } 1837 } 1838 1839 /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation). 1840 Saves a "movl %eax, %tmp" compared to the default route. */ 1841 if (e->tag == Iex_Binop 1842 && e->Iex.Binop.op == Iop_CmpNE32 1843 && e->Iex.Binop.arg1->tag == Iex_CCall 1844 && e->Iex.Binop.arg2->tag == Iex_Const) { 1845 IRExpr* cal = e->Iex.Binop.arg1; 1846 IRExpr* con = e->Iex.Binop.arg2; 1847 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */ 1848 vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */ 1849 vassert(con->Iex.Const.con->tag == Ico_U32); 1850 /* Marshal args, do the call. */ 1851 doHelperCall( env, False, NULL, cal->Iex.CCall.cee, cal->Iex.CCall.args ); 1852 addInstr(env, X86Instr_Alu32R(Xalu_CMP, 1853 X86RMI_Imm(con->Iex.Const.con->Ico.U32), 1854 hregX86_EAX())); 1855 return Xcc_NZ; 1856 } 1857 1858 /* Cmp*32*(x,y) */ 1859 if (e->tag == Iex_Binop 1860 && (e->Iex.Binop.op == Iop_CmpEQ32 1861 || e->Iex.Binop.op == Iop_CmpNE32 1862 || e->Iex.Binop.op == Iop_CmpLT32S 1863 || e->Iex.Binop.op == Iop_CmpLT32U 1864 || e->Iex.Binop.op == Iop_CmpLE32S 1865 || e->Iex.Binop.op == Iop_CmpLE32U 1866 || e->Iex.Binop.op == Iop_CasCmpEQ32 1867 || e->Iex.Binop.op == Iop_CasCmpNE32)) { 1868 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1869 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1870 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1)); 1871 switch (e->Iex.Binop.op) { 1872 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z; 1873 case Iop_CmpNE32: case Iop_CasCmpNE32: return Xcc_NZ; 1874 case Iop_CmpLT32S: return Xcc_L; 1875 case Iop_CmpLT32U: return Xcc_B; 1876 case Iop_CmpLE32S: return Xcc_LE; 1877 case Iop_CmpLE32U: return Xcc_BE; 1878 default: vpanic("iselCondCode(x86): CmpXX32"); 1879 } 1880 } 1881 1882 /* CmpNE64 */ 1883 if (e->tag == Iex_Binop 1884 && (e->Iex.Binop.op == Iop_CmpNE64 1885 || e->Iex.Binop.op == Iop_CmpEQ64)) { 1886 HReg hi1, hi2, lo1, lo2; 1887 HReg tHi = newVRegI(env); 1888 HReg tLo = newVRegI(env); 1889 iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 ); 1890 iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 ); 1891 addInstr(env, mk_iMOVsd_RR(hi1, tHi)); 1892 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi)); 1893 addInstr(env, mk_iMOVsd_RR(lo1, tLo)); 1894 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo)); 1895 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo)); 1896 switch (e->Iex.Binop.op) { 1897 case Iop_CmpNE64: return Xcc_NZ; 1898 case Iop_CmpEQ64: return Xcc_Z; 1899 default: vpanic("iselCondCode(x86): CmpXX64"); 1900 } 1901 } 1902 1903 ppIRExpr(e); 1904 vpanic("iselCondCode"); 1905} 1906 1907 1908/*---------------------------------------------------------*/ 1909/*--- ISEL: Integer expressions (64 bit) ---*/ 1910/*---------------------------------------------------------*/ 1911 1912/* Compute a 64-bit value into a register pair, which is returned as 1913 the first two parameters. As with iselIntExpr_R, these may be 1914 either real or virtual regs; in any case they must not be changed 1915 by subsequent code emitted by the caller. */ 1916 1917static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) 1918{ 1919 iselInt64Expr_wrk(rHi, rLo, env, e); 1920# if 0 1921 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 1922# endif 1923 vassert(hregClass(*rHi) == HRcInt32); 1924 vassert(hregIsVirtual(*rHi)); 1925 vassert(hregClass(*rLo) == HRcInt32); 1926 vassert(hregIsVirtual(*rLo)); 1927} 1928 1929/* DO NOT CALL THIS DIRECTLY ! */ 1930static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) 1931{ 1932 MatchInfo mi; 1933 HWord fn = 0; /* helper fn for most SIMD64 stuff */ 1934 vassert(e); 1935 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64); 1936 1937 /* 64-bit literal */ 1938 if (e->tag == Iex_Const) { 1939 ULong w64 = e->Iex.Const.con->Ico.U64; 1940 UInt wHi = toUInt(w64 >> 32); 1941 UInt wLo = toUInt(w64); 1942 HReg tLo = newVRegI(env); 1943 HReg tHi = newVRegI(env); 1944 vassert(e->Iex.Const.con->tag == Ico_U64); 1945 if (wLo == wHi) { 1946 /* Save a precious Int register in this special case. */ 1947 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo)); 1948 *rHi = tLo; 1949 *rLo = tLo; 1950 } else { 1951 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi)); 1952 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo)); 1953 *rHi = tHi; 1954 *rLo = tLo; 1955 } 1956 return; 1957 } 1958 1959 /* read 64-bit IRTemp */ 1960 if (e->tag == Iex_RdTmp) { 1961 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp); 1962 return; 1963 } 1964 1965 /* 64-bit load */ 1966 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 1967 HReg tLo, tHi; 1968 X86AMode *am0, *am4; 1969 vassert(e->Iex.Load.ty == Ity_I64); 1970 tLo = newVRegI(env); 1971 tHi = newVRegI(env); 1972 am0 = iselIntExpr_AMode(env, e->Iex.Load.addr); 1973 am4 = advance4(am0); 1974 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo )); 1975 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 1976 *rHi = tHi; 1977 *rLo = tLo; 1978 return; 1979 } 1980 1981 /* 64-bit GET */ 1982 if (e->tag == Iex_Get) { 1983 X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP()); 1984 X86AMode* am4 = advance4(am); 1985 HReg tLo = newVRegI(env); 1986 HReg tHi = newVRegI(env); 1987 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo )); 1988 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 1989 *rHi = tHi; 1990 *rLo = tLo; 1991 return; 1992 } 1993 1994 /* 64-bit GETI */ 1995 if (e->tag == Iex_GetI) { 1996 X86AMode* am 1997 = genGuestArrayOffset( env, e->Iex.GetI.descr, 1998 e->Iex.GetI.ix, e->Iex.GetI.bias ); 1999 X86AMode* am4 = advance4(am); 2000 HReg tLo = newVRegI(env); 2001 HReg tHi = newVRegI(env); 2002 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo )); 2003 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 2004 *rHi = tHi; 2005 *rLo = tLo; 2006 return; 2007 } 2008 2009 /* 64-bit Mux0X: Mux0X(g, expr, 0:I64) */ 2010 if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.exprX)) { 2011 X86RM* r8; 2012 HReg e0Lo, e0Hi; 2013 HReg tLo = newVRegI(env); 2014 HReg tHi = newVRegI(env); 2015 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2016 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0); 2017 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 2018 addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) ); 2019 addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) ); 2020 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 2021 addInstr(env, X86Instr_Test32(0xFF, r8)); 2022 addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tHi)); 2023 addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tLo)); 2024 add_to_esp(env, 4); 2025 *rHi = tHi; 2026 *rLo = tLo; 2027 return; 2028 } 2029 /* 64-bit Mux0X: Mux0X(g, 0:I64, expr) */ 2030 if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.expr0)) { 2031 X86RM* r8; 2032 HReg e0Lo, e0Hi; 2033 HReg tLo = newVRegI(env); 2034 HReg tHi = newVRegI(env); 2035 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2036 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.exprX); 2037 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 2038 addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) ); 2039 addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) ); 2040 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 2041 addInstr(env, X86Instr_Test32(0xFF, r8)); 2042 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tHi)); 2043 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tLo)); 2044 add_to_esp(env, 4); 2045 *rHi = tHi; 2046 *rLo = tLo; 2047 return; 2048 } 2049 2050 /* 64-bit Mux0X: Mux0X(g, expr, expr) */ 2051 if (e->tag == Iex_Mux0X) { 2052 X86RM* r8; 2053 HReg e0Lo, e0Hi, eXLo, eXHi; 2054 HReg tLo = newVRegI(env); 2055 HReg tHi = newVRegI(env); 2056 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0); 2057 iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX); 2058 addInstr(env, mk_iMOVsd_RR(eXHi, tHi)); 2059 addInstr(env, mk_iMOVsd_RR(eXLo, tLo)); 2060 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 2061 addInstr(env, X86Instr_Test32(0xFF, r8)); 2062 /* This assumes the first cmov32 doesn't trash the condition 2063 codes, so they are still available for the second cmov32 */ 2064 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi)); 2065 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo)); 2066 *rHi = tHi; 2067 *rLo = tLo; 2068 return; 2069 } 2070 2071 /* --------- BINARY ops --------- */ 2072 if (e->tag == Iex_Binop) { 2073 switch (e->Iex.Binop.op) { 2074 /* 32 x 32 -> 64 multiply */ 2075 case Iop_MullU32: 2076 case Iop_MullS32: { 2077 /* get one operand into %eax, and the other into a R/M. 2078 Need to make an educated guess about which is better in 2079 which. */ 2080 HReg tLo = newVRegI(env); 2081 HReg tHi = newVRegI(env); 2082 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32); 2083 X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1); 2084 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2); 2085 addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX())); 2086 addInstr(env, X86Instr_MulL(syned, rmLeft)); 2087 /* Result is now in EDX:EAX. Tell the caller. */ 2088 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2089 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2090 *rHi = tHi; 2091 *rLo = tLo; 2092 return; 2093 } 2094 2095 /* 64 x 32 -> (32(rem),32(div)) division */ 2096 case Iop_DivModU64to32: 2097 case Iop_DivModS64to32: { 2098 /* Get the 64-bit operand into edx:eax, and the other into 2099 any old R/M. */ 2100 HReg sHi, sLo; 2101 HReg tLo = newVRegI(env); 2102 HReg tHi = newVRegI(env); 2103 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32); 2104 X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2); 2105 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2106 addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX())); 2107 addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX())); 2108 addInstr(env, X86Instr_Div(syned, rmRight)); 2109 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2110 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2111 *rHi = tHi; 2112 *rLo = tLo; 2113 return; 2114 } 2115 2116 /* Or64/And64/Xor64 */ 2117 case Iop_Or64: 2118 case Iop_And64: 2119 case Iop_Xor64: { 2120 HReg xLo, xHi, yLo, yHi; 2121 HReg tLo = newVRegI(env); 2122 HReg tHi = newVRegI(env); 2123 X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR 2124 : e->Iex.Binop.op==Iop_And64 ? Xalu_AND 2125 : Xalu_XOR; 2126 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2127 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2128 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2129 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi)); 2130 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2131 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo)); 2132 *rHi = tHi; 2133 *rLo = tLo; 2134 return; 2135 } 2136 2137 /* Add64/Sub64 */ 2138 case Iop_Add64: 2139 if (e->Iex.Binop.arg2->tag == Iex_Const) { 2140 /* special case Add64(e, const) */ 2141 ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 2142 UInt wHi = toUInt(w64 >> 32); 2143 UInt wLo = toUInt(w64); 2144 HReg tLo = newVRegI(env); 2145 HReg tHi = newVRegI(env); 2146 HReg xLo, xHi; 2147 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64); 2148 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2149 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2150 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2151 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo)); 2152 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi)); 2153 *rHi = tHi; 2154 *rLo = tLo; 2155 return; 2156 } 2157 /* else fall through to the generic case */ 2158 case Iop_Sub64: { 2159 HReg xLo, xHi, yLo, yHi; 2160 HReg tLo = newVRegI(env); 2161 HReg tHi = newVRegI(env); 2162 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2163 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2164 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2165 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2166 if (e->Iex.Binop.op==Iop_Add64) { 2167 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo)); 2168 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi)); 2169 } else { 2170 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo)); 2171 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi)); 2172 } 2173 *rHi = tHi; 2174 *rLo = tLo; 2175 return; 2176 } 2177 2178 /* 32HLto64(e1,e2) */ 2179 case Iop_32HLto64: 2180 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2181 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2182 return; 2183 2184 /* 64-bit shifts */ 2185 case Iop_Shl64: { 2186 /* We use the same ingenious scheme as gcc. Put the value 2187 to be shifted into %hi:%lo, and the shift amount into 2188 %cl. Then (dsts on right, a la ATT syntax): 2189 2190 shldl %cl, %lo, %hi -- make %hi be right for the 2191 -- shift amt %cl % 32 2192 shll %cl, %lo -- make %lo be right for the 2193 -- shift amt %cl % 32 2194 2195 Now, if (shift amount % 64) is in the range 32 .. 63, 2196 we have to do a fixup, which puts the result low half 2197 into the result high half, and zeroes the low half: 2198 2199 testl $32, %ecx 2200 2201 cmovnz %lo, %hi 2202 movl $0, %tmp -- sigh; need yet another reg 2203 cmovnz %tmp, %lo 2204 */ 2205 HReg rAmt, sHi, sLo, tHi, tLo, tTemp; 2206 tLo = newVRegI(env); 2207 tHi = newVRegI(env); 2208 tTemp = newVRegI(env); 2209 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2); 2210 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2211 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX())); 2212 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2213 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2214 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo 2215 and those regs are legitimately modifiable. */ 2216 addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi)); 2217 addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo)); 2218 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX()))); 2219 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi)); 2220 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp)); 2221 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo)); 2222 *rHi = tHi; 2223 *rLo = tLo; 2224 return; 2225 } 2226 2227 case Iop_Shr64: { 2228 /* We use the same ingenious scheme as gcc. Put the value 2229 to be shifted into %hi:%lo, and the shift amount into 2230 %cl. Then: 2231 2232 shrdl %cl, %hi, %lo -- make %lo be right for the 2233 -- shift amt %cl % 32 2234 shrl %cl, %hi -- make %hi be right for the 2235 -- shift amt %cl % 32 2236 2237 Now, if (shift amount % 64) is in the range 32 .. 63, 2238 we have to do a fixup, which puts the result high half 2239 into the result low half, and zeroes the high half: 2240 2241 testl $32, %ecx 2242 2243 cmovnz %hi, %lo 2244 movl $0, %tmp -- sigh; need yet another reg 2245 cmovnz %tmp, %hi 2246 */ 2247 HReg rAmt, sHi, sLo, tHi, tLo, tTemp; 2248 tLo = newVRegI(env); 2249 tHi = newVRegI(env); 2250 tTemp = newVRegI(env); 2251 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2); 2252 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2253 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX())); 2254 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2255 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2256 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo 2257 and those regs are legitimately modifiable. */ 2258 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo)); 2259 addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi)); 2260 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX()))); 2261 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo)); 2262 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp)); 2263 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi)); 2264 *rHi = tHi; 2265 *rLo = tLo; 2266 return; 2267 } 2268 2269 /* F64 -> I64 */ 2270 /* Sigh, this is an almost exact copy of the F64 -> I32/I16 2271 case. Unfortunately I see no easy way to avoid the 2272 duplication. */ 2273 case Iop_F64toI64S: { 2274 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 2275 HReg tLo = newVRegI(env); 2276 HReg tHi = newVRegI(env); 2277 2278 /* Used several times ... */ 2279 /* Careful ... this sharing is only safe because 2280 zero_esp/four_esp do not hold any registers which the 2281 register allocator could attempt to swizzle later. */ 2282 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2283 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP()); 2284 2285 /* rf now holds the value to be converted, and rrm holds 2286 the rounding mode value, encoded as per the 2287 IRRoundingMode enum. The first thing to do is set the 2288 FPU's rounding mode accordingly. */ 2289 2290 /* Create a space for the format conversion. */ 2291 /* subl $8, %esp */ 2292 sub_from_esp(env, 8); 2293 2294 /* Set host rounding mode */ 2295 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2296 2297 /* gistll %rf, 0(%esp) */ 2298 addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp)); 2299 2300 /* movl 0(%esp), %dstLo */ 2301 /* movl 4(%esp), %dstHi */ 2302 addInstr(env, X86Instr_Alu32R( 2303 Xalu_MOV, X86RMI_Mem(zero_esp), tLo)); 2304 addInstr(env, X86Instr_Alu32R( 2305 Xalu_MOV, X86RMI_Mem(four_esp), tHi)); 2306 2307 /* Restore default FPU rounding. */ 2308 set_FPU_rounding_default( env ); 2309 2310 /* addl $8, %esp */ 2311 add_to_esp(env, 8); 2312 2313 *rHi = tHi; 2314 *rLo = tLo; 2315 return; 2316 } 2317 2318 case Iop_Add8x8: 2319 fn = (HWord)h_generic_calc_Add8x8; goto binnish; 2320 case Iop_Add16x4: 2321 fn = (HWord)h_generic_calc_Add16x4; goto binnish; 2322 case Iop_Add32x2: 2323 fn = (HWord)h_generic_calc_Add32x2; goto binnish; 2324 2325 case Iop_Avg8Ux8: 2326 fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish; 2327 case Iop_Avg16Ux4: 2328 fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish; 2329 2330 case Iop_CmpEQ8x8: 2331 fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish; 2332 case Iop_CmpEQ16x4: 2333 fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish; 2334 case Iop_CmpEQ32x2: 2335 fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish; 2336 2337 case Iop_CmpGT8Sx8: 2338 fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish; 2339 case Iop_CmpGT16Sx4: 2340 fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish; 2341 case Iop_CmpGT32Sx2: 2342 fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish; 2343 2344 case Iop_InterleaveHI8x8: 2345 fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish; 2346 case Iop_InterleaveLO8x8: 2347 fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish; 2348 case Iop_InterleaveHI16x4: 2349 fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish; 2350 case Iop_InterleaveLO16x4: 2351 fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish; 2352 case Iop_InterleaveHI32x2: 2353 fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish; 2354 case Iop_InterleaveLO32x2: 2355 fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish; 2356 case Iop_CatOddLanes16x4: 2357 fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish; 2358 case Iop_CatEvenLanes16x4: 2359 fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish; 2360 case Iop_Perm8x8: 2361 fn = (HWord)h_generic_calc_Perm8x8; goto binnish; 2362 2363 case Iop_Max8Ux8: 2364 fn = (HWord)h_generic_calc_Max8Ux8; goto binnish; 2365 case Iop_Max16Sx4: 2366 fn = (HWord)h_generic_calc_Max16Sx4; goto binnish; 2367 case Iop_Min8Ux8: 2368 fn = (HWord)h_generic_calc_Min8Ux8; goto binnish; 2369 case Iop_Min16Sx4: 2370 fn = (HWord)h_generic_calc_Min16Sx4; goto binnish; 2371 2372 case Iop_Mul16x4: 2373 fn = (HWord)h_generic_calc_Mul16x4; goto binnish; 2374 case Iop_Mul32x2: 2375 fn = (HWord)h_generic_calc_Mul32x2; goto binnish; 2376 case Iop_MulHi16Sx4: 2377 fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish; 2378 case Iop_MulHi16Ux4: 2379 fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish; 2380 2381 case Iop_QAdd8Sx8: 2382 fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish; 2383 case Iop_QAdd16Sx4: 2384 fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish; 2385 case Iop_QAdd8Ux8: 2386 fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish; 2387 case Iop_QAdd16Ux4: 2388 fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish; 2389 2390 case Iop_QNarrowBin32Sto16Sx4: 2391 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish; 2392 case Iop_QNarrowBin16Sto8Sx8: 2393 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish; 2394 case Iop_QNarrowBin16Sto8Ux8: 2395 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish; 2396 case Iop_NarrowBin16to8x8: 2397 fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish; 2398 case Iop_NarrowBin32to16x4: 2399 fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish; 2400 2401 case Iop_QSub8Sx8: 2402 fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish; 2403 case Iop_QSub16Sx4: 2404 fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish; 2405 case Iop_QSub8Ux8: 2406 fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish; 2407 case Iop_QSub16Ux4: 2408 fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish; 2409 2410 case Iop_Sub8x8: 2411 fn = (HWord)h_generic_calc_Sub8x8; goto binnish; 2412 case Iop_Sub16x4: 2413 fn = (HWord)h_generic_calc_Sub16x4; goto binnish; 2414 case Iop_Sub32x2: 2415 fn = (HWord)h_generic_calc_Sub32x2; goto binnish; 2416 2417 binnish: { 2418 /* Note: the following assumes all helpers are of 2419 signature 2420 ULong fn ( ULong, ULong ), and they are 2421 not marked as regparm functions. 2422 */ 2423 HReg xLo, xHi, yLo, yHi; 2424 HReg tLo = newVRegI(env); 2425 HReg tHi = newVRegI(env); 2426 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2427 addInstr(env, X86Instr_Push(X86RMI_Reg(yHi))); 2428 addInstr(env, X86Instr_Push(X86RMI_Reg(yLo))); 2429 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2430 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2431 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2432 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 )); 2433 add_to_esp(env, 4*4); 2434 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2435 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2436 *rHi = tHi; 2437 *rLo = tLo; 2438 return; 2439 } 2440 2441 case Iop_ShlN32x2: 2442 fn = (HWord)h_generic_calc_ShlN32x2; goto shifty; 2443 case Iop_ShlN16x4: 2444 fn = (HWord)h_generic_calc_ShlN16x4; goto shifty; 2445 case Iop_ShlN8x8: 2446 fn = (HWord)h_generic_calc_ShlN8x8; goto shifty; 2447 case Iop_ShrN32x2: 2448 fn = (HWord)h_generic_calc_ShrN32x2; goto shifty; 2449 case Iop_ShrN16x4: 2450 fn = (HWord)h_generic_calc_ShrN16x4; goto shifty; 2451 case Iop_SarN32x2: 2452 fn = (HWord)h_generic_calc_SarN32x2; goto shifty; 2453 case Iop_SarN16x4: 2454 fn = (HWord)h_generic_calc_SarN16x4; goto shifty; 2455 case Iop_SarN8x8: 2456 fn = (HWord)h_generic_calc_SarN8x8; goto shifty; 2457 shifty: { 2458 /* Note: the following assumes all helpers are of 2459 signature 2460 ULong fn ( ULong, UInt ), and they are 2461 not marked as regparm functions. 2462 */ 2463 HReg xLo, xHi; 2464 HReg tLo = newVRegI(env); 2465 HReg tHi = newVRegI(env); 2466 X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2467 addInstr(env, X86Instr_Push(y)); 2468 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2469 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2470 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2471 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 )); 2472 add_to_esp(env, 3*4); 2473 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2474 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2475 *rHi = tHi; 2476 *rLo = tLo; 2477 return; 2478 } 2479 2480 default: 2481 break; 2482 } 2483 } /* if (e->tag == Iex_Binop) */ 2484 2485 2486 /* --------- UNARY ops --------- */ 2487 if (e->tag == Iex_Unop) { 2488 switch (e->Iex.Unop.op) { 2489 2490 /* 32Sto64(e) */ 2491 case Iop_32Sto64: { 2492 HReg tLo = newVRegI(env); 2493 HReg tHi = newVRegI(env); 2494 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2495 addInstr(env, mk_iMOVsd_RR(src,tHi)); 2496 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2497 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi)); 2498 *rHi = tHi; 2499 *rLo = tLo; 2500 return; 2501 } 2502 2503 /* 32Uto64(e) */ 2504 case Iop_32Uto64: { 2505 HReg tLo = newVRegI(env); 2506 HReg tHi = newVRegI(env); 2507 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2508 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2509 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2510 *rHi = tHi; 2511 *rLo = tLo; 2512 return; 2513 } 2514 2515 /* 16Uto64(e) */ 2516 case Iop_16Uto64: { 2517 HReg tLo = newVRegI(env); 2518 HReg tHi = newVRegI(env); 2519 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2520 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2521 addInstr(env, X86Instr_Alu32R(Xalu_AND, 2522 X86RMI_Imm(0xFFFF), tLo)); 2523 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2524 *rHi = tHi; 2525 *rLo = tLo; 2526 return; 2527 } 2528 2529 /* V128{HI}to64 */ 2530 case Iop_V128HIto64: 2531 case Iop_V128to64: { 2532 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0; 2533 HReg tLo = newVRegI(env); 2534 HReg tHi = newVRegI(env); 2535 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 2536 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 2537 X86AMode* espLO = X86AMode_IR(off, hregX86_ESP()); 2538 X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP()); 2539 sub_from_esp(env, 16); 2540 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0)); 2541 addInstr(env, X86Instr_Alu32R( Xalu_MOV, 2542 X86RMI_Mem(espLO), tLo )); 2543 addInstr(env, X86Instr_Alu32R( Xalu_MOV, 2544 X86RMI_Mem(espHI), tHi )); 2545 add_to_esp(env, 16); 2546 *rHi = tHi; 2547 *rLo = tLo; 2548 return; 2549 } 2550 2551 /* could do better than this, but for now ... */ 2552 case Iop_1Sto64: { 2553 HReg tLo = newVRegI(env); 2554 HReg tHi = newVRegI(env); 2555 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 2556 addInstr(env, X86Instr_Set32(cond,tLo)); 2557 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo)); 2558 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo)); 2559 addInstr(env, mk_iMOVsd_RR(tLo, tHi)); 2560 *rHi = tHi; 2561 *rLo = tLo; 2562 return; 2563 } 2564 2565 /* Not64(e) */ 2566 case Iop_Not64: { 2567 HReg tLo = newVRegI(env); 2568 HReg tHi = newVRegI(env); 2569 HReg sHi, sLo; 2570 iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg); 2571 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2572 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2573 addInstr(env, X86Instr_Unary32(Xun_NOT,tHi)); 2574 addInstr(env, X86Instr_Unary32(Xun_NOT,tLo)); 2575 *rHi = tHi; 2576 *rLo = tLo; 2577 return; 2578 } 2579 2580 /* Left64(e) */ 2581 case Iop_Left64: { 2582 HReg yLo, yHi; 2583 HReg tLo = newVRegI(env); 2584 HReg tHi = newVRegI(env); 2585 /* yHi:yLo = arg */ 2586 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg); 2587 /* tLo = 0 - yLo, and set carry */ 2588 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo)); 2589 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo)); 2590 /* tHi = 0 - yHi - carry */ 2591 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2592 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi)); 2593 /* So now we have tHi:tLo = -arg. To finish off, or 'arg' 2594 back in, so as to give the final result 2595 tHi:tLo = arg | -arg. */ 2596 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo)); 2597 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi)); 2598 *rHi = tHi; 2599 *rLo = tLo; 2600 return; 2601 } 2602 2603 /* --- patterns rooted at: CmpwNEZ64 --- */ 2604 2605 /* CmpwNEZ64(e) */ 2606 case Iop_CmpwNEZ64: { 2607 2608 DECLARE_PATTERN(p_CmpwNEZ64_Or64); 2609 DEFINE_PATTERN(p_CmpwNEZ64_Or64, 2610 unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1)))); 2611 if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) { 2612 /* CmpwNEZ64(Or64(x,y)) */ 2613 HReg xHi,xLo,yHi,yLo; 2614 HReg xBoth = newVRegI(env); 2615 HReg merged = newVRegI(env); 2616 HReg tmp2 = newVRegI(env); 2617 2618 iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]); 2619 addInstr(env, mk_iMOVsd_RR(xHi,xBoth)); 2620 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2621 X86RMI_Reg(xLo),xBoth)); 2622 2623 iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]); 2624 addInstr(env, mk_iMOVsd_RR(yHi,merged)); 2625 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2626 X86RMI_Reg(yLo),merged)); 2627 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2628 X86RMI_Reg(xBoth),merged)); 2629 2630 /* tmp2 = (merged | -merged) >>s 31 */ 2631 addInstr(env, mk_iMOVsd_RR(merged,tmp2)); 2632 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2)); 2633 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2634 X86RMI_Reg(merged), tmp2)); 2635 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2)); 2636 *rHi = tmp2; 2637 *rLo = tmp2; 2638 return; 2639 } else { 2640 /* CmpwNEZ64(e) */ 2641 HReg srcLo, srcHi; 2642 HReg tmp1 = newVRegI(env); 2643 HReg tmp2 = newVRegI(env); 2644 /* srcHi:srcLo = arg */ 2645 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); 2646 /* tmp1 = srcHi | srcLo */ 2647 addInstr(env, mk_iMOVsd_RR(srcHi,tmp1)); 2648 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2649 X86RMI_Reg(srcLo), tmp1)); 2650 /* tmp2 = (tmp1 | -tmp1) >>s 31 */ 2651 addInstr(env, mk_iMOVsd_RR(tmp1,tmp2)); 2652 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2)); 2653 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2654 X86RMI_Reg(tmp1), tmp2)); 2655 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2)); 2656 *rHi = tmp2; 2657 *rLo = tmp2; 2658 return; 2659 } 2660 } 2661 2662 /* ReinterpF64asI64(e) */ 2663 /* Given an IEEE754 double, produce an I64 with the same bit 2664 pattern. */ 2665 case Iop_ReinterpF64asI64: { 2666 HReg rf = iselDblExpr(env, e->Iex.Unop.arg); 2667 HReg tLo = newVRegI(env); 2668 HReg tHi = newVRegI(env); 2669 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2670 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP()); 2671 /* paranoia */ 2672 set_FPU_rounding_default(env); 2673 /* subl $8, %esp */ 2674 sub_from_esp(env, 8); 2675 /* gstD %rf, 0(%esp) */ 2676 addInstr(env, 2677 X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp)); 2678 /* movl 0(%esp), %tLo */ 2679 addInstr(env, 2680 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo)); 2681 /* movl 4(%esp), %tHi */ 2682 addInstr(env, 2683 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi)); 2684 /* addl $8, %esp */ 2685 add_to_esp(env, 8); 2686 *rHi = tHi; 2687 *rLo = tLo; 2688 return; 2689 } 2690 2691 case Iop_CmpNEZ32x2: 2692 fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish; 2693 case Iop_CmpNEZ16x4: 2694 fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish; 2695 case Iop_CmpNEZ8x8: 2696 fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish; 2697 unish: { 2698 /* Note: the following assumes all helpers are of 2699 signature 2700 ULong fn ( ULong ), and they are 2701 not marked as regparm functions. 2702 */ 2703 HReg xLo, xHi; 2704 HReg tLo = newVRegI(env); 2705 HReg tHi = newVRegI(env); 2706 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg); 2707 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2708 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2709 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 )); 2710 add_to_esp(env, 2*4); 2711 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2712 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2713 *rHi = tHi; 2714 *rLo = tLo; 2715 return; 2716 } 2717 2718 default: 2719 break; 2720 } 2721 } /* if (e->tag == Iex_Unop) */ 2722 2723 2724 /* --------- CCALL --------- */ 2725 if (e->tag == Iex_CCall) { 2726 HReg tLo = newVRegI(env); 2727 HReg tHi = newVRegI(env); 2728 2729 /* Marshal args, do the call, clear stack. */ 2730 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args ); 2731 2732 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2733 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2734 *rHi = tHi; 2735 *rLo = tLo; 2736 return; 2737 } 2738 2739 ppIRExpr(e); 2740 vpanic("iselInt64Expr"); 2741} 2742 2743 2744/*---------------------------------------------------------*/ 2745/*--- ISEL: Floating point expressions (32 bit) ---*/ 2746/*---------------------------------------------------------*/ 2747 2748/* Nothing interesting here; really just wrappers for 2749 64-bit stuff. */ 2750 2751static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) 2752{ 2753 HReg r = iselFltExpr_wrk( env, e ); 2754# if 0 2755 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2756# endif 2757 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */ 2758 vassert(hregIsVirtual(r)); 2759 return r; 2760} 2761 2762/* DO NOT CALL THIS DIRECTLY */ 2763static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) 2764{ 2765 IRType ty = typeOfIRExpr(env->type_env,e); 2766 vassert(ty == Ity_F32); 2767 2768 if (e->tag == Iex_RdTmp) { 2769 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2770 } 2771 2772 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2773 X86AMode* am; 2774 HReg res = newVRegF(env); 2775 vassert(e->Iex.Load.ty == Ity_F32); 2776 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2777 addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am)); 2778 return res; 2779 } 2780 2781 if (e->tag == Iex_Binop 2782 && e->Iex.Binop.op == Iop_F64toF32) { 2783 /* Although the result is still held in a standard FPU register, 2784 we need to round it to reflect the loss of accuracy/range 2785 entailed in casting it to a 32-bit float. */ 2786 HReg dst = newVRegF(env); 2787 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 2788 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2789 addInstr(env, X86Instr_Fp64to32(src,dst)); 2790 set_FPU_rounding_default( env ); 2791 return dst; 2792 } 2793 2794 if (e->tag == Iex_Get) { 2795 X86AMode* am = X86AMode_IR( e->Iex.Get.offset, 2796 hregX86_EBP() ); 2797 HReg res = newVRegF(env); 2798 addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am )); 2799 return res; 2800 } 2801 2802 if (e->tag == Iex_Unop 2803 && e->Iex.Unop.op == Iop_ReinterpI32asF32) { 2804 /* Given an I32, produce an IEEE754 float with the same bit 2805 pattern. */ 2806 HReg dst = newVRegF(env); 2807 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg); 2808 /* paranoia */ 2809 addInstr(env, X86Instr_Push(rmi)); 2810 addInstr(env, X86Instr_FpLdSt( 2811 True/*load*/, 4, dst, 2812 X86AMode_IR(0, hregX86_ESP()))); 2813 add_to_esp(env, 4); 2814 return dst; 2815 } 2816 2817 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) { 2818 HReg rf = iselFltExpr(env, e->Iex.Binop.arg2); 2819 HReg dst = newVRegF(env); 2820 2821 /* rf now holds the value to be rounded. The first thing to do 2822 is set the FPU's rounding mode accordingly. */ 2823 2824 /* Set host rounding mode */ 2825 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2826 2827 /* grndint %rf, %dst */ 2828 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst)); 2829 2830 /* Restore default FPU rounding. */ 2831 set_FPU_rounding_default( env ); 2832 2833 return dst; 2834 } 2835 2836 ppIRExpr(e); 2837 vpanic("iselFltExpr_wrk"); 2838} 2839 2840 2841/*---------------------------------------------------------*/ 2842/*--- ISEL: Floating point expressions (64 bit) ---*/ 2843/*---------------------------------------------------------*/ 2844 2845/* Compute a 64-bit floating point value into a register, the identity 2846 of which is returned. As with iselIntExpr_R, the reg may be either 2847 real or virtual; in any case it must not be changed by subsequent 2848 code emitted by the caller. */ 2849 2850/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm: 2851 2852 Type S (1 bit) E (11 bits) F (52 bits) 2853 ---- --------- ----------- ----------- 2854 signalling NaN u 2047 (max) .0uuuuu---u 2855 (with at least 2856 one 1 bit) 2857 quiet NaN u 2047 (max) .1uuuuu---u 2858 2859 negative infinity 1 2047 (max) .000000---0 2860 2861 positive infinity 0 2047 (max) .000000---0 2862 2863 negative zero 1 0 .000000---0 2864 2865 positive zero 0 0 .000000---0 2866*/ 2867 2868static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ) 2869{ 2870 HReg r = iselDblExpr_wrk( env, e ); 2871# if 0 2872 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2873# endif 2874 vassert(hregClass(r) == HRcFlt64); 2875 vassert(hregIsVirtual(r)); 2876 return r; 2877} 2878 2879/* DO NOT CALL THIS DIRECTLY */ 2880static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) 2881{ 2882 IRType ty = typeOfIRExpr(env->type_env,e); 2883 vassert(e); 2884 vassert(ty == Ity_F64); 2885 2886 if (e->tag == Iex_RdTmp) { 2887 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2888 } 2889 2890 if (e->tag == Iex_Const) { 2891 union { UInt u32x2[2]; ULong u64; Double f64; } u; 2892 HReg freg = newVRegF(env); 2893 vassert(sizeof(u) == 8); 2894 vassert(sizeof(u.u64) == 8); 2895 vassert(sizeof(u.f64) == 8); 2896 vassert(sizeof(u.u32x2) == 8); 2897 2898 if (e->Iex.Const.con->tag == Ico_F64) { 2899 u.f64 = e->Iex.Const.con->Ico.F64; 2900 } 2901 else if (e->Iex.Const.con->tag == Ico_F64i) { 2902 u.u64 = e->Iex.Const.con->Ico.F64i; 2903 } 2904 else 2905 vpanic("iselDblExpr(x86): const"); 2906 2907 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1]))); 2908 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0]))); 2909 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg, 2910 X86AMode_IR(0, hregX86_ESP()))); 2911 add_to_esp(env, 8); 2912 return freg; 2913 } 2914 2915 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2916 X86AMode* am; 2917 HReg res = newVRegF(env); 2918 vassert(e->Iex.Load.ty == Ity_F64); 2919 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2920 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am)); 2921 return res; 2922 } 2923 2924 if (e->tag == Iex_Get) { 2925 X86AMode* am = X86AMode_IR( e->Iex.Get.offset, 2926 hregX86_EBP() ); 2927 HReg res = newVRegF(env); 2928 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am )); 2929 return res; 2930 } 2931 2932 if (e->tag == Iex_GetI) { 2933 X86AMode* am 2934 = genGuestArrayOffset( 2935 env, e->Iex.GetI.descr, 2936 e->Iex.GetI.ix, e->Iex.GetI.bias ); 2937 HReg res = newVRegF(env); 2938 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am )); 2939 return res; 2940 } 2941 2942 if (e->tag == Iex_Triop) { 2943 X86FpOp fpop = Xfp_INVALID; 2944 switch (e->Iex.Triop.op) { 2945 case Iop_AddF64: fpop = Xfp_ADD; break; 2946 case Iop_SubF64: fpop = Xfp_SUB; break; 2947 case Iop_MulF64: fpop = Xfp_MUL; break; 2948 case Iop_DivF64: fpop = Xfp_DIV; break; 2949 case Iop_ScaleF64: fpop = Xfp_SCALE; break; 2950 case Iop_Yl2xF64: fpop = Xfp_YL2X; break; 2951 case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break; 2952 case Iop_AtanF64: fpop = Xfp_ATAN; break; 2953 case Iop_PRemF64: fpop = Xfp_PREM; break; 2954 case Iop_PRem1F64: fpop = Xfp_PREM1; break; 2955 default: break; 2956 } 2957 if (fpop != Xfp_INVALID) { 2958 HReg res = newVRegF(env); 2959 HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2); 2960 HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3); 2961 /* XXXROUNDINGFIXME */ 2962 /* set roundingmode here */ 2963 addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res)); 2964 if (fpop != Xfp_ADD && fpop != Xfp_SUB 2965 && fpop != Xfp_MUL && fpop != Xfp_DIV) 2966 roundToF64(env, res); 2967 return res; 2968 } 2969 } 2970 2971 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) { 2972 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 2973 HReg dst = newVRegF(env); 2974 2975 /* rf now holds the value to be rounded. The first thing to do 2976 is set the FPU's rounding mode accordingly. */ 2977 2978 /* Set host rounding mode */ 2979 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2980 2981 /* grndint %rf, %dst */ 2982 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst)); 2983 2984 /* Restore default FPU rounding. */ 2985 set_FPU_rounding_default( env ); 2986 2987 return dst; 2988 } 2989 2990 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) { 2991 HReg dst = newVRegF(env); 2992 HReg rHi,rLo; 2993 iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2); 2994 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 2995 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 2996 2997 /* Set host rounding mode */ 2998 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2999 3000 addInstr(env, X86Instr_FpLdStI( 3001 True/*load*/, 8, dst, 3002 X86AMode_IR(0, hregX86_ESP()))); 3003 3004 /* Restore default FPU rounding. */ 3005 set_FPU_rounding_default( env ); 3006 3007 add_to_esp(env, 8); 3008 return dst; 3009 } 3010 3011 if (e->tag == Iex_Binop) { 3012 X86FpOp fpop = Xfp_INVALID; 3013 switch (e->Iex.Binop.op) { 3014 case Iop_SinF64: fpop = Xfp_SIN; break; 3015 case Iop_CosF64: fpop = Xfp_COS; break; 3016 case Iop_TanF64: fpop = Xfp_TAN; break; 3017 case Iop_2xm1F64: fpop = Xfp_2XM1; break; 3018 case Iop_SqrtF64: fpop = Xfp_SQRT; break; 3019 default: break; 3020 } 3021 if (fpop != Xfp_INVALID) { 3022 HReg res = newVRegF(env); 3023 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 3024 /* XXXROUNDINGFIXME */ 3025 /* set roundingmode here */ 3026 addInstr(env, X86Instr_FpUnary(fpop,src,res)); 3027 if (fpop != Xfp_SQRT 3028 && fpop != Xfp_NEG && fpop != Xfp_ABS) 3029 roundToF64(env, res); 3030 return res; 3031 } 3032 } 3033 3034 if (e->tag == Iex_Unop) { 3035 X86FpOp fpop = Xfp_INVALID; 3036 switch (e->Iex.Unop.op) { 3037 case Iop_NegF64: fpop = Xfp_NEG; break; 3038 case Iop_AbsF64: fpop = Xfp_ABS; break; 3039 default: break; 3040 } 3041 if (fpop != Xfp_INVALID) { 3042 HReg res = newVRegF(env); 3043 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 3044 addInstr(env, X86Instr_FpUnary(fpop,src,res)); 3045 if (fpop != Xfp_NEG && fpop != Xfp_ABS) 3046 roundToF64(env, res); 3047 return res; 3048 } 3049 } 3050 3051 if (e->tag == Iex_Unop) { 3052 switch (e->Iex.Unop.op) { 3053 case Iop_I32StoF64: { 3054 HReg dst = newVRegF(env); 3055 HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg); 3056 addInstr(env, X86Instr_Push(X86RMI_Reg(ri))); 3057 set_FPU_rounding_default(env); 3058 addInstr(env, X86Instr_FpLdStI( 3059 True/*load*/, 4, dst, 3060 X86AMode_IR(0, hregX86_ESP()))); 3061 add_to_esp(env, 4); 3062 return dst; 3063 } 3064 case Iop_ReinterpI64asF64: { 3065 /* Given an I64, produce an IEEE754 double with the same 3066 bit pattern. */ 3067 HReg dst = newVRegF(env); 3068 HReg rHi, rLo; 3069 iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg); 3070 /* paranoia */ 3071 set_FPU_rounding_default(env); 3072 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 3073 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 3074 addInstr(env, X86Instr_FpLdSt( 3075 True/*load*/, 8, dst, 3076 X86AMode_IR(0, hregX86_ESP()))); 3077 add_to_esp(env, 8); 3078 return dst; 3079 } 3080 case Iop_F32toF64: { 3081 /* this is a no-op */ 3082 HReg res = iselFltExpr(env, e->Iex.Unop.arg); 3083 return res; 3084 } 3085 default: 3086 break; 3087 } 3088 } 3089 3090 /* --------- MULTIPLEX --------- */ 3091 if (e->tag == Iex_Mux0X) { 3092 if (ty == Ity_F64 3093 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) { 3094 X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 3095 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX); 3096 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0); 3097 HReg dst = newVRegF(env); 3098 addInstr(env, X86Instr_FpUnary(Xfp_MOV,rX,dst)); 3099 addInstr(env, X86Instr_Test32(0xFF, r8)); 3100 addInstr(env, X86Instr_FpCMov(Xcc_Z,r0,dst)); 3101 return dst; 3102 } 3103 } 3104 3105 ppIRExpr(e); 3106 vpanic("iselDblExpr_wrk"); 3107} 3108 3109 3110/*---------------------------------------------------------*/ 3111/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/ 3112/*---------------------------------------------------------*/ 3113 3114static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ) 3115{ 3116 HReg r = iselVecExpr_wrk( env, e ); 3117# if 0 3118 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3119# endif 3120 vassert(hregClass(r) == HRcVec128); 3121 vassert(hregIsVirtual(r)); 3122 return r; 3123} 3124 3125 3126/* DO NOT CALL THIS DIRECTLY */ 3127static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) 3128{ 3129 3130# define REQUIRE_SSE1 \ 3131 do { if (env->hwcaps == 0/*baseline, no sse*/) \ 3132 goto vec_fail; \ 3133 } while (0) 3134 3135# define REQUIRE_SSE2 \ 3136 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \ 3137 goto vec_fail; \ 3138 } while (0) 3139 3140# define SSE2_OR_ABOVE \ 3141 (env->hwcaps & VEX_HWCAPS_X86_SSE2) 3142 3143 HWord fn = 0; /* address of helper fn, if required */ 3144 MatchInfo mi; 3145 Bool arg1isEReg = False; 3146 X86SseOp op = Xsse_INVALID; 3147 IRType ty = typeOfIRExpr(env->type_env,e); 3148 vassert(e); 3149 vassert(ty == Ity_V128); 3150 3151 REQUIRE_SSE1; 3152 3153 if (e->tag == Iex_RdTmp) { 3154 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3155 } 3156 3157 if (e->tag == Iex_Get) { 3158 HReg dst = newVRegV(env); 3159 addInstr(env, X86Instr_SseLdSt( 3160 True/*load*/, 3161 dst, 3162 X86AMode_IR(e->Iex.Get.offset, hregX86_EBP()) 3163 ) 3164 ); 3165 return dst; 3166 } 3167 3168 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 3169 HReg dst = newVRegV(env); 3170 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 3171 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am )); 3172 return dst; 3173 } 3174 3175 if (e->tag == Iex_Const) { 3176 HReg dst = newVRegV(env); 3177 vassert(e->Iex.Const.con->tag == Ico_V128); 3178 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst)); 3179 return dst; 3180 } 3181 3182 if (e->tag == Iex_Unop) { 3183 3184 if (SSE2_OR_ABOVE) { 3185 /* 64UtoV128(LDle:I64(addr)) */ 3186 DECLARE_PATTERN(p_zwiden_load64); 3187 DEFINE_PATTERN(p_zwiden_load64, 3188 unop(Iop_64UtoV128, 3189 IRExpr_Load(Iend_LE,Ity_I64,bind(0)))); 3190 if (matchIRExpr(&mi, p_zwiden_load64, e)) { 3191 X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]); 3192 HReg dst = newVRegV(env); 3193 addInstr(env, X86Instr_SseLdzLO(8, dst, am)); 3194 return dst; 3195 } 3196 } 3197 3198 switch (e->Iex.Unop.op) { 3199 3200 case Iop_NotV128: { 3201 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3202 return do_sse_Not128(env, arg); 3203 } 3204 3205 case Iop_CmpNEZ64x2: { 3206 /* We can use SSE2 instructions for this. */ 3207 /* Ideally, we want to do a 64Ix2 comparison against zero of 3208 the operand. Problem is no such insn exists. Solution 3209 therefore is to do a 32Ix4 comparison instead, and bitwise- 3210 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and 3211 let the not'd result of this initial comparison be a:b:c:d. 3212 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use 3213 pshufd to create a value b:a:d:c, and OR that with a:b:c:d, 3214 giving the required result. 3215 3216 The required selection sequence is 2,3,0,1, which 3217 according to Intel's documentation means the pshufd 3218 literal value is 0xB1, that is, 3219 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0) 3220 */ 3221 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3222 HReg tmp = newVRegV(env); 3223 HReg dst = newVRegV(env); 3224 REQUIRE_SSE2; 3225 addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp)); 3226 addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp)); 3227 tmp = do_sse_Not128(env, tmp); 3228 addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst)); 3229 addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst)); 3230 return dst; 3231 } 3232 3233 case Iop_CmpNEZ32x4: { 3234 /* Sigh, we have to generate lousy code since this has to 3235 work on SSE1 hosts */ 3236 /* basically, the idea is: for each lane: 3237 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1) 3238 sbbl %r, %r (now %r = 1Sto32(CF)) 3239 movl %r, lane 3240 */ 3241 Int i; 3242 X86AMode* am; 3243 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3244 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3245 HReg dst = newVRegV(env); 3246 HReg r32 = newVRegI(env); 3247 sub_from_esp(env, 16); 3248 addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0)); 3249 for (i = 0; i < 4; i++) { 3250 am = X86AMode_IR(i*4, hregX86_ESP()); 3251 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32)); 3252 addInstr(env, X86Instr_Unary32(Xun_NEG, r32)); 3253 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32)); 3254 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am)); 3255 } 3256 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3257 add_to_esp(env, 16); 3258 return dst; 3259 } 3260 3261 case Iop_CmpNEZ8x16: 3262 case Iop_CmpNEZ16x8: { 3263 /* We can use SSE2 instructions for this. */ 3264 HReg arg; 3265 HReg vec0 = newVRegV(env); 3266 HReg vec1 = newVRegV(env); 3267 HReg dst = newVRegV(env); 3268 X86SseOp cmpOp 3269 = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16 3270 : Xsse_CMPEQ8; 3271 REQUIRE_SSE2; 3272 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0)); 3273 addInstr(env, mk_vMOVsd_RR(vec0, vec1)); 3274 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1)); 3275 /* defer arg computation to here so as to give CMPEQF as long 3276 as possible to complete */ 3277 arg = iselVecExpr(env, e->Iex.Unop.arg); 3278 /* vec0 is all 0s; vec1 is all 1s */ 3279 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3280 /* 16x8 or 8x16 comparison == */ 3281 addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst)); 3282 /* invert result */ 3283 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst)); 3284 return dst; 3285 } 3286 3287 case Iop_Recip32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary; 3288 case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary; 3289 case Iop_Sqrt32Fx4: op = Xsse_SQRTF; goto do_32Fx4_unary; 3290 do_32Fx4_unary: 3291 { 3292 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3293 HReg dst = newVRegV(env); 3294 addInstr(env, X86Instr_Sse32Fx4(op, arg, dst)); 3295 return dst; 3296 } 3297 3298 case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary; 3299 case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary; 3300 case Iop_Sqrt64Fx2: op = Xsse_SQRTF; goto do_64Fx2_unary; 3301 do_64Fx2_unary: 3302 { 3303 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3304 HReg dst = newVRegV(env); 3305 REQUIRE_SSE2; 3306 addInstr(env, X86Instr_Sse64Fx2(op, arg, dst)); 3307 return dst; 3308 } 3309 3310 case Iop_Recip32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary; 3311 case Iop_RSqrt32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary; 3312 case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary; 3313 do_32F0x4_unary: 3314 { 3315 /* A bit subtle. We have to copy the arg to the result 3316 register first, because actually doing the SSE scalar insn 3317 leaves the upper 3/4 of the destination register 3318 unchanged. Whereas the required semantics of these 3319 primops is that the upper 3/4 is simply copied in from the 3320 argument. */ 3321 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3322 HReg dst = newVRegV(env); 3323 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3324 addInstr(env, X86Instr_Sse32FLo(op, arg, dst)); 3325 return dst; 3326 } 3327 3328 case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary; 3329 case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary; 3330 case Iop_Sqrt64F0x2: op = Xsse_SQRTF; goto do_64F0x2_unary; 3331 do_64F0x2_unary: 3332 { 3333 /* A bit subtle. We have to copy the arg to the result 3334 register first, because actually doing the SSE scalar insn 3335 leaves the upper half of the destination register 3336 unchanged. Whereas the required semantics of these 3337 primops is that the upper half is simply copied in from the 3338 argument. */ 3339 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3340 HReg dst = newVRegV(env); 3341 REQUIRE_SSE2; 3342 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3343 addInstr(env, X86Instr_Sse64FLo(op, arg, dst)); 3344 return dst; 3345 } 3346 3347 case Iop_32UtoV128: { 3348 HReg dst = newVRegV(env); 3349 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3350 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg); 3351 addInstr(env, X86Instr_Push(rmi)); 3352 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0)); 3353 add_to_esp(env, 4); 3354 return dst; 3355 } 3356 3357 case Iop_64UtoV128: { 3358 HReg rHi, rLo; 3359 HReg dst = newVRegV(env); 3360 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3361 iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg); 3362 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 3363 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 3364 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0)); 3365 add_to_esp(env, 8); 3366 return dst; 3367 } 3368 3369 default: 3370 break; 3371 } /* switch (e->Iex.Unop.op) */ 3372 } /* if (e->tag == Iex_Unop) */ 3373 3374 if (e->tag == Iex_Binop) { 3375 switch (e->Iex.Binop.op) { 3376 3377 case Iop_SetV128lo32: { 3378 HReg dst = newVRegV(env); 3379 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3380 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3381 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3382 sub_from_esp(env, 16); 3383 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0)); 3384 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0)); 3385 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3386 add_to_esp(env, 16); 3387 return dst; 3388 } 3389 3390 case Iop_SetV128lo64: { 3391 HReg dst = newVRegV(env); 3392 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3393 HReg srcIhi, srcIlo; 3394 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3395 X86AMode* esp4 = advance4(esp0); 3396 iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2); 3397 sub_from_esp(env, 16); 3398 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0)); 3399 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0)); 3400 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4)); 3401 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3402 add_to_esp(env, 16); 3403 return dst; 3404 } 3405 3406 case Iop_64HLtoV128: { 3407 HReg r3, r2, r1, r0; 3408 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3409 X86AMode* esp4 = advance4(esp0); 3410 X86AMode* esp8 = advance4(esp4); 3411 X86AMode* esp12 = advance4(esp8); 3412 HReg dst = newVRegV(env); 3413 /* do this via the stack (easy, convenient, etc) */ 3414 sub_from_esp(env, 16); 3415 /* Do the less significant 64 bits */ 3416 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2); 3417 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0)); 3418 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4)); 3419 /* Do the more significant 64 bits */ 3420 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1); 3421 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8)); 3422 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12)); 3423 /* Fetch result back from stack. */ 3424 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3425 add_to_esp(env, 16); 3426 return dst; 3427 } 3428 3429 case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4; 3430 case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4; 3431 case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4; 3432 case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4; 3433 case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4; 3434 case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4; 3435 case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4; 3436 case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4; 3437 case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4; 3438 case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4; 3439 do_32Fx4: 3440 { 3441 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3442 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3443 HReg dst = newVRegV(env); 3444 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3445 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst)); 3446 return dst; 3447 } 3448 3449 case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2; 3450 case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2; 3451 case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2; 3452 case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2; 3453 case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2; 3454 case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2; 3455 case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2; 3456 case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2; 3457 case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2; 3458 case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2; 3459 do_64Fx2: 3460 { 3461 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3462 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3463 HReg dst = newVRegV(env); 3464 REQUIRE_SSE2; 3465 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3466 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst)); 3467 return dst; 3468 } 3469 3470 case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4; 3471 case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4; 3472 case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4; 3473 case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4; 3474 case Iop_Add32F0x4: op = Xsse_ADDF; goto do_32F0x4; 3475 case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4; 3476 case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4; 3477 case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4; 3478 case Iop_Mul32F0x4: op = Xsse_MULF; goto do_32F0x4; 3479 case Iop_Sub32F0x4: op = Xsse_SUBF; goto do_32F0x4; 3480 do_32F0x4: { 3481 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3482 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3483 HReg dst = newVRegV(env); 3484 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3485 addInstr(env, X86Instr_Sse32FLo(op, argR, dst)); 3486 return dst; 3487 } 3488 3489 case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2; 3490 case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2; 3491 case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2; 3492 case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2; 3493 case Iop_Add64F0x2: op = Xsse_ADDF; goto do_64F0x2; 3494 case Iop_Div64F0x2: op = Xsse_DIVF; goto do_64F0x2; 3495 case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2; 3496 case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2; 3497 case Iop_Mul64F0x2: op = Xsse_MULF; goto do_64F0x2; 3498 case Iop_Sub64F0x2: op = Xsse_SUBF; goto do_64F0x2; 3499 do_64F0x2: { 3500 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3501 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3502 HReg dst = newVRegV(env); 3503 REQUIRE_SSE2; 3504 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3505 addInstr(env, X86Instr_Sse64FLo(op, argR, dst)); 3506 return dst; 3507 } 3508 3509 case Iop_QNarrowBin32Sto16Sx8: 3510 op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg; 3511 case Iop_QNarrowBin16Sto8Sx16: 3512 op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg; 3513 case Iop_QNarrowBin16Sto8Ux16: 3514 op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg; 3515 3516 case Iop_InterleaveHI8x16: 3517 op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg; 3518 case Iop_InterleaveHI16x8: 3519 op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg; 3520 case Iop_InterleaveHI32x4: 3521 op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg; 3522 case Iop_InterleaveHI64x2: 3523 op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg; 3524 3525 case Iop_InterleaveLO8x16: 3526 op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg; 3527 case Iop_InterleaveLO16x8: 3528 op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg; 3529 case Iop_InterleaveLO32x4: 3530 op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg; 3531 case Iop_InterleaveLO64x2: 3532 op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg; 3533 3534 case Iop_AndV128: op = Xsse_AND; goto do_SseReRg; 3535 case Iop_OrV128: op = Xsse_OR; goto do_SseReRg; 3536 case Iop_XorV128: op = Xsse_XOR; goto do_SseReRg; 3537 case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg; 3538 case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg; 3539 case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg; 3540 case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg; 3541 case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg; 3542 case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg; 3543 case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg; 3544 case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg; 3545 case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg; 3546 case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg; 3547 case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg; 3548 case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg; 3549 case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg; 3550 case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg; 3551 case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg; 3552 case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg; 3553 case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg; 3554 case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg; 3555 case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg; 3556 case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg; 3557 case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg; 3558 case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg; 3559 case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg; 3560 case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg; 3561 case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg; 3562 case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg; 3563 case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg; 3564 case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg; 3565 case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg; 3566 case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg; 3567 case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg; 3568 do_SseReRg: { 3569 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); 3570 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); 3571 HReg dst = newVRegV(env); 3572 if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR) 3573 REQUIRE_SSE2; 3574 if (arg1isEReg) { 3575 addInstr(env, mk_vMOVsd_RR(arg2, dst)); 3576 addInstr(env, X86Instr_SseReRg(op, arg1, dst)); 3577 } else { 3578 addInstr(env, mk_vMOVsd_RR(arg1, dst)); 3579 addInstr(env, X86Instr_SseReRg(op, arg2, dst)); 3580 } 3581 return dst; 3582 } 3583 3584 case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift; 3585 case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift; 3586 case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift; 3587 case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift; 3588 case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift; 3589 case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift; 3590 case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift; 3591 case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift; 3592 do_SseShift: { 3593 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1); 3594 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 3595 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3596 HReg ereg = newVRegV(env); 3597 HReg dst = newVRegV(env); 3598 REQUIRE_SSE2; 3599 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3600 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3601 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3602 addInstr(env, X86Instr_Push(rmi)); 3603 addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0)); 3604 addInstr(env, mk_vMOVsd_RR(greg, dst)); 3605 addInstr(env, X86Instr_SseReRg(op, ereg, dst)); 3606 add_to_esp(env, 16); 3607 return dst; 3608 } 3609 3610 case Iop_NarrowBin32to16x8: 3611 fn = (HWord)h_generic_calc_NarrowBin32to16x8; 3612 goto do_SseAssistedBinary; 3613 case Iop_NarrowBin16to8x16: 3614 fn = (HWord)h_generic_calc_NarrowBin16to8x16; 3615 goto do_SseAssistedBinary; 3616 do_SseAssistedBinary: { 3617 /* As with the amd64 case (where this is copied from) we 3618 generate pretty bad code. */ 3619 vassert(fn != 0); 3620 HReg dst = newVRegV(env); 3621 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3622 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3623 HReg argp = newVRegI(env); 3624 /* subl $112, %esp -- make a space */ 3625 sub_from_esp(env, 112); 3626 /* leal 48(%esp), %r_argp -- point into it */ 3627 addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()), 3628 argp)); 3629 /* andl $-16, %r_argp -- 16-align the pointer */ 3630 addInstr(env, X86Instr_Alu32R(Xalu_AND, 3631 X86RMI_Imm( ~(UInt)15 ), 3632 argp)); 3633 /* Prepare 3 arg regs: 3634 leal 0(%r_argp), %eax 3635 leal 16(%r_argp), %edx 3636 leal 32(%r_argp), %ecx 3637 */ 3638 addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp), 3639 hregX86_EAX())); 3640 addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp), 3641 hregX86_EDX())); 3642 addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp), 3643 hregX86_ECX())); 3644 /* Store the two args, at (%edx) and (%ecx): 3645 movupd %argL, 0(%edx) 3646 movupd %argR, 0(%ecx) 3647 */ 3648 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL, 3649 X86AMode_IR(0, hregX86_EDX()))); 3650 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR, 3651 X86AMode_IR(0, hregX86_ECX()))); 3652 /* call the helper */ 3653 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 3 )); 3654 /* fetch the result from memory, using %r_argp, which the 3655 register allocator will keep alive across the call. */ 3656 addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst, 3657 X86AMode_IR(0, argp))); 3658 /* and finally, clear the space */ 3659 add_to_esp(env, 112); 3660 return dst; 3661 } 3662 3663 default: 3664 break; 3665 } /* switch (e->Iex.Binop.op) */ 3666 } /* if (e->tag == Iex_Binop) */ 3667 3668 if (e->tag == Iex_Mux0X) { 3669 X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 3670 HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX); 3671 HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0); 3672 HReg dst = newVRegV(env); 3673 addInstr(env, mk_vMOVsd_RR(rX,dst)); 3674 addInstr(env, X86Instr_Test32(0xFF, r8)); 3675 addInstr(env, X86Instr_SseCMov(Xcc_Z,r0,dst)); 3676 return dst; 3677 } 3678 3679 vec_fail: 3680 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n", 3681 LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps)); 3682 ppIRExpr(e); 3683 vpanic("iselVecExpr_wrk"); 3684 3685# undef REQUIRE_SSE1 3686# undef REQUIRE_SSE2 3687# undef SSE2_OR_ABOVE 3688} 3689 3690 3691/*---------------------------------------------------------*/ 3692/*--- ISEL: Statements ---*/ 3693/*---------------------------------------------------------*/ 3694 3695static void iselStmt ( ISelEnv* env, IRStmt* stmt ) 3696{ 3697 if (vex_traceflags & VEX_TRACE_VCODE) { 3698 vex_printf("\n-- "); 3699 ppIRStmt(stmt); 3700 vex_printf("\n"); 3701 } 3702 3703 switch (stmt->tag) { 3704 3705 /* --------- STORE --------- */ 3706 case Ist_Store: { 3707 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); 3708 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); 3709 IREndness end = stmt->Ist.Store.end; 3710 3711 if (tya != Ity_I32 || end != Iend_LE) 3712 goto stmt_fail; 3713 3714 if (tyd == Ity_I32) { 3715 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3716 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data); 3717 addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am)); 3718 return; 3719 } 3720 if (tyd == Ity_I8 || tyd == Ity_I16) { 3721 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3722 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data); 3723 addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2), 3724 r,am )); 3725 return; 3726 } 3727 if (tyd == Ity_F64) { 3728 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3729 HReg r = iselDblExpr(env, stmt->Ist.Store.data); 3730 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am)); 3731 return; 3732 } 3733 if (tyd == Ity_F32) { 3734 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3735 HReg r = iselFltExpr(env, stmt->Ist.Store.data); 3736 addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am)); 3737 return; 3738 } 3739 if (tyd == Ity_I64) { 3740 HReg vHi, vLo, rA; 3741 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data); 3742 rA = iselIntExpr_R(env, stmt->Ist.Store.addr); 3743 addInstr(env, X86Instr_Alu32M( 3744 Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA))); 3745 addInstr(env, X86Instr_Alu32M( 3746 Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA))); 3747 return; 3748 } 3749 if (tyd == Ity_V128) { 3750 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3751 HReg r = iselVecExpr(env, stmt->Ist.Store.data); 3752 addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am)); 3753 return; 3754 } 3755 break; 3756 } 3757 3758 /* --------- PUT --------- */ 3759 case Ist_Put: { 3760 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); 3761 if (ty == Ity_I32) { 3762 /* We're going to write to memory, so compute the RHS into an 3763 X86RI. */ 3764 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data); 3765 addInstr(env, 3766 X86Instr_Alu32M( 3767 Xalu_MOV, 3768 ri, 3769 X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP()) 3770 )); 3771 return; 3772 } 3773 if (ty == Ity_I8 || ty == Ity_I16) { 3774 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data); 3775 addInstr(env, X86Instr_Store( 3776 toUChar(ty==Ity_I8 ? 1 : 2), 3777 r, 3778 X86AMode_IR(stmt->Ist.Put.offset, 3779 hregX86_EBP()))); 3780 return; 3781 } 3782 if (ty == Ity_I64) { 3783 HReg vHi, vLo; 3784 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3785 X86AMode* am4 = advance4(am); 3786 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data); 3787 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am )); 3788 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 )); 3789 return; 3790 } 3791 if (ty == Ity_V128) { 3792 HReg vec = iselVecExpr(env, stmt->Ist.Put.data); 3793 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3794 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am)); 3795 return; 3796 } 3797 if (ty == Ity_F32) { 3798 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data); 3799 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3800 set_FPU_rounding_default(env); /* paranoia */ 3801 addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am )); 3802 return; 3803 } 3804 if (ty == Ity_F64) { 3805 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data); 3806 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3807 set_FPU_rounding_default(env); /* paranoia */ 3808 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am )); 3809 return; 3810 } 3811 break; 3812 } 3813 3814 /* --------- Indexed PUT --------- */ 3815 case Ist_PutI: { 3816 X86AMode* am 3817 = genGuestArrayOffset( 3818 env, stmt->Ist.PutI.descr, 3819 stmt->Ist.PutI.ix, stmt->Ist.PutI.bias ); 3820 3821 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data); 3822 if (ty == Ity_F64) { 3823 HReg val = iselDblExpr(env, stmt->Ist.PutI.data); 3824 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am )); 3825 return; 3826 } 3827 if (ty == Ity_I8) { 3828 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data); 3829 addInstr(env, X86Instr_Store( 1, r, am )); 3830 return; 3831 } 3832 if (ty == Ity_I32) { 3833 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data); 3834 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am )); 3835 return; 3836 } 3837 if (ty == Ity_I64) { 3838 HReg rHi, rLo; 3839 X86AMode* am4 = advance4(am); 3840 iselInt64Expr(&rHi, &rLo, env, stmt->Ist.PutI.data); 3841 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am )); 3842 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 )); 3843 return; 3844 } 3845 break; 3846 } 3847 3848 /* --------- TMP --------- */ 3849 case Ist_WrTmp: { 3850 IRTemp tmp = stmt->Ist.WrTmp.tmp; 3851 IRType ty = typeOfIRTemp(env->type_env, tmp); 3852 3853 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..), 3854 compute it into an AMode and then use LEA. This usually 3855 produces fewer instructions, often because (for memcheck 3856 created IR) we get t = address-expression, (t is later used 3857 twice) and so doing this naturally turns address-expression 3858 back into an X86 amode. */ 3859 if (ty == Ity_I32 3860 && stmt->Ist.WrTmp.data->tag == Iex_Binop 3861 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) { 3862 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data); 3863 HReg dst = lookupIRTemp(env, tmp); 3864 if (am->tag == Xam_IR && am->Xam.IR.imm == 0) { 3865 /* Hmm, iselIntExpr_AMode wimped out and just computed the 3866 value into a register. Just emit a normal reg-reg move 3867 so reg-alloc can coalesce it away in the usual way. */ 3868 HReg src = am->Xam.IR.reg; 3869 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst)); 3870 } else { 3871 addInstr(env, X86Instr_Lea32(am,dst)); 3872 } 3873 return; 3874 } 3875 3876 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { 3877 X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data); 3878 HReg dst = lookupIRTemp(env, tmp); 3879 addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst)); 3880 return; 3881 } 3882 if (ty == Ity_I64) { 3883 HReg rHi, rLo, dstHi, dstLo; 3884 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); 3885 lookupIRTemp64( &dstHi, &dstLo, env, tmp); 3886 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) ); 3887 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) ); 3888 return; 3889 } 3890 if (ty == Ity_I1) { 3891 X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data); 3892 HReg dst = lookupIRTemp(env, tmp); 3893 addInstr(env, X86Instr_Set32(cond, dst)); 3894 return; 3895 } 3896 if (ty == Ity_F64) { 3897 HReg dst = lookupIRTemp(env, tmp); 3898 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); 3899 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst)); 3900 return; 3901 } 3902 if (ty == Ity_F32) { 3903 HReg dst = lookupIRTemp(env, tmp); 3904 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); 3905 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst)); 3906 return; 3907 } 3908 if (ty == Ity_V128) { 3909 HReg dst = lookupIRTemp(env, tmp); 3910 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data); 3911 addInstr(env, mk_vMOVsd_RR(src,dst)); 3912 return; 3913 } 3914 break; 3915 } 3916 3917 /* --------- Call to DIRTY helper --------- */ 3918 case Ist_Dirty: { 3919 IRType retty; 3920 IRDirty* d = stmt->Ist.Dirty.details; 3921 Bool passBBP = False; 3922 3923 if (d->nFxState == 0) 3924 vassert(!d->needsBBP); 3925 3926 passBBP = toBool(d->nFxState > 0 && d->needsBBP); 3927 3928 /* Marshal args, do the call, clear stack. */ 3929 doHelperCall( env, passBBP, d->guard, d->cee, d->args ); 3930 3931 /* Now figure out what to do with the returned value, if any. */ 3932 if (d->tmp == IRTemp_INVALID) 3933 /* No return value. Nothing to do. */ 3934 return; 3935 3936 retty = typeOfIRTemp(env->type_env, d->tmp); 3937 if (retty == Ity_I64) { 3938 HReg dstHi, dstLo; 3939 /* The returned value is in %edx:%eax. Park it in the 3940 register-pair associated with tmp. */ 3941 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp); 3942 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) ); 3943 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) ); 3944 return; 3945 } 3946 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) { 3947 /* The returned value is in %eax. Park it in the register 3948 associated with tmp. */ 3949 HReg dst = lookupIRTemp(env, d->tmp); 3950 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) ); 3951 return; 3952 } 3953 break; 3954 } 3955 3956 /* --------- MEM FENCE --------- */ 3957 case Ist_MBE: 3958 switch (stmt->Ist.MBE.event) { 3959 case Imbe_Fence: 3960 addInstr(env, X86Instr_MFence(env->hwcaps)); 3961 return; 3962 default: 3963 break; 3964 } 3965 break; 3966 3967 /* --------- ACAS --------- */ 3968 case Ist_CAS: 3969 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) { 3970 /* "normal" singleton CAS */ 3971 UChar sz; 3972 IRCAS* cas = stmt->Ist.CAS.details; 3973 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 3974 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */ 3975 X86AMode* am = iselIntExpr_AMode(env, cas->addr); 3976 HReg rDataLo = iselIntExpr_R(env, cas->dataLo); 3977 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo); 3978 HReg rOldLo = lookupIRTemp(env, cas->oldLo); 3979 vassert(cas->expdHi == NULL); 3980 vassert(cas->dataHi == NULL); 3981 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo)); 3982 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX())); 3983 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX())); 3984 switch (ty) { 3985 case Ity_I32: sz = 4; break; 3986 case Ity_I16: sz = 2; break; 3987 case Ity_I8: sz = 1; break; 3988 default: goto unhandled_cas; 3989 } 3990 addInstr(env, X86Instr_ACAS(am, sz)); 3991 addInstr(env, 3992 X86Instr_CMov32(Xcc_NZ, 3993 X86RM_Reg(hregX86_EAX()), rOldLo)); 3994 return; 3995 } else { 3996 /* double CAS */ 3997 IRCAS* cas = stmt->Ist.CAS.details; 3998 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 3999 /* only 32-bit allowed in this case */ 4000 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */ 4001 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */ 4002 X86AMode* am = iselIntExpr_AMode(env, cas->addr); 4003 HReg rDataHi = iselIntExpr_R(env, cas->dataHi); 4004 HReg rDataLo = iselIntExpr_R(env, cas->dataLo); 4005 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi); 4006 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo); 4007 HReg rOldHi = lookupIRTemp(env, cas->oldHi); 4008 HReg rOldLo = lookupIRTemp(env, cas->oldLo); 4009 if (ty != Ity_I32) 4010 goto unhandled_cas; 4011 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi)); 4012 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo)); 4013 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX())); 4014 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX())); 4015 addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX())); 4016 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX())); 4017 addInstr(env, X86Instr_DACAS(am)); 4018 addInstr(env, 4019 X86Instr_CMov32(Xcc_NZ, 4020 X86RM_Reg(hregX86_EDX()), rOldHi)); 4021 addInstr(env, 4022 X86Instr_CMov32(Xcc_NZ, 4023 X86RM_Reg(hregX86_EAX()), rOldLo)); 4024 return; 4025 } 4026 unhandled_cas: 4027 break; 4028 4029 /* --------- INSTR MARK --------- */ 4030 /* Doesn't generate any executable code ... */ 4031 case Ist_IMark: 4032 return; 4033 4034 /* --------- NO-OP --------- */ 4035 /* Fairly self-explanatory, wouldn't you say? */ 4036 case Ist_NoOp: 4037 return; 4038 4039 /* --------- EXIT --------- */ 4040 case Ist_Exit: { 4041 X86RI* dst; 4042 X86CondCode cc; 4043 if (stmt->Ist.Exit.dst->tag != Ico_U32) 4044 vpanic("isel_x86: Ist_Exit: dst is not a 32-bit value"); 4045 dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst)); 4046 cc = iselCondCode(env,stmt->Ist.Exit.guard); 4047 addInstr(env, X86Instr_Goto(stmt->Ist.Exit.jk, cc, dst)); 4048 return; 4049 } 4050 4051 default: break; 4052 } 4053 stmt_fail: 4054 ppIRStmt(stmt); 4055 vpanic("iselStmt"); 4056} 4057 4058 4059/*---------------------------------------------------------*/ 4060/*--- ISEL: Basic block terminators (Nexts) ---*/ 4061/*---------------------------------------------------------*/ 4062 4063static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk ) 4064{ 4065 X86RI* ri; 4066 if (vex_traceflags & VEX_TRACE_VCODE) { 4067 vex_printf("\n-- goto {"); 4068 ppIRJumpKind(jk); 4069 vex_printf("} "); 4070 ppIRExpr(next); 4071 vex_printf("\n"); 4072 } 4073 ri = iselIntExpr_RI(env, next); 4074 addInstr(env, X86Instr_Goto(jk, Xcc_ALWAYS,ri)); 4075} 4076 4077 4078/*---------------------------------------------------------*/ 4079/*--- Insn selector top-level ---*/ 4080/*---------------------------------------------------------*/ 4081 4082/* Translate an entire SB to x86 code. */ 4083 4084HInstrArray* iselSB_X86 ( IRSB* bb, VexArch arch_host, 4085 VexArchInfo* archinfo_host, 4086 VexAbiInfo* vbi/*UNUSED*/ ) 4087{ 4088 Int i, j; 4089 HReg hreg, hregHI; 4090 ISelEnv* env; 4091 UInt hwcaps_host = archinfo_host->hwcaps; 4092 4093 /* sanity ... */ 4094 vassert(arch_host == VexArchX86); 4095 vassert(0 == (hwcaps_host 4096 & ~(VEX_HWCAPS_X86_SSE1 4097 | VEX_HWCAPS_X86_SSE2 4098 | VEX_HWCAPS_X86_SSE3 4099 | VEX_HWCAPS_X86_LZCNT))); 4100 4101 /* Make up an initial environment to use. */ 4102 env = LibVEX_Alloc(sizeof(ISelEnv)); 4103 env->vreg_ctr = 0; 4104 4105 /* Set up output code array. */ 4106 env->code = newHInstrArray(); 4107 4108 /* Copy BB's type env. */ 4109 env->type_env = bb->tyenv; 4110 4111 /* Make up an IRTemp -> virtual HReg mapping. This doesn't 4112 change as we go along. */ 4113 env->n_vregmap = bb->tyenv->types_used; 4114 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 4115 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 4116 4117 /* and finally ... */ 4118 env->hwcaps = hwcaps_host; 4119 4120 /* For each IR temporary, allocate a suitably-kinded virtual 4121 register. */ 4122 j = 0; 4123 for (i = 0; i < env->n_vregmap; i++) { 4124 hregHI = hreg = INVALID_HREG; 4125 switch (bb->tyenv->types[i]) { 4126 case Ity_I1: 4127 case Ity_I8: 4128 case Ity_I16: 4129 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break; 4130 case Ity_I64: hreg = mkHReg(j++, HRcInt32, True); 4131 hregHI = mkHReg(j++, HRcInt32, True); break; 4132 case Ity_F32: 4133 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break; 4134 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break; 4135 default: ppIRType(bb->tyenv->types[i]); 4136 vpanic("iselBB: IRTemp type"); 4137 } 4138 env->vregmap[i] = hreg; 4139 env->vregmapHI[i] = hregHI; 4140 } 4141 env->vreg_ctr = j; 4142 4143 /* Ok, finally we can iterate over the statements. */ 4144 for (i = 0; i < bb->stmts_used; i++) 4145 iselStmt(env,bb->stmts[i]); 4146 4147 iselNext(env,bb->next,bb->jumpkind); 4148 4149 /* record the number of vregs we used. */ 4150 env->code->n_vregs = env->vreg_ctr; 4151 return env->code; 4152} 4153 4154 4155/*---------------------------------------------------------------*/ 4156/*--- end host_x86_isel.c ---*/ 4157/*---------------------------------------------------------------*/ 4158