1 2/*---------------------------------------------------------------*/ 3/*--- begin host_amd64_isel.c ---*/ 4/*---------------------------------------------------------------*/ 5 6/* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2011 OpenWorks LLP 11 info@open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34*/ 35 36#include "libvex_basictypes.h" 37#include "libvex_ir.h" 38#include "libvex.h" 39 40#include "ir_match.h" 41#include "main_util.h" 42#include "main_globals.h" 43#include "host_generic_regs.h" 44#include "host_generic_simd64.h" 45#include "host_generic_simd128.h" 46#include "host_amd64_defs.h" 47 48 49/*---------------------------------------------------------*/ 50/*--- x87/SSE control word stuff ---*/ 51/*---------------------------------------------------------*/ 52 53/* Vex-generated code expects to run with the FPU set as follows: all 54 exceptions masked, round-to-nearest, precision = 53 bits. This 55 corresponds to a FPU control word value of 0x027F. 56 57 Similarly the SSE control word (%mxcsr) should be 0x1F80. 58 59 %fpucw and %mxcsr should have these values on entry to 60 Vex-generated code, and should those values should be 61 unchanged at exit. 62*/ 63 64#define DEFAULT_FPUCW 0x027F 65 66#define DEFAULT_MXCSR 0x1F80 67 68/* debugging only, do not use */ 69/* define DEFAULT_FPUCW 0x037F */ 70 71 72/*---------------------------------------------------------*/ 73/*--- misc helpers ---*/ 74/*---------------------------------------------------------*/ 75 76/* These are duplicated in guest-amd64/toIR.c */ 77static IRExpr* unop ( IROp op, IRExpr* a ) 78{ 79 return IRExpr_Unop(op, a); 80} 81 82static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 83{ 84 return IRExpr_Binop(op, a1, a2); 85} 86 87static IRExpr* bind ( Int binder ) 88{ 89 return IRExpr_Binder(binder); 90} 91 92 93/*---------------------------------------------------------*/ 94/*--- ISelEnv ---*/ 95/*---------------------------------------------------------*/ 96 97/* This carries around: 98 99 - A mapping from IRTemp to IRType, giving the type of any IRTemp we 100 might encounter. This is computed before insn selection starts, 101 and does not change. 102 103 - A mapping from IRTemp to HReg. This tells the insn selector 104 which virtual register is associated with each IRTemp 105 temporary. This is computed before insn selection starts, and 106 does not change. We expect this mapping to map precisely the 107 same set of IRTemps as the type mapping does. 108 109 - vregmap holds the primary register for the IRTemp. 110 - vregmapHI is only used for 128-bit integer-typed 111 IRTemps. It holds the identity of a second 112 64-bit virtual HReg, which holds the high half 113 of the value. 114 115 - The code array, that is, the insns selected so far. 116 117 - A counter, for generating new virtual registers. 118 119 - The host subarchitecture we are selecting insns for. 120 This is set at the start and does not change. 121 122 Note, this is all host-independent. (JRS 20050201: well, kinda 123 ... not completely. Compare with ISelEnv for X86.) 124*/ 125 126typedef 127 struct { 128 IRTypeEnv* type_env; 129 130 HReg* vregmap; 131 HReg* vregmapHI; 132 Int n_vregmap; 133 134 HInstrArray* code; 135 136 Int vreg_ctr; 137 138 UInt hwcaps; 139 } 140 ISelEnv; 141 142 143static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) 144{ 145 vassert(tmp >= 0); 146 vassert(tmp < env->n_vregmap); 147 return env->vregmap[tmp]; 148} 149 150static void lookupIRTemp128 ( HReg* vrHI, HReg* vrLO, 151 ISelEnv* env, IRTemp tmp ) 152{ 153 vassert(tmp >= 0); 154 vassert(tmp < env->n_vregmap); 155 vassert(env->vregmapHI[tmp] != INVALID_HREG); 156 *vrLO = env->vregmap[tmp]; 157 *vrHI = env->vregmapHI[tmp]; 158} 159 160static void addInstr ( ISelEnv* env, AMD64Instr* instr ) 161{ 162 addHInstr(env->code, instr); 163 if (vex_traceflags & VEX_TRACE_VCODE) { 164 ppAMD64Instr(instr, True); 165 vex_printf("\n"); 166 } 167} 168 169static HReg newVRegI ( ISelEnv* env ) 170{ 171 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/); 172 env->vreg_ctr++; 173 return reg; 174} 175 176//.. static HReg newVRegF ( ISelEnv* env ) 177//.. { 178//.. HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/); 179//.. env->vreg_ctr++; 180//.. return reg; 181//.. } 182 183static HReg newVRegV ( ISelEnv* env ) 184{ 185 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/); 186 env->vreg_ctr++; 187 return reg; 188} 189 190 191/*---------------------------------------------------------*/ 192/*--- ISEL: Forward declarations ---*/ 193/*---------------------------------------------------------*/ 194 195/* These are organised as iselXXX and iselXXX_wrk pairs. The 196 iselXXX_wrk do the real work, but are not to be called directly. 197 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then 198 checks that all returned registers are virtual. You should not 199 call the _wrk version directly. 200*/ 201static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e ); 202static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e ); 203 204static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ); 205static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ); 206 207static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e ); 208static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e ); 209 210static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); 211static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); 212 213static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ); 214static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e ); 215 216static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, 217 ISelEnv* env, IRExpr* e ); 218static void iselInt128Expr ( HReg* rHi, HReg* rLo, 219 ISelEnv* env, IRExpr* e ); 220 221static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ); 222static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ); 223 224static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); 225static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); 226 227static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); 228static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); 229 230static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ); 231static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ); 232 233 234/*---------------------------------------------------------*/ 235/*--- ISEL: Misc helpers ---*/ 236/*---------------------------------------------------------*/ 237 238static Bool sane_AMode ( AMD64AMode* am ) 239{ 240 switch (am->tag) { 241 case Aam_IR: 242 return 243 toBool( hregClass(am->Aam.IR.reg) == HRcInt64 244 && (hregIsVirtual(am->Aam.IR.reg) 245 || am->Aam.IR.reg == hregAMD64_RBP()) ); 246 case Aam_IRRS: 247 return 248 toBool( hregClass(am->Aam.IRRS.base) == HRcInt64 249 && hregIsVirtual(am->Aam.IRRS.base) 250 && hregClass(am->Aam.IRRS.index) == HRcInt64 251 && hregIsVirtual(am->Aam.IRRS.index) ); 252 default: 253 vpanic("sane_AMode: unknown amd64 amode tag"); 254 } 255} 256 257 258/* Can the lower 32 bits be signedly widened to produce the whole 259 64-bit value? In other words, are the top 33 bits either all 0 or 260 all 1 ? */ 261static Bool fitsIn32Bits ( ULong x ) 262{ 263 Long y0 = (Long)x; 264 Long y1 = y0; 265 y1 <<= 32; 266 y1 >>=/*s*/ 32; 267 return toBool(x == y1); 268} 269 270/* Is this a 64-bit zero expression? */ 271 272static Bool isZeroU64 ( IRExpr* e ) 273{ 274 return e->tag == Iex_Const 275 && e->Iex.Const.con->tag == Ico_U64 276 && e->Iex.Const.con->Ico.U64 == 0ULL; 277} 278 279static Bool isZeroU32 ( IRExpr* e ) 280{ 281 return e->tag == Iex_Const 282 && e->Iex.Const.con->tag == Ico_U32 283 && e->Iex.Const.con->Ico.U32 == 0; 284} 285 286/* Make a int reg-reg move. */ 287 288static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst ) 289{ 290 vassert(hregClass(src) == HRcInt64); 291 vassert(hregClass(dst) == HRcInt64); 292 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst); 293} 294 295/* Make a vector reg-reg move. */ 296 297static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst ) 298{ 299 vassert(hregClass(src) == HRcVec128); 300 vassert(hregClass(dst) == HRcVec128); 301 return AMD64Instr_SseReRg(Asse_MOV, src, dst); 302} 303 304/* Advance/retreat %rsp by n. */ 305 306static void add_to_rsp ( ISelEnv* env, Int n ) 307{ 308 vassert(n > 0 && n < 256 && (n%8) == 0); 309 addInstr(env, 310 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n), 311 hregAMD64_RSP())); 312} 313 314static void sub_from_rsp ( ISelEnv* env, Int n ) 315{ 316 vassert(n > 0 && n < 256 && (n%8) == 0); 317 addInstr(env, 318 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n), 319 hregAMD64_RSP())); 320} 321 322/* Push 64-bit constants on the stack. */ 323static void push_uimm64( ISelEnv* env, ULong uimm64 ) 324{ 325 /* If uimm64 can be expressed as the sign extension of its 326 lower 32 bits, we can do it the easy way. */ 327 Long simm64 = (Long)uimm64; 328 if ( simm64 == ((simm64 << 32) >> 32) ) { 329 addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) ); 330 } else { 331 HReg tmp = newVRegI(env); 332 addInstr( env, AMD64Instr_Imm64(uimm64, tmp) ); 333 addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) ); 334 } 335} 336 337//.. /* Given an amode, return one which references 4 bytes further 338//.. along. */ 339//.. 340//.. static X86AMode* advance4 ( X86AMode* am ) 341//.. { 342//.. X86AMode* am4 = dopyX86AMode(am); 343//.. switch (am4->tag) { 344//.. case Xam_IRRS: 345//.. am4->Xam.IRRS.imm += 4; break; 346//.. case Xam_IR: 347//.. am4->Xam.IR.imm += 4; break; 348//.. default: 349//.. vpanic("advance4(x86,host)"); 350//.. } 351//.. return am4; 352//.. } 353//.. 354//.. 355//.. /* Push an arg onto the host stack, in preparation for a call to a 356//.. helper function of some kind. Returns the number of 32-bit words 357//.. pushed. */ 358//.. 359//.. static Int pushArg ( ISelEnv* env, IRExpr* arg ) 360//.. { 361//.. IRType arg_ty = typeOfIRExpr(env->type_env, arg); 362//.. if (arg_ty == Ity_I32) { 363//.. addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg))); 364//.. return 1; 365//.. } else 366//.. if (arg_ty == Ity_I64) { 367//.. HReg rHi, rLo; 368//.. iselInt64Expr(&rHi, &rLo, env, arg); 369//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 370//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 371//.. return 2; 372//.. } 373//.. ppIRExpr(arg); 374//.. vpanic("pushArg(x86): can't handle arg of this type"); 375//.. } 376 377 378/* Used only in doHelperCall. If possible, produce a single 379 instruction which computes 'e' into 'dst'. If not possible, return 380 NULL. */ 381 382static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env, 383 HReg dst, 384 IRExpr* e ) 385{ 386 vassert(typeOfIRExpr(env->type_env, e) == Ity_I64); 387 388 if (e->tag == Iex_Const) { 389 vassert(e->Iex.Const.con->tag == Ico_U64); 390 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) { 391 return AMD64Instr_Alu64R( 392 Aalu_MOV, 393 AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)), 394 dst 395 ); 396 } else { 397 return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst); 398 } 399 } 400 401 if (e->tag == Iex_RdTmp) { 402 HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp); 403 return mk_iMOVsd_RR(src, dst); 404 } 405 406 if (e->tag == Iex_Get) { 407 vassert(e->Iex.Get.ty == Ity_I64); 408 return AMD64Instr_Alu64R( 409 Aalu_MOV, 410 AMD64RMI_Mem( 411 AMD64AMode_IR(e->Iex.Get.offset, 412 hregAMD64_RBP())), 413 dst); 414 } 415 416 if (e->tag == Iex_Unop 417 && e->Iex.Unop.op == Iop_32Uto64 418 && e->Iex.Unop.arg->tag == Iex_RdTmp) { 419 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp); 420 return AMD64Instr_MovxLQ(False, src, dst); 421 } 422 423 if (0) { ppIRExpr(e); vex_printf("\n"); } 424 425 return NULL; 426} 427 428 429/* Do a complete function call. guard is a Ity_Bit expression 430 indicating whether or not the call happens. If guard==NULL, the 431 call is unconditional. */ 432 433static 434void doHelperCall ( ISelEnv* env, 435 Bool passBBP, 436 IRExpr* guard, IRCallee* cee, IRExpr** args ) 437{ 438 AMD64CondCode cc; 439 HReg argregs[6]; 440 HReg tmpregs[6]; 441 AMD64Instr* fastinstrs[6]; 442 Int n_args, i, argreg; 443 444 /* Marshal args for a call and do the call. 445 446 If passBBP is True, %rbp (the baseblock pointer) is to be passed 447 as the first arg. 448 449 This function only deals with a tiny set of possibilities, which 450 cover all helpers in practice. The restrictions are that only 451 arguments in registers are supported, hence only 6x64 integer 452 bits in total can be passed. In fact the only supported arg 453 type is I64. 454 455 Generating code which is both efficient and correct when 456 parameters are to be passed in registers is difficult, for the 457 reasons elaborated in detail in comments attached to 458 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant 459 of the method described in those comments. 460 461 The problem is split into two cases: the fast scheme and the 462 slow scheme. In the fast scheme, arguments are computed 463 directly into the target (real) registers. This is only safe 464 when we can be sure that computation of each argument will not 465 trash any real registers set by computation of any other 466 argument. 467 468 In the slow scheme, all args are first computed into vregs, and 469 once they are all done, they are moved to the relevant real 470 regs. This always gives correct code, but it also gives a bunch 471 of vreg-to-rreg moves which are usually redundant but are hard 472 for the register allocator to get rid of. 473 474 To decide which scheme to use, all argument expressions are 475 first examined. If they are all so simple that it is clear they 476 will be evaluated without use of any fixed registers, use the 477 fast scheme, else use the slow scheme. Note also that only 478 unconditional calls may use the fast scheme, since having to 479 compute a condition expression could itself trash real 480 registers. 481 482 Note this requires being able to examine an expression and 483 determine whether or not evaluation of it might use a fixed 484 register. That requires knowledge of how the rest of this insn 485 selector works. Currently just the following 3 are regarded as 486 safe -- hopefully they cover the majority of arguments in 487 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. 488 */ 489 490 /* Note that the cee->regparms field is meaningless on AMD64 host 491 (since there is only one calling convention) and so we always 492 ignore it. */ 493 494 n_args = 0; 495 for (i = 0; args[i]; i++) 496 n_args++; 497 498 if (6 < n_args + (passBBP ? 1 : 0)) 499 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args"); 500 501 argregs[0] = hregAMD64_RDI(); 502 argregs[1] = hregAMD64_RSI(); 503 argregs[2] = hregAMD64_RDX(); 504 argregs[3] = hregAMD64_RCX(); 505 argregs[4] = hregAMD64_R8(); 506 argregs[5] = hregAMD64_R9(); 507 508 tmpregs[0] = tmpregs[1] = tmpregs[2] = 509 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG; 510 511 fastinstrs[0] = fastinstrs[1] = fastinstrs[2] = 512 fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL; 513 514 /* First decide which scheme (slow or fast) is to be used. First 515 assume the fast scheme, and select slow if any contraindications 516 (wow) appear. */ 517 518 if (guard) { 519 if (guard->tag == Iex_Const 520 && guard->Iex.Const.con->tag == Ico_U1 521 && guard->Iex.Const.con->Ico.U1 == True) { 522 /* unconditional */ 523 } else { 524 /* Not manifestly unconditional -- be conservative. */ 525 goto slowscheme; 526 } 527 } 528 529 /* Ok, let's try for the fast scheme. If it doesn't pan out, we'll 530 use the slow scheme. Because this is tentative, we can't call 531 addInstr (that is, commit to) any instructions until we're 532 handled all the arguments. So park the resulting instructions 533 in a buffer and emit that if we're successful. */ 534 535 /* FAST SCHEME */ 536 argreg = 0; 537 if (passBBP) { 538 fastinstrs[argreg] = mk_iMOVsd_RR( hregAMD64_RBP(), argregs[argreg]); 539 argreg++; 540 } 541 542 for (i = 0; i < n_args; i++) { 543 vassert(argreg < 6); 544 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64); 545 fastinstrs[argreg] 546 = iselIntExpr_single_instruction( env, argregs[argreg], args[i] ); 547 if (fastinstrs[argreg] == NULL) 548 goto slowscheme; 549 argreg++; 550 } 551 552 /* Looks like we're in luck. Emit the accumulated instructions and 553 move on to doing the call itself. */ 554 vassert(argreg <= 6); 555 for (i = 0; i < argreg; i++) 556 addInstr(env, fastinstrs[i]); 557 558 /* Fast scheme only applies for unconditional calls. Hence: */ 559 cc = Acc_ALWAYS; 560 561 goto handle_call; 562 563 564 /* SLOW SCHEME; move via temporaries */ 565 slowscheme: 566#if 0 567if (n_args > 0) {for (i = 0; args[i]; i++) { 568ppIRExpr(args[i]); vex_printf(" "); } 569vex_printf("\n");} 570#endif 571 argreg = 0; 572 573 if (passBBP) { 574 /* This is pretty stupid; better to move directly to rdi 575 after the rest of the args are done. */ 576 tmpregs[argreg] = newVRegI(env); 577 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[argreg])); 578 argreg++; 579 } 580 581 for (i = 0; i < n_args; i++) { 582 vassert(argreg < 6); 583 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64); 584 tmpregs[argreg] = iselIntExpr_R(env, args[i]); 585 argreg++; 586 } 587 588 /* Now we can compute the condition. We can't do it earlier 589 because the argument computations could trash the condition 590 codes. Be a bit clever to handle the common case where the 591 guard is 1:Bit. */ 592 cc = Acc_ALWAYS; 593 if (guard) { 594 if (guard->tag == Iex_Const 595 && guard->Iex.Const.con->tag == Ico_U1 596 && guard->Iex.Const.con->Ico.U1 == True) { 597 /* unconditional -- do nothing */ 598 } else { 599 cc = iselCondCode( env, guard ); 600 } 601 } 602 603 /* Move the args to their final destinations. */ 604 for (i = 0; i < argreg; i++) { 605 /* None of these insns, including any spill code that might 606 be generated, may alter the condition codes. */ 607 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) ); 608 } 609 610 611 /* Finally, the call itself. */ 612 handle_call: 613 addInstr(env, AMD64Instr_Call( 614 cc, 615 Ptr_to_ULong(cee->addr), 616 n_args + (passBBP ? 1 : 0) 617 ) 618 ); 619} 620 621 622/* Given a guest-state array descriptor, an index expression and a 623 bias, generate an AMD64AMode holding the relevant guest state 624 offset. */ 625 626static 627AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr, 628 IRExpr* off, Int bias ) 629{ 630 HReg tmp, roff; 631 Int elemSz = sizeofIRType(descr->elemTy); 632 Int nElems = descr->nElems; 633 634 /* Throw out any cases not generated by an amd64 front end. In 635 theory there might be a day where we need to handle them -- if 636 we ever run non-amd64-guest on amd64 host. */ 637 638 if (nElems != 8 || (elemSz != 1 && elemSz != 8)) 639 vpanic("genGuestArrayOffset(amd64 host)"); 640 641 /* Compute off into a reg, %off. Then return: 642 643 movq %off, %tmp 644 addq $bias, %tmp (if bias != 0) 645 andq %tmp, 7 646 ... base(%rbp, %tmp, shift) ... 647 */ 648 tmp = newVRegI(env); 649 roff = iselIntExpr_R(env, off); 650 addInstr(env, mk_iMOVsd_RR(roff, tmp)); 651 if (bias != 0) { 652 /* Make sure the bias is sane, in the sense that there are 653 no significant bits above bit 30 in it. */ 654 vassert(-10000 < bias && bias < 10000); 655 addInstr(env, 656 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp)); 657 } 658 addInstr(env, 659 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp)); 660 vassert(elemSz == 1 || elemSz == 8); 661 return 662 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp, 663 elemSz==8 ? 3 : 0); 664} 665 666 667/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */ 668static 669void set_SSE_rounding_default ( ISelEnv* env ) 670{ 671 /* pushq $DEFAULT_MXCSR 672 ldmxcsr 0(%rsp) 673 addq $8, %rsp 674 */ 675 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP()); 676 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR))); 677 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp)); 678 add_to_rsp(env, 8); 679} 680 681/* Mess with the FPU's rounding mode: set to the default rounding mode 682 (DEFAULT_FPUCW). */ 683static 684void set_FPU_rounding_default ( ISelEnv* env ) 685{ 686 /* movq $DEFAULT_FPUCW, -8(%rsp) 687 fldcw -8(%esp) 688 */ 689 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 690 addInstr(env, AMD64Instr_Alu64M( 691 Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp)); 692 addInstr(env, AMD64Instr_A87LdCW(m8_rsp)); 693} 694 695 696/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed 697 expression denoting a value in the range 0 .. 3, indicating a round 698 mode encoded as per type IRRoundingMode. Set the SSE machinery to 699 have the same rounding. 700*/ 701static 702void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode ) 703{ 704 /* Note: this sequence only makes sense because DEFAULT_MXCSR has 705 both rounding bits == 0. If that wasn't the case, we couldn't 706 create a new rounding field simply by ORing the new value into 707 place. */ 708 709 /* movq $3, %reg 710 andq [[mode]], %reg -- shouldn't be needed; paranoia 711 shlq $13, %reg 712 orq $DEFAULT_MXCSR, %reg 713 pushq %reg 714 ldmxcsr 0(%esp) 715 addq $8, %rsp 716 */ 717 HReg reg = newVRegI(env); 718 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP()); 719 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg)); 720 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 721 iselIntExpr_RMI(env, mode), reg)); 722 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg)); 723 addInstr(env, AMD64Instr_Alu64R( 724 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg)); 725 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg))); 726 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp)); 727 add_to_rsp(env, 8); 728} 729 730 731/* Mess with the FPU's rounding mode: 'mode' is an I32-typed 732 expression denoting a value in the range 0 .. 3, indicating a round 733 mode encoded as per type IRRoundingMode. Set the x87 FPU to have 734 the same rounding. 735*/ 736static 737void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode ) 738{ 739 HReg rrm = iselIntExpr_R(env, mode); 740 HReg rrm2 = newVRegI(env); 741 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 742 743 /* movq %rrm, %rrm2 744 andq $3, %rrm2 -- shouldn't be needed; paranoia 745 shlq $10, %rrm2 746 orq $DEFAULT_FPUCW, %rrm2 747 movq %rrm2, -8(%rsp) 748 fldcw -8(%esp) 749 */ 750 addInstr(env, mk_iMOVsd_RR(rrm, rrm2)); 751 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2)); 752 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2)); 753 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, 754 AMD64RMI_Imm(DEFAULT_FPUCW), rrm2)); 755 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, 756 AMD64RI_Reg(rrm2), m8_rsp)); 757 addInstr(env, AMD64Instr_A87LdCW(m8_rsp)); 758} 759 760 761/* Generate all-zeroes into a new vector register. 762*/ 763static HReg generate_zeroes_V128 ( ISelEnv* env ) 764{ 765 HReg dst = newVRegV(env); 766 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst)); 767 return dst; 768} 769 770/* Generate all-ones into a new vector register. 771*/ 772static HReg generate_ones_V128 ( ISelEnv* env ) 773{ 774 HReg dst = newVRegV(env); 775 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst)); 776 return dst; 777} 778 779 780/* Generate !src into a new vector register. Amazing that there isn't 781 a less crappy way to do this. 782*/ 783static HReg do_sse_NotV128 ( ISelEnv* env, HReg src ) 784{ 785 HReg dst = generate_ones_V128(env); 786 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst)); 787 return dst; 788} 789 790 791/* Expand the given byte into a 64-bit word, by cloning each bit 792 8 times. */ 793static ULong bitmask8_to_bytemask64 ( UShort w8 ) 794{ 795 vassert(w8 == (w8 & 0xFF)); 796 ULong w64 = 0; 797 Int i; 798 for (i = 0; i < 8; i++) { 799 if (w8 & (1<<i)) 800 w64 |= (0xFFULL << (8 * i)); 801 } 802 return w64; 803} 804 805 806//.. /* Round an x87 FPU value to 53-bit-mantissa precision, to be used 807//.. after most non-simple FPU operations (simple = +, -, *, / and 808//.. sqrt). 809//.. 810//.. This could be done a lot more efficiently if needed, by loading 811//.. zero and adding it to the value to be rounded (fldz ; faddp?). 812//.. */ 813//.. static void roundToF64 ( ISelEnv* env, HReg reg ) 814//.. { 815//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 816//.. sub_from_esp(env, 8); 817//.. addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp)); 818//.. addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp)); 819//.. add_to_esp(env, 8); 820//.. } 821 822 823/*---------------------------------------------------------*/ 824/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/ 825/*---------------------------------------------------------*/ 826 827/* Select insns for an integer-typed expression, and add them to the 828 code list. Return a reg holding the result. This reg will be a 829 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you 830 want to modify it, ask for a new vreg, copy it in there, and modify 831 the copy. The register allocator will do its best to map both 832 vregs to the same real register, so the copies will often disappear 833 later in the game. 834 835 This should handle expressions of 64, 32, 16 and 8-bit type. All 836 results are returned in a 64-bit register. For 32-, 16- and 8-bit 837 expressions, the upper 32/16/24 bits are arbitrary, so you should 838 mask or sign extend partial values if necessary. 839*/ 840 841static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) 842{ 843 HReg r = iselIntExpr_R_wrk(env, e); 844 /* sanity checks ... */ 845# if 0 846 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n"); 847# endif 848 vassert(hregClass(r) == HRcInt64); 849 vassert(hregIsVirtual(r)); 850 return r; 851} 852 853/* DO NOT CALL THIS DIRECTLY ! */ 854static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) 855{ 856 /* Used for unary/binary SIMD64 ops. */ 857 HWord fn = 0; 858 Bool second_is_UInt; 859 860 MatchInfo mi; 861 DECLARE_PATTERN(p_1Uto8_64to1); 862 DECLARE_PATTERN(p_LDle8_then_8Uto64); 863 DECLARE_PATTERN(p_LDle16_then_16Uto64); 864 865 IRType ty = typeOfIRExpr(env->type_env,e); 866 switch (ty) { 867 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: break; 868 default: vassert(0); 869 } 870 871 switch (e->tag) { 872 873 /* --------- TEMP --------- */ 874 case Iex_RdTmp: { 875 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 876 } 877 878 /* --------- LOAD --------- */ 879 case Iex_Load: { 880 HReg dst = newVRegI(env); 881 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr ); 882 883 /* We can't handle big-endian loads, nor load-linked. */ 884 if (e->Iex.Load.end != Iend_LE) 885 goto irreducible; 886 887 if (ty == Ity_I64) { 888 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, 889 AMD64RMI_Mem(amode), dst) ); 890 return dst; 891 } 892 if (ty == Ity_I32) { 893 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst)); 894 return dst; 895 } 896 if (ty == Ity_I16) { 897 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst)); 898 return dst; 899 } 900 if (ty == Ity_I8) { 901 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst)); 902 return dst; 903 } 904 break; 905 } 906 907 /* --------- BINARY OP --------- */ 908 case Iex_Binop: { 909 AMD64AluOp aluOp; 910 AMD64ShiftOp shOp; 911 912 /* Pattern: Sub64(0,x) */ 913 /* and: Sub32(0,x) */ 914 if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1)) 915 || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) { 916 HReg dst = newVRegI(env); 917 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2); 918 addInstr(env, mk_iMOVsd_RR(reg,dst)); 919 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); 920 return dst; 921 } 922 923 /* Is it an addition or logical style op? */ 924 switch (e->Iex.Binop.op) { 925 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64: 926 aluOp = Aalu_ADD; break; 927 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64: 928 aluOp = Aalu_SUB; break; 929 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64: 930 aluOp = Aalu_AND; break; 931 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64: 932 aluOp = Aalu_OR; break; 933 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64: 934 aluOp = Aalu_XOR; break; 935 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64: 936 aluOp = Aalu_MUL; break; 937 default: 938 aluOp = Aalu_INVALID; break; 939 } 940 /* For commutative ops we assume any literal 941 values are on the second operand. */ 942 if (aluOp != Aalu_INVALID) { 943 HReg dst = newVRegI(env); 944 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 945 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 946 addInstr(env, mk_iMOVsd_RR(reg,dst)); 947 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst)); 948 return dst; 949 } 950 951 /* Perhaps a shift op? */ 952 switch (e->Iex.Binop.op) { 953 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8: 954 shOp = Ash_SHL; break; 955 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8: 956 shOp = Ash_SHR; break; 957 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8: 958 shOp = Ash_SAR; break; 959 default: 960 shOp = Ash_INVALID; break; 961 } 962 if (shOp != Ash_INVALID) { 963 HReg dst = newVRegI(env); 964 965 /* regL = the value to be shifted */ 966 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); 967 addInstr(env, mk_iMOVsd_RR(regL,dst)); 968 969 /* Do any necessary widening for 32/16/8 bit operands */ 970 switch (e->Iex.Binop.op) { 971 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64: 972 break; 973 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8: 974 break; 975 case Iop_Shr8: 976 addInstr(env, AMD64Instr_Alu64R( 977 Aalu_AND, AMD64RMI_Imm(0xFF), dst)); 978 break; 979 case Iop_Shr16: 980 addInstr(env, AMD64Instr_Alu64R( 981 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst)); 982 break; 983 case Iop_Shr32: 984 addInstr(env, AMD64Instr_MovxLQ(False, dst, dst)); 985 break; 986 case Iop_Sar8: 987 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst)); 988 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst)); 989 break; 990 case Iop_Sar16: 991 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst)); 992 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst)); 993 break; 994 case Iop_Sar32: 995 addInstr(env, AMD64Instr_MovxLQ(True, dst, dst)); 996 break; 997 default: 998 ppIROp(e->Iex.Binop.op); 999 vassert(0); 1000 } 1001 1002 /* Now consider the shift amount. If it's a literal, we 1003 can do a much better job than the general case. */ 1004 if (e->Iex.Binop.arg2->tag == Iex_Const) { 1005 /* assert that the IR is well-typed */ 1006 Int nshift; 1007 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1008 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1009 vassert(nshift >= 0); 1010 if (nshift > 0) 1011 /* Can't allow nshift==0 since that means %cl */ 1012 addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst)); 1013 } else { 1014 /* General case; we have to force the amount into %cl. */ 1015 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1016 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX())); 1017 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst)); 1018 } 1019 return dst; 1020 } 1021 1022 /* Deal with 64-bit SIMD binary ops */ 1023 second_is_UInt = False; 1024 switch (e->Iex.Binop.op) { 1025 case Iop_Add8x8: 1026 fn = (HWord)h_generic_calc_Add8x8; break; 1027 case Iop_Add16x4: 1028 fn = (HWord)h_generic_calc_Add16x4; break; 1029 case Iop_Add32x2: 1030 fn = (HWord)h_generic_calc_Add32x2; break; 1031 1032 case Iop_Avg8Ux8: 1033 fn = (HWord)h_generic_calc_Avg8Ux8; break; 1034 case Iop_Avg16Ux4: 1035 fn = (HWord)h_generic_calc_Avg16Ux4; break; 1036 1037 case Iop_CmpEQ8x8: 1038 fn = (HWord)h_generic_calc_CmpEQ8x8; break; 1039 case Iop_CmpEQ16x4: 1040 fn = (HWord)h_generic_calc_CmpEQ16x4; break; 1041 case Iop_CmpEQ32x2: 1042 fn = (HWord)h_generic_calc_CmpEQ32x2; break; 1043 1044 case Iop_CmpGT8Sx8: 1045 fn = (HWord)h_generic_calc_CmpGT8Sx8; break; 1046 case Iop_CmpGT16Sx4: 1047 fn = (HWord)h_generic_calc_CmpGT16Sx4; break; 1048 case Iop_CmpGT32Sx2: 1049 fn = (HWord)h_generic_calc_CmpGT32Sx2; break; 1050 1051 case Iop_InterleaveHI8x8: 1052 fn = (HWord)h_generic_calc_InterleaveHI8x8; break; 1053 case Iop_InterleaveLO8x8: 1054 fn = (HWord)h_generic_calc_InterleaveLO8x8; break; 1055 case Iop_InterleaveHI16x4: 1056 fn = (HWord)h_generic_calc_InterleaveHI16x4; break; 1057 case Iop_InterleaveLO16x4: 1058 fn = (HWord)h_generic_calc_InterleaveLO16x4; break; 1059 case Iop_InterleaveHI32x2: 1060 fn = (HWord)h_generic_calc_InterleaveHI32x2; break; 1061 case Iop_InterleaveLO32x2: 1062 fn = (HWord)h_generic_calc_InterleaveLO32x2; break; 1063 case Iop_CatOddLanes16x4: 1064 fn = (HWord)h_generic_calc_CatOddLanes16x4; break; 1065 case Iop_CatEvenLanes16x4: 1066 fn = (HWord)h_generic_calc_CatEvenLanes16x4; break; 1067 case Iop_Perm8x8: 1068 fn = (HWord)h_generic_calc_Perm8x8; break; 1069 1070 case Iop_Max8Ux8: 1071 fn = (HWord)h_generic_calc_Max8Ux8; break; 1072 case Iop_Max16Sx4: 1073 fn = (HWord)h_generic_calc_Max16Sx4; break; 1074 case Iop_Min8Ux8: 1075 fn = (HWord)h_generic_calc_Min8Ux8; break; 1076 case Iop_Min16Sx4: 1077 fn = (HWord)h_generic_calc_Min16Sx4; break; 1078 1079 case Iop_Mul16x4: 1080 fn = (HWord)h_generic_calc_Mul16x4; break; 1081 case Iop_Mul32x2: 1082 fn = (HWord)h_generic_calc_Mul32x2; break; 1083 case Iop_MulHi16Sx4: 1084 fn = (HWord)h_generic_calc_MulHi16Sx4; break; 1085 case Iop_MulHi16Ux4: 1086 fn = (HWord)h_generic_calc_MulHi16Ux4; break; 1087 1088 case Iop_QAdd8Sx8: 1089 fn = (HWord)h_generic_calc_QAdd8Sx8; break; 1090 case Iop_QAdd16Sx4: 1091 fn = (HWord)h_generic_calc_QAdd16Sx4; break; 1092 case Iop_QAdd8Ux8: 1093 fn = (HWord)h_generic_calc_QAdd8Ux8; break; 1094 case Iop_QAdd16Ux4: 1095 fn = (HWord)h_generic_calc_QAdd16Ux4; break; 1096 1097 case Iop_QNarrowBin32Sto16Sx4: 1098 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; break; 1099 case Iop_QNarrowBin16Sto8Sx8: 1100 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break; 1101 case Iop_QNarrowBin16Sto8Ux8: 1102 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break; 1103 case Iop_NarrowBin16to8x8: 1104 fn = (HWord)h_generic_calc_NarrowBin16to8x8; break; 1105 case Iop_NarrowBin32to16x4: 1106 fn = (HWord)h_generic_calc_NarrowBin32to16x4; break; 1107 1108 case Iop_QSub8Sx8: 1109 fn = (HWord)h_generic_calc_QSub8Sx8; break; 1110 case Iop_QSub16Sx4: 1111 fn = (HWord)h_generic_calc_QSub16Sx4; break; 1112 case Iop_QSub8Ux8: 1113 fn = (HWord)h_generic_calc_QSub8Ux8; break; 1114 case Iop_QSub16Ux4: 1115 fn = (HWord)h_generic_calc_QSub16Ux4; break; 1116 1117 case Iop_Sub8x8: 1118 fn = (HWord)h_generic_calc_Sub8x8; break; 1119 case Iop_Sub16x4: 1120 fn = (HWord)h_generic_calc_Sub16x4; break; 1121 case Iop_Sub32x2: 1122 fn = (HWord)h_generic_calc_Sub32x2; break; 1123 1124 case Iop_ShlN32x2: 1125 fn = (HWord)h_generic_calc_ShlN32x2; 1126 second_is_UInt = True; 1127 break; 1128 case Iop_ShlN16x4: 1129 fn = (HWord)h_generic_calc_ShlN16x4; 1130 second_is_UInt = True; 1131 break; 1132 case Iop_ShlN8x8: 1133 fn = (HWord)h_generic_calc_ShlN8x8; 1134 second_is_UInt = True; 1135 break; 1136 case Iop_ShrN32x2: 1137 fn = (HWord)h_generic_calc_ShrN32x2; 1138 second_is_UInt = True; 1139 break; 1140 case Iop_ShrN16x4: 1141 fn = (HWord)h_generic_calc_ShrN16x4; 1142 second_is_UInt = True; 1143 break; 1144 case Iop_SarN32x2: 1145 fn = (HWord)h_generic_calc_SarN32x2; 1146 second_is_UInt = True; 1147 break; 1148 case Iop_SarN16x4: 1149 fn = (HWord)h_generic_calc_SarN16x4; 1150 second_is_UInt = True; 1151 break; 1152 case Iop_SarN8x8: 1153 fn = (HWord)h_generic_calc_SarN8x8; 1154 second_is_UInt = True; 1155 break; 1156 1157 default: 1158 fn = (HWord)0; break; 1159 } 1160 if (fn != (HWord)0) { 1161 /* Note: the following assumes all helpers are of signature 1162 ULong fn ( ULong, ULong ), and they are 1163 not marked as regparm functions. 1164 */ 1165 HReg dst = newVRegI(env); 1166 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1167 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1168 if (second_is_UInt) 1169 addInstr(env, AMD64Instr_MovxLQ(False, argR, argR)); 1170 addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) ); 1171 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) ); 1172 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2 )); 1173 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst)); 1174 return dst; 1175 } 1176 1177 /* Handle misc other ops. */ 1178 1179 if (e->Iex.Binop.op == Iop_Max32U) { 1180 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1181 HReg dst = newVRegI(env); 1182 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); 1183 addInstr(env, mk_iMOVsd_RR(src1, dst)); 1184 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP, AMD64RMI_Reg(src2), dst)); 1185 addInstr(env, AMD64Instr_CMov64(Acc_B, AMD64RM_Reg(src2), dst)); 1186 return dst; 1187 } 1188 1189 if (e->Iex.Binop.op == Iop_DivModS64to32 1190 || e->Iex.Binop.op == Iop_DivModU64to32) { 1191 /* 64 x 32 -> (32(rem),32(div)) division */ 1192 /* Get the 64-bit operand into edx:eax, and the other into 1193 any old R/M. */ 1194 HReg rax = hregAMD64_RAX(); 1195 HReg rdx = hregAMD64_RDX(); 1196 HReg dst = newVRegI(env); 1197 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32); 1198 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2); 1199 /* Compute the left operand into a reg, and then 1200 put the top half in edx and the bottom in eax. */ 1201 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1202 addInstr(env, mk_iMOVsd_RR(left64, rdx)); 1203 addInstr(env, mk_iMOVsd_RR(left64, rax)); 1204 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx)); 1205 addInstr(env, AMD64Instr_Div(syned, 4, rmRight)); 1206 addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx)); 1207 addInstr(env, AMD64Instr_MovxLQ(False, rax, rax)); 1208 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx)); 1209 addInstr(env, mk_iMOVsd_RR(rax, dst)); 1210 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst)); 1211 return dst; 1212 } 1213 1214 if (e->Iex.Binop.op == Iop_32HLto64) { 1215 HReg hi32 = newVRegI(env); 1216 HReg lo32 = newVRegI(env); 1217 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1218 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1219 addInstr(env, mk_iMOVsd_RR(hi32s, hi32)); 1220 addInstr(env, mk_iMOVsd_RR(lo32s, lo32)); 1221 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32)); 1222 addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32)); 1223 addInstr(env, AMD64Instr_Alu64R( 1224 Aalu_OR, AMD64RMI_Reg(lo32), hi32)); 1225 return hi32; 1226 } 1227 1228 if (e->Iex.Binop.op == Iop_16HLto32) { 1229 HReg hi16 = newVRegI(env); 1230 HReg lo16 = newVRegI(env); 1231 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1232 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1233 addInstr(env, mk_iMOVsd_RR(hi16s, hi16)); 1234 addInstr(env, mk_iMOVsd_RR(lo16s, lo16)); 1235 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16)); 1236 addInstr(env, AMD64Instr_Alu64R( 1237 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16)); 1238 addInstr(env, AMD64Instr_Alu64R( 1239 Aalu_OR, AMD64RMI_Reg(lo16), hi16)); 1240 return hi16; 1241 } 1242 1243 if (e->Iex.Binop.op == Iop_8HLto16) { 1244 HReg hi8 = newVRegI(env); 1245 HReg lo8 = newVRegI(env); 1246 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1247 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1248 addInstr(env, mk_iMOVsd_RR(hi8s, hi8)); 1249 addInstr(env, mk_iMOVsd_RR(lo8s, lo8)); 1250 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8)); 1251 addInstr(env, AMD64Instr_Alu64R( 1252 Aalu_AND, AMD64RMI_Imm(0xFF), lo8)); 1253 addInstr(env, AMD64Instr_Alu64R( 1254 Aalu_OR, AMD64RMI_Reg(lo8), hi8)); 1255 return hi8; 1256 } 1257 1258 if (e->Iex.Binop.op == Iop_MullS32 1259 || e->Iex.Binop.op == Iop_MullS16 1260 || e->Iex.Binop.op == Iop_MullS8 1261 || e->Iex.Binop.op == Iop_MullU32 1262 || e->Iex.Binop.op == Iop_MullU16 1263 || e->Iex.Binop.op == Iop_MullU8) { 1264 HReg a32 = newVRegI(env); 1265 HReg b32 = newVRegI(env); 1266 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1267 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1268 Int shift = 0; 1269 AMD64ShiftOp shr_op = Ash_SHR; 1270 switch (e->Iex.Binop.op) { 1271 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break; 1272 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break; 1273 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break; 1274 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break; 1275 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break; 1276 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break; 1277 default: vassert(0); 1278 } 1279 1280 addInstr(env, mk_iMOVsd_RR(a32s, a32)); 1281 addInstr(env, mk_iMOVsd_RR(b32s, b32)); 1282 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32)); 1283 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32)); 1284 addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32)); 1285 addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32)); 1286 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32)); 1287 return b32; 1288 } 1289 1290 if (e->Iex.Binop.op == Iop_CmpF64) { 1291 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1); 1292 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2); 1293 HReg dst = newVRegI(env); 1294 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst)); 1295 /* Mask out irrelevant parts of the result so as to conform 1296 to the CmpF64 definition. */ 1297 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst)); 1298 return dst; 1299 } 1300 1301 if (e->Iex.Binop.op == Iop_F64toI32S 1302 || e->Iex.Binop.op == Iop_F64toI64S) { 1303 Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8; 1304 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 1305 HReg dst = newVRegI(env); 1306 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 ); 1307 addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst )); 1308 set_SSE_rounding_default(env); 1309 return dst; 1310 } 1311 1312//.. if (e->Iex.Binop.op == Iop_F64toI32 || e->Iex.Binop.op == Iop_F64toI16) { 1313//.. Int sz = e->Iex.Binop.op == Iop_F64toI16 ? 2 : 4; 1314//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 1315//.. HReg dst = newVRegI(env); 1316//.. 1317//.. /* Used several times ... */ 1318//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 1319//.. 1320//.. /* rf now holds the value to be converted, and rrm holds the 1321//.. rounding mode value, encoded as per the IRRoundingMode 1322//.. enum. The first thing to do is set the FPU's rounding 1323//.. mode accordingly. */ 1324//.. 1325//.. /* Create a space for the format conversion. */ 1326//.. /* subl $4, %esp */ 1327//.. sub_from_esp(env, 4); 1328//.. 1329//.. /* Set host rounding mode */ 1330//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 1331//.. 1332//.. /* gistw/l %rf, 0(%esp) */ 1333//.. addInstr(env, X86Instr_FpLdStI(False/*store*/, sz, rf, zero_esp)); 1334//.. 1335//.. if (sz == 2) { 1336//.. /* movzwl 0(%esp), %dst */ 1337//.. addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst)); 1338//.. } else { 1339//.. /* movl 0(%esp), %dst */ 1340//.. vassert(sz == 4); 1341//.. addInstr(env, X86Instr_Alu32R( 1342//.. Xalu_MOV, X86RMI_Mem(zero_esp), dst)); 1343//.. } 1344//.. 1345//.. /* Restore default FPU rounding. */ 1346//.. set_FPU_rounding_default( env ); 1347//.. 1348//.. /* addl $4, %esp */ 1349//.. add_to_esp(env, 4); 1350//.. return dst; 1351//.. } 1352//.. 1353//.. /* C3210 flags following FPU partial remainder (fprem), both 1354//.. IEEE compliant (PREM1) and non-IEEE compliant (PREM). */ 1355//.. if (e->Iex.Binop.op == Iop_PRemC3210F64 1356//.. || e->Iex.Binop.op == Iop_PRem1C3210F64) { 1357//.. HReg junk = newVRegF(env); 1358//.. HReg dst = newVRegI(env); 1359//.. HReg srcL = iselDblExpr(env, e->Iex.Binop.arg1); 1360//.. HReg srcR = iselDblExpr(env, e->Iex.Binop.arg2); 1361//.. addInstr(env, X86Instr_FpBinary( 1362//.. e->Iex.Binop.op==Iop_PRemC3210F64 1363//.. ? Xfp_PREM : Xfp_PREM1, 1364//.. srcL,srcR,junk 1365//.. )); 1366//.. /* The previous pseudo-insn will have left the FPU's C3210 1367//.. flags set correctly. So bag them. */ 1368//.. addInstr(env, X86Instr_FpStSW_AX()); 1369//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 1370//.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst)); 1371//.. return dst; 1372//.. } 1373 1374 break; 1375 } 1376 1377 /* --------- UNARY OP --------- */ 1378 case Iex_Unop: { 1379 1380 /* 1Uto8(64to1(expr64)) */ 1381 { 1382 DEFINE_PATTERN( p_1Uto8_64to1, 1383 unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) ); 1384 if (matchIRExpr(&mi,p_1Uto8_64to1,e)) { 1385 IRExpr* expr64 = mi.bindee[0]; 1386 HReg dst = newVRegI(env); 1387 HReg src = iselIntExpr_R(env, expr64); 1388 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1389 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 1390 AMD64RMI_Imm(1), dst)); 1391 return dst; 1392 } 1393 } 1394 1395 /* 8Uto64(LDle(expr64)) */ 1396 { 1397 DEFINE_PATTERN(p_LDle8_then_8Uto64, 1398 unop(Iop_8Uto64, 1399 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1400 if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) { 1401 HReg dst = newVRegI(env); 1402 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1403 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst)); 1404 return dst; 1405 } 1406 } 1407 1408 /* 16Uto64(LDle(expr64)) */ 1409 { 1410 DEFINE_PATTERN(p_LDle16_then_16Uto64, 1411 unop(Iop_16Uto64, 1412 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) ); 1413 if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) { 1414 HReg dst = newVRegI(env); 1415 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1416 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst)); 1417 return dst; 1418 } 1419 } 1420 1421 /* 32Uto64( Add32/Sub32/And32/Or32/Xor32(expr32, expr32) ) 1422 Use 32 bit arithmetic and let the default zero-extend rule 1423 do the 32Uto64 for free. */ 1424 if (e->Iex.Unop.op == Iop_32Uto64 && e->Iex.Unop.arg->tag == Iex_Binop) { 1425 IROp opi = e->Iex.Unop.arg->Iex.Binop.op; /* inner op */ 1426 IRExpr* argL = e->Iex.Unop.arg->Iex.Binop.arg1; 1427 IRExpr* argR = e->Iex.Unop.arg->Iex.Binop.arg2; 1428 AMD64AluOp aluOp = Aalu_INVALID; 1429 switch (opi) { 1430 case Iop_Add32: aluOp = Aalu_ADD; break; 1431 case Iop_Sub32: aluOp = Aalu_SUB; break; 1432 case Iop_And32: aluOp = Aalu_AND; break; 1433 case Iop_Or32: aluOp = Aalu_OR; break; 1434 case Iop_Xor32: aluOp = Aalu_XOR; break; 1435 default: break; 1436 } 1437 if (aluOp != Aalu_INVALID) { 1438 /* For commutative ops we assume any literal values are on 1439 the second operand. */ 1440 HReg dst = newVRegI(env); 1441 HReg reg = iselIntExpr_R(env, argL); 1442 AMD64RMI* rmi = iselIntExpr_RMI(env, argR); 1443 addInstr(env, mk_iMOVsd_RR(reg,dst)); 1444 addInstr(env, AMD64Instr_Alu32R(aluOp, rmi, dst)); 1445 return dst; 1446 } 1447 /* just fall through to normal handling for Iop_32Uto64 */ 1448 } 1449 1450 /* Fallback cases */ 1451 switch (e->Iex.Unop.op) { 1452 case Iop_32Uto64: 1453 case Iop_32Sto64: { 1454 HReg dst = newVRegI(env); 1455 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1456 addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64, 1457 src, dst) ); 1458 return dst; 1459 } 1460 case Iop_128HIto64: { 1461 HReg rHi, rLo; 1462 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1463 return rHi; /* and abandon rLo */ 1464 } 1465 case Iop_128to64: { 1466 HReg rHi, rLo; 1467 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1468 return rLo; /* and abandon rHi */ 1469 } 1470 case Iop_8Uto16: 1471 case Iop_8Uto32: 1472 case Iop_8Uto64: 1473 case Iop_16Uto64: 1474 case Iop_16Uto32: { 1475 HReg dst = newVRegI(env); 1476 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1477 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32 1478 || e->Iex.Unop.op==Iop_16Uto64 ); 1479 UInt mask = srcIs16 ? 0xFFFF : 0xFF; 1480 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1481 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 1482 AMD64RMI_Imm(mask), dst)); 1483 return dst; 1484 } 1485 case Iop_8Sto16: 1486 case Iop_8Sto64: 1487 case Iop_8Sto32: 1488 case Iop_16Sto32: 1489 case Iop_16Sto64: { 1490 HReg dst = newVRegI(env); 1491 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1492 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32 1493 || e->Iex.Unop.op==Iop_16Sto64 ); 1494 UInt amt = srcIs16 ? 48 : 56; 1495 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1496 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst)); 1497 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst)); 1498 return dst; 1499 } 1500 case Iop_Not8: 1501 case Iop_Not16: 1502 case Iop_Not32: 1503 case Iop_Not64: { 1504 HReg dst = newVRegI(env); 1505 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1506 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1507 addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst)); 1508 return dst; 1509 } 1510//.. case Iop_64HIto32: { 1511//.. HReg rHi, rLo; 1512//.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1513//.. return rHi; /* and abandon rLo .. poor wee thing :-) */ 1514//.. } 1515//.. case Iop_64to32: { 1516//.. HReg rHi, rLo; 1517//.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1518//.. return rLo; /* similar stupid comment to the above ... */ 1519//.. } 1520 case Iop_16HIto8: 1521 case Iop_32HIto16: 1522 case Iop_64HIto32: { 1523 HReg dst = newVRegI(env); 1524 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1525 Int shift = 0; 1526 switch (e->Iex.Unop.op) { 1527 case Iop_16HIto8: shift = 8; break; 1528 case Iop_32HIto16: shift = 16; break; 1529 case Iop_64HIto32: shift = 32; break; 1530 default: vassert(0); 1531 } 1532 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1533 addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst)); 1534 return dst; 1535 } 1536 case Iop_1Uto64: 1537 case Iop_1Uto32: 1538 case Iop_1Uto8: { 1539 HReg dst = newVRegI(env); 1540 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1541 addInstr(env, AMD64Instr_Set64(cond,dst)); 1542 return dst; 1543 } 1544 case Iop_1Sto8: 1545 case Iop_1Sto16: 1546 case Iop_1Sto32: 1547 case Iop_1Sto64: { 1548 /* could do better than this, but for now ... */ 1549 HReg dst = newVRegI(env); 1550 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1551 addInstr(env, AMD64Instr_Set64(cond,dst)); 1552 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst)); 1553 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); 1554 return dst; 1555 } 1556 case Iop_Ctz64: { 1557 /* Count trailing zeroes, implemented by amd64 'bsfq' */ 1558 HReg dst = newVRegI(env); 1559 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1560 addInstr(env, AMD64Instr_Bsfr64(True,src,dst)); 1561 return dst; 1562 } 1563 case Iop_Clz64: { 1564 /* Count leading zeroes. Do 'bsrq' to establish the index 1565 of the highest set bit, and subtract that value from 1566 63. */ 1567 HReg tmp = newVRegI(env); 1568 HReg dst = newVRegI(env); 1569 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1570 addInstr(env, AMD64Instr_Bsfr64(False,src,tmp)); 1571 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, 1572 AMD64RMI_Imm(63), dst)); 1573 addInstr(env, AMD64Instr_Alu64R(Aalu_SUB, 1574 AMD64RMI_Reg(tmp), dst)); 1575 return dst; 1576 } 1577 1578 case Iop_CmpwNEZ64: { 1579 HReg dst = newVRegI(env); 1580 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1581 addInstr(env, mk_iMOVsd_RR(src,dst)); 1582 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); 1583 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, 1584 AMD64RMI_Reg(src), dst)); 1585 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); 1586 return dst; 1587 } 1588 1589 case Iop_CmpwNEZ32: { 1590 HReg src = newVRegI(env); 1591 HReg dst = newVRegI(env); 1592 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg); 1593 addInstr(env, mk_iMOVsd_RR(pre,src)); 1594 addInstr(env, AMD64Instr_MovxLQ(False, src, src)); 1595 addInstr(env, mk_iMOVsd_RR(src,dst)); 1596 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); 1597 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, 1598 AMD64RMI_Reg(src), dst)); 1599 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); 1600 return dst; 1601 } 1602 1603 case Iop_Left8: 1604 case Iop_Left16: 1605 case Iop_Left32: 1606 case Iop_Left64: { 1607 HReg dst = newVRegI(env); 1608 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1609 addInstr(env, mk_iMOVsd_RR(src, dst)); 1610 addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst)); 1611 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst)); 1612 return dst; 1613 } 1614 1615 case Iop_V128to32: { 1616 HReg dst = newVRegI(env); 1617 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1618 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP()); 1619 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16)); 1620 addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst)); 1621 return dst; 1622 } 1623 1624 /* V128{HI}to64 */ 1625 case Iop_V128HIto64: 1626 case Iop_V128to64: { 1627 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0; 1628 HReg dst = newVRegI(env); 1629 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1630 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 1631 AMD64AMode* rspN = AMD64AMode_IR(off, hregAMD64_RSP()); 1632 sub_from_rsp(env, 16); 1633 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp0)); 1634 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, 1635 AMD64RMI_Mem(rspN), dst )); 1636 add_to_rsp(env, 16); 1637 return dst; 1638 } 1639 1640 /* ReinterpF64asI64(e) */ 1641 /* Given an IEEE754 double, produce an I64 with the same bit 1642 pattern. */ 1643 case Iop_ReinterpF64asI64: { 1644 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 1645 HReg dst = newVRegI(env); 1646 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 1647 /* paranoia */ 1648 set_SSE_rounding_default(env); 1649 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp)); 1650 addInstr(env, AMD64Instr_Alu64R( 1651 Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst)); 1652 return dst; 1653 } 1654 1655 /* ReinterpF32asI32(e) */ 1656 /* Given an IEEE754 single, produce an I64 with the same bit 1657 pattern in the lower half. */ 1658 case Iop_ReinterpF32asI32: { 1659 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 1660 HReg dst = newVRegI(env); 1661 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 1662 /* paranoia */ 1663 set_SSE_rounding_default(env); 1664 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp)); 1665 addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst )); 1666 return dst; 1667 } 1668 1669 case Iop_16to8: 1670 case Iop_32to8: 1671 case Iop_64to8: 1672 case Iop_32to16: 1673 case Iop_64to16: 1674 case Iop_64to32: 1675 /* These are no-ops. */ 1676 return iselIntExpr_R(env, e->Iex.Unop.arg); 1677 1678 default: 1679 break; 1680 } 1681 1682 /* Deal with unary 64-bit SIMD ops. */ 1683 switch (e->Iex.Unop.op) { 1684 case Iop_CmpNEZ32x2: 1685 fn = (HWord)h_generic_calc_CmpNEZ32x2; break; 1686 case Iop_CmpNEZ16x4: 1687 fn = (HWord)h_generic_calc_CmpNEZ16x4; break; 1688 case Iop_CmpNEZ8x8: 1689 fn = (HWord)h_generic_calc_CmpNEZ8x8; break; 1690 default: 1691 fn = (HWord)0; break; 1692 } 1693 if (fn != (HWord)0) { 1694 /* Note: the following assumes all helpers are of 1695 signature 1696 ULong fn ( ULong ), and they are 1697 not marked as regparm functions. 1698 */ 1699 HReg dst = newVRegI(env); 1700 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 1701 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) ); 1702 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1 )); 1703 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst)); 1704 return dst; 1705 } 1706 1707 break; 1708 } 1709 1710 /* --------- GET --------- */ 1711 case Iex_Get: { 1712 if (ty == Ity_I64) { 1713 HReg dst = newVRegI(env); 1714 addInstr(env, AMD64Instr_Alu64R( 1715 Aalu_MOV, 1716 AMD64RMI_Mem( 1717 AMD64AMode_IR(e->Iex.Get.offset, 1718 hregAMD64_RBP())), 1719 dst)); 1720 return dst; 1721 } 1722 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) { 1723 HReg dst = newVRegI(env); 1724 addInstr(env, AMD64Instr_LoadEX( 1725 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)), 1726 False, 1727 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()), 1728 dst)); 1729 return dst; 1730 } 1731 break; 1732 } 1733 1734 case Iex_GetI: { 1735 AMD64AMode* am 1736 = genGuestArrayOffset( 1737 env, e->Iex.GetI.descr, 1738 e->Iex.GetI.ix, e->Iex.GetI.bias ); 1739 HReg dst = newVRegI(env); 1740 if (ty == Ity_I8) { 1741 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst )); 1742 return dst; 1743 } 1744 if (ty == Ity_I64) { 1745 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst )); 1746 return dst; 1747 } 1748 break; 1749 } 1750 1751 /* --------- CCALL --------- */ 1752 case Iex_CCall: { 1753 HReg dst = newVRegI(env); 1754 vassert(ty == e->Iex.CCall.retty); 1755 1756 /* be very restrictive for now. Only 64-bit ints allowed 1757 for args, and 64 or 32 bits for return type. */ 1758 if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32) 1759 goto irreducible; 1760 1761 /* Marshal args, do the call. */ 1762 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args ); 1763 1764 /* Move to dst, and zero out the top 32 bits if the result type is 1765 Ity_I32. Probably overkill, but still .. */ 1766 if (e->Iex.CCall.retty == Ity_I64) 1767 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst)); 1768 else 1769 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst)); 1770 1771 return dst; 1772 } 1773 1774 /* --------- LITERAL --------- */ 1775 /* 64/32/16/8-bit literals */ 1776 case Iex_Const: 1777 if (ty == Ity_I64) { 1778 HReg r = newVRegI(env); 1779 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r)); 1780 return r; 1781 } else { 1782 AMD64RMI* rmi = iselIntExpr_RMI ( env, e ); 1783 HReg r = newVRegI(env); 1784 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r)); 1785 return r; 1786 } 1787 1788 /* --------- MULTIPLEX --------- */ 1789 case Iex_Mux0X: { 1790 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) 1791 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) { 1792 HReg r8; 1793 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX); 1794 AMD64RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0); 1795 HReg dst = newVRegI(env); 1796 addInstr(env, mk_iMOVsd_RR(rX,dst)); 1797 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); 1798 addInstr(env, AMD64Instr_Test64(0xFF, r8)); 1799 addInstr(env, AMD64Instr_CMov64(Acc_Z,r0,dst)); 1800 return dst; 1801 } 1802 break; 1803 } 1804 1805 /* --------- TERNARY OP --------- */ 1806 case Iex_Triop: { 1807 /* C3210 flags following FPU partial remainder (fprem), both 1808 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */ 1809 if (e->Iex.Triop.op == Iop_PRemC3210F64 1810 || e->Iex.Triop.op == Iop_PRem1C3210F64) { 1811 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 1812 HReg arg1 = iselDblExpr(env, e->Iex.Triop.arg2); 1813 HReg arg2 = iselDblExpr(env, e->Iex.Triop.arg3); 1814 HReg dst = newVRegI(env); 1815 addInstr(env, AMD64Instr_A87Free(2)); 1816 1817 /* one arg -> top of x87 stack */ 1818 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp)); 1819 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 1820 1821 /* other arg -> top of x87 stack */ 1822 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp)); 1823 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 1824 1825 switch (e->Iex.Triop.op) { 1826 case Iop_PRemC3210F64: 1827 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM)); 1828 break; 1829 case Iop_PRem1C3210F64: 1830 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1)); 1831 break; 1832 default: 1833 vassert(0); 1834 } 1835 /* Ignore the result, and instead make off with the FPU's 1836 C3210 flags (in the status word). */ 1837 addInstr(env, AMD64Instr_A87StSW(m8_rsp)); 1838 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst)); 1839 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst)); 1840 return dst; 1841 } 1842 break; 1843 } 1844 1845 default: 1846 break; 1847 } /* switch (e->tag) */ 1848 1849 /* We get here if no pattern matched. */ 1850 irreducible: 1851 ppIRExpr(e); 1852 vpanic("iselIntExpr_R(amd64): cannot reduce tree"); 1853} 1854 1855 1856/*---------------------------------------------------------*/ 1857/*--- ISEL: Integer expression auxiliaries ---*/ 1858/*---------------------------------------------------------*/ 1859 1860/* --------------------- AMODEs --------------------- */ 1861 1862/* Return an AMode which computes the value of the specified 1863 expression, possibly also adding insns to the code list as a 1864 result. The expression may only be a 32-bit one. 1865*/ 1866 1867static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e ) 1868{ 1869 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e); 1870 vassert(sane_AMode(am)); 1871 return am; 1872} 1873 1874/* DO NOT CALL THIS DIRECTLY ! */ 1875static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ) 1876{ 1877 MatchInfo mi; 1878 DECLARE_PATTERN(p_complex); 1879 IRType ty = typeOfIRExpr(env->type_env,e); 1880 vassert(ty == Ity_I64); 1881 1882 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */ 1883 /* bind0 bind1 bind2 bind3 */ 1884 DEFINE_PATTERN(p_complex, 1885 binop( Iop_Add64, 1886 binop( Iop_Add64, 1887 bind(0), 1888 binop(Iop_Shl64, bind(1), bind(2)) 1889 ), 1890 bind(3) 1891 ) 1892 ); 1893 if (matchIRExpr(&mi, p_complex, e)) { 1894 IRExpr* expr1 = mi.bindee[0]; 1895 IRExpr* expr2 = mi.bindee[1]; 1896 IRExpr* imm8 = mi.bindee[2]; 1897 IRExpr* simm32 = mi.bindee[3]; 1898 if (imm8->tag == Iex_Const 1899 && imm8->Iex.Const.con->tag == Ico_U8 1900 && imm8->Iex.Const.con->Ico.U8 < 4 1901 /* imm8 is OK, now check simm32 */ 1902 && simm32->tag == Iex_Const 1903 && simm32->Iex.Const.con->tag == Ico_U64 1904 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) { 1905 UInt shift = imm8->Iex.Const.con->Ico.U8; 1906 UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64); 1907 HReg r1 = iselIntExpr_R(env, expr1); 1908 HReg r2 = iselIntExpr_R(env, expr2); 1909 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3); 1910 return AMD64AMode_IRRS(offset, r1, r2, shift); 1911 } 1912 } 1913 1914 /* Add64(expr1, Shl64(expr2, imm)) */ 1915 if (e->tag == Iex_Binop 1916 && e->Iex.Binop.op == Iop_Add64 1917 && e->Iex.Binop.arg2->tag == Iex_Binop 1918 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64 1919 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const 1920 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { 1921 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1922 if (shift == 1 || shift == 2 || shift == 3) { 1923 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1924 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 ); 1925 return AMD64AMode_IRRS(0, r1, r2, shift); 1926 } 1927 } 1928 1929 /* Add64(expr,i) */ 1930 if (e->tag == Iex_Binop 1931 && e->Iex.Binop.op == Iop_Add64 1932 && e->Iex.Binop.arg2->tag == Iex_Const 1933 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64 1934 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) { 1935 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1936 return AMD64AMode_IR( 1937 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64), 1938 r1 1939 ); 1940 } 1941 1942 /* Doesn't match anything in particular. Generate it into 1943 a register and use that. */ 1944 { 1945 HReg r1 = iselIntExpr_R(env, e); 1946 return AMD64AMode_IR(0, r1); 1947 } 1948} 1949 1950 1951/* --------------------- RMIs --------------------- */ 1952 1953/* Similarly, calculate an expression into an X86RMI operand. As with 1954 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1955 1956static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e ) 1957{ 1958 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e); 1959 /* sanity checks ... */ 1960 switch (rmi->tag) { 1961 case Armi_Imm: 1962 return rmi; 1963 case Armi_Reg: 1964 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64); 1965 vassert(hregIsVirtual(rmi->Armi.Reg.reg)); 1966 return rmi; 1967 case Armi_Mem: 1968 vassert(sane_AMode(rmi->Armi.Mem.am)); 1969 return rmi; 1970 default: 1971 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag"); 1972 } 1973} 1974 1975/* DO NOT CALL THIS DIRECTLY ! */ 1976static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e ) 1977{ 1978 IRType ty = typeOfIRExpr(env->type_env,e); 1979 vassert(ty == Ity_I64 || ty == Ity_I32 1980 || ty == Ity_I16 || ty == Ity_I8); 1981 1982 /* special case: immediate 64/32/16/8 */ 1983 if (e->tag == Iex_Const) { 1984 switch (e->Iex.Const.con->tag) { 1985 case Ico_U64: 1986 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) { 1987 return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)); 1988 } 1989 break; 1990 case Ico_U32: 1991 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break; 1992 case Ico_U16: 1993 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break; 1994 case Ico_U8: 1995 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break; 1996 default: 1997 vpanic("iselIntExpr_RMI.Iex_Const(amd64)"); 1998 } 1999 } 2000 2001 /* special case: 64-bit GET */ 2002 if (e->tag == Iex_Get && ty == Ity_I64) { 2003 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset, 2004 hregAMD64_RBP())); 2005 } 2006 2007 /* special case: 64-bit load from memory */ 2008 if (e->tag == Iex_Load && ty == Ity_I64 2009 && e->Iex.Load.end == Iend_LE) { 2010 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2011 return AMD64RMI_Mem(am); 2012 } 2013 2014 /* default case: calculate into a register and return that */ 2015 { 2016 HReg r = iselIntExpr_R ( env, e ); 2017 return AMD64RMI_Reg(r); 2018 } 2019} 2020 2021 2022/* --------------------- RIs --------------------- */ 2023 2024/* Calculate an expression into an AMD64RI operand. As with 2025 iselIntExpr_R, the expression can have type 64, 32, 16 or 8 2026 bits. */ 2027 2028static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ) 2029{ 2030 AMD64RI* ri = iselIntExpr_RI_wrk(env, e); 2031 /* sanity checks ... */ 2032 switch (ri->tag) { 2033 case Ari_Imm: 2034 return ri; 2035 case Ari_Reg: 2036 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64); 2037 vassert(hregIsVirtual(ri->Ari.Reg.reg)); 2038 return ri; 2039 default: 2040 vpanic("iselIntExpr_RI: unknown amd64 RI tag"); 2041 } 2042} 2043 2044/* DO NOT CALL THIS DIRECTLY ! */ 2045static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ) 2046{ 2047 IRType ty = typeOfIRExpr(env->type_env,e); 2048 vassert(ty == Ity_I64 || ty == Ity_I32 2049 || ty == Ity_I16 || ty == Ity_I8); 2050 2051 /* special case: immediate */ 2052 if (e->tag == Iex_Const) { 2053 switch (e->Iex.Const.con->tag) { 2054 case Ico_U64: 2055 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) { 2056 return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64)); 2057 } 2058 break; 2059 case Ico_U32: 2060 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32); 2061 case Ico_U16: 2062 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); 2063 case Ico_U8: 2064 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8); 2065 default: 2066 vpanic("iselIntExpr_RMI.Iex_Const(amd64)"); 2067 } 2068 } 2069 2070 /* default case: calculate into a register and return that */ 2071 { 2072 HReg r = iselIntExpr_R ( env, e ); 2073 return AMD64RI_Reg(r); 2074 } 2075} 2076 2077 2078/* --------------------- RMs --------------------- */ 2079 2080/* Similarly, calculate an expression into an AMD64RM operand. As 2081 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8 2082 bits. */ 2083 2084static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e ) 2085{ 2086 AMD64RM* rm = iselIntExpr_RM_wrk(env, e); 2087 /* sanity checks ... */ 2088 switch (rm->tag) { 2089 case Arm_Reg: 2090 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64); 2091 vassert(hregIsVirtual(rm->Arm.Reg.reg)); 2092 return rm; 2093 case Arm_Mem: 2094 vassert(sane_AMode(rm->Arm.Mem.am)); 2095 return rm; 2096 default: 2097 vpanic("iselIntExpr_RM: unknown amd64 RM tag"); 2098 } 2099} 2100 2101/* DO NOT CALL THIS DIRECTLY ! */ 2102static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e ) 2103{ 2104 IRType ty = typeOfIRExpr(env->type_env,e); 2105 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 2106 2107 /* special case: 64-bit GET */ 2108 if (e->tag == Iex_Get && ty == Ity_I64) { 2109 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset, 2110 hregAMD64_RBP())); 2111 } 2112 2113 /* special case: load from memory */ 2114 2115 /* default case: calculate into a register and return that */ 2116 { 2117 HReg r = iselIntExpr_R ( env, e ); 2118 return AMD64RM_Reg(r); 2119 } 2120} 2121 2122 2123/* --------------------- CONDCODE --------------------- */ 2124 2125/* Generate code to evaluated a bit-typed expression, returning the 2126 condition code which would correspond when the expression would 2127 notionally have returned 1. */ 2128 2129static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ) 2130{ 2131 /* Uh, there's nothing we can sanity check here, unfortunately. */ 2132 return iselCondCode_wrk(env,e); 2133} 2134 2135/* DO NOT CALL THIS DIRECTLY ! */ 2136static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) 2137{ 2138 MatchInfo mi; 2139 2140 vassert(e); 2141 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); 2142 2143 /* var */ 2144 if (e->tag == Iex_RdTmp) { 2145 HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp); 2146 HReg dst = newVRegI(env); 2147 addInstr(env, mk_iMOVsd_RR(r64,dst)); 2148 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst)); 2149 return Acc_NZ; 2150 } 2151 2152 /* Constant 1:Bit */ 2153 if (e->tag == Iex_Const) { 2154 HReg r; 2155 vassert(e->Iex.Const.con->tag == Ico_U1); 2156 vassert(e->Iex.Const.con->Ico.U1 == True 2157 || e->Iex.Const.con->Ico.U1 == False); 2158 r = newVRegI(env); 2159 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r)); 2160 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r)); 2161 return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ; 2162 } 2163 2164 /* Not1(...) */ 2165 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { 2166 /* Generate code for the arg, and negate the test condition */ 2167 return 1 ^ iselCondCode(env, e->Iex.Unop.arg); 2168 } 2169 2170 /* --- patterns rooted at: 64to1 --- */ 2171 2172 /* 64to1 */ 2173 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) { 2174 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg); 2175 addInstr(env, AMD64Instr_Test64(1,reg)); 2176 return Acc_NZ; 2177 } 2178 2179 /* --- patterns rooted at: CmpNEZ8 --- */ 2180 2181 /* CmpNEZ8(x) */ 2182 if (e->tag == Iex_Unop 2183 && e->Iex.Unop.op == Iop_CmpNEZ8) { 2184 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg); 2185 addInstr(env, AMD64Instr_Test64(0xFF,r)); 2186 return Acc_NZ; 2187 } 2188 2189 /* --- patterns rooted at: CmpNEZ16 --- */ 2190 2191 /* CmpNEZ16(x) */ 2192 if (e->tag == Iex_Unop 2193 && e->Iex.Unop.op == Iop_CmpNEZ16) { 2194 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg); 2195 addInstr(env, AMD64Instr_Test64(0xFFFF,r)); 2196 return Acc_NZ; 2197 } 2198 2199 /* --- patterns rooted at: CmpNEZ32 --- */ 2200 2201 /* CmpNEZ32(x) */ 2202 if (e->tag == Iex_Unop 2203 && e->Iex.Unop.op == Iop_CmpNEZ32) { 2204 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 2205 AMD64RMI* rmi2 = AMD64RMI_Imm(0); 2206 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1)); 2207 return Acc_NZ; 2208 } 2209 2210 /* --- patterns rooted at: CmpNEZ64 --- */ 2211 2212 /* CmpNEZ64(Or64(x,y)) */ 2213 { 2214 DECLARE_PATTERN(p_CmpNEZ64_Or64); 2215 DEFINE_PATTERN(p_CmpNEZ64_Or64, 2216 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1)))); 2217 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) { 2218 HReg r0 = iselIntExpr_R(env, mi.bindee[0]); 2219 AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]); 2220 HReg tmp = newVRegI(env); 2221 addInstr(env, mk_iMOVsd_RR(r0, tmp)); 2222 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp)); 2223 return Acc_NZ; 2224 } 2225 } 2226 2227 /* CmpNEZ64(x) */ 2228 if (e->tag == Iex_Unop 2229 && e->Iex.Unop.op == Iop_CmpNEZ64) { 2230 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 2231 AMD64RMI* rmi2 = AMD64RMI_Imm(0); 2232 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1)); 2233 return Acc_NZ; 2234 } 2235 2236 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */ 2237 2238 /* CmpEQ8 / CmpNE8 */ 2239 if (e->tag == Iex_Binop 2240 && (e->Iex.Binop.op == Iop_CmpEQ8 2241 || e->Iex.Binop.op == Iop_CmpNE8 2242 || e->Iex.Binop.op == Iop_CasCmpEQ8 2243 || e->Iex.Binop.op == Iop_CasCmpNE8)) { 2244 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2245 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2246 HReg r = newVRegI(env); 2247 addInstr(env, mk_iMOVsd_RR(r1,r)); 2248 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r)); 2249 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r)); 2250 switch (e->Iex.Binop.op) { 2251 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z; 2252 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ; 2253 default: vpanic("iselCondCode(amd64): CmpXX8"); 2254 } 2255 } 2256 2257 /* CmpEQ16 / CmpNE16 */ 2258 if (e->tag == Iex_Binop 2259 && (e->Iex.Binop.op == Iop_CmpEQ16 2260 || e->Iex.Binop.op == Iop_CmpNE16 2261 || e->Iex.Binop.op == Iop_CasCmpEQ16 2262 || e->Iex.Binop.op == Iop_CasCmpNE16)) { 2263 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2264 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2265 HReg r = newVRegI(env); 2266 addInstr(env, mk_iMOVsd_RR(r1,r)); 2267 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r)); 2268 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r)); 2269 switch (e->Iex.Binop.op) { 2270 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z; 2271 case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ; 2272 default: vpanic("iselCondCode(amd64): CmpXX16"); 2273 } 2274 } 2275 2276 /* CmpNE64(ccall, 64-bit constant) (--smc-check=all optimisation). 2277 Saves a "movq %rax, %tmp" compared to the default route. */ 2278 if (e->tag == Iex_Binop 2279 && e->Iex.Binop.op == Iop_CmpNE64 2280 && e->Iex.Binop.arg1->tag == Iex_CCall 2281 && e->Iex.Binop.arg2->tag == Iex_Const) { 2282 IRExpr* cal = e->Iex.Binop.arg1; 2283 IRExpr* con = e->Iex.Binop.arg2; 2284 HReg tmp = newVRegI(env); 2285 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */ 2286 vassert(cal->Iex.CCall.retty == Ity_I64); /* else ill-typed IR */ 2287 vassert(con->Iex.Const.con->tag == Ico_U64); 2288 /* Marshal args, do the call. */ 2289 doHelperCall( env, False, NULL, cal->Iex.CCall.cee, cal->Iex.CCall.args ); 2290 addInstr(env, AMD64Instr_Imm64(con->Iex.Const.con->Ico.U64, tmp)); 2291 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP, 2292 AMD64RMI_Reg(hregAMD64_RAX()), tmp)); 2293 return Acc_NZ; 2294 } 2295 2296 /* Cmp*64*(x,y) */ 2297 if (e->tag == Iex_Binop 2298 && (e->Iex.Binop.op == Iop_CmpEQ64 2299 || e->Iex.Binop.op == Iop_CmpNE64 2300 || e->Iex.Binop.op == Iop_CmpLT64S 2301 || e->Iex.Binop.op == Iop_CmpLT64U 2302 || e->Iex.Binop.op == Iop_CmpLE64S 2303 || e->Iex.Binop.op == Iop_CmpLE64U 2304 || e->Iex.Binop.op == Iop_CasCmpEQ64 2305 || e->Iex.Binop.op == Iop_CasCmpNE64)) { 2306 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2307 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2308 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1)); 2309 switch (e->Iex.Binop.op) { 2310 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z; 2311 case Iop_CmpNE64: case Iop_CasCmpNE64: return Acc_NZ; 2312 case Iop_CmpLT64S: return Acc_L; 2313 case Iop_CmpLT64U: return Acc_B; 2314 case Iop_CmpLE64S: return Acc_LE; 2315 case Iop_CmpLE64U: return Acc_BE; 2316 default: vpanic("iselCondCode(amd64): CmpXX64"); 2317 } 2318 } 2319 2320 /* Cmp*32*(x,y) */ 2321 if (e->tag == Iex_Binop 2322 && (e->Iex.Binop.op == Iop_CmpEQ32 2323 || e->Iex.Binop.op == Iop_CmpNE32 2324 || e->Iex.Binop.op == Iop_CmpLT32S 2325 || e->Iex.Binop.op == Iop_CmpLT32U 2326 || e->Iex.Binop.op == Iop_CmpLE32S 2327 || e->Iex.Binop.op == Iop_CmpLE32U 2328 || e->Iex.Binop.op == Iop_CasCmpEQ32 2329 || e->Iex.Binop.op == Iop_CasCmpNE32)) { 2330 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2331 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2332 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1)); 2333 switch (e->Iex.Binop.op) { 2334 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z; 2335 case Iop_CmpNE32: case Iop_CasCmpNE32: return Acc_NZ; 2336 case Iop_CmpLT32S: return Acc_L; 2337 case Iop_CmpLT32U: return Acc_B; 2338 case Iop_CmpLE32S: return Acc_LE; 2339 case Iop_CmpLE32U: return Acc_BE; 2340 default: vpanic("iselCondCode(amd64): CmpXX32"); 2341 } 2342 } 2343 2344 ppIRExpr(e); 2345 vpanic("iselCondCode(amd64)"); 2346} 2347 2348 2349/*---------------------------------------------------------*/ 2350/*--- ISEL: Integer expressions (128 bit) ---*/ 2351/*---------------------------------------------------------*/ 2352 2353/* Compute a 128-bit value into a register pair, which is returned as 2354 the first two parameters. As with iselIntExpr_R, these may be 2355 either real or virtual regs; in any case they must not be changed 2356 by subsequent code emitted by the caller. */ 2357 2358static void iselInt128Expr ( HReg* rHi, HReg* rLo, 2359 ISelEnv* env, IRExpr* e ) 2360{ 2361 iselInt128Expr_wrk(rHi, rLo, env, e); 2362# if 0 2363 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2364# endif 2365 vassert(hregClass(*rHi) == HRcInt64); 2366 vassert(hregIsVirtual(*rHi)); 2367 vassert(hregClass(*rLo) == HRcInt64); 2368 vassert(hregIsVirtual(*rLo)); 2369} 2370 2371/* DO NOT CALL THIS DIRECTLY ! */ 2372static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, 2373 ISelEnv* env, IRExpr* e ) 2374{ 2375//.. HWord fn = 0; /* helper fn for most SIMD64 stuff */ 2376 vassert(e); 2377 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128); 2378 2379//.. /* 64-bit literal */ 2380//.. if (e->tag == Iex_Const) { 2381//.. ULong w64 = e->Iex.Const.con->Ico.U64; 2382//.. UInt wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF; 2383//.. UInt wLo = ((UInt)w64) & 0xFFFFFFFF; 2384//.. HReg tLo = newVRegI(env); 2385//.. HReg tHi = newVRegI(env); 2386//.. vassert(e->Iex.Const.con->tag == Ico_U64); 2387//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi)); 2388//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo)); 2389//.. *rHi = tHi; 2390//.. *rLo = tLo; 2391//.. return; 2392//.. } 2393 2394 /* read 128-bit IRTemp */ 2395 if (e->tag == Iex_RdTmp) { 2396 lookupIRTemp128( rHi, rLo, env, e->Iex.RdTmp.tmp); 2397 return; 2398 } 2399 2400//.. /* 64-bit load */ 2401//.. if (e->tag == Iex_LDle) { 2402//.. HReg tLo, tHi; 2403//.. X86AMode *am0, *am4; 2404//.. vassert(e->Iex.LDle.ty == Ity_I64); 2405//.. tLo = newVRegI(env); 2406//.. tHi = newVRegI(env); 2407//.. am0 = iselIntExpr_AMode(env, e->Iex.LDle.addr); 2408//.. am4 = advance4(am0); 2409//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo )); 2410//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 2411//.. *rHi = tHi; 2412//.. *rLo = tLo; 2413//.. return; 2414//.. } 2415//.. 2416//.. /* 64-bit GET */ 2417//.. if (e->tag == Iex_Get) { 2418//.. X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP()); 2419//.. X86AMode* am4 = advance4(am); 2420//.. HReg tLo = newVRegI(env); 2421//.. HReg tHi = newVRegI(env); 2422//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo )); 2423//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 2424//.. *rHi = tHi; 2425//.. *rLo = tLo; 2426//.. return; 2427//.. } 2428//.. 2429//.. /* 64-bit GETI */ 2430//.. if (e->tag == Iex_GetI) { 2431//.. X86AMode* am 2432//.. = genGuestArrayOffset( env, e->Iex.GetI.descr, 2433//.. e->Iex.GetI.ix, e->Iex.GetI.bias ); 2434//.. X86AMode* am4 = advance4(am); 2435//.. HReg tLo = newVRegI(env); 2436//.. HReg tHi = newVRegI(env); 2437//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo )); 2438//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 2439//.. *rHi = tHi; 2440//.. *rLo = tLo; 2441//.. return; 2442//.. } 2443//.. 2444//.. /* 64-bit Mux0X */ 2445//.. if (e->tag == Iex_Mux0X) { 2446//.. HReg e0Lo, e0Hi, eXLo, eXHi, r8; 2447//.. HReg tLo = newVRegI(env); 2448//.. HReg tHi = newVRegI(env); 2449//.. iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0); 2450//.. iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX); 2451//.. addInstr(env, mk_iMOVsd_RR(eXHi, tHi)); 2452//.. addInstr(env, mk_iMOVsd_RR(eXLo, tLo)); 2453//.. r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); 2454//.. addInstr(env, X86Instr_Test32(X86RI_Imm(0xFF), X86RM_Reg(r8))); 2455//.. /* This assumes the first cmov32 doesn't trash the condition 2456//.. codes, so they are still available for the second cmov32 */ 2457//.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi)); 2458//.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo)); 2459//.. *rHi = tHi; 2460//.. *rLo = tLo; 2461//.. return; 2462//.. } 2463 2464 /* --------- BINARY ops --------- */ 2465 if (e->tag == Iex_Binop) { 2466 switch (e->Iex.Binop.op) { 2467 /* 64 x 64 -> 128 multiply */ 2468 case Iop_MullU64: 2469 case Iop_MullS64: { 2470 /* get one operand into %rax, and the other into a R/M. 2471 Need to make an educated guess about which is better in 2472 which. */ 2473 HReg tLo = newVRegI(env); 2474 HReg tHi = newVRegI(env); 2475 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64); 2476 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1); 2477 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2); 2478 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX())); 2479 addInstr(env, AMD64Instr_MulL(syned, rmLeft)); 2480 /* Result is now in RDX:RAX. Tell the caller. */ 2481 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi)); 2482 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo)); 2483 *rHi = tHi; 2484 *rLo = tLo; 2485 return; 2486 } 2487 2488 /* 128 x 64 -> (64(rem),64(div)) division */ 2489 case Iop_DivModU128to64: 2490 case Iop_DivModS128to64: { 2491 /* Get the 128-bit operand into rdx:rax, and the other into 2492 any old R/M. */ 2493 HReg sHi, sLo; 2494 HReg tLo = newVRegI(env); 2495 HReg tHi = newVRegI(env); 2496 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64); 2497 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2); 2498 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2499 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX())); 2500 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX())); 2501 addInstr(env, AMD64Instr_Div(syned, 8, rmRight)); 2502 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi)); 2503 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo)); 2504 *rHi = tHi; 2505 *rLo = tLo; 2506 return; 2507 } 2508 2509 /* 64HLto128(e1,e2) */ 2510 case Iop_64HLto128: 2511 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2512 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2513 return; 2514 2515//.. /* Or64/And64/Xor64 */ 2516//.. case Iop_Or64: 2517//.. case Iop_And64: 2518//.. case Iop_Xor64: { 2519//.. HReg xLo, xHi, yLo, yHi; 2520//.. HReg tLo = newVRegI(env); 2521//.. HReg tHi = newVRegI(env); 2522//.. X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR 2523//.. : e->Iex.Binop.op==Iop_And64 ? Xalu_AND 2524//.. : Xalu_XOR; 2525//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2526//.. addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2527//.. addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2528//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2529//.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi)); 2530//.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo)); 2531//.. *rHi = tHi; 2532//.. *rLo = tLo; 2533//.. return; 2534//.. } 2535//.. 2536//.. /* Add64/Sub64 */ 2537//.. case Iop_Add64: 2538//.. case Iop_Sub64: { 2539//.. HReg xLo, xHi, yLo, yHi; 2540//.. HReg tLo = newVRegI(env); 2541//.. HReg tHi = newVRegI(env); 2542//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2543//.. addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2544//.. addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2545//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2546//.. if (e->Iex.Binop.op==Iop_Add64) { 2547//.. addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo)); 2548//.. addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi)); 2549//.. } else { 2550//.. addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo)); 2551//.. addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi)); 2552//.. } 2553//.. *rHi = tHi; 2554//.. *rLo = tLo; 2555//.. return; 2556//.. } 2557//.. 2558//.. /* 32HLto64(e1,e2) */ 2559//.. case Iop_32HLto64: 2560//.. *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2561//.. *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2562//.. return; 2563//.. 2564//.. /* 64-bit shifts */ 2565//.. case Iop_Shl64: { 2566//.. /* We use the same ingenious scheme as gcc. Put the value 2567//.. to be shifted into %hi:%lo, and the shift amount into 2568//.. %cl. Then (dsts on right, a la ATT syntax): 2569//.. 2570//.. shldl %cl, %lo, %hi -- make %hi be right for the 2571//.. -- shift amt %cl % 32 2572//.. shll %cl, %lo -- make %lo be right for the 2573//.. -- shift amt %cl % 32 2574//.. 2575//.. Now, if (shift amount % 64) is in the range 32 .. 63, 2576//.. we have to do a fixup, which puts the result low half 2577//.. into the result high half, and zeroes the low half: 2578//.. 2579//.. testl $32, %ecx 2580//.. 2581//.. cmovnz %lo, %hi 2582//.. movl $0, %tmp -- sigh; need yet another reg 2583//.. cmovnz %tmp, %lo 2584//.. */ 2585//.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp; 2586//.. tLo = newVRegI(env); 2587//.. tHi = newVRegI(env); 2588//.. tTemp = newVRegI(env); 2589//.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2); 2590//.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2591//.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX())); 2592//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2593//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2594//.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo 2595//.. and those regs are legitimately modifiable. */ 2596//.. addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi)); 2597//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, X86RM_Reg(tLo))); 2598//.. addInstr(env, X86Instr_Test32(X86RI_Imm(32), 2599//.. X86RM_Reg(hregX86_ECX()))); 2600//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi)); 2601//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp)); 2602//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo)); 2603//.. *rHi = tHi; 2604//.. *rLo = tLo; 2605//.. return; 2606//.. } 2607//.. 2608//.. case Iop_Shr64: { 2609//.. /* We use the same ingenious scheme as gcc. Put the value 2610//.. to be shifted into %hi:%lo, and the shift amount into 2611//.. %cl. Then: 2612//.. 2613//.. shrdl %cl, %hi, %lo -- make %lo be right for the 2614//.. -- shift amt %cl % 32 2615//.. shrl %cl, %hi -- make %hi be right for the 2616//.. -- shift amt %cl % 32 2617//.. 2618//.. Now, if (shift amount % 64) is in the range 32 .. 63, 2619//.. we have to do a fixup, which puts the result high half 2620//.. into the result low half, and zeroes the high half: 2621//.. 2622//.. testl $32, %ecx 2623//.. 2624//.. cmovnz %hi, %lo 2625//.. movl $0, %tmp -- sigh; need yet another reg 2626//.. cmovnz %tmp, %hi 2627//.. */ 2628//.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp; 2629//.. tLo = newVRegI(env); 2630//.. tHi = newVRegI(env); 2631//.. tTemp = newVRegI(env); 2632//.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2); 2633//.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2634//.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX())); 2635//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2636//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2637//.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo 2638//.. and those regs are legitimately modifiable. */ 2639//.. addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo)); 2640//.. addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, X86RM_Reg(tHi))); 2641//.. addInstr(env, X86Instr_Test32(X86RI_Imm(32), 2642//.. X86RM_Reg(hregX86_ECX()))); 2643//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo)); 2644//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp)); 2645//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi)); 2646//.. *rHi = tHi; 2647//.. *rLo = tLo; 2648//.. return; 2649//.. } 2650//.. 2651//.. /* F64 -> I64 */ 2652//.. /* Sigh, this is an almost exact copy of the F64 -> I32/I16 2653//.. case. Unfortunately I see no easy way to avoid the 2654//.. duplication. */ 2655//.. case Iop_F64toI64: { 2656//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 2657//.. HReg tLo = newVRegI(env); 2658//.. HReg tHi = newVRegI(env); 2659//.. 2660//.. /* Used several times ... */ 2661//.. /* Careful ... this sharing is only safe because 2662//.. zero_esp/four_esp do not hold any registers which the 2663//.. register allocator could attempt to swizzle later. */ 2664//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2665//.. X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP()); 2666//.. 2667//.. /* rf now holds the value to be converted, and rrm holds 2668//.. the rounding mode value, encoded as per the 2669//.. IRRoundingMode enum. The first thing to do is set the 2670//.. FPU's rounding mode accordingly. */ 2671//.. 2672//.. /* Create a space for the format conversion. */ 2673//.. /* subl $8, %esp */ 2674//.. sub_from_esp(env, 8); 2675//.. 2676//.. /* Set host rounding mode */ 2677//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2678//.. 2679//.. /* gistll %rf, 0(%esp) */ 2680//.. addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp)); 2681//.. 2682//.. /* movl 0(%esp), %dstLo */ 2683//.. /* movl 4(%esp), %dstHi */ 2684//.. addInstr(env, X86Instr_Alu32R( 2685//.. Xalu_MOV, X86RMI_Mem(zero_esp), tLo)); 2686//.. addInstr(env, X86Instr_Alu32R( 2687//.. Xalu_MOV, X86RMI_Mem(four_esp), tHi)); 2688//.. 2689//.. /* Restore default FPU rounding. */ 2690//.. set_FPU_rounding_default( env ); 2691//.. 2692//.. /* addl $8, %esp */ 2693//.. add_to_esp(env, 8); 2694//.. 2695//.. *rHi = tHi; 2696//.. *rLo = tLo; 2697//.. return; 2698//.. } 2699//.. 2700 default: 2701 break; 2702 } 2703 } /* if (e->tag == Iex_Binop) */ 2704 2705 2706//.. /* --------- UNARY ops --------- */ 2707//.. if (e->tag == Iex_Unop) { 2708//.. switch (e->Iex.Unop.op) { 2709//.. 2710//.. /* 32Sto64(e) */ 2711//.. case Iop_32Sto64: { 2712//.. HReg tLo = newVRegI(env); 2713//.. HReg tHi = newVRegI(env); 2714//.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2715//.. addInstr(env, mk_iMOVsd_RR(src,tHi)); 2716//.. addInstr(env, mk_iMOVsd_RR(src,tLo)); 2717//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tHi))); 2718//.. *rHi = tHi; 2719//.. *rLo = tLo; 2720//.. return; 2721//.. } 2722//.. 2723//.. /* 32Uto64(e) */ 2724//.. case Iop_32Uto64: { 2725//.. HReg tLo = newVRegI(env); 2726//.. HReg tHi = newVRegI(env); 2727//.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2728//.. addInstr(env, mk_iMOVsd_RR(src,tLo)); 2729//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2730//.. *rHi = tHi; 2731//.. *rLo = tLo; 2732//.. return; 2733//.. } 2734 2735//.. /* could do better than this, but for now ... */ 2736//.. case Iop_1Sto64: { 2737//.. HReg tLo = newVRegI(env); 2738//.. HReg tHi = newVRegI(env); 2739//.. X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 2740//.. addInstr(env, X86Instr_Set32(cond,tLo)); 2741//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, X86RM_Reg(tLo))); 2742//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tLo))); 2743//.. addInstr(env, mk_iMOVsd_RR(tLo, tHi)); 2744//.. *rHi = tHi; 2745//.. *rLo = tLo; 2746//.. return; 2747//.. } 2748//.. 2749//.. /* Not64(e) */ 2750//.. case Iop_Not64: { 2751//.. HReg tLo = newVRegI(env); 2752//.. HReg tHi = newVRegI(env); 2753//.. HReg sHi, sLo; 2754//.. iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg); 2755//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2756//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2757//.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tHi))); 2758//.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tLo))); 2759//.. *rHi = tHi; 2760//.. *rLo = tLo; 2761//.. return; 2762//.. } 2763//.. 2764//.. default: 2765//.. break; 2766//.. } 2767//.. } /* if (e->tag == Iex_Unop) */ 2768//.. 2769//.. 2770//.. /* --------- CCALL --------- */ 2771//.. if (e->tag == Iex_CCall) { 2772//.. HReg tLo = newVRegI(env); 2773//.. HReg tHi = newVRegI(env); 2774//.. 2775//.. /* Marshal args, do the call, clear stack. */ 2776//.. doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args ); 2777//.. 2778//.. addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2779//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2780//.. *rHi = tHi; 2781//.. *rLo = tLo; 2782//.. return; 2783//.. } 2784 2785 ppIRExpr(e); 2786 vpanic("iselInt128Expr"); 2787} 2788 2789 2790/*---------------------------------------------------------*/ 2791/*--- ISEL: Floating point expressions (32 bit) ---*/ 2792/*---------------------------------------------------------*/ 2793 2794/* Nothing interesting here; really just wrappers for 2795 64-bit stuff. */ 2796 2797static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) 2798{ 2799 HReg r = iselFltExpr_wrk( env, e ); 2800# if 0 2801 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2802# endif 2803 vassert(hregClass(r) == HRcVec128); 2804 vassert(hregIsVirtual(r)); 2805 return r; 2806} 2807 2808/* DO NOT CALL THIS DIRECTLY */ 2809static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) 2810{ 2811 IRType ty = typeOfIRExpr(env->type_env,e); 2812 vassert(ty == Ity_F32); 2813 2814 if (e->tag == Iex_RdTmp) { 2815 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2816 } 2817 2818 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2819 AMD64AMode* am; 2820 HReg res = newVRegV(env); 2821 vassert(e->Iex.Load.ty == Ity_F32); 2822 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2823 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am)); 2824 return res; 2825 } 2826 2827 if (e->tag == Iex_Binop 2828 && e->Iex.Binop.op == Iop_F64toF32) { 2829 /* Although the result is still held in a standard SSE register, 2830 we need to round it to reflect the loss of accuracy/range 2831 entailed in casting it to a 32-bit float. */ 2832 HReg dst = newVRegV(env); 2833 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 2834 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 ); 2835 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst)); 2836 set_SSE_rounding_default( env ); 2837 return dst; 2838 } 2839 2840 if (e->tag == Iex_Get) { 2841 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset, 2842 hregAMD64_RBP() ); 2843 HReg res = newVRegV(env); 2844 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am )); 2845 return res; 2846 } 2847 2848 if (e->tag == Iex_Unop 2849 && e->Iex.Unop.op == Iop_ReinterpI32asF32) { 2850 /* Given an I32, produce an IEEE754 float with the same bit 2851 pattern. */ 2852 HReg dst = newVRegV(env); 2853 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2854 AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP()); 2855 addInstr(env, AMD64Instr_Store(4, src, m4_rsp)); 2856 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp )); 2857 return dst; 2858 } 2859 2860 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) { 2861 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 2862 HReg arg = iselFltExpr(env, e->Iex.Binop.arg2); 2863 HReg dst = newVRegV(env); 2864 2865 /* rf now holds the value to be rounded. The first thing to do 2866 is set the FPU's rounding mode accordingly. */ 2867 2868 /* Set host x87 rounding mode */ 2869 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2870 2871 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp)); 2872 addInstr(env, AMD64Instr_A87Free(1)); 2873 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4)); 2874 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND)); 2875 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4)); 2876 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp)); 2877 2878 /* Restore default x87 rounding. */ 2879 set_FPU_rounding_default( env ); 2880 2881 return dst; 2882 } 2883 2884 ppIRExpr(e); 2885 vpanic("iselFltExpr_wrk"); 2886} 2887 2888 2889/*---------------------------------------------------------*/ 2890/*--- ISEL: Floating point expressions (64 bit) ---*/ 2891/*---------------------------------------------------------*/ 2892 2893/* Compute a 64-bit floating point value into the lower half of an xmm 2894 register, the identity of which is returned. As with 2895 iselIntExpr_R, the returned reg will be virtual, and it must not be 2896 changed by subsequent code emitted by the caller. 2897*/ 2898 2899/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm: 2900 2901 Type S (1 bit) E (11 bits) F (52 bits) 2902 ---- --------- ----------- ----------- 2903 signalling NaN u 2047 (max) .0uuuuu---u 2904 (with at least 2905 one 1 bit) 2906 quiet NaN u 2047 (max) .1uuuuu---u 2907 2908 negative infinity 1 2047 (max) .000000---0 2909 2910 positive infinity 0 2047 (max) .000000---0 2911 2912 negative zero 1 0 .000000---0 2913 2914 positive zero 0 0 .000000---0 2915*/ 2916 2917static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ) 2918{ 2919 HReg r = iselDblExpr_wrk( env, e ); 2920# if 0 2921 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2922# endif 2923 vassert(hregClass(r) == HRcVec128); 2924 vassert(hregIsVirtual(r)); 2925 return r; 2926} 2927 2928/* DO NOT CALL THIS DIRECTLY */ 2929static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) 2930{ 2931 IRType ty = typeOfIRExpr(env->type_env,e); 2932 vassert(e); 2933 vassert(ty == Ity_F64); 2934 2935 if (e->tag == Iex_RdTmp) { 2936 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2937 } 2938 2939 if (e->tag == Iex_Const) { 2940 union { ULong u64; Double f64; } u; 2941 HReg res = newVRegV(env); 2942 HReg tmp = newVRegI(env); 2943 vassert(sizeof(u) == 8); 2944 vassert(sizeof(u.u64) == 8); 2945 vassert(sizeof(u.f64) == 8); 2946 2947 if (e->Iex.Const.con->tag == Ico_F64) { 2948 u.f64 = e->Iex.Const.con->Ico.F64; 2949 } 2950 else if (e->Iex.Const.con->tag == Ico_F64i) { 2951 u.u64 = e->Iex.Const.con->Ico.F64i; 2952 } 2953 else 2954 vpanic("iselDblExpr(amd64): const"); 2955 2956 addInstr(env, AMD64Instr_Imm64(u.u64, tmp)); 2957 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp))); 2958 addInstr(env, AMD64Instr_SseLdSt( 2959 True/*load*/, 8, res, 2960 AMD64AMode_IR(0, hregAMD64_RSP()) 2961 )); 2962 add_to_rsp(env, 8); 2963 return res; 2964 } 2965 2966 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2967 AMD64AMode* am; 2968 HReg res = newVRegV(env); 2969 vassert(e->Iex.Load.ty == Ity_F64); 2970 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2971 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am )); 2972 return res; 2973 } 2974 2975 if (e->tag == Iex_Get) { 2976 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset, 2977 hregAMD64_RBP() ); 2978 HReg res = newVRegV(env); 2979 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am )); 2980 return res; 2981 } 2982 2983 if (e->tag == Iex_GetI) { 2984 AMD64AMode* am 2985 = genGuestArrayOffset( 2986 env, e->Iex.GetI.descr, 2987 e->Iex.GetI.ix, e->Iex.GetI.bias ); 2988 HReg res = newVRegV(env); 2989 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am )); 2990 return res; 2991 } 2992 2993 if (e->tag == Iex_Triop) { 2994 AMD64SseOp op = Asse_INVALID; 2995 switch (e->Iex.Triop.op) { 2996 case Iop_AddF64: op = Asse_ADDF; break; 2997 case Iop_SubF64: op = Asse_SUBF; break; 2998 case Iop_MulF64: op = Asse_MULF; break; 2999 case Iop_DivF64: op = Asse_DIVF; break; 3000 default: break; 3001 } 3002 if (op != Asse_INVALID) { 3003 HReg dst = newVRegV(env); 3004 HReg argL = iselDblExpr(env, e->Iex.Triop.arg2); 3005 HReg argR = iselDblExpr(env, e->Iex.Triop.arg3); 3006 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3007 /* XXXROUNDINGFIXME */ 3008 /* set roundingmode here */ 3009 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst)); 3010 return dst; 3011 } 3012 } 3013 3014 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) { 3015 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 3016 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2); 3017 HReg dst = newVRegV(env); 3018 3019 /* rf now holds the value to be rounded. The first thing to do 3020 is set the FPU's rounding mode accordingly. */ 3021 3022 /* Set host x87 rounding mode */ 3023 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 3024 3025 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp)); 3026 addInstr(env, AMD64Instr_A87Free(1)); 3027 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 3028 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND)); 3029 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); 3030 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 3031 3032 /* Restore default x87 rounding. */ 3033 set_FPU_rounding_default( env ); 3034 3035 return dst; 3036 } 3037 3038 if (e->tag == Iex_Triop 3039 && (e->Iex.Triop.op == Iop_ScaleF64 3040 || e->Iex.Triop.op == Iop_AtanF64 3041 || e->Iex.Triop.op == Iop_Yl2xF64 3042 || e->Iex.Triop.op == Iop_Yl2xp1F64 3043 || e->Iex.Triop.op == Iop_PRemF64 3044 || e->Iex.Triop.op == Iop_PRem1F64) 3045 ) { 3046 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 3047 HReg arg1 = iselDblExpr(env, e->Iex.Triop.arg2); 3048 HReg arg2 = iselDblExpr(env, e->Iex.Triop.arg3); 3049 HReg dst = newVRegV(env); 3050 Bool arg2first = toBool(e->Iex.Triop.op == Iop_ScaleF64 3051 || e->Iex.Triop.op == Iop_PRemF64 3052 || e->Iex.Triop.op == Iop_PRem1F64); 3053 addInstr(env, AMD64Instr_A87Free(2)); 3054 3055 /* one arg -> top of x87 stack */ 3056 addInstr(env, AMD64Instr_SseLdSt( 3057 False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp)); 3058 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 3059 3060 /* other arg -> top of x87 stack */ 3061 addInstr(env, AMD64Instr_SseLdSt( 3062 False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp)); 3063 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 3064 3065 /* do it */ 3066 /* XXXROUNDINGFIXME */ 3067 /* set roundingmode here */ 3068 switch (e->Iex.Triop.op) { 3069 case Iop_ScaleF64: 3070 addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE)); 3071 break; 3072 case Iop_AtanF64: 3073 addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN)); 3074 break; 3075 case Iop_Yl2xF64: 3076 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X)); 3077 break; 3078 case Iop_Yl2xp1F64: 3079 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1)); 3080 break; 3081 case Iop_PRemF64: 3082 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM)); 3083 break; 3084 case Iop_PRem1F64: 3085 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1)); 3086 break; 3087 default: 3088 vassert(0); 3089 } 3090 3091 /* save result */ 3092 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); 3093 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 3094 return dst; 3095 } 3096 3097 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) { 3098 HReg dst = newVRegV(env); 3099 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2); 3100 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 ); 3101 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst )); 3102 set_SSE_rounding_default( env ); 3103 return dst; 3104 } 3105 3106 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) { 3107 HReg dst = newVRegV(env); 3108 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 3109 set_SSE_rounding_default( env ); 3110 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst )); 3111 return dst; 3112 } 3113 3114 if (e->tag == Iex_Unop 3115 && (e->Iex.Unop.op == Iop_NegF64 3116 || e->Iex.Unop.op == Iop_AbsF64)) { 3117 /* Sigh ... very rough code. Could do much better. */ 3118 /* Get the 128-bit literal 00---0 10---0 into a register 3119 and xor/nand it with the value to be negated. */ 3120 HReg r1 = newVRegI(env); 3121 HReg dst = newVRegV(env); 3122 HReg tmp = newVRegV(env); 3123 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 3124 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 3125 addInstr(env, mk_vMOVsd_RR(src,tmp)); 3126 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0))); 3127 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 )); 3128 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1))); 3129 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0)); 3130 3131 if (e->Iex.Unop.op == Iop_NegF64) 3132 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst)); 3133 else 3134 addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst)); 3135 3136 add_to_rsp(env, 16); 3137 return dst; 3138 } 3139 3140 if (e->tag == Iex_Binop) { 3141 A87FpOp fpop = Afp_INVALID; 3142 switch (e->Iex.Binop.op) { 3143 case Iop_SqrtF64: fpop = Afp_SQRT; break; 3144 case Iop_SinF64: fpop = Afp_SIN; break; 3145 case Iop_CosF64: fpop = Afp_COS; break; 3146 case Iop_TanF64: fpop = Afp_TAN; break; 3147 case Iop_2xm1F64: fpop = Afp_2XM1; break; 3148 default: break; 3149 } 3150 if (fpop != Afp_INVALID) { 3151 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 3152 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2); 3153 HReg dst = newVRegV(env); 3154 Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1; 3155 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp)); 3156 addInstr(env, AMD64Instr_A87Free(nNeeded)); 3157 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 3158 /* XXXROUNDINGFIXME */ 3159 /* set roundingmode here */ 3160 addInstr(env, AMD64Instr_A87FpOp(fpop)); 3161 if (e->Iex.Binop.op==Iop_TanF64) { 3162 /* get rid of the extra 1.0 that fptan pushes */ 3163 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); 3164 } 3165 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); 3166 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 3167 return dst; 3168 } 3169 } 3170 3171 if (e->tag == Iex_Unop) { 3172 switch (e->Iex.Unop.op) { 3173//.. case Iop_I32toF64: { 3174//.. HReg dst = newVRegF(env); 3175//.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg); 3176//.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri))); 3177//.. set_FPU_rounding_default(env); 3178//.. addInstr(env, X86Instr_FpLdStI( 3179//.. True/*load*/, 4, dst, 3180//.. X86AMode_IR(0, hregX86_ESP()))); 3181//.. add_to_esp(env, 4); 3182//.. return dst; 3183//.. } 3184 case Iop_ReinterpI64asF64: { 3185 /* Given an I64, produce an IEEE754 double with the same 3186 bit pattern. */ 3187 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 3188 HReg dst = newVRegV(env); 3189 AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg); 3190 /* paranoia */ 3191 set_SSE_rounding_default(env); 3192 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp)); 3193 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 3194 return dst; 3195 } 3196 case Iop_F32toF64: { 3197 HReg f32; 3198 HReg f64 = newVRegV(env); 3199 /* this shouldn't be necessary, but be paranoid ... */ 3200 set_SSE_rounding_default(env); 3201 f32 = iselFltExpr(env, e->Iex.Unop.arg); 3202 addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64)); 3203 return f64; 3204 } 3205 default: 3206 break; 3207 } 3208 } 3209 3210 /* --------- MULTIPLEX --------- */ 3211 if (e->tag == Iex_Mux0X) { 3212 HReg r8, rX, r0, dst; 3213 vassert(ty == Ity_F64); 3214 vassert(typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8); 3215 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); 3216 rX = iselDblExpr(env, e->Iex.Mux0X.exprX); 3217 r0 = iselDblExpr(env, e->Iex.Mux0X.expr0); 3218 dst = newVRegV(env); 3219 addInstr(env, mk_vMOVsd_RR(rX,dst)); 3220 addInstr(env, AMD64Instr_Test64(0xFF, r8)); 3221 addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst)); 3222 return dst; 3223 } 3224 3225 ppIRExpr(e); 3226 vpanic("iselDblExpr_wrk"); 3227} 3228 3229 3230/*---------------------------------------------------------*/ 3231/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/ 3232/*---------------------------------------------------------*/ 3233 3234static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ) 3235{ 3236 HReg r = iselVecExpr_wrk( env, e ); 3237# if 0 3238 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3239# endif 3240 vassert(hregClass(r) == HRcVec128); 3241 vassert(hregIsVirtual(r)); 3242 return r; 3243} 3244 3245 3246/* DO NOT CALL THIS DIRECTLY */ 3247static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) 3248{ 3249 HWord fn = 0; /* address of helper fn, if required */ 3250 Bool arg1isEReg = False; 3251 AMD64SseOp op = Asse_INVALID; 3252 IRType ty = typeOfIRExpr(env->type_env,e); 3253 vassert(e); 3254 vassert(ty == Ity_V128); 3255 3256 if (e->tag == Iex_RdTmp) { 3257 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3258 } 3259 3260 if (e->tag == Iex_Get) { 3261 HReg dst = newVRegV(env); 3262 addInstr(env, AMD64Instr_SseLdSt( 3263 True/*load*/, 3264 16, 3265 dst, 3266 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP()) 3267 ) 3268 ); 3269 return dst; 3270 } 3271 3272 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 3273 HReg dst = newVRegV(env); 3274 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 3275 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am )); 3276 return dst; 3277 } 3278 3279 if (e->tag == Iex_Const) { 3280 HReg dst = newVRegV(env); 3281 vassert(e->Iex.Const.con->tag == Ico_V128); 3282 switch (e->Iex.Const.con->Ico.V128) { 3283 case 0x0000: 3284 dst = generate_zeroes_V128(env); 3285 break; 3286 case 0xFFFF: 3287 dst = generate_ones_V128(env); 3288 break; 3289 default: { 3290 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 3291 /* do push_uimm64 twice, first time for the high-order half. */ 3292 push_uimm64(env, bitmask8_to_bytemask64( 3293 (e->Iex.Const.con->Ico.V128 >> 8) & 0xFF 3294 )); 3295 push_uimm64(env, bitmask8_to_bytemask64( 3296 (e->Iex.Const.con->Ico.V128 >> 0) & 0xFF 3297 )); 3298 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 )); 3299 add_to_rsp(env, 16); 3300 break; 3301 } 3302 } 3303 return dst; 3304 } 3305 3306 if (e->tag == Iex_Unop) { 3307 switch (e->Iex.Unop.op) { 3308 3309 case Iop_NotV128: { 3310 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3311 return do_sse_NotV128(env, arg); 3312 } 3313 3314 case Iop_CmpNEZ64x2: { 3315 /* We can use SSE2 instructions for this. */ 3316 /* Ideally, we want to do a 64Ix2 comparison against zero of 3317 the operand. Problem is no such insn exists. Solution 3318 therefore is to do a 32Ix4 comparison instead, and bitwise- 3319 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and 3320 let the not'd result of this initial comparison be a:b:c:d. 3321 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use 3322 pshufd to create a value b:a:d:c, and OR that with a:b:c:d, 3323 giving the required result. 3324 3325 The required selection sequence is 2,3,0,1, which 3326 according to Intel's documentation means the pshufd 3327 literal value is 0xB1, that is, 3328 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0) 3329 */ 3330 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3331 HReg tmp = generate_zeroes_V128(env); 3332 HReg dst = newVRegV(env); 3333 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp)); 3334 tmp = do_sse_NotV128(env, tmp); 3335 addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst)); 3336 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst)); 3337 return dst; 3338 } 3339 3340 case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector; 3341 case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector; 3342 case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector; 3343 do_CmpNEZ_vector: 3344 { 3345 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3346 HReg tmp = newVRegV(env); 3347 HReg zero = generate_zeroes_V128(env); 3348 HReg dst; 3349 addInstr(env, mk_vMOVsd_RR(arg, tmp)); 3350 addInstr(env, AMD64Instr_SseReRg(op, zero, tmp)); 3351 dst = do_sse_NotV128(env, tmp); 3352 return dst; 3353 } 3354 3355 case Iop_Recip32Fx4: op = Asse_RCPF; goto do_32Fx4_unary; 3356 case Iop_RSqrt32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary; 3357 case Iop_Sqrt32Fx4: op = Asse_SQRTF; goto do_32Fx4_unary; 3358 do_32Fx4_unary: 3359 { 3360 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3361 HReg dst = newVRegV(env); 3362 addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst)); 3363 return dst; 3364 } 3365 3366//.. case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary; 3367//.. case Iop_RSqrt64Fx2: op = Asse_RSQRTF; goto do_64Fx2_unary; 3368 case Iop_Sqrt64Fx2: op = Asse_SQRTF; goto do_64Fx2_unary; 3369 do_64Fx2_unary: 3370 { 3371 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3372 HReg dst = newVRegV(env); 3373 addInstr(env, AMD64Instr_Sse64Fx2(op, arg, dst)); 3374 return dst; 3375 } 3376 3377 case Iop_Recip32F0x4: op = Asse_RCPF; goto do_32F0x4_unary; 3378 case Iop_RSqrt32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary; 3379 case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary; 3380 do_32F0x4_unary: 3381 { 3382 /* A bit subtle. We have to copy the arg to the result 3383 register first, because actually doing the SSE scalar insn 3384 leaves the upper 3/4 of the destination register 3385 unchanged. Whereas the required semantics of these 3386 primops is that the upper 3/4 is simply copied in from the 3387 argument. */ 3388 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3389 HReg dst = newVRegV(env); 3390 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3391 addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst)); 3392 return dst; 3393 } 3394 3395//.. case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary; 3396//.. case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary; 3397 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary; 3398 do_64F0x2_unary: 3399 { 3400 /* A bit subtle. We have to copy the arg to the result 3401 register first, because actually doing the SSE scalar insn 3402 leaves the upper half of the destination register 3403 unchanged. Whereas the required semantics of these 3404 primops is that the upper half is simply copied in from the 3405 argument. */ 3406 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3407 HReg dst = newVRegV(env); 3408 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3409 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst)); 3410 return dst; 3411 } 3412 3413 case Iop_32UtoV128: { 3414 HReg dst = newVRegV(env); 3415 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP()); 3416 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg); 3417 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32)); 3418 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32)); 3419 return dst; 3420 } 3421 3422 case Iop_64UtoV128: { 3423 HReg dst = newVRegV(env); 3424 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 3425 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg); 3426 addInstr(env, AMD64Instr_Push(rmi)); 3427 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0)); 3428 add_to_rsp(env, 8); 3429 return dst; 3430 } 3431 3432 default: 3433 break; 3434 } /* switch (e->Iex.Unop.op) */ 3435 } /* if (e->tag == Iex_Unop) */ 3436 3437 if (e->tag == Iex_Binop) { 3438 switch (e->Iex.Binop.op) { 3439 3440 case Iop_SetV128lo64: { 3441 HReg dst = newVRegV(env); 3442 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3443 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3444 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP()); 3445 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16)); 3446 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16)); 3447 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16)); 3448 return dst; 3449 } 3450 3451 case Iop_SetV128lo32: { 3452 HReg dst = newVRegV(env); 3453 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3454 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3455 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP()); 3456 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16)); 3457 addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16)); 3458 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16)); 3459 return dst; 3460 } 3461 3462 case Iop_64HLtoV128: { 3463 AMD64AMode* rsp = AMD64AMode_IR(0, hregAMD64_RSP()); 3464 HReg dst = newVRegV(env); 3465 /* do this via the stack (easy, convenient, etc) */ 3466 addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg1))); 3467 addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg2))); 3468 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp)); 3469 add_to_rsp(env, 16); 3470 return dst; 3471 } 3472 3473 case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4; 3474 case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4; 3475 case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4; 3476 case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4; 3477 case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4; 3478 case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4; 3479 case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4; 3480 case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4; 3481 case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4; 3482 case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4; 3483 do_32Fx4: 3484 { 3485 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3486 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3487 HReg dst = newVRegV(env); 3488 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3489 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst)); 3490 return dst; 3491 } 3492 3493 case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2; 3494 case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2; 3495 case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2; 3496 case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2; 3497 case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2; 3498 case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2; 3499 case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2; 3500 case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2; 3501 case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2; 3502 case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2; 3503 do_64Fx2: 3504 { 3505 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3506 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3507 HReg dst = newVRegV(env); 3508 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3509 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst)); 3510 return dst; 3511 } 3512 3513 case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4; 3514 case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4; 3515 case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4; 3516 case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4; 3517 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4; 3518 case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4; 3519 case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4; 3520 case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4; 3521 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4; 3522 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4; 3523 do_32F0x4: { 3524 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3525 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3526 HReg dst = newVRegV(env); 3527 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3528 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst)); 3529 return dst; 3530 } 3531 3532 case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2; 3533 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2; 3534 case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2; 3535 case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2; 3536 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2; 3537 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2; 3538 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2; 3539 case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2; 3540 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2; 3541 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2; 3542 do_64F0x2: { 3543 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3544 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3545 HReg dst = newVRegV(env); 3546 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3547 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst)); 3548 return dst; 3549 } 3550 3551 case Iop_QNarrowBin32Sto16Sx8: 3552 op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg; 3553 case Iop_QNarrowBin16Sto8Sx16: 3554 op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg; 3555 case Iop_QNarrowBin16Sto8Ux16: 3556 op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg; 3557 3558 case Iop_InterleaveHI8x16: 3559 op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg; 3560 case Iop_InterleaveHI16x8: 3561 op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg; 3562 case Iop_InterleaveHI32x4: 3563 op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg; 3564 case Iop_InterleaveHI64x2: 3565 op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg; 3566 3567 case Iop_InterleaveLO8x16: 3568 op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg; 3569 case Iop_InterleaveLO16x8: 3570 op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg; 3571 case Iop_InterleaveLO32x4: 3572 op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg; 3573 case Iop_InterleaveLO64x2: 3574 op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg; 3575 3576 case Iop_AndV128: op = Asse_AND; goto do_SseReRg; 3577 case Iop_OrV128: op = Asse_OR; goto do_SseReRg; 3578 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg; 3579 case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg; 3580 case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg; 3581 case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg; 3582 case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg; 3583 case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg; 3584 case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg; 3585 case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg; 3586 case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg; 3587 case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg; 3588 case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg; 3589 case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg; 3590 case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg; 3591 case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg; 3592 case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg; 3593 case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg; 3594 case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg; 3595 case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg; 3596 case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg; 3597 case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg; 3598 case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg; 3599 case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg; 3600 case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg; 3601 case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg; 3602 case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg; 3603 case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg; 3604 case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg; 3605 case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg; 3606 case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg; 3607 case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg; 3608 case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg; 3609 case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg; 3610 do_SseReRg: { 3611 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); 3612 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); 3613 HReg dst = newVRegV(env); 3614 if (arg1isEReg) { 3615 addInstr(env, mk_vMOVsd_RR(arg2, dst)); 3616 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst)); 3617 } else { 3618 addInstr(env, mk_vMOVsd_RR(arg1, dst)); 3619 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst)); 3620 } 3621 return dst; 3622 } 3623 3624 case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift; 3625 case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift; 3626 case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift; 3627 case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift; 3628 case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift; 3629 case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift; 3630 case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift; 3631 case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift; 3632 do_SseShift: { 3633 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1); 3634 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 3635 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 3636 HReg ereg = newVRegV(env); 3637 HReg dst = newVRegV(env); 3638 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0))); 3639 addInstr(env, AMD64Instr_Push(rmi)); 3640 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0)); 3641 addInstr(env, mk_vMOVsd_RR(greg, dst)); 3642 addInstr(env, AMD64Instr_SseReRg(op, ereg, dst)); 3643 add_to_rsp(env, 16); 3644 return dst; 3645 } 3646 3647 case Iop_Mul32x4: fn = (HWord)h_generic_calc_Mul32x4; 3648 goto do_SseAssistedBinary; 3649 case Iop_Max32Sx4: fn = (HWord)h_generic_calc_Max32Sx4; 3650 goto do_SseAssistedBinary; 3651 case Iop_Min32Sx4: fn = (HWord)h_generic_calc_Min32Sx4; 3652 goto do_SseAssistedBinary; 3653 case Iop_Max32Ux4: fn = (HWord)h_generic_calc_Max32Ux4; 3654 goto do_SseAssistedBinary; 3655 case Iop_Min32Ux4: fn = (HWord)h_generic_calc_Min32Ux4; 3656 goto do_SseAssistedBinary; 3657 case Iop_Max16Ux8: fn = (HWord)h_generic_calc_Max16Ux8; 3658 goto do_SseAssistedBinary; 3659 case Iop_Min16Ux8: fn = (HWord)h_generic_calc_Min16Ux8; 3660 goto do_SseAssistedBinary; 3661 case Iop_Max8Sx16: fn = (HWord)h_generic_calc_Max8Sx16; 3662 goto do_SseAssistedBinary; 3663 case Iop_Min8Sx16: fn = (HWord)h_generic_calc_Min8Sx16; 3664 goto do_SseAssistedBinary; 3665 case Iop_CmpEQ64x2: fn = (HWord)h_generic_calc_CmpEQ64x2; 3666 goto do_SseAssistedBinary; 3667 case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2; 3668 goto do_SseAssistedBinary; 3669 case Iop_QNarrowBin32Sto16Ux8: 3670 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8; 3671 goto do_SseAssistedBinary; 3672 case Iop_NarrowBin16to8x16: 3673 fn = (HWord)h_generic_calc_NarrowBin16to8x16; 3674 goto do_SseAssistedBinary; 3675 case Iop_NarrowBin32to16x8: 3676 fn = (HWord)h_generic_calc_NarrowBin32to16x8; 3677 goto do_SseAssistedBinary; 3678 do_SseAssistedBinary: { 3679 /* RRRufff! RRRufff code is what we're generating here. Oh 3680 well. */ 3681 vassert(fn != 0); 3682 HReg dst = newVRegV(env); 3683 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3684 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3685 HReg argp = newVRegI(env); 3686 /* subq $112, %rsp -- make a space*/ 3687 sub_from_rsp(env, 112); 3688 /* leaq 48(%rsp), %r_argp -- point into it */ 3689 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()), 3690 argp)); 3691 /* andq $-16, %r_argp -- 16-align the pointer */ 3692 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 3693 AMD64RMI_Imm( ~(UInt)15 ), 3694 argp)); 3695 /* Prepare 3 arg regs: 3696 leaq 0(%r_argp), %rdi 3697 leaq 16(%r_argp), %rsi 3698 leaq 32(%r_argp), %rdx 3699 */ 3700 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp), 3701 hregAMD64_RDI())); 3702 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp), 3703 hregAMD64_RSI())); 3704 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp), 3705 hregAMD64_RDX())); 3706 /* Store the two args, at (%rsi) and (%rdx): 3707 movupd %argL, 0(%rsi) 3708 movupd %argR, 0(%rdx) 3709 */ 3710 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL, 3711 AMD64AMode_IR(0, hregAMD64_RSI()))); 3712 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argR, 3713 AMD64AMode_IR(0, hregAMD64_RDX()))); 3714 /* call the helper */ 3715 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3 )); 3716 /* fetch the result from memory, using %r_argp, which the 3717 register allocator will keep alive across the call. */ 3718 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst, 3719 AMD64AMode_IR(0, argp))); 3720 /* and finally, clear the space */ 3721 add_to_rsp(env, 112); 3722 return dst; 3723 } 3724 3725 case Iop_SarN64x2: fn = (HWord)h_generic_calc_SarN64x2; 3726 goto do_SseAssistedVectorAndScalar; 3727 case Iop_SarN8x16: fn = (HWord)h_generic_calc_SarN8x16; 3728 goto do_SseAssistedVectorAndScalar; 3729 do_SseAssistedVectorAndScalar: { 3730 /* RRRufff! RRRufff code is what we're generating here. Oh 3731 well. */ 3732 vassert(fn != 0); 3733 HReg dst = newVRegV(env); 3734 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3735 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 3736 HReg argp = newVRegI(env); 3737 /* subq $112, %rsp -- make a space*/ 3738 sub_from_rsp(env, 112); 3739 /* leaq 48(%rsp), %r_argp -- point into it */ 3740 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()), 3741 argp)); 3742 /* andq $-16, %r_argp -- 16-align the pointer */ 3743 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 3744 AMD64RMI_Imm( ~(UInt)15 ), 3745 argp)); 3746 /* Prepare 2 vector arg regs: 3747 leaq 0(%r_argp), %rdi 3748 leaq 16(%r_argp), %rsi 3749 */ 3750 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp), 3751 hregAMD64_RDI())); 3752 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp), 3753 hregAMD64_RSI())); 3754 /* Store the vector arg, at (%rsi): 3755 movupd %argL, 0(%rsi) 3756 */ 3757 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL, 3758 AMD64AMode_IR(0, hregAMD64_RSI()))); 3759 /* And get the scalar value into rdx */ 3760 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RDX())); 3761 3762 /* call the helper */ 3763 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3 )); 3764 /* fetch the result from memory, using %r_argp, which the 3765 register allocator will keep alive across the call. */ 3766 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst, 3767 AMD64AMode_IR(0, argp))); 3768 /* and finally, clear the space */ 3769 add_to_rsp(env, 112); 3770 return dst; 3771 } 3772 3773 default: 3774 break; 3775 } /* switch (e->Iex.Binop.op) */ 3776 } /* if (e->tag == Iex_Binop) */ 3777 3778 if (e->tag == Iex_Mux0X) { 3779 HReg r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); 3780 HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX); 3781 HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0); 3782 HReg dst = newVRegV(env); 3783 addInstr(env, mk_vMOVsd_RR(rX,dst)); 3784 addInstr(env, AMD64Instr_Test64(0xFF, r8)); 3785 addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst)); 3786 return dst; 3787 } 3788 3789 //vec_fail: 3790 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n", 3791 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps)); 3792 ppIRExpr(e); 3793 vpanic("iselVecExpr_wrk"); 3794} 3795 3796 3797/*---------------------------------------------------------*/ 3798/*--- ISEL: Statements ---*/ 3799/*---------------------------------------------------------*/ 3800 3801static void iselStmt ( ISelEnv* env, IRStmt* stmt ) 3802{ 3803 if (vex_traceflags & VEX_TRACE_VCODE) { 3804 vex_printf("\n-- "); 3805 ppIRStmt(stmt); 3806 vex_printf("\n"); 3807 } 3808 3809 switch (stmt->tag) { 3810 3811 /* --------- STORE --------- */ 3812 case Ist_Store: { 3813 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); 3814 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); 3815 IREndness end = stmt->Ist.Store.end; 3816 3817 if (tya != Ity_I64 || end != Iend_LE) 3818 goto stmt_fail; 3819 3820 if (tyd == Ity_I64) { 3821 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3822 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data); 3823 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am)); 3824 return; 3825 } 3826 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) { 3827 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3828 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data); 3829 addInstr(env, AMD64Instr_Store( 3830 toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)), 3831 r,am)); 3832 return; 3833 } 3834 if (tyd == Ity_F64) { 3835 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3836 HReg r = iselDblExpr(env, stmt->Ist.Store.data); 3837 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am)); 3838 return; 3839 } 3840 if (tyd == Ity_F32) { 3841 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3842 HReg r = iselFltExpr(env, stmt->Ist.Store.data); 3843 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am)); 3844 return; 3845 } 3846 if (tyd == Ity_V128) { 3847 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3848 HReg r = iselVecExpr(env, stmt->Ist.Store.data); 3849 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am)); 3850 return; 3851 } 3852 break; 3853 } 3854 3855 /* --------- PUT --------- */ 3856 case Ist_Put: { 3857 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); 3858 if (ty == Ity_I64) { 3859 /* We're going to write to memory, so compute the RHS into an 3860 AMD64RI. */ 3861 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data); 3862 addInstr(env, 3863 AMD64Instr_Alu64M( 3864 Aalu_MOV, 3865 ri, 3866 AMD64AMode_IR(stmt->Ist.Put.offset, 3867 hregAMD64_RBP()) 3868 )); 3869 return; 3870 } 3871 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) { 3872 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data); 3873 addInstr(env, AMD64Instr_Store( 3874 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)), 3875 r, 3876 AMD64AMode_IR(stmt->Ist.Put.offset, 3877 hregAMD64_RBP()))); 3878 return; 3879 } 3880 if (ty == Ity_V128) { 3881 HReg vec = iselVecExpr(env, stmt->Ist.Put.data); 3882 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, 3883 hregAMD64_RBP()); 3884 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am)); 3885 return; 3886 } 3887 if (ty == Ity_F32) { 3888 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data); 3889 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP()); 3890 set_SSE_rounding_default(env); /* paranoia */ 3891 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am )); 3892 return; 3893 } 3894 if (ty == Ity_F64) { 3895 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data); 3896 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset, 3897 hregAMD64_RBP() ); 3898 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am )); 3899 return; 3900 } 3901 break; 3902 } 3903 3904 /* --------- Indexed PUT --------- */ 3905 case Ist_PutI: { 3906 AMD64AMode* am 3907 = genGuestArrayOffset( 3908 env, stmt->Ist.PutI.descr, 3909 stmt->Ist.PutI.ix, stmt->Ist.PutI.bias ); 3910 3911 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data); 3912 if (ty == Ity_F64) { 3913 HReg val = iselDblExpr(env, stmt->Ist.PutI.data); 3914 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am )); 3915 return; 3916 } 3917 if (ty == Ity_I8) { 3918 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data); 3919 addInstr(env, AMD64Instr_Store( 1, r, am )); 3920 return; 3921 } 3922 if (ty == Ity_I64) { 3923 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.PutI.data); 3924 addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am )); 3925 return; 3926 } 3927 break; 3928 } 3929 3930 /* --------- TMP --------- */ 3931 case Ist_WrTmp: { 3932 IRTemp tmp = stmt->Ist.WrTmp.tmp; 3933 IRType ty = typeOfIRTemp(env->type_env, tmp); 3934 3935 /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..), 3936 compute it into an AMode and then use LEA. This usually 3937 produces fewer instructions, often because (for memcheck 3938 created IR) we get t = address-expression, (t is later used 3939 twice) and so doing this naturally turns address-expression 3940 back into an AMD64 amode. */ 3941 if (ty == Ity_I64 3942 && stmt->Ist.WrTmp.data->tag == Iex_Binop 3943 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) { 3944 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data); 3945 HReg dst = lookupIRTemp(env, tmp); 3946 if (am->tag == Aam_IR && am->Aam.IR.imm == 0) { 3947 /* Hmm, iselIntExpr_AMode wimped out and just computed the 3948 value into a register. Just emit a normal reg-reg move 3949 so reg-alloc can coalesce it away in the usual way. */ 3950 HReg src = am->Aam.IR.reg; 3951 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst)); 3952 } else { 3953 addInstr(env, AMD64Instr_Lea64(am,dst)); 3954 } 3955 return; 3956 } 3957 3958 if (ty == Ity_I64 || ty == Ity_I32 3959 || ty == Ity_I16 || ty == Ity_I8) { 3960 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data); 3961 HReg dst = lookupIRTemp(env, tmp); 3962 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst)); 3963 return; 3964 } 3965 if (ty == Ity_I128) { 3966 HReg rHi, rLo, dstHi, dstLo; 3967 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); 3968 lookupIRTemp128( &dstHi, &dstLo, env, tmp); 3969 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) ); 3970 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) ); 3971 return; 3972 } 3973 if (ty == Ity_I1) { 3974 AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data); 3975 HReg dst = lookupIRTemp(env, tmp); 3976 addInstr(env, AMD64Instr_Set64(cond, dst)); 3977 return; 3978 } 3979 if (ty == Ity_F64) { 3980 HReg dst = lookupIRTemp(env, tmp); 3981 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); 3982 addInstr(env, mk_vMOVsd_RR(src, dst)); 3983 return; 3984 } 3985 if (ty == Ity_F32) { 3986 HReg dst = lookupIRTemp(env, tmp); 3987 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); 3988 addInstr(env, mk_vMOVsd_RR(src, dst)); 3989 return; 3990 } 3991 if (ty == Ity_V128) { 3992 HReg dst = lookupIRTemp(env, tmp); 3993 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data); 3994 addInstr(env, mk_vMOVsd_RR(src, dst)); 3995 return; 3996 } 3997 break; 3998 } 3999 4000 /* --------- Call to DIRTY helper --------- */ 4001 case Ist_Dirty: { 4002 IRType retty; 4003 IRDirty* d = stmt->Ist.Dirty.details; 4004 Bool passBBP = False; 4005 4006 if (d->nFxState == 0) 4007 vassert(!d->needsBBP); 4008 4009 passBBP = toBool(d->nFxState > 0 && d->needsBBP); 4010 4011 /* Marshal args, do the call, clear stack. */ 4012 doHelperCall( env, passBBP, d->guard, d->cee, d->args ); 4013 4014 /* Now figure out what to do with the returned value, if any. */ 4015 if (d->tmp == IRTemp_INVALID) 4016 /* No return value. Nothing to do. */ 4017 return; 4018 4019 retty = typeOfIRTemp(env->type_env, d->tmp); 4020 if (retty == Ity_I64 || retty == Ity_I32 4021 || retty == Ity_I16 || retty == Ity_I8) { 4022 /* The returned value is in %rax. Park it in the register 4023 associated with tmp. */ 4024 HReg dst = lookupIRTemp(env, d->tmp); 4025 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) ); 4026 return; 4027 } 4028 break; 4029 } 4030 4031 /* --------- MEM FENCE --------- */ 4032 case Ist_MBE: 4033 switch (stmt->Ist.MBE.event) { 4034 case Imbe_Fence: 4035 addInstr(env, AMD64Instr_MFence()); 4036 return; 4037 default: 4038 break; 4039 } 4040 break; 4041 4042 /* --------- ACAS --------- */ 4043 case Ist_CAS: 4044 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) { 4045 /* "normal" singleton CAS */ 4046 UChar sz; 4047 IRCAS* cas = stmt->Ist.CAS.details; 4048 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 4049 /* get: cas->expd into %rax, and cas->data into %rbx */ 4050 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr); 4051 HReg rData = iselIntExpr_R(env, cas->dataLo); 4052 HReg rExpd = iselIntExpr_R(env, cas->expdLo); 4053 HReg rOld = lookupIRTemp(env, cas->oldLo); 4054 vassert(cas->expdHi == NULL); 4055 vassert(cas->dataHi == NULL); 4056 addInstr(env, mk_iMOVsd_RR(rExpd, rOld)); 4057 addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX())); 4058 addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX())); 4059 switch (ty) { 4060 case Ity_I64: sz = 8; break; 4061 case Ity_I32: sz = 4; break; 4062 case Ity_I16: sz = 2; break; 4063 case Ity_I8: sz = 1; break; 4064 default: goto unhandled_cas; 4065 } 4066 addInstr(env, AMD64Instr_ACAS(am, sz)); 4067 addInstr(env, AMD64Instr_CMov64( 4068 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOld)); 4069 return; 4070 } else { 4071 /* double CAS */ 4072 UChar sz; 4073 IRCAS* cas = stmt->Ist.CAS.details; 4074 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 4075 /* only 32-bit and 64-bit allowed in this case */ 4076 /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */ 4077 /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */ 4078 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr); 4079 HReg rDataHi = iselIntExpr_R(env, cas->dataHi); 4080 HReg rDataLo = iselIntExpr_R(env, cas->dataLo); 4081 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi); 4082 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo); 4083 HReg rOldHi = lookupIRTemp(env, cas->oldHi); 4084 HReg rOldLo = lookupIRTemp(env, cas->oldLo); 4085 switch (ty) { 4086 case Ity_I64: 4087 if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16)) 4088 goto unhandled_cas; /* we'd have to generate 4089 cmpxchg16b, but the host 4090 doesn't support that */ 4091 sz = 8; 4092 break; 4093 case Ity_I32: 4094 sz = 4; 4095 break; 4096 default: 4097 goto unhandled_cas; 4098 } 4099 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi)); 4100 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo)); 4101 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX())); 4102 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX())); 4103 addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX())); 4104 addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX())); 4105 addInstr(env, AMD64Instr_DACAS(am, sz)); 4106 addInstr(env, 4107 AMD64Instr_CMov64( 4108 Acc_NZ, AMD64RM_Reg(hregAMD64_RDX()), rOldHi)); 4109 addInstr(env, 4110 AMD64Instr_CMov64( 4111 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOldLo)); 4112 return; 4113 } 4114 unhandled_cas: 4115 break; 4116 4117 /* --------- INSTR MARK --------- */ 4118 /* Doesn't generate any executable code ... */ 4119 case Ist_IMark: 4120 return; 4121 4122 /* --------- ABI HINT --------- */ 4123 /* These have no meaning (denotation in the IR) and so we ignore 4124 them ... if any actually made it this far. */ 4125 case Ist_AbiHint: 4126 return; 4127 4128 /* --------- NO-OP --------- */ 4129 case Ist_NoOp: 4130 return; 4131 4132 /* --------- EXIT --------- */ 4133 case Ist_Exit: { 4134 AMD64RI* dst; 4135 AMD64CondCode cc; 4136 if (stmt->Ist.Exit.dst->tag != Ico_U64) 4137 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value"); 4138 dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst)); 4139 cc = iselCondCode(env,stmt->Ist.Exit.guard); 4140 addInstr(env, AMD64Instr_Goto(stmt->Ist.Exit.jk, cc, dst)); 4141 return; 4142 } 4143 4144 default: break; 4145 } 4146 stmt_fail: 4147 ppIRStmt(stmt); 4148 vpanic("iselStmt(amd64)"); 4149} 4150 4151 4152/*---------------------------------------------------------*/ 4153/*--- ISEL: Basic block terminators (Nexts) ---*/ 4154/*---------------------------------------------------------*/ 4155 4156static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk ) 4157{ 4158 AMD64RI* ri; 4159 if (vex_traceflags & VEX_TRACE_VCODE) { 4160 vex_printf("\n-- goto {"); 4161 ppIRJumpKind(jk); 4162 vex_printf("} "); 4163 ppIRExpr(next); 4164 vex_printf("\n"); 4165 } 4166 ri = iselIntExpr_RI(env, next); 4167 addInstr(env, AMD64Instr_Goto(jk, Acc_ALWAYS,ri)); 4168} 4169 4170 4171/*---------------------------------------------------------*/ 4172/*--- Insn selector top-level ---*/ 4173/*---------------------------------------------------------*/ 4174 4175/* Translate an entire SB to amd64 code. */ 4176 4177HInstrArray* iselSB_AMD64 ( IRSB* bb, VexArch arch_host, 4178 VexArchInfo* archinfo_host, 4179 VexAbiInfo* vbi/*UNUSED*/ ) 4180{ 4181 Int i, j; 4182 HReg hreg, hregHI; 4183 ISelEnv* env; 4184 UInt hwcaps_host = archinfo_host->hwcaps; 4185 4186 /* sanity ... */ 4187 vassert(arch_host == VexArchAMD64); 4188 vassert(0 == (hwcaps_host 4189 & ~(VEX_HWCAPS_AMD64_SSE3 4190 | VEX_HWCAPS_AMD64_CX16 4191 | VEX_HWCAPS_AMD64_LZCNT))); 4192 4193 /* Make up an initial environment to use. */ 4194 env = LibVEX_Alloc(sizeof(ISelEnv)); 4195 env->vreg_ctr = 0; 4196 4197 /* Set up output code array. */ 4198 env->code = newHInstrArray(); 4199 4200 /* Copy BB's type env. */ 4201 env->type_env = bb->tyenv; 4202 4203 /* Make up an IRTemp -> virtual HReg mapping. This doesn't 4204 change as we go along. */ 4205 env->n_vregmap = bb->tyenv->types_used; 4206 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 4207 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 4208 4209 /* and finally ... */ 4210 env->hwcaps = hwcaps_host; 4211 4212 /* For each IR temporary, allocate a suitably-kinded virtual 4213 register. */ 4214 j = 0; 4215 for (i = 0; i < env->n_vregmap; i++) { 4216 hregHI = hreg = INVALID_HREG; 4217 switch (bb->tyenv->types[i]) { 4218 case Ity_I1: 4219 case Ity_I8: 4220 case Ity_I16: 4221 case Ity_I32: 4222 case Ity_I64: hreg = mkHReg(j++, HRcInt64, True); break; 4223 case Ity_I128: hreg = mkHReg(j++, HRcInt64, True); 4224 hregHI = mkHReg(j++, HRcInt64, True); break; 4225 case Ity_F32: 4226 case Ity_F64: 4227 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break; 4228 default: ppIRType(bb->tyenv->types[i]); 4229 vpanic("iselBB(amd64): IRTemp type"); 4230 } 4231 env->vregmap[i] = hreg; 4232 env->vregmapHI[i] = hregHI; 4233 } 4234 env->vreg_ctr = j; 4235 4236 /* Ok, finally we can iterate over the statements. */ 4237 for (i = 0; i < bb->stmts_used; i++) 4238 if (bb->stmts[i]) 4239 iselStmt(env,bb->stmts[i]); 4240 4241 iselNext(env,bb->next,bb->jumpkind); 4242 4243 /* record the number of vregs we used. */ 4244 env->code->n_vregs = env->vreg_ctr; 4245 return env->code; 4246} 4247 4248 4249/*---------------------------------------------------------------*/ 4250/*--- end host_amd64_isel.c ---*/ 4251/*---------------------------------------------------------------*/ 4252