1 2/*---------------------------------------------------------------*/ 3/*--- begin host_amd64_isel.c ---*/ 4/*---------------------------------------------------------------*/ 5 6/* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2013 OpenWorks LLP 11 info@open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34*/ 35 36#include "libvex_basictypes.h" 37#include "libvex_ir.h" 38#include "libvex.h" 39 40#include "ir_match.h" 41#include "main_util.h" 42#include "main_globals.h" 43#include "host_generic_regs.h" 44#include "host_generic_simd64.h" 45#include "host_generic_simd128.h" 46#include "host_generic_simd256.h" 47#include "host_generic_maddf.h" 48#include "host_amd64_defs.h" 49 50 51/*---------------------------------------------------------*/ 52/*--- x87/SSE control word stuff ---*/ 53/*---------------------------------------------------------*/ 54 55/* Vex-generated code expects to run with the FPU set as follows: all 56 exceptions masked, round-to-nearest, precision = 53 bits. This 57 corresponds to a FPU control word value of 0x027F. 58 59 Similarly the SSE control word (%mxcsr) should be 0x1F80. 60 61 %fpucw and %mxcsr should have these values on entry to 62 Vex-generated code, and should those values should be 63 unchanged at exit. 64*/ 65 66#define DEFAULT_FPUCW 0x027F 67 68#define DEFAULT_MXCSR 0x1F80 69 70/* debugging only, do not use */ 71/* define DEFAULT_FPUCW 0x037F */ 72 73 74/*---------------------------------------------------------*/ 75/*--- misc helpers ---*/ 76/*---------------------------------------------------------*/ 77 78/* These are duplicated in guest-amd64/toIR.c */ 79static IRExpr* unop ( IROp op, IRExpr* a ) 80{ 81 return IRExpr_Unop(op, a); 82} 83 84static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 85{ 86 return IRExpr_Binop(op, a1, a2); 87} 88 89static IRExpr* bind ( Int binder ) 90{ 91 return IRExpr_Binder(binder); 92} 93 94static Bool isZeroU8 ( IRExpr* e ) 95{ 96 return e->tag == Iex_Const 97 && e->Iex.Const.con->tag == Ico_U8 98 && e->Iex.Const.con->Ico.U8 == 0; 99} 100 101 102/*---------------------------------------------------------*/ 103/*--- ISelEnv ---*/ 104/*---------------------------------------------------------*/ 105 106/* This carries around: 107 108 - A mapping from IRTemp to IRType, giving the type of any IRTemp we 109 might encounter. This is computed before insn selection starts, 110 and does not change. 111 112 - A mapping from IRTemp to HReg. This tells the insn selector 113 which virtual register is associated with each IRTemp 114 temporary. This is computed before insn selection starts, and 115 does not change. We expect this mapping to map precisely the 116 same set of IRTemps as the type mapping does. 117 118 - vregmap holds the primary register for the IRTemp. 119 - vregmapHI is only used for 128-bit integer-typed 120 IRTemps. It holds the identity of a second 121 64-bit virtual HReg, which holds the high half 122 of the value. 123 124 - The host subarchitecture we are selecting insns for. 125 This is set at the start and does not change. 126 127 - The code array, that is, the insns selected so far. 128 129 - A counter, for generating new virtual registers. 130 131 - A Bool for indicating whether we may generate chain-me 132 instructions for control flow transfers, or whether we must use 133 XAssisted. 134 135 - The maximum guest address of any guest insn in this block. 136 Actually, the address of the highest-addressed byte from any insn 137 in this block. Is set at the start and does not change. This is 138 used for detecting jumps which are definitely forward-edges from 139 this block, and therefore can be made (chained) to the fast entry 140 point of the destination, thereby avoiding the destination's 141 event check. 142 143 Note, this is all host-independent. (JRS 20050201: well, kinda 144 ... not completely. Compare with ISelEnv for X86.) 145*/ 146 147typedef 148 struct { 149 /* Constant -- are set at the start and do not change. */ 150 IRTypeEnv* type_env; 151 152 HReg* vregmap; 153 HReg* vregmapHI; 154 Int n_vregmap; 155 156 UInt hwcaps; 157 158 Bool chainingAllowed; 159 Addr64 max_ga; 160 161 /* These are modified as we go along. */ 162 HInstrArray* code; 163 Int vreg_ctr; 164 } 165 ISelEnv; 166 167 168static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) 169{ 170 vassert(tmp >= 0); 171 vassert(tmp < env->n_vregmap); 172 return env->vregmap[tmp]; 173} 174 175static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO, 176 ISelEnv* env, IRTemp tmp ) 177{ 178 vassert(tmp >= 0); 179 vassert(tmp < env->n_vregmap); 180 vassert(! hregIsInvalid(env->vregmapHI[tmp])); 181 *vrLO = env->vregmap[tmp]; 182 *vrHI = env->vregmapHI[tmp]; 183} 184 185static void addInstr ( ISelEnv* env, AMD64Instr* instr ) 186{ 187 addHInstr(env->code, instr); 188 if (vex_traceflags & VEX_TRACE_VCODE) { 189 ppAMD64Instr(instr, True); 190 vex_printf("\n"); 191 } 192} 193 194static HReg newVRegI ( ISelEnv* env ) 195{ 196 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/); 197 env->vreg_ctr++; 198 return reg; 199} 200 201static HReg newVRegV ( ISelEnv* env ) 202{ 203 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/); 204 env->vreg_ctr++; 205 return reg; 206} 207 208 209/*---------------------------------------------------------*/ 210/*--- ISEL: Forward declarations ---*/ 211/*---------------------------------------------------------*/ 212 213/* These are organised as iselXXX and iselXXX_wrk pairs. The 214 iselXXX_wrk do the real work, but are not to be called directly. 215 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then 216 checks that all returned registers are virtual. You should not 217 call the _wrk version directly. 218*/ 219static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e ); 220static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e ); 221 222static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ); 223static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ); 224 225static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e ); 226static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e ); 227 228static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); 229static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); 230 231static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ); 232static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e ); 233 234static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, 235 ISelEnv* env, IRExpr* e ); 236static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo, 237 ISelEnv* env, IRExpr* e ); 238 239static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ); 240static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ); 241 242static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); 243static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); 244 245static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); 246static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); 247 248static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ); 249static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ); 250 251static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, 252 ISelEnv* env, IRExpr* e ); 253static void iselDVecExpr ( /*OUT*/HReg* rHi, HReg* rLo, 254 ISelEnv* env, IRExpr* e ); 255 256 257/*---------------------------------------------------------*/ 258/*--- ISEL: Misc helpers ---*/ 259/*---------------------------------------------------------*/ 260 261static Bool sane_AMode ( AMD64AMode* am ) 262{ 263 switch (am->tag) { 264 case Aam_IR: 265 return 266 toBool( hregClass(am->Aam.IR.reg) == HRcInt64 267 && (hregIsVirtual(am->Aam.IR.reg) 268 || sameHReg(am->Aam.IR.reg, hregAMD64_RBP())) ); 269 case Aam_IRRS: 270 return 271 toBool( hregClass(am->Aam.IRRS.base) == HRcInt64 272 && hregIsVirtual(am->Aam.IRRS.base) 273 && hregClass(am->Aam.IRRS.index) == HRcInt64 274 && hregIsVirtual(am->Aam.IRRS.index) ); 275 default: 276 vpanic("sane_AMode: unknown amd64 amode tag"); 277 } 278} 279 280 281/* Can the lower 32 bits be signedly widened to produce the whole 282 64-bit value? In other words, are the top 33 bits either all 0 or 283 all 1 ? */ 284static Bool fitsIn32Bits ( ULong x ) 285{ 286 Long y0 = (Long)x; 287 Long y1 = y0; 288 y1 <<= 32; 289 y1 >>=/*s*/ 32; 290 return toBool(x == y1); 291} 292 293/* Is this a 64-bit zero expression? */ 294 295static Bool isZeroU64 ( IRExpr* e ) 296{ 297 return e->tag == Iex_Const 298 && e->Iex.Const.con->tag == Ico_U64 299 && e->Iex.Const.con->Ico.U64 == 0ULL; 300} 301 302static Bool isZeroU32 ( IRExpr* e ) 303{ 304 return e->tag == Iex_Const 305 && e->Iex.Const.con->tag == Ico_U32 306 && e->Iex.Const.con->Ico.U32 == 0; 307} 308 309/* Make a int reg-reg move. */ 310 311static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst ) 312{ 313 vassert(hregClass(src) == HRcInt64); 314 vassert(hregClass(dst) == HRcInt64); 315 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst); 316} 317 318/* Make a vector (128 bit) reg-reg move. */ 319 320static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst ) 321{ 322 vassert(hregClass(src) == HRcVec128); 323 vassert(hregClass(dst) == HRcVec128); 324 return AMD64Instr_SseReRg(Asse_MOV, src, dst); 325} 326 327/* Advance/retreat %rsp by n. */ 328 329static void add_to_rsp ( ISelEnv* env, Int n ) 330{ 331 vassert(n > 0 && n < 256 && (n%8) == 0); 332 addInstr(env, 333 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n), 334 hregAMD64_RSP())); 335} 336 337static void sub_from_rsp ( ISelEnv* env, Int n ) 338{ 339 vassert(n > 0 && n < 256 && (n%8) == 0); 340 addInstr(env, 341 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n), 342 hregAMD64_RSP())); 343} 344 345/* Push 64-bit constants on the stack. */ 346static void push_uimm64( ISelEnv* env, ULong uimm64 ) 347{ 348 /* If uimm64 can be expressed as the sign extension of its 349 lower 32 bits, we can do it the easy way. */ 350 Long simm64 = (Long)uimm64; 351 if ( simm64 == ((simm64 << 32) >> 32) ) { 352 addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) ); 353 } else { 354 HReg tmp = newVRegI(env); 355 addInstr( env, AMD64Instr_Imm64(uimm64, tmp) ); 356 addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) ); 357 } 358} 359 360 361/* Used only in doHelperCall. If possible, produce a single 362 instruction which computes 'e' into 'dst'. If not possible, return 363 NULL. */ 364 365static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env, 366 HReg dst, 367 IRExpr* e ) 368{ 369 /* Per comments in doHelperCall below, appearance of 370 Iex_VECRET implies ill-formed IR. */ 371 vassert(e->tag != Iex_VECRET); 372 373 /* In this case we give out a copy of the BaseBlock pointer. */ 374 if (UNLIKELY(e->tag == Iex_BBPTR)) { 375 return mk_iMOVsd_RR( hregAMD64_RBP(), dst ); 376 } 377 378 vassert(typeOfIRExpr(env->type_env, e) == Ity_I64); 379 380 if (e->tag == Iex_Const) { 381 vassert(e->Iex.Const.con->tag == Ico_U64); 382 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) { 383 return AMD64Instr_Alu64R( 384 Aalu_MOV, 385 AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)), 386 dst 387 ); 388 } else { 389 return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst); 390 } 391 } 392 393 if (e->tag == Iex_RdTmp) { 394 HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp); 395 return mk_iMOVsd_RR(src, dst); 396 } 397 398 if (e->tag == Iex_Get) { 399 vassert(e->Iex.Get.ty == Ity_I64); 400 return AMD64Instr_Alu64R( 401 Aalu_MOV, 402 AMD64RMI_Mem( 403 AMD64AMode_IR(e->Iex.Get.offset, 404 hregAMD64_RBP())), 405 dst); 406 } 407 408 if (e->tag == Iex_Unop 409 && e->Iex.Unop.op == Iop_32Uto64 410 && e->Iex.Unop.arg->tag == Iex_RdTmp) { 411 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp); 412 return AMD64Instr_MovxLQ(False, src, dst); 413 } 414 415 if (0) { ppIRExpr(e); vex_printf("\n"); } 416 417 return NULL; 418} 419 420 421/* Do a complete function call. |guard| is a Ity_Bit expression 422 indicating whether or not the call happens. If guard==NULL, the 423 call is unconditional. |retloc| is set to indicate where the 424 return value is after the call. The caller (of this fn) must 425 generate code to add |stackAdjustAfterCall| to the stack pointer 426 after the call is done. */ 427 428static 429void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall, 430 /*OUT*/RetLoc* retloc, 431 ISelEnv* env, 432 IRExpr* guard, 433 IRCallee* cee, IRType retTy, IRExpr** args ) 434{ 435 AMD64CondCode cc; 436 HReg argregs[6]; 437 HReg tmpregs[6]; 438 AMD64Instr* fastinstrs[6]; 439 UInt n_args, i; 440 441 /* Set default returns. We'll update them later if needed. */ 442 *stackAdjustAfterCall = 0; 443 *retloc = mk_RetLoc_INVALID(); 444 445 /* These are used for cross-checking that IR-level constraints on 446 the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */ 447 UInt nVECRETs = 0; 448 UInt nBBPTRs = 0; 449 450 /* Marshal args for a call and do the call. 451 452 This function only deals with a tiny set of possibilities, which 453 cover all helpers in practice. The restrictions are that only 454 arguments in registers are supported, hence only 6x64 integer 455 bits in total can be passed. In fact the only supported arg 456 type is I64. 457 458 The return type can be I{64,32,16,8} or V{128,256}. In the 459 latter two cases, it is expected that |args| will contain the 460 special node IRExpr_VECRET(), in which case this routine 461 generates code to allocate space on the stack for the vector 462 return value. Since we are not passing any scalars on the 463 stack, it is enough to preallocate the return space before 464 marshalling any arguments, in this case. 465 466 |args| may also contain IRExpr_BBPTR(), in which case the 467 value in %rbp is passed as the corresponding argument. 468 469 Generating code which is both efficient and correct when 470 parameters are to be passed in registers is difficult, for the 471 reasons elaborated in detail in comments attached to 472 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant 473 of the method described in those comments. 474 475 The problem is split into two cases: the fast scheme and the 476 slow scheme. In the fast scheme, arguments are computed 477 directly into the target (real) registers. This is only safe 478 when we can be sure that computation of each argument will not 479 trash any real registers set by computation of any other 480 argument. 481 482 In the slow scheme, all args are first computed into vregs, and 483 once they are all done, they are moved to the relevant real 484 regs. This always gives correct code, but it also gives a bunch 485 of vreg-to-rreg moves which are usually redundant but are hard 486 for the register allocator to get rid of. 487 488 To decide which scheme to use, all argument expressions are 489 first examined. If they are all so simple that it is clear they 490 will be evaluated without use of any fixed registers, use the 491 fast scheme, else use the slow scheme. Note also that only 492 unconditional calls may use the fast scheme, since having to 493 compute a condition expression could itself trash real 494 registers. Note that for simplicity, in the case where 495 IRExpr_VECRET() is present, we use the slow scheme. This is 496 motivated by the desire to avoid any possible complexity 497 w.r.t. nested calls. 498 499 Note this requires being able to examine an expression and 500 determine whether or not evaluation of it might use a fixed 501 register. That requires knowledge of how the rest of this insn 502 selector works. Currently just the following 3 are regarded as 503 safe -- hopefully they cover the majority of arguments in 504 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. 505 */ 506 507 /* Note that the cee->regparms field is meaningless on AMD64 host 508 (since there is only one calling convention) and so we always 509 ignore it. */ 510 n_args = 0; 511 for (i = 0; args[i]; i++) 512 n_args++; 513 514 if (n_args > 6) 515 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args"); 516 517 argregs[0] = hregAMD64_RDI(); 518 argregs[1] = hregAMD64_RSI(); 519 argregs[2] = hregAMD64_RDX(); 520 argregs[3] = hregAMD64_RCX(); 521 argregs[4] = hregAMD64_R8(); 522 argregs[5] = hregAMD64_R9(); 523 524 tmpregs[0] = tmpregs[1] = tmpregs[2] = 525 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG; 526 527 fastinstrs[0] = fastinstrs[1] = fastinstrs[2] = 528 fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL; 529 530 /* First decide which scheme (slow or fast) is to be used. First 531 assume the fast scheme, and select slow if any contraindications 532 (wow) appear. */ 533 534 /* We'll need space on the stack for the return value. Avoid 535 possible complications with nested calls by using the slow 536 scheme. */ 537 if (retTy == Ity_V128 || retTy == Ity_V256) 538 goto slowscheme; 539 540 if (guard) { 541 if (guard->tag == Iex_Const 542 && guard->Iex.Const.con->tag == Ico_U1 543 && guard->Iex.Const.con->Ico.U1 == True) { 544 /* unconditional */ 545 } else { 546 /* Not manifestly unconditional -- be conservative. */ 547 goto slowscheme; 548 } 549 } 550 551 /* Ok, let's try for the fast scheme. If it doesn't pan out, we'll 552 use the slow scheme. Because this is tentative, we can't call 553 addInstr (that is, commit to) any instructions until we're 554 handled all the arguments. So park the resulting instructions 555 in a buffer and emit that if we're successful. */ 556 557 /* FAST SCHEME */ 558 /* In this loop, we process args that can be computed into the 559 destination (real) register with a single instruction, without 560 using any fixed regs. That also includes IRExpr_BBPTR(), but 561 not IRExpr_VECRET(). Indeed, if the IR is well-formed, we can 562 never see IRExpr_VECRET() at this point, since the return-type 563 check above should ensure all those cases use the slow scheme 564 instead. */ 565 vassert(n_args >= 0 && n_args <= 6); 566 for (i = 0; i < n_args; i++) { 567 IRExpr* arg = args[i]; 568 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) { 569 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64); 570 } 571 fastinstrs[i] 572 = iselIntExpr_single_instruction( env, argregs[i], args[i] ); 573 if (fastinstrs[i] == NULL) 574 goto slowscheme; 575 } 576 577 /* Looks like we're in luck. Emit the accumulated instructions and 578 move on to doing the call itself. */ 579 for (i = 0; i < n_args; i++) 580 addInstr(env, fastinstrs[i]); 581 582 /* Fast scheme only applies for unconditional calls. Hence: */ 583 cc = Acc_ALWAYS; 584 585 goto handle_call; 586 587 588 /* SLOW SCHEME; move via temporaries */ 589 slowscheme: 590 {} 591# if 0 /* debug only */ 592 if (n_args > 0) {for (i = 0; args[i]; i++) { 593 ppIRExpr(args[i]); vex_printf(" "); } 594 vex_printf("\n");} 595# endif 596 597 /* If we have a vector return type, allocate a place for it on the 598 stack and record its address. */ 599 HReg r_vecRetAddr = INVALID_HREG; 600 if (retTy == Ity_V128) { 601 r_vecRetAddr = newVRegI(env); 602 sub_from_rsp(env, 16); 603 addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr )); 604 } 605 else if (retTy == Ity_V256) { 606 r_vecRetAddr = newVRegI(env); 607 sub_from_rsp(env, 32); 608 addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr )); 609 } 610 611 vassert(n_args >= 0 && n_args <= 6); 612 for (i = 0; i < n_args; i++) { 613 IRExpr* arg = args[i]; 614 if (UNLIKELY(arg->tag == Iex_BBPTR)) { 615 tmpregs[i] = newVRegI(env); 616 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[i])); 617 nBBPTRs++; 618 } 619 else if (UNLIKELY(arg->tag == Iex_VECRET)) { 620 /* We stashed the address of the return slot earlier, so just 621 retrieve it now. */ 622 vassert(!hregIsInvalid(r_vecRetAddr)); 623 tmpregs[i] = r_vecRetAddr; 624 nVECRETs++; 625 } 626 else { 627 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64); 628 tmpregs[i] = iselIntExpr_R(env, args[i]); 629 } 630 } 631 632 /* Now we can compute the condition. We can't do it earlier 633 because the argument computations could trash the condition 634 codes. Be a bit clever to handle the common case where the 635 guard is 1:Bit. */ 636 cc = Acc_ALWAYS; 637 if (guard) { 638 if (guard->tag == Iex_Const 639 && guard->Iex.Const.con->tag == Ico_U1 640 && guard->Iex.Const.con->Ico.U1 == True) { 641 /* unconditional -- do nothing */ 642 } else { 643 cc = iselCondCode( env, guard ); 644 } 645 } 646 647 /* Move the args to their final destinations. */ 648 for (i = 0; i < n_args; i++) { 649 /* None of these insns, including any spill code that might 650 be generated, may alter the condition codes. */ 651 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) ); 652 } 653 654 655 /* Do final checks, set the return values, and generate the call 656 instruction proper. */ 657 handle_call: 658 659 if (retTy == Ity_V128 || retTy == Ity_V256) { 660 vassert(nVECRETs == 1); 661 } else { 662 vassert(nVECRETs == 0); 663 } 664 665 vassert(nBBPTRs == 0 || nBBPTRs == 1); 666 667 vassert(*stackAdjustAfterCall == 0); 668 vassert(is_RetLoc_INVALID(*retloc)); 669 switch (retTy) { 670 case Ity_INVALID: 671 /* Function doesn't return a value. */ 672 *retloc = mk_RetLoc_simple(RLPri_None); 673 break; 674 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 675 *retloc = mk_RetLoc_simple(RLPri_Int); 676 break; 677 case Ity_V128: 678 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0); 679 *stackAdjustAfterCall = 16; 680 break; 681 case Ity_V256: 682 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0); 683 *stackAdjustAfterCall = 32; 684 break; 685 default: 686 /* IR can denote other possible return types, but we don't 687 handle those here. */ 688 vassert(0); 689 } 690 691 /* Finally, generate the call itself. This needs the *retloc value 692 set in the switch above, which is why it's at the end. */ 693 addInstr(env, 694 AMD64Instr_Call(cc, Ptr_to_ULong(cee->addr), n_args, *retloc)); 695} 696 697 698/* Given a guest-state array descriptor, an index expression and a 699 bias, generate an AMD64AMode holding the relevant guest state 700 offset. */ 701 702static 703AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr, 704 IRExpr* off, Int bias ) 705{ 706 HReg tmp, roff; 707 Int elemSz = sizeofIRType(descr->elemTy); 708 Int nElems = descr->nElems; 709 710 /* Throw out any cases not generated by an amd64 front end. In 711 theory there might be a day where we need to handle them -- if 712 we ever run non-amd64-guest on amd64 host. */ 713 714 if (nElems != 8 || (elemSz != 1 && elemSz != 8)) 715 vpanic("genGuestArrayOffset(amd64 host)"); 716 717 /* Compute off into a reg, %off. Then return: 718 719 movq %off, %tmp 720 addq $bias, %tmp (if bias != 0) 721 andq %tmp, 7 722 ... base(%rbp, %tmp, shift) ... 723 */ 724 tmp = newVRegI(env); 725 roff = iselIntExpr_R(env, off); 726 addInstr(env, mk_iMOVsd_RR(roff, tmp)); 727 if (bias != 0) { 728 /* Make sure the bias is sane, in the sense that there are 729 no significant bits above bit 30 in it. */ 730 vassert(-10000 < bias && bias < 10000); 731 addInstr(env, 732 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp)); 733 } 734 addInstr(env, 735 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp)); 736 vassert(elemSz == 1 || elemSz == 8); 737 return 738 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp, 739 elemSz==8 ? 3 : 0); 740} 741 742 743/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */ 744static 745void set_SSE_rounding_default ( ISelEnv* env ) 746{ 747 /* pushq $DEFAULT_MXCSR 748 ldmxcsr 0(%rsp) 749 addq $8, %rsp 750 */ 751 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP()); 752 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR))); 753 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp)); 754 add_to_rsp(env, 8); 755} 756 757/* Mess with the FPU's rounding mode: set to the default rounding mode 758 (DEFAULT_FPUCW). */ 759static 760void set_FPU_rounding_default ( ISelEnv* env ) 761{ 762 /* movq $DEFAULT_FPUCW, -8(%rsp) 763 fldcw -8(%esp) 764 */ 765 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 766 addInstr(env, AMD64Instr_Alu64M( 767 Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp)); 768 addInstr(env, AMD64Instr_A87LdCW(m8_rsp)); 769} 770 771 772/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed 773 expression denoting a value in the range 0 .. 3, indicating a round 774 mode encoded as per type IRRoundingMode. Set the SSE machinery to 775 have the same rounding. 776*/ 777static 778void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode ) 779{ 780 /* Note: this sequence only makes sense because DEFAULT_MXCSR has 781 both rounding bits == 0. If that wasn't the case, we couldn't 782 create a new rounding field simply by ORing the new value into 783 place. */ 784 785 /* movq $3, %reg 786 andq [[mode]], %reg -- shouldn't be needed; paranoia 787 shlq $13, %reg 788 orq $DEFAULT_MXCSR, %reg 789 pushq %reg 790 ldmxcsr 0(%esp) 791 addq $8, %rsp 792 */ 793 HReg reg = newVRegI(env); 794 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP()); 795 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg)); 796 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 797 iselIntExpr_RMI(env, mode), reg)); 798 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg)); 799 addInstr(env, AMD64Instr_Alu64R( 800 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg)); 801 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg))); 802 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp)); 803 add_to_rsp(env, 8); 804} 805 806 807/* Mess with the FPU's rounding mode: 'mode' is an I32-typed 808 expression denoting a value in the range 0 .. 3, indicating a round 809 mode encoded as per type IRRoundingMode. Set the x87 FPU to have 810 the same rounding. 811*/ 812static 813void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode ) 814{ 815 HReg rrm = iselIntExpr_R(env, mode); 816 HReg rrm2 = newVRegI(env); 817 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 818 819 /* movq %rrm, %rrm2 820 andq $3, %rrm2 -- shouldn't be needed; paranoia 821 shlq $10, %rrm2 822 orq $DEFAULT_FPUCW, %rrm2 823 movq %rrm2, -8(%rsp) 824 fldcw -8(%esp) 825 */ 826 addInstr(env, mk_iMOVsd_RR(rrm, rrm2)); 827 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2)); 828 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2)); 829 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, 830 AMD64RMI_Imm(DEFAULT_FPUCW), rrm2)); 831 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, 832 AMD64RI_Reg(rrm2), m8_rsp)); 833 addInstr(env, AMD64Instr_A87LdCW(m8_rsp)); 834} 835 836 837/* Generate all-zeroes into a new vector register. 838*/ 839static HReg generate_zeroes_V128 ( ISelEnv* env ) 840{ 841 HReg dst = newVRegV(env); 842 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst)); 843 return dst; 844} 845 846/* Generate all-ones into a new vector register. 847*/ 848static HReg generate_ones_V128 ( ISelEnv* env ) 849{ 850 HReg dst = newVRegV(env); 851 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst)); 852 return dst; 853} 854 855 856/* Generate !src into a new vector register. Amazing that there isn't 857 a less crappy way to do this. 858*/ 859static HReg do_sse_NotV128 ( ISelEnv* env, HReg src ) 860{ 861 HReg dst = generate_ones_V128(env); 862 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst)); 863 return dst; 864} 865 866 867/* Expand the given byte into a 64-bit word, by cloning each bit 868 8 times. */ 869static ULong bitmask8_to_bytemask64 ( UShort w8 ) 870{ 871 vassert(w8 == (w8 & 0xFF)); 872 ULong w64 = 0; 873 Int i; 874 for (i = 0; i < 8; i++) { 875 if (w8 & (1<<i)) 876 w64 |= (0xFFULL << (8 * i)); 877 } 878 return w64; 879} 880 881 882/*---------------------------------------------------------*/ 883/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/ 884/*---------------------------------------------------------*/ 885 886/* Select insns for an integer-typed expression, and add them to the 887 code list. Return a reg holding the result. This reg will be a 888 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you 889 want to modify it, ask for a new vreg, copy it in there, and modify 890 the copy. The register allocator will do its best to map both 891 vregs to the same real register, so the copies will often disappear 892 later in the game. 893 894 This should handle expressions of 64, 32, 16 and 8-bit type. All 895 results are returned in a 64-bit register. For 32-, 16- and 8-bit 896 expressions, the upper 32/48/56 bits are arbitrary, so you should 897 mask or sign extend partial values if necessary. 898*/ 899 900static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) 901{ 902 HReg r = iselIntExpr_R_wrk(env, e); 903 /* sanity checks ... */ 904# if 0 905 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n"); 906# endif 907 vassert(hregClass(r) == HRcInt64); 908 vassert(hregIsVirtual(r)); 909 return r; 910} 911 912/* DO NOT CALL THIS DIRECTLY ! */ 913static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) 914{ 915 /* Used for unary/binary SIMD64 ops. */ 916 HWord fn = 0; 917 Bool second_is_UInt; 918 919 MatchInfo mi; 920 DECLARE_PATTERN(p_1Uto8_64to1); 921 DECLARE_PATTERN(p_LDle8_then_8Uto64); 922 DECLARE_PATTERN(p_LDle16_then_16Uto64); 923 924 IRType ty = typeOfIRExpr(env->type_env,e); 925 switch (ty) { 926 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: break; 927 default: vassert(0); 928 } 929 930 switch (e->tag) { 931 932 /* --------- TEMP --------- */ 933 case Iex_RdTmp: { 934 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 935 } 936 937 /* --------- LOAD --------- */ 938 case Iex_Load: { 939 HReg dst = newVRegI(env); 940 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr ); 941 942 /* We can't handle big-endian loads, nor load-linked. */ 943 if (e->Iex.Load.end != Iend_LE) 944 goto irreducible; 945 946 if (ty == Ity_I64) { 947 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, 948 AMD64RMI_Mem(amode), dst) ); 949 return dst; 950 } 951 if (ty == Ity_I32) { 952 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst)); 953 return dst; 954 } 955 if (ty == Ity_I16) { 956 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst)); 957 return dst; 958 } 959 if (ty == Ity_I8) { 960 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst)); 961 return dst; 962 } 963 break; 964 } 965 966 /* --------- BINARY OP --------- */ 967 case Iex_Binop: { 968 AMD64AluOp aluOp; 969 AMD64ShiftOp shOp; 970 971 /* Pattern: Sub64(0,x) */ 972 /* and: Sub32(0,x) */ 973 if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1)) 974 || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) { 975 HReg dst = newVRegI(env); 976 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2); 977 addInstr(env, mk_iMOVsd_RR(reg,dst)); 978 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); 979 return dst; 980 } 981 982 /* Is it an addition or logical style op? */ 983 switch (e->Iex.Binop.op) { 984 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64: 985 aluOp = Aalu_ADD; break; 986 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64: 987 aluOp = Aalu_SUB; break; 988 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64: 989 aluOp = Aalu_AND; break; 990 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64: 991 aluOp = Aalu_OR; break; 992 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64: 993 aluOp = Aalu_XOR; break; 994 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64: 995 aluOp = Aalu_MUL; break; 996 default: 997 aluOp = Aalu_INVALID; break; 998 } 999 /* For commutative ops we assume any literal 1000 values are on the second operand. */ 1001 if (aluOp != Aalu_INVALID) { 1002 HReg dst = newVRegI(env); 1003 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 1004 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1005 addInstr(env, mk_iMOVsd_RR(reg,dst)); 1006 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst)); 1007 return dst; 1008 } 1009 1010 /* Perhaps a shift op? */ 1011 switch (e->Iex.Binop.op) { 1012 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8: 1013 shOp = Ash_SHL; break; 1014 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8: 1015 shOp = Ash_SHR; break; 1016 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8: 1017 shOp = Ash_SAR; break; 1018 default: 1019 shOp = Ash_INVALID; break; 1020 } 1021 if (shOp != Ash_INVALID) { 1022 HReg dst = newVRegI(env); 1023 1024 /* regL = the value to be shifted */ 1025 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1026 addInstr(env, mk_iMOVsd_RR(regL,dst)); 1027 1028 /* Do any necessary widening for 32/16/8 bit operands */ 1029 switch (e->Iex.Binop.op) { 1030 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64: 1031 break; 1032 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8: 1033 break; 1034 case Iop_Shr8: 1035 addInstr(env, AMD64Instr_Alu64R( 1036 Aalu_AND, AMD64RMI_Imm(0xFF), dst)); 1037 break; 1038 case Iop_Shr16: 1039 addInstr(env, AMD64Instr_Alu64R( 1040 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst)); 1041 break; 1042 case Iop_Shr32: 1043 addInstr(env, AMD64Instr_MovxLQ(False, dst, dst)); 1044 break; 1045 case Iop_Sar8: 1046 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst)); 1047 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst)); 1048 break; 1049 case Iop_Sar16: 1050 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst)); 1051 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst)); 1052 break; 1053 case Iop_Sar32: 1054 addInstr(env, AMD64Instr_MovxLQ(True, dst, dst)); 1055 break; 1056 default: 1057 ppIROp(e->Iex.Binop.op); 1058 vassert(0); 1059 } 1060 1061 /* Now consider the shift amount. If it's a literal, we 1062 can do a much better job than the general case. */ 1063 if (e->Iex.Binop.arg2->tag == Iex_Const) { 1064 /* assert that the IR is well-typed */ 1065 Int nshift; 1066 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1067 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1068 vassert(nshift >= 0); 1069 if (nshift > 0) 1070 /* Can't allow nshift==0 since that means %cl */ 1071 addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst)); 1072 } else { 1073 /* General case; we have to force the amount into %cl. */ 1074 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1075 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX())); 1076 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst)); 1077 } 1078 return dst; 1079 } 1080 1081 /* Deal with 64-bit SIMD binary ops */ 1082 second_is_UInt = False; 1083 switch (e->Iex.Binop.op) { 1084 case Iop_Add8x8: 1085 fn = (HWord)h_generic_calc_Add8x8; break; 1086 case Iop_Add16x4: 1087 fn = (HWord)h_generic_calc_Add16x4; break; 1088 case Iop_Add32x2: 1089 fn = (HWord)h_generic_calc_Add32x2; break; 1090 1091 case Iop_Avg8Ux8: 1092 fn = (HWord)h_generic_calc_Avg8Ux8; break; 1093 case Iop_Avg16Ux4: 1094 fn = (HWord)h_generic_calc_Avg16Ux4; break; 1095 1096 case Iop_CmpEQ8x8: 1097 fn = (HWord)h_generic_calc_CmpEQ8x8; break; 1098 case Iop_CmpEQ16x4: 1099 fn = (HWord)h_generic_calc_CmpEQ16x4; break; 1100 case Iop_CmpEQ32x2: 1101 fn = (HWord)h_generic_calc_CmpEQ32x2; break; 1102 1103 case Iop_CmpGT8Sx8: 1104 fn = (HWord)h_generic_calc_CmpGT8Sx8; break; 1105 case Iop_CmpGT16Sx4: 1106 fn = (HWord)h_generic_calc_CmpGT16Sx4; break; 1107 case Iop_CmpGT32Sx2: 1108 fn = (HWord)h_generic_calc_CmpGT32Sx2; break; 1109 1110 case Iop_InterleaveHI8x8: 1111 fn = (HWord)h_generic_calc_InterleaveHI8x8; break; 1112 case Iop_InterleaveLO8x8: 1113 fn = (HWord)h_generic_calc_InterleaveLO8x8; break; 1114 case Iop_InterleaveHI16x4: 1115 fn = (HWord)h_generic_calc_InterleaveHI16x4; break; 1116 case Iop_InterleaveLO16x4: 1117 fn = (HWord)h_generic_calc_InterleaveLO16x4; break; 1118 case Iop_InterleaveHI32x2: 1119 fn = (HWord)h_generic_calc_InterleaveHI32x2; break; 1120 case Iop_InterleaveLO32x2: 1121 fn = (HWord)h_generic_calc_InterleaveLO32x2; break; 1122 case Iop_CatOddLanes16x4: 1123 fn = (HWord)h_generic_calc_CatOddLanes16x4; break; 1124 case Iop_CatEvenLanes16x4: 1125 fn = (HWord)h_generic_calc_CatEvenLanes16x4; break; 1126 case Iop_Perm8x8: 1127 fn = (HWord)h_generic_calc_Perm8x8; break; 1128 1129 case Iop_Max8Ux8: 1130 fn = (HWord)h_generic_calc_Max8Ux8; break; 1131 case Iop_Max16Sx4: 1132 fn = (HWord)h_generic_calc_Max16Sx4; break; 1133 case Iop_Min8Ux8: 1134 fn = (HWord)h_generic_calc_Min8Ux8; break; 1135 case Iop_Min16Sx4: 1136 fn = (HWord)h_generic_calc_Min16Sx4; break; 1137 1138 case Iop_Mul16x4: 1139 fn = (HWord)h_generic_calc_Mul16x4; break; 1140 case Iop_Mul32x2: 1141 fn = (HWord)h_generic_calc_Mul32x2; break; 1142 case Iop_MulHi16Sx4: 1143 fn = (HWord)h_generic_calc_MulHi16Sx4; break; 1144 case Iop_MulHi16Ux4: 1145 fn = (HWord)h_generic_calc_MulHi16Ux4; break; 1146 1147 case Iop_QAdd8Sx8: 1148 fn = (HWord)h_generic_calc_QAdd8Sx8; break; 1149 case Iop_QAdd16Sx4: 1150 fn = (HWord)h_generic_calc_QAdd16Sx4; break; 1151 case Iop_QAdd8Ux8: 1152 fn = (HWord)h_generic_calc_QAdd8Ux8; break; 1153 case Iop_QAdd16Ux4: 1154 fn = (HWord)h_generic_calc_QAdd16Ux4; break; 1155 1156 case Iop_QNarrowBin32Sto16Sx4: 1157 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; break; 1158 case Iop_QNarrowBin16Sto8Sx8: 1159 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break; 1160 case Iop_QNarrowBin16Sto8Ux8: 1161 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break; 1162 case Iop_NarrowBin16to8x8: 1163 fn = (HWord)h_generic_calc_NarrowBin16to8x8; break; 1164 case Iop_NarrowBin32to16x4: 1165 fn = (HWord)h_generic_calc_NarrowBin32to16x4; break; 1166 1167 case Iop_QSub8Sx8: 1168 fn = (HWord)h_generic_calc_QSub8Sx8; break; 1169 case Iop_QSub16Sx4: 1170 fn = (HWord)h_generic_calc_QSub16Sx4; break; 1171 case Iop_QSub8Ux8: 1172 fn = (HWord)h_generic_calc_QSub8Ux8; break; 1173 case Iop_QSub16Ux4: 1174 fn = (HWord)h_generic_calc_QSub16Ux4; break; 1175 1176 case Iop_Sub8x8: 1177 fn = (HWord)h_generic_calc_Sub8x8; break; 1178 case Iop_Sub16x4: 1179 fn = (HWord)h_generic_calc_Sub16x4; break; 1180 case Iop_Sub32x2: 1181 fn = (HWord)h_generic_calc_Sub32x2; break; 1182 1183 case Iop_ShlN32x2: 1184 fn = (HWord)h_generic_calc_ShlN32x2; 1185 second_is_UInt = True; 1186 break; 1187 case Iop_ShlN16x4: 1188 fn = (HWord)h_generic_calc_ShlN16x4; 1189 second_is_UInt = True; 1190 break; 1191 case Iop_ShlN8x8: 1192 fn = (HWord)h_generic_calc_ShlN8x8; 1193 second_is_UInt = True; 1194 break; 1195 case Iop_ShrN32x2: 1196 fn = (HWord)h_generic_calc_ShrN32x2; 1197 second_is_UInt = True; 1198 break; 1199 case Iop_ShrN16x4: 1200 fn = (HWord)h_generic_calc_ShrN16x4; 1201 second_is_UInt = True; 1202 break; 1203 case Iop_SarN32x2: 1204 fn = (HWord)h_generic_calc_SarN32x2; 1205 second_is_UInt = True; 1206 break; 1207 case Iop_SarN16x4: 1208 fn = (HWord)h_generic_calc_SarN16x4; 1209 second_is_UInt = True; 1210 break; 1211 case Iop_SarN8x8: 1212 fn = (HWord)h_generic_calc_SarN8x8; 1213 second_is_UInt = True; 1214 break; 1215 1216 default: 1217 fn = (HWord)0; break; 1218 } 1219 if (fn != (HWord)0) { 1220 /* Note: the following assumes all helpers are of signature 1221 ULong fn ( ULong, ULong ), and they are 1222 not marked as regparm functions. 1223 */ 1224 HReg dst = newVRegI(env); 1225 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1226 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1227 if (second_is_UInt) 1228 addInstr(env, AMD64Instr_MovxLQ(False, argR, argR)); 1229 addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) ); 1230 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) ); 1231 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2, 1232 mk_RetLoc_simple(RLPri_Int) )); 1233 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst)); 1234 return dst; 1235 } 1236 1237 /* Handle misc other ops. */ 1238 1239 if (e->Iex.Binop.op == Iop_Max32U) { 1240 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1241 HReg dst = newVRegI(env); 1242 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); 1243 addInstr(env, mk_iMOVsd_RR(src1, dst)); 1244 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP, AMD64RMI_Reg(src2), dst)); 1245 addInstr(env, AMD64Instr_CMov64(Acc_B, AMD64RM_Reg(src2), dst)); 1246 return dst; 1247 } 1248 1249 if (e->Iex.Binop.op == Iop_DivModS64to32 1250 || e->Iex.Binop.op == Iop_DivModU64to32) { 1251 /* 64 x 32 -> (32(rem),32(div)) division */ 1252 /* Get the 64-bit operand into edx:eax, and the other into 1253 any old R/M. */ 1254 HReg rax = hregAMD64_RAX(); 1255 HReg rdx = hregAMD64_RDX(); 1256 HReg dst = newVRegI(env); 1257 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32); 1258 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2); 1259 /* Compute the left operand into a reg, and then 1260 put the top half in edx and the bottom in eax. */ 1261 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1262 addInstr(env, mk_iMOVsd_RR(left64, rdx)); 1263 addInstr(env, mk_iMOVsd_RR(left64, rax)); 1264 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx)); 1265 addInstr(env, AMD64Instr_Div(syned, 4, rmRight)); 1266 addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx)); 1267 addInstr(env, AMD64Instr_MovxLQ(False, rax, rax)); 1268 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx)); 1269 addInstr(env, mk_iMOVsd_RR(rax, dst)); 1270 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst)); 1271 return dst; 1272 } 1273 1274 if (e->Iex.Binop.op == Iop_32HLto64) { 1275 HReg hi32 = newVRegI(env); 1276 HReg lo32 = newVRegI(env); 1277 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1278 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1279 addInstr(env, mk_iMOVsd_RR(hi32s, hi32)); 1280 addInstr(env, mk_iMOVsd_RR(lo32s, lo32)); 1281 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32)); 1282 addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32)); 1283 addInstr(env, AMD64Instr_Alu64R( 1284 Aalu_OR, AMD64RMI_Reg(lo32), hi32)); 1285 return hi32; 1286 } 1287 1288 if (e->Iex.Binop.op == Iop_16HLto32) { 1289 HReg hi16 = newVRegI(env); 1290 HReg lo16 = newVRegI(env); 1291 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1292 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1293 addInstr(env, mk_iMOVsd_RR(hi16s, hi16)); 1294 addInstr(env, mk_iMOVsd_RR(lo16s, lo16)); 1295 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16)); 1296 addInstr(env, AMD64Instr_Alu64R( 1297 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16)); 1298 addInstr(env, AMD64Instr_Alu64R( 1299 Aalu_OR, AMD64RMI_Reg(lo16), hi16)); 1300 return hi16; 1301 } 1302 1303 if (e->Iex.Binop.op == Iop_8HLto16) { 1304 HReg hi8 = newVRegI(env); 1305 HReg lo8 = newVRegI(env); 1306 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1307 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1308 addInstr(env, mk_iMOVsd_RR(hi8s, hi8)); 1309 addInstr(env, mk_iMOVsd_RR(lo8s, lo8)); 1310 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8)); 1311 addInstr(env, AMD64Instr_Alu64R( 1312 Aalu_AND, AMD64RMI_Imm(0xFF), lo8)); 1313 addInstr(env, AMD64Instr_Alu64R( 1314 Aalu_OR, AMD64RMI_Reg(lo8), hi8)); 1315 return hi8; 1316 } 1317 1318 if (e->Iex.Binop.op == Iop_MullS32 1319 || e->Iex.Binop.op == Iop_MullS16 1320 || e->Iex.Binop.op == Iop_MullS8 1321 || e->Iex.Binop.op == Iop_MullU32 1322 || e->Iex.Binop.op == Iop_MullU16 1323 || e->Iex.Binop.op == Iop_MullU8) { 1324 HReg a32 = newVRegI(env); 1325 HReg b32 = newVRegI(env); 1326 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1327 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1328 Int shift = 0; 1329 AMD64ShiftOp shr_op = Ash_SHR; 1330 switch (e->Iex.Binop.op) { 1331 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break; 1332 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break; 1333 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break; 1334 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break; 1335 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break; 1336 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break; 1337 default: vassert(0); 1338 } 1339 1340 addInstr(env, mk_iMOVsd_RR(a32s, a32)); 1341 addInstr(env, mk_iMOVsd_RR(b32s, b32)); 1342 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32)); 1343 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32)); 1344 addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32)); 1345 addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32)); 1346 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32)); 1347 return b32; 1348 } 1349 1350 if (e->Iex.Binop.op == Iop_CmpF64) { 1351 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1); 1352 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2); 1353 HReg dst = newVRegI(env); 1354 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst)); 1355 /* Mask out irrelevant parts of the result so as to conform 1356 to the CmpF64 definition. */ 1357 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst)); 1358 return dst; 1359 } 1360 1361 if (e->Iex.Binop.op == Iop_F64toI32S 1362 || e->Iex.Binop.op == Iop_F64toI64S) { 1363 Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8; 1364 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 1365 HReg dst = newVRegI(env); 1366 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 ); 1367 addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst )); 1368 set_SSE_rounding_default(env); 1369 return dst; 1370 } 1371 1372 break; 1373 } 1374 1375 /* --------- UNARY OP --------- */ 1376 case Iex_Unop: { 1377 1378 /* 1Uto8(64to1(expr64)) */ 1379 { 1380 DEFINE_PATTERN( p_1Uto8_64to1, 1381 unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) ); 1382 if (matchIRExpr(&mi,p_1Uto8_64to1,e)) { 1383 IRExpr* expr64 = mi.bindee[0]; 1384 HReg dst = newVRegI(env); 1385 HReg src = iselIntExpr_R(env, expr64); 1386 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1387 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 1388 AMD64RMI_Imm(1), dst)); 1389 return dst; 1390 } 1391 } 1392 1393 /* 8Uto64(LDle(expr64)) */ 1394 { 1395 DEFINE_PATTERN(p_LDle8_then_8Uto64, 1396 unop(Iop_8Uto64, 1397 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1398 if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) { 1399 HReg dst = newVRegI(env); 1400 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1401 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst)); 1402 return dst; 1403 } 1404 } 1405 1406 /* 16Uto64(LDle(expr64)) */ 1407 { 1408 DEFINE_PATTERN(p_LDle16_then_16Uto64, 1409 unop(Iop_16Uto64, 1410 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) ); 1411 if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) { 1412 HReg dst = newVRegI(env); 1413 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1414 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst)); 1415 return dst; 1416 } 1417 } 1418 1419 /* 32Uto64( Add32/Sub32/And32/Or32/Xor32(expr32, expr32) ) 1420 Use 32 bit arithmetic and let the default zero-extend rule 1421 do the 32Uto64 for free. */ 1422 if (e->Iex.Unop.op == Iop_32Uto64 && e->Iex.Unop.arg->tag == Iex_Binop) { 1423 IROp opi = e->Iex.Unop.arg->Iex.Binop.op; /* inner op */ 1424 IRExpr* argL = e->Iex.Unop.arg->Iex.Binop.arg1; 1425 IRExpr* argR = e->Iex.Unop.arg->Iex.Binop.arg2; 1426 AMD64AluOp aluOp = Aalu_INVALID; 1427 switch (opi) { 1428 case Iop_Add32: aluOp = Aalu_ADD; break; 1429 case Iop_Sub32: aluOp = Aalu_SUB; break; 1430 case Iop_And32: aluOp = Aalu_AND; break; 1431 case Iop_Or32: aluOp = Aalu_OR; break; 1432 case Iop_Xor32: aluOp = Aalu_XOR; break; 1433 default: break; 1434 } 1435 if (aluOp != Aalu_INVALID) { 1436 /* For commutative ops we assume any literal values are on 1437 the second operand. */ 1438 HReg dst = newVRegI(env); 1439 HReg reg = iselIntExpr_R(env, argL); 1440 AMD64RMI* rmi = iselIntExpr_RMI(env, argR); 1441 addInstr(env, mk_iMOVsd_RR(reg,dst)); 1442 addInstr(env, AMD64Instr_Alu32R(aluOp, rmi, dst)); 1443 return dst; 1444 } 1445 /* just fall through to normal handling for Iop_32Uto64 */ 1446 } 1447 1448 /* Fallback cases */ 1449 switch (e->Iex.Unop.op) { 1450 case Iop_32Uto64: 1451 case Iop_32Sto64: { 1452 HReg dst = newVRegI(env); 1453 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1454 addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64, 1455 src, dst) ); 1456 return dst; 1457 } 1458 case Iop_128HIto64: { 1459 HReg rHi, rLo; 1460 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1461 return rHi; /* and abandon rLo */ 1462 } 1463 case Iop_128to64: { 1464 HReg rHi, rLo; 1465 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1466 return rLo; /* and abandon rHi */ 1467 } 1468 case Iop_8Uto16: 1469 case Iop_8Uto32: 1470 case Iop_8Uto64: 1471 case Iop_16Uto64: 1472 case Iop_16Uto32: { 1473 HReg dst = newVRegI(env); 1474 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1475 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32 1476 || e->Iex.Unop.op==Iop_16Uto64 ); 1477 UInt mask = srcIs16 ? 0xFFFF : 0xFF; 1478 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1479 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 1480 AMD64RMI_Imm(mask), dst)); 1481 return dst; 1482 } 1483 case Iop_8Sto16: 1484 case Iop_8Sto64: 1485 case Iop_8Sto32: 1486 case Iop_16Sto32: 1487 case Iop_16Sto64: { 1488 HReg dst = newVRegI(env); 1489 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1490 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32 1491 || e->Iex.Unop.op==Iop_16Sto64 ); 1492 UInt amt = srcIs16 ? 48 : 56; 1493 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1494 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst)); 1495 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst)); 1496 return dst; 1497 } 1498 case Iop_Not8: 1499 case Iop_Not16: 1500 case Iop_Not32: 1501 case Iop_Not64: { 1502 HReg dst = newVRegI(env); 1503 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1504 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1505 addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst)); 1506 return dst; 1507 } 1508 case Iop_16HIto8: 1509 case Iop_32HIto16: 1510 case Iop_64HIto32: { 1511 HReg dst = newVRegI(env); 1512 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1513 Int shift = 0; 1514 switch (e->Iex.Unop.op) { 1515 case Iop_16HIto8: shift = 8; break; 1516 case Iop_32HIto16: shift = 16; break; 1517 case Iop_64HIto32: shift = 32; break; 1518 default: vassert(0); 1519 } 1520 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1521 addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst)); 1522 return dst; 1523 } 1524 case Iop_1Uto64: 1525 case Iop_1Uto32: 1526 case Iop_1Uto8: { 1527 HReg dst = newVRegI(env); 1528 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1529 addInstr(env, AMD64Instr_Set64(cond,dst)); 1530 return dst; 1531 } 1532 case Iop_1Sto8: 1533 case Iop_1Sto16: 1534 case Iop_1Sto32: 1535 case Iop_1Sto64: { 1536 /* could do better than this, but for now ... */ 1537 HReg dst = newVRegI(env); 1538 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1539 addInstr(env, AMD64Instr_Set64(cond,dst)); 1540 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst)); 1541 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); 1542 return dst; 1543 } 1544 case Iop_Ctz64: { 1545 /* Count trailing zeroes, implemented by amd64 'bsfq' */ 1546 HReg dst = newVRegI(env); 1547 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1548 addInstr(env, AMD64Instr_Bsfr64(True,src,dst)); 1549 return dst; 1550 } 1551 case Iop_Clz64: { 1552 /* Count leading zeroes. Do 'bsrq' to establish the index 1553 of the highest set bit, and subtract that value from 1554 63. */ 1555 HReg tmp = newVRegI(env); 1556 HReg dst = newVRegI(env); 1557 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1558 addInstr(env, AMD64Instr_Bsfr64(False,src,tmp)); 1559 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, 1560 AMD64RMI_Imm(63), dst)); 1561 addInstr(env, AMD64Instr_Alu64R(Aalu_SUB, 1562 AMD64RMI_Reg(tmp), dst)); 1563 return dst; 1564 } 1565 1566 case Iop_CmpwNEZ64: { 1567 HReg dst = newVRegI(env); 1568 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1569 addInstr(env, mk_iMOVsd_RR(src,dst)); 1570 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); 1571 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, 1572 AMD64RMI_Reg(src), dst)); 1573 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); 1574 return dst; 1575 } 1576 1577 case Iop_CmpwNEZ32: { 1578 HReg src = newVRegI(env); 1579 HReg dst = newVRegI(env); 1580 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg); 1581 addInstr(env, mk_iMOVsd_RR(pre,src)); 1582 addInstr(env, AMD64Instr_MovxLQ(False, src, src)); 1583 addInstr(env, mk_iMOVsd_RR(src,dst)); 1584 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); 1585 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, 1586 AMD64RMI_Reg(src), dst)); 1587 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); 1588 return dst; 1589 } 1590 1591 case Iop_Left8: 1592 case Iop_Left16: 1593 case Iop_Left32: 1594 case Iop_Left64: { 1595 HReg dst = newVRegI(env); 1596 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1597 addInstr(env, mk_iMOVsd_RR(src, dst)); 1598 addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst)); 1599 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst)); 1600 return dst; 1601 } 1602 1603 case Iop_V128to32: { 1604 HReg dst = newVRegI(env); 1605 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1606 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP()); 1607 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16)); 1608 addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst)); 1609 return dst; 1610 } 1611 1612 /* V128{HI}to64 */ 1613 case Iop_V128HIto64: 1614 case Iop_V128to64: { 1615 HReg dst = newVRegI(env); 1616 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? -8 : -16; 1617 HReg rsp = hregAMD64_RSP(); 1618 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1619 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp); 1620 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp); 1621 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 1622 16, vec, m16_rsp)); 1623 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, 1624 AMD64RMI_Mem(off_rsp), dst )); 1625 return dst; 1626 } 1627 1628 case Iop_V256to64_0: case Iop_V256to64_1: 1629 case Iop_V256to64_2: case Iop_V256to64_3: { 1630 HReg vHi, vLo, vec; 1631 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg); 1632 /* Do the first part of the selection by deciding which of 1633 the 128 bit registers do look at, and second part using 1634 the same scheme as for V128{HI}to64 above. */ 1635 Int off = 0; 1636 switch (e->Iex.Unop.op) { 1637 case Iop_V256to64_0: vec = vLo; off = -16; break; 1638 case Iop_V256to64_1: vec = vLo; off = -8; break; 1639 case Iop_V256to64_2: vec = vHi; off = -16; break; 1640 case Iop_V256to64_3: vec = vHi; off = -8; break; 1641 default: vassert(0); 1642 } 1643 HReg dst = newVRegI(env); 1644 HReg rsp = hregAMD64_RSP(); 1645 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp); 1646 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp); 1647 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 1648 16, vec, m16_rsp)); 1649 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, 1650 AMD64RMI_Mem(off_rsp), dst )); 1651 return dst; 1652 } 1653 1654 /* ReinterpF64asI64(e) */ 1655 /* Given an IEEE754 double, produce an I64 with the same bit 1656 pattern. */ 1657 case Iop_ReinterpF64asI64: { 1658 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 1659 HReg dst = newVRegI(env); 1660 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 1661 /* paranoia */ 1662 set_SSE_rounding_default(env); 1663 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp)); 1664 addInstr(env, AMD64Instr_Alu64R( 1665 Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst)); 1666 return dst; 1667 } 1668 1669 /* ReinterpF32asI32(e) */ 1670 /* Given an IEEE754 single, produce an I64 with the same bit 1671 pattern in the lower half. */ 1672 case Iop_ReinterpF32asI32: { 1673 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 1674 HReg dst = newVRegI(env); 1675 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 1676 /* paranoia */ 1677 set_SSE_rounding_default(env); 1678 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp)); 1679 addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst )); 1680 return dst; 1681 } 1682 1683 case Iop_16to8: 1684 case Iop_32to8: 1685 case Iop_64to8: 1686 case Iop_32to16: 1687 case Iop_64to16: 1688 case Iop_64to32: 1689 /* These are no-ops. */ 1690 return iselIntExpr_R(env, e->Iex.Unop.arg); 1691 1692 case Iop_GetMSBs8x8: { 1693 /* Note: the following assumes the helper is of 1694 signature 1695 UInt fn ( ULong ), and is not a regparm fn. 1696 */ 1697 HReg dst = newVRegI(env); 1698 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 1699 fn = (HWord)h_generic_calc_GetMSBs8x8; 1700 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) ); 1701 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1702 1, mk_RetLoc_simple(RLPri_Int) )); 1703 /* MovxLQ is not exactly the right thing here. We just 1704 need to get the bottom 8 bits of RAX into dst, and zero 1705 out everything else. Assuming that the helper returns 1706 a UInt with the top 24 bits zeroed out, it'll do, 1707 though. */ 1708 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst)); 1709 return dst; 1710 } 1711 1712 case Iop_GetMSBs8x16: { 1713 /* Note: the following assumes the helper is of signature 1714 UInt fn ( ULong w64hi, ULong w64Lo ), 1715 and is not a regparm fn. */ 1716 HReg dst = newVRegI(env); 1717 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1718 HReg rsp = hregAMD64_RSP(); 1719 fn = (HWord)h_generic_calc_GetMSBs8x16; 1720 AMD64AMode* m8_rsp = AMD64AMode_IR( -8, rsp); 1721 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp); 1722 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 1723 16, vec, m16_rsp)); 1724 /* hi 64 bits into RDI -- the first arg */ 1725 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, 1726 AMD64RMI_Mem(m8_rsp), 1727 hregAMD64_RDI() )); /* 1st arg */ 1728 /* lo 64 bits into RSI -- the 2nd arg */ 1729 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, 1730 AMD64RMI_Mem(m16_rsp), 1731 hregAMD64_RSI() )); /* 2nd arg */ 1732 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1733 2, mk_RetLoc_simple(RLPri_Int) )); 1734 /* MovxLQ is not exactly the right thing here. We just 1735 need to get the bottom 16 bits of RAX into dst, and zero 1736 out everything else. Assuming that the helper returns 1737 a UInt with the top 16 bits zeroed out, it'll do, 1738 though. */ 1739 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst)); 1740 return dst; 1741 } 1742 1743 default: 1744 break; 1745 } 1746 1747 /* Deal with unary 64-bit SIMD ops. */ 1748 switch (e->Iex.Unop.op) { 1749 case Iop_CmpNEZ32x2: 1750 fn = (HWord)h_generic_calc_CmpNEZ32x2; break; 1751 case Iop_CmpNEZ16x4: 1752 fn = (HWord)h_generic_calc_CmpNEZ16x4; break; 1753 case Iop_CmpNEZ8x8: 1754 fn = (HWord)h_generic_calc_CmpNEZ8x8; break; 1755 default: 1756 fn = (HWord)0; break; 1757 } 1758 if (fn != (HWord)0) { 1759 /* Note: the following assumes all helpers are of 1760 signature 1761 ULong fn ( ULong ), and they are 1762 not marked as regparm functions. 1763 */ 1764 HReg dst = newVRegI(env); 1765 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 1766 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) ); 1767 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1, 1768 mk_RetLoc_simple(RLPri_Int) )); 1769 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst)); 1770 return dst; 1771 } 1772 1773 break; 1774 } 1775 1776 /* --------- GET --------- */ 1777 case Iex_Get: { 1778 if (ty == Ity_I64) { 1779 HReg dst = newVRegI(env); 1780 addInstr(env, AMD64Instr_Alu64R( 1781 Aalu_MOV, 1782 AMD64RMI_Mem( 1783 AMD64AMode_IR(e->Iex.Get.offset, 1784 hregAMD64_RBP())), 1785 dst)); 1786 return dst; 1787 } 1788 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) { 1789 HReg dst = newVRegI(env); 1790 addInstr(env, AMD64Instr_LoadEX( 1791 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)), 1792 False, 1793 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()), 1794 dst)); 1795 return dst; 1796 } 1797 break; 1798 } 1799 1800 case Iex_GetI: { 1801 AMD64AMode* am 1802 = genGuestArrayOffset( 1803 env, e->Iex.GetI.descr, 1804 e->Iex.GetI.ix, e->Iex.GetI.bias ); 1805 HReg dst = newVRegI(env); 1806 if (ty == Ity_I8) { 1807 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst )); 1808 return dst; 1809 } 1810 if (ty == Ity_I64) { 1811 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst )); 1812 return dst; 1813 } 1814 break; 1815 } 1816 1817 /* --------- CCALL --------- */ 1818 case Iex_CCall: { 1819 HReg dst = newVRegI(env); 1820 vassert(ty == e->Iex.CCall.retty); 1821 1822 /* be very restrictive for now. Only 64-bit ints allowed for 1823 args, and 64 or 32 bits for return type. */ 1824 if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32) 1825 goto irreducible; 1826 1827 /* Marshal args, do the call. */ 1828 UInt addToSp = 0; 1829 RetLoc rloc = mk_RetLoc_INVALID(); 1830 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, 1831 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args ); 1832 vassert(is_sane_RetLoc(rloc)); 1833 vassert(rloc.pri == RLPri_Int); 1834 vassert(addToSp == 0); 1835 1836 /* Move to dst, and zero out the top 32 bits if the result type is 1837 Ity_I32. Probably overkill, but still .. */ 1838 if (e->Iex.CCall.retty == Ity_I64) 1839 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst)); 1840 else 1841 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst)); 1842 1843 return dst; 1844 } 1845 1846 /* --------- LITERAL --------- */ 1847 /* 64/32/16/8-bit literals */ 1848 case Iex_Const: 1849 if (ty == Ity_I64) { 1850 HReg r = newVRegI(env); 1851 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r)); 1852 return r; 1853 } else { 1854 AMD64RMI* rmi = iselIntExpr_RMI ( env, e ); 1855 HReg r = newVRegI(env); 1856 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r)); 1857 return r; 1858 } 1859 1860 /* --------- MULTIPLEX --------- */ 1861 case Iex_ITE: { // VFD 1862 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) 1863 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) { 1864 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue); 1865 AMD64RM* r0 = iselIntExpr_RM(env, e->Iex.ITE.iffalse); 1866 HReg dst = newVRegI(env); 1867 addInstr(env, mk_iMOVsd_RR(r1,dst)); 1868 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 1869 addInstr(env, AMD64Instr_CMov64(cc ^ 1, r0, dst)); 1870 return dst; 1871 } 1872 break; 1873 } 1874 1875 /* --------- TERNARY OP --------- */ 1876 case Iex_Triop: { 1877 IRTriop *triop = e->Iex.Triop.details; 1878 /* C3210 flags following FPU partial remainder (fprem), both 1879 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */ 1880 if (triop->op == Iop_PRemC3210F64 1881 || triop->op == Iop_PRem1C3210F64) { 1882 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 1883 HReg arg1 = iselDblExpr(env, triop->arg2); 1884 HReg arg2 = iselDblExpr(env, triop->arg3); 1885 HReg dst = newVRegI(env); 1886 addInstr(env, AMD64Instr_A87Free(2)); 1887 1888 /* one arg -> top of x87 stack */ 1889 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp)); 1890 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 1891 1892 /* other arg -> top of x87 stack */ 1893 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp)); 1894 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 1895 1896 switch (triop->op) { 1897 case Iop_PRemC3210F64: 1898 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM)); 1899 break; 1900 case Iop_PRem1C3210F64: 1901 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1)); 1902 break; 1903 default: 1904 vassert(0); 1905 } 1906 /* Ignore the result, and instead make off with the FPU's 1907 C3210 flags (in the status word). */ 1908 addInstr(env, AMD64Instr_A87StSW(m8_rsp)); 1909 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst)); 1910 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst)); 1911 return dst; 1912 } 1913 break; 1914 } 1915 1916 default: 1917 break; 1918 } /* switch (e->tag) */ 1919 1920 /* We get here if no pattern matched. */ 1921 irreducible: 1922 ppIRExpr(e); 1923 vpanic("iselIntExpr_R(amd64): cannot reduce tree"); 1924} 1925 1926 1927/*---------------------------------------------------------*/ 1928/*--- ISEL: Integer expression auxiliaries ---*/ 1929/*---------------------------------------------------------*/ 1930 1931/* --------------------- AMODEs --------------------- */ 1932 1933/* Return an AMode which computes the value of the specified 1934 expression, possibly also adding insns to the code list as a 1935 result. The expression may only be a 32-bit one. 1936*/ 1937 1938static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e ) 1939{ 1940 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e); 1941 vassert(sane_AMode(am)); 1942 return am; 1943} 1944 1945/* DO NOT CALL THIS DIRECTLY ! */ 1946static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ) 1947{ 1948 MatchInfo mi; 1949 DECLARE_PATTERN(p_complex); 1950 IRType ty = typeOfIRExpr(env->type_env,e); 1951 vassert(ty == Ity_I64); 1952 1953 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */ 1954 /* bind0 bind1 bind2 bind3 */ 1955 DEFINE_PATTERN(p_complex, 1956 binop( Iop_Add64, 1957 binop( Iop_Add64, 1958 bind(0), 1959 binop(Iop_Shl64, bind(1), bind(2)) 1960 ), 1961 bind(3) 1962 ) 1963 ); 1964 if (matchIRExpr(&mi, p_complex, e)) { 1965 IRExpr* expr1 = mi.bindee[0]; 1966 IRExpr* expr2 = mi.bindee[1]; 1967 IRExpr* imm8 = mi.bindee[2]; 1968 IRExpr* simm32 = mi.bindee[3]; 1969 if (imm8->tag == Iex_Const 1970 && imm8->Iex.Const.con->tag == Ico_U8 1971 && imm8->Iex.Const.con->Ico.U8 < 4 1972 /* imm8 is OK, now check simm32 */ 1973 && simm32->tag == Iex_Const 1974 && simm32->Iex.Const.con->tag == Ico_U64 1975 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) { 1976 UInt shift = imm8->Iex.Const.con->Ico.U8; 1977 UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64); 1978 HReg r1 = iselIntExpr_R(env, expr1); 1979 HReg r2 = iselIntExpr_R(env, expr2); 1980 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3); 1981 return AMD64AMode_IRRS(offset, r1, r2, shift); 1982 } 1983 } 1984 1985 /* Add64(expr1, Shl64(expr2, imm)) */ 1986 if (e->tag == Iex_Binop 1987 && e->Iex.Binop.op == Iop_Add64 1988 && e->Iex.Binop.arg2->tag == Iex_Binop 1989 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64 1990 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const 1991 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { 1992 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1993 if (shift == 1 || shift == 2 || shift == 3) { 1994 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1995 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 ); 1996 return AMD64AMode_IRRS(0, r1, r2, shift); 1997 } 1998 } 1999 2000 /* Add64(expr,i) */ 2001 if (e->tag == Iex_Binop 2002 && e->Iex.Binop.op == Iop_Add64 2003 && e->Iex.Binop.arg2->tag == Iex_Const 2004 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64 2005 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) { 2006 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2007 return AMD64AMode_IR( 2008 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64), 2009 r1 2010 ); 2011 } 2012 2013 /* Doesn't match anything in particular. Generate it into 2014 a register and use that. */ 2015 { 2016 HReg r1 = iselIntExpr_R(env, e); 2017 return AMD64AMode_IR(0, r1); 2018 } 2019} 2020 2021 2022/* --------------------- RMIs --------------------- */ 2023 2024/* Similarly, calculate an expression into an X86RMI operand. As with 2025 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 2026 2027static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e ) 2028{ 2029 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e); 2030 /* sanity checks ... */ 2031 switch (rmi->tag) { 2032 case Armi_Imm: 2033 return rmi; 2034 case Armi_Reg: 2035 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64); 2036 vassert(hregIsVirtual(rmi->Armi.Reg.reg)); 2037 return rmi; 2038 case Armi_Mem: 2039 vassert(sane_AMode(rmi->Armi.Mem.am)); 2040 return rmi; 2041 default: 2042 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag"); 2043 } 2044} 2045 2046/* DO NOT CALL THIS DIRECTLY ! */ 2047static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e ) 2048{ 2049 IRType ty = typeOfIRExpr(env->type_env,e); 2050 vassert(ty == Ity_I64 || ty == Ity_I32 2051 || ty == Ity_I16 || ty == Ity_I8); 2052 2053 /* special case: immediate 64/32/16/8 */ 2054 if (e->tag == Iex_Const) { 2055 switch (e->Iex.Const.con->tag) { 2056 case Ico_U64: 2057 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) { 2058 return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)); 2059 } 2060 break; 2061 case Ico_U32: 2062 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break; 2063 case Ico_U16: 2064 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break; 2065 case Ico_U8: 2066 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break; 2067 default: 2068 vpanic("iselIntExpr_RMI.Iex_Const(amd64)"); 2069 } 2070 } 2071 2072 /* special case: 64-bit GET */ 2073 if (e->tag == Iex_Get && ty == Ity_I64) { 2074 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset, 2075 hregAMD64_RBP())); 2076 } 2077 2078 /* special case: 64-bit load from memory */ 2079 if (e->tag == Iex_Load && ty == Ity_I64 2080 && e->Iex.Load.end == Iend_LE) { 2081 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2082 return AMD64RMI_Mem(am); 2083 } 2084 2085 /* default case: calculate into a register and return that */ 2086 { 2087 HReg r = iselIntExpr_R ( env, e ); 2088 return AMD64RMI_Reg(r); 2089 } 2090} 2091 2092 2093/* --------------------- RIs --------------------- */ 2094 2095/* Calculate an expression into an AMD64RI operand. As with 2096 iselIntExpr_R, the expression can have type 64, 32, 16 or 8 2097 bits. */ 2098 2099static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ) 2100{ 2101 AMD64RI* ri = iselIntExpr_RI_wrk(env, e); 2102 /* sanity checks ... */ 2103 switch (ri->tag) { 2104 case Ari_Imm: 2105 return ri; 2106 case Ari_Reg: 2107 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64); 2108 vassert(hregIsVirtual(ri->Ari.Reg.reg)); 2109 return ri; 2110 default: 2111 vpanic("iselIntExpr_RI: unknown amd64 RI tag"); 2112 } 2113} 2114 2115/* DO NOT CALL THIS DIRECTLY ! */ 2116static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ) 2117{ 2118 IRType ty = typeOfIRExpr(env->type_env,e); 2119 vassert(ty == Ity_I64 || ty == Ity_I32 2120 || ty == Ity_I16 || ty == Ity_I8); 2121 2122 /* special case: immediate */ 2123 if (e->tag == Iex_Const) { 2124 switch (e->Iex.Const.con->tag) { 2125 case Ico_U64: 2126 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) { 2127 return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64)); 2128 } 2129 break; 2130 case Ico_U32: 2131 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32); 2132 case Ico_U16: 2133 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); 2134 case Ico_U8: 2135 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8); 2136 default: 2137 vpanic("iselIntExpr_RMI.Iex_Const(amd64)"); 2138 } 2139 } 2140 2141 /* default case: calculate into a register and return that */ 2142 { 2143 HReg r = iselIntExpr_R ( env, e ); 2144 return AMD64RI_Reg(r); 2145 } 2146} 2147 2148 2149/* --------------------- RMs --------------------- */ 2150 2151/* Similarly, calculate an expression into an AMD64RM operand. As 2152 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8 2153 bits. */ 2154 2155static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e ) 2156{ 2157 AMD64RM* rm = iselIntExpr_RM_wrk(env, e); 2158 /* sanity checks ... */ 2159 switch (rm->tag) { 2160 case Arm_Reg: 2161 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64); 2162 vassert(hregIsVirtual(rm->Arm.Reg.reg)); 2163 return rm; 2164 case Arm_Mem: 2165 vassert(sane_AMode(rm->Arm.Mem.am)); 2166 return rm; 2167 default: 2168 vpanic("iselIntExpr_RM: unknown amd64 RM tag"); 2169 } 2170} 2171 2172/* DO NOT CALL THIS DIRECTLY ! */ 2173static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e ) 2174{ 2175 IRType ty = typeOfIRExpr(env->type_env,e); 2176 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 2177 2178 /* special case: 64-bit GET */ 2179 if (e->tag == Iex_Get && ty == Ity_I64) { 2180 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset, 2181 hregAMD64_RBP())); 2182 } 2183 2184 /* special case: load from memory */ 2185 2186 /* default case: calculate into a register and return that */ 2187 { 2188 HReg r = iselIntExpr_R ( env, e ); 2189 return AMD64RM_Reg(r); 2190 } 2191} 2192 2193 2194/* --------------------- CONDCODE --------------------- */ 2195 2196/* Generate code to evaluated a bit-typed expression, returning the 2197 condition code which would correspond when the expression would 2198 notionally have returned 1. */ 2199 2200static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ) 2201{ 2202 /* Uh, there's nothing we can sanity check here, unfortunately. */ 2203 return iselCondCode_wrk(env,e); 2204} 2205 2206/* DO NOT CALL THIS DIRECTLY ! */ 2207static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) 2208{ 2209 MatchInfo mi; 2210 2211 vassert(e); 2212 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); 2213 2214 /* var */ 2215 if (e->tag == Iex_RdTmp) { 2216 HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp); 2217 HReg dst = newVRegI(env); 2218 addInstr(env, mk_iMOVsd_RR(r64,dst)); 2219 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst)); 2220 return Acc_NZ; 2221 } 2222 2223 /* Constant 1:Bit */ 2224 if (e->tag == Iex_Const) { 2225 HReg r; 2226 vassert(e->Iex.Const.con->tag == Ico_U1); 2227 vassert(e->Iex.Const.con->Ico.U1 == True 2228 || e->Iex.Const.con->Ico.U1 == False); 2229 r = newVRegI(env); 2230 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r)); 2231 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r)); 2232 return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ; 2233 } 2234 2235 /* Not1(...) */ 2236 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { 2237 /* Generate code for the arg, and negate the test condition */ 2238 return 1 ^ iselCondCode(env, e->Iex.Unop.arg); 2239 } 2240 2241 /* --- patterns rooted at: 64to1 --- */ 2242 2243 /* 64to1 */ 2244 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) { 2245 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg); 2246 addInstr(env, AMD64Instr_Test64(1,reg)); 2247 return Acc_NZ; 2248 } 2249 2250 /* --- patterns rooted at: 32to1 --- */ 2251 2252 /* 32to1 */ 2253 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_32to1) { 2254 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg); 2255 addInstr(env, AMD64Instr_Test64(1,reg)); 2256 return Acc_NZ; 2257 } 2258 2259 /* --- patterns rooted at: CmpNEZ8 --- */ 2260 2261 /* CmpNEZ8(x) */ 2262 if (e->tag == Iex_Unop 2263 && e->Iex.Unop.op == Iop_CmpNEZ8) { 2264 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg); 2265 addInstr(env, AMD64Instr_Test64(0xFF,r)); 2266 return Acc_NZ; 2267 } 2268 2269 /* --- patterns rooted at: CmpNEZ16 --- */ 2270 2271 /* CmpNEZ16(x) */ 2272 if (e->tag == Iex_Unop 2273 && e->Iex.Unop.op == Iop_CmpNEZ16) { 2274 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg); 2275 addInstr(env, AMD64Instr_Test64(0xFFFF,r)); 2276 return Acc_NZ; 2277 } 2278 2279 /* --- patterns rooted at: CmpNEZ32 --- */ 2280 2281 /* CmpNEZ32(x) */ 2282 if (e->tag == Iex_Unop 2283 && e->Iex.Unop.op == Iop_CmpNEZ32) { 2284 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 2285 AMD64RMI* rmi2 = AMD64RMI_Imm(0); 2286 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1)); 2287 return Acc_NZ; 2288 } 2289 2290 /* --- patterns rooted at: CmpNEZ64 --- */ 2291 2292 /* CmpNEZ64(Or64(x,y)) */ 2293 { 2294 DECLARE_PATTERN(p_CmpNEZ64_Or64); 2295 DEFINE_PATTERN(p_CmpNEZ64_Or64, 2296 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1)))); 2297 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) { 2298 HReg r0 = iselIntExpr_R(env, mi.bindee[0]); 2299 AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]); 2300 HReg tmp = newVRegI(env); 2301 addInstr(env, mk_iMOVsd_RR(r0, tmp)); 2302 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp)); 2303 return Acc_NZ; 2304 } 2305 } 2306 2307 /* CmpNEZ64(x) */ 2308 if (e->tag == Iex_Unop 2309 && e->Iex.Unop.op == Iop_CmpNEZ64) { 2310 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 2311 AMD64RMI* rmi2 = AMD64RMI_Imm(0); 2312 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1)); 2313 return Acc_NZ; 2314 } 2315 2316 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */ 2317 2318 /* CmpEQ8 / CmpNE8 */ 2319 if (e->tag == Iex_Binop 2320 && (e->Iex.Binop.op == Iop_CmpEQ8 2321 || e->Iex.Binop.op == Iop_CmpNE8 2322 || e->Iex.Binop.op == Iop_CasCmpEQ8 2323 || e->Iex.Binop.op == Iop_CasCmpNE8)) { 2324 if (isZeroU8(e->Iex.Binop.arg2)) { 2325 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2326 addInstr(env, AMD64Instr_Test64(0xFF,r1)); 2327 switch (e->Iex.Binop.op) { 2328 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z; 2329 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ; 2330 default: vpanic("iselCondCode(amd64): CmpXX8(expr,0:I8)"); 2331 } 2332 } else { 2333 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2334 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2335 HReg r = newVRegI(env); 2336 addInstr(env, mk_iMOVsd_RR(r1,r)); 2337 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r)); 2338 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r)); 2339 switch (e->Iex.Binop.op) { 2340 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z; 2341 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ; 2342 default: vpanic("iselCondCode(amd64): CmpXX8(expr,expr)"); 2343 } 2344 } 2345 } 2346 2347 /* CmpEQ16 / CmpNE16 */ 2348 if (e->tag == Iex_Binop 2349 && (e->Iex.Binop.op == Iop_CmpEQ16 2350 || e->Iex.Binop.op == Iop_CmpNE16 2351 || e->Iex.Binop.op == Iop_CasCmpEQ16 2352 || e->Iex.Binop.op == Iop_CasCmpNE16)) { 2353 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2354 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2355 HReg r = newVRegI(env); 2356 addInstr(env, mk_iMOVsd_RR(r1,r)); 2357 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r)); 2358 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r)); 2359 switch (e->Iex.Binop.op) { 2360 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z; 2361 case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ; 2362 default: vpanic("iselCondCode(amd64): CmpXX16"); 2363 } 2364 } 2365 2366 /* CmpNE64(ccall, 64-bit constant) (--smc-check=all optimisation). 2367 Saves a "movq %rax, %tmp" compared to the default route. */ 2368 if (e->tag == Iex_Binop 2369 && e->Iex.Binop.op == Iop_CmpNE64 2370 && e->Iex.Binop.arg1->tag == Iex_CCall 2371 && e->Iex.Binop.arg2->tag == Iex_Const) { 2372 IRExpr* cal = e->Iex.Binop.arg1; 2373 IRExpr* con = e->Iex.Binop.arg2; 2374 HReg tmp = newVRegI(env); 2375 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */ 2376 vassert(cal->Iex.CCall.retty == Ity_I64); /* else ill-typed IR */ 2377 vassert(con->Iex.Const.con->tag == Ico_U64); 2378 /* Marshal args, do the call. */ 2379 UInt addToSp = 0; 2380 RetLoc rloc = mk_RetLoc_INVALID(); 2381 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, 2382 cal->Iex.CCall.cee, 2383 cal->Iex.CCall.retty, cal->Iex.CCall.args ); 2384 vassert(is_sane_RetLoc(rloc)); 2385 vassert(rloc.pri == RLPri_Int); 2386 vassert(addToSp == 0); 2387 /* */ 2388 addInstr(env, AMD64Instr_Imm64(con->Iex.Const.con->Ico.U64, tmp)); 2389 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP, 2390 AMD64RMI_Reg(hregAMD64_RAX()), tmp)); 2391 return Acc_NZ; 2392 } 2393 2394 /* Cmp*64*(x,y) */ 2395 if (e->tag == Iex_Binop 2396 && (e->Iex.Binop.op == Iop_CmpEQ64 2397 || e->Iex.Binop.op == Iop_CmpNE64 2398 || e->Iex.Binop.op == Iop_CmpLT64S 2399 || e->Iex.Binop.op == Iop_CmpLT64U 2400 || e->Iex.Binop.op == Iop_CmpLE64S 2401 || e->Iex.Binop.op == Iop_CmpLE64U 2402 || e->Iex.Binop.op == Iop_CasCmpEQ64 2403 || e->Iex.Binop.op == Iop_CasCmpNE64 2404 || e->Iex.Binop.op == Iop_ExpCmpNE64)) { 2405 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2406 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2407 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1)); 2408 switch (e->Iex.Binop.op) { 2409 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z; 2410 case Iop_CmpNE64: 2411 case Iop_CasCmpNE64: case Iop_ExpCmpNE64: return Acc_NZ; 2412 case Iop_CmpLT64S: return Acc_L; 2413 case Iop_CmpLT64U: return Acc_B; 2414 case Iop_CmpLE64S: return Acc_LE; 2415 case Iop_CmpLE64U: return Acc_BE; 2416 default: vpanic("iselCondCode(amd64): CmpXX64"); 2417 } 2418 } 2419 2420 /* Cmp*32*(x,y) */ 2421 if (e->tag == Iex_Binop 2422 && (e->Iex.Binop.op == Iop_CmpEQ32 2423 || e->Iex.Binop.op == Iop_CmpNE32 2424 || e->Iex.Binop.op == Iop_CmpLT32S 2425 || e->Iex.Binop.op == Iop_CmpLT32U 2426 || e->Iex.Binop.op == Iop_CmpLE32S 2427 || e->Iex.Binop.op == Iop_CmpLE32U 2428 || e->Iex.Binop.op == Iop_CasCmpEQ32 2429 || e->Iex.Binop.op == Iop_CasCmpNE32 2430 || e->Iex.Binop.op == Iop_ExpCmpNE32)) { 2431 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2432 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2433 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1)); 2434 switch (e->Iex.Binop.op) { 2435 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z; 2436 case Iop_CmpNE32: 2437 case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Acc_NZ; 2438 case Iop_CmpLT32S: return Acc_L; 2439 case Iop_CmpLT32U: return Acc_B; 2440 case Iop_CmpLE32S: return Acc_LE; 2441 case Iop_CmpLE32U: return Acc_BE; 2442 default: vpanic("iselCondCode(amd64): CmpXX32"); 2443 } 2444 } 2445 2446 ppIRExpr(e); 2447 vpanic("iselCondCode(amd64)"); 2448} 2449 2450 2451/*---------------------------------------------------------*/ 2452/*--- ISEL: Integer expressions (128 bit) ---*/ 2453/*---------------------------------------------------------*/ 2454 2455/* Compute a 128-bit value into a register pair, which is returned as 2456 the first two parameters. As with iselIntExpr_R, these may be 2457 either real or virtual regs; in any case they must not be changed 2458 by subsequent code emitted by the caller. */ 2459 2460static void iselInt128Expr ( HReg* rHi, HReg* rLo, 2461 ISelEnv* env, IRExpr* e ) 2462{ 2463 iselInt128Expr_wrk(rHi, rLo, env, e); 2464# if 0 2465 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2466# endif 2467 vassert(hregClass(*rHi) == HRcInt64); 2468 vassert(hregIsVirtual(*rHi)); 2469 vassert(hregClass(*rLo) == HRcInt64); 2470 vassert(hregIsVirtual(*rLo)); 2471} 2472 2473/* DO NOT CALL THIS DIRECTLY ! */ 2474static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, 2475 ISelEnv* env, IRExpr* e ) 2476{ 2477 vassert(e); 2478 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128); 2479 2480 /* read 128-bit IRTemp */ 2481 if (e->tag == Iex_RdTmp) { 2482 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp); 2483 return; 2484 } 2485 2486 /* --------- BINARY ops --------- */ 2487 if (e->tag == Iex_Binop) { 2488 switch (e->Iex.Binop.op) { 2489 /* 64 x 64 -> 128 multiply */ 2490 case Iop_MullU64: 2491 case Iop_MullS64: { 2492 /* get one operand into %rax, and the other into a R/M. 2493 Need to make an educated guess about which is better in 2494 which. */ 2495 HReg tLo = newVRegI(env); 2496 HReg tHi = newVRegI(env); 2497 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64); 2498 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1); 2499 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2); 2500 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX())); 2501 addInstr(env, AMD64Instr_MulL(syned, rmLeft)); 2502 /* Result is now in RDX:RAX. Tell the caller. */ 2503 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi)); 2504 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo)); 2505 *rHi = tHi; 2506 *rLo = tLo; 2507 return; 2508 } 2509 2510 /* 128 x 64 -> (64(rem),64(div)) division */ 2511 case Iop_DivModU128to64: 2512 case Iop_DivModS128to64: { 2513 /* Get the 128-bit operand into rdx:rax, and the other into 2514 any old R/M. */ 2515 HReg sHi, sLo; 2516 HReg tLo = newVRegI(env); 2517 HReg tHi = newVRegI(env); 2518 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64); 2519 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2); 2520 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2521 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX())); 2522 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX())); 2523 addInstr(env, AMD64Instr_Div(syned, 8, rmRight)); 2524 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi)); 2525 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo)); 2526 *rHi = tHi; 2527 *rLo = tLo; 2528 return; 2529 } 2530 2531 /* 64HLto128(e1,e2) */ 2532 case Iop_64HLto128: 2533 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2534 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2535 return; 2536 2537 default: 2538 break; 2539 } 2540 } /* if (e->tag == Iex_Binop) */ 2541 2542 ppIRExpr(e); 2543 vpanic("iselInt128Expr"); 2544} 2545 2546 2547/*---------------------------------------------------------*/ 2548/*--- ISEL: Floating point expressions (32 bit) ---*/ 2549/*---------------------------------------------------------*/ 2550 2551/* Nothing interesting here; really just wrappers for 2552 64-bit stuff. */ 2553 2554static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) 2555{ 2556 HReg r = iselFltExpr_wrk( env, e ); 2557# if 0 2558 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2559# endif 2560 vassert(hregClass(r) == HRcVec128); 2561 vassert(hregIsVirtual(r)); 2562 return r; 2563} 2564 2565/* DO NOT CALL THIS DIRECTLY */ 2566static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) 2567{ 2568 IRType ty = typeOfIRExpr(env->type_env,e); 2569 vassert(ty == Ity_F32); 2570 2571 if (e->tag == Iex_RdTmp) { 2572 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2573 } 2574 2575 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2576 AMD64AMode* am; 2577 HReg res = newVRegV(env); 2578 vassert(e->Iex.Load.ty == Ity_F32); 2579 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2580 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am)); 2581 return res; 2582 } 2583 2584 if (e->tag == Iex_Binop 2585 && e->Iex.Binop.op == Iop_F64toF32) { 2586 /* Although the result is still held in a standard SSE register, 2587 we need to round it to reflect the loss of accuracy/range 2588 entailed in casting it to a 32-bit float. */ 2589 HReg dst = newVRegV(env); 2590 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 2591 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 ); 2592 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst)); 2593 set_SSE_rounding_default( env ); 2594 return dst; 2595 } 2596 2597 if (e->tag == Iex_Get) { 2598 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset, 2599 hregAMD64_RBP() ); 2600 HReg res = newVRegV(env); 2601 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am )); 2602 return res; 2603 } 2604 2605 if (e->tag == Iex_Unop 2606 && e->Iex.Unop.op == Iop_ReinterpI32asF32) { 2607 /* Given an I32, produce an IEEE754 float with the same bit 2608 pattern. */ 2609 HReg dst = newVRegV(env); 2610 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2611 AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP()); 2612 addInstr(env, AMD64Instr_Store(4, src, m4_rsp)); 2613 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp )); 2614 return dst; 2615 } 2616 2617 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) { 2618 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 2619 HReg arg = iselFltExpr(env, e->Iex.Binop.arg2); 2620 HReg dst = newVRegV(env); 2621 2622 /* rf now holds the value to be rounded. The first thing to do 2623 is set the FPU's rounding mode accordingly. */ 2624 2625 /* Set host x87 rounding mode */ 2626 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2627 2628 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp)); 2629 addInstr(env, AMD64Instr_A87Free(1)); 2630 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4)); 2631 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND)); 2632 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4)); 2633 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp)); 2634 2635 /* Restore default x87 rounding. */ 2636 set_FPU_rounding_default( env ); 2637 2638 return dst; 2639 } 2640 2641 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_NegF32) { 2642 /* Sigh ... very rough code. Could do much better. */ 2643 /* Get the 128-bit literal 00---0 10---0 into a register 2644 and xor it with the value to be negated. */ 2645 HReg r1 = newVRegI(env); 2646 HReg dst = newVRegV(env); 2647 HReg tmp = newVRegV(env); 2648 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 2649 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 2650 addInstr(env, mk_vMOVsd_RR(src,tmp)); 2651 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0))); 2652 addInstr(env, AMD64Instr_Imm64( 1ULL<<31, r1 )); 2653 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1))); 2654 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0)); 2655 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst)); 2656 add_to_rsp(env, 16); 2657 return dst; 2658 } 2659 2660 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF32) { 2661 IRQop *qop = e->Iex.Qop.details; 2662 HReg dst = newVRegV(env); 2663 HReg argX = iselFltExpr(env, qop->arg2); 2664 HReg argY = iselFltExpr(env, qop->arg3); 2665 HReg argZ = iselFltExpr(env, qop->arg4); 2666 /* XXXROUNDINGFIXME */ 2667 /* set roundingmode here */ 2668 /* subq $16, %rsp -- make a space*/ 2669 sub_from_rsp(env, 16); 2670 /* Prepare 4 arg regs: 2671 leaq 0(%rsp), %rdi 2672 leaq 4(%rsp), %rsi 2673 leaq 8(%rsp), %rdx 2674 leaq 12(%rsp), %rcx 2675 */ 2676 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()), 2677 hregAMD64_RDI())); 2678 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(4, hregAMD64_RSP()), 2679 hregAMD64_RSI())); 2680 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()), 2681 hregAMD64_RDX())); 2682 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(12, hregAMD64_RSP()), 2683 hregAMD64_RCX())); 2684 /* Store the three args, at (%rsi), (%rdx) and (%rcx): 2685 movss %argX, 0(%rsi) 2686 movss %argY, 0(%rdx) 2687 movss %argZ, 0(%rcx) 2688 */ 2689 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argX, 2690 AMD64AMode_IR(0, hregAMD64_RSI()))); 2691 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argY, 2692 AMD64AMode_IR(0, hregAMD64_RDX()))); 2693 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argZ, 2694 AMD64AMode_IR(0, hregAMD64_RCX()))); 2695 /* call the helper */ 2696 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, 2697 (ULong)(HWord)h_generic_calc_MAddF32, 2698 4, mk_RetLoc_simple(RLPri_None) )); 2699 /* fetch the result from memory, using %r_argp, which the 2700 register allocator will keep alive across the call. */ 2701 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 4, dst, 2702 AMD64AMode_IR(0, hregAMD64_RSP()))); 2703 /* and finally, clear the space */ 2704 add_to_rsp(env, 16); 2705 return dst; 2706 } 2707 2708 ppIRExpr(e); 2709 vpanic("iselFltExpr_wrk"); 2710} 2711 2712 2713/*---------------------------------------------------------*/ 2714/*--- ISEL: Floating point expressions (64 bit) ---*/ 2715/*---------------------------------------------------------*/ 2716 2717/* Compute a 64-bit floating point value into the lower half of an xmm 2718 register, the identity of which is returned. As with 2719 iselIntExpr_R, the returned reg will be virtual, and it must not be 2720 changed by subsequent code emitted by the caller. 2721*/ 2722 2723/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm: 2724 2725 Type S (1 bit) E (11 bits) F (52 bits) 2726 ---- --------- ----------- ----------- 2727 signalling NaN u 2047 (max) .0uuuuu---u 2728 (with at least 2729 one 1 bit) 2730 quiet NaN u 2047 (max) .1uuuuu---u 2731 2732 negative infinity 1 2047 (max) .000000---0 2733 2734 positive infinity 0 2047 (max) .000000---0 2735 2736 negative zero 1 0 .000000---0 2737 2738 positive zero 0 0 .000000---0 2739*/ 2740 2741static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ) 2742{ 2743 HReg r = iselDblExpr_wrk( env, e ); 2744# if 0 2745 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2746# endif 2747 vassert(hregClass(r) == HRcVec128); 2748 vassert(hregIsVirtual(r)); 2749 return r; 2750} 2751 2752/* DO NOT CALL THIS DIRECTLY */ 2753static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) 2754{ 2755 IRType ty = typeOfIRExpr(env->type_env,e); 2756 vassert(e); 2757 vassert(ty == Ity_F64); 2758 2759 if (e->tag == Iex_RdTmp) { 2760 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2761 } 2762 2763 if (e->tag == Iex_Const) { 2764 union { ULong u64; Double f64; } u; 2765 HReg res = newVRegV(env); 2766 HReg tmp = newVRegI(env); 2767 vassert(sizeof(u) == 8); 2768 vassert(sizeof(u.u64) == 8); 2769 vassert(sizeof(u.f64) == 8); 2770 2771 if (e->Iex.Const.con->tag == Ico_F64) { 2772 u.f64 = e->Iex.Const.con->Ico.F64; 2773 } 2774 else if (e->Iex.Const.con->tag == Ico_F64i) { 2775 u.u64 = e->Iex.Const.con->Ico.F64i; 2776 } 2777 else 2778 vpanic("iselDblExpr(amd64): const"); 2779 2780 addInstr(env, AMD64Instr_Imm64(u.u64, tmp)); 2781 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp))); 2782 addInstr(env, AMD64Instr_SseLdSt( 2783 True/*load*/, 8, res, 2784 AMD64AMode_IR(0, hregAMD64_RSP()) 2785 )); 2786 add_to_rsp(env, 8); 2787 return res; 2788 } 2789 2790 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2791 AMD64AMode* am; 2792 HReg res = newVRegV(env); 2793 vassert(e->Iex.Load.ty == Ity_F64); 2794 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2795 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am )); 2796 return res; 2797 } 2798 2799 if (e->tag == Iex_Get) { 2800 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset, 2801 hregAMD64_RBP() ); 2802 HReg res = newVRegV(env); 2803 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am )); 2804 return res; 2805 } 2806 2807 if (e->tag == Iex_GetI) { 2808 AMD64AMode* am 2809 = genGuestArrayOffset( 2810 env, e->Iex.GetI.descr, 2811 e->Iex.GetI.ix, e->Iex.GetI.bias ); 2812 HReg res = newVRegV(env); 2813 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am )); 2814 return res; 2815 } 2816 2817 if (e->tag == Iex_Triop) { 2818 IRTriop *triop = e->Iex.Triop.details; 2819 AMD64SseOp op = Asse_INVALID; 2820 switch (triop->op) { 2821 case Iop_AddF64: op = Asse_ADDF; break; 2822 case Iop_SubF64: op = Asse_SUBF; break; 2823 case Iop_MulF64: op = Asse_MULF; break; 2824 case Iop_DivF64: op = Asse_DIVF; break; 2825 default: break; 2826 } 2827 if (op != Asse_INVALID) { 2828 HReg dst = newVRegV(env); 2829 HReg argL = iselDblExpr(env, triop->arg2); 2830 HReg argR = iselDblExpr(env, triop->arg3); 2831 addInstr(env, mk_vMOVsd_RR(argL, dst)); 2832 /* XXXROUNDINGFIXME */ 2833 /* set roundingmode here */ 2834 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst)); 2835 return dst; 2836 } 2837 } 2838 2839 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF64) { 2840 IRQop *qop = e->Iex.Qop.details; 2841 HReg dst = newVRegV(env); 2842 HReg argX = iselDblExpr(env, qop->arg2); 2843 HReg argY = iselDblExpr(env, qop->arg3); 2844 HReg argZ = iselDblExpr(env, qop->arg4); 2845 /* XXXROUNDINGFIXME */ 2846 /* set roundingmode here */ 2847 /* subq $32, %rsp -- make a space*/ 2848 sub_from_rsp(env, 32); 2849 /* Prepare 4 arg regs: 2850 leaq 0(%rsp), %rdi 2851 leaq 8(%rsp), %rsi 2852 leaq 16(%rsp), %rdx 2853 leaq 24(%rsp), %rcx 2854 */ 2855 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()), 2856 hregAMD64_RDI())); 2857 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()), 2858 hregAMD64_RSI())); 2859 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, hregAMD64_RSP()), 2860 hregAMD64_RDX())); 2861 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(24, hregAMD64_RSP()), 2862 hregAMD64_RCX())); 2863 /* Store the three args, at (%rsi), (%rdx) and (%rcx): 2864 movsd %argX, 0(%rsi) 2865 movsd %argY, 0(%rdx) 2866 movsd %argZ, 0(%rcx) 2867 */ 2868 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argX, 2869 AMD64AMode_IR(0, hregAMD64_RSI()))); 2870 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argY, 2871 AMD64AMode_IR(0, hregAMD64_RDX()))); 2872 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argZ, 2873 AMD64AMode_IR(0, hregAMD64_RCX()))); 2874 /* call the helper */ 2875 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, 2876 (ULong)(HWord)h_generic_calc_MAddF64, 2877 4, mk_RetLoc_simple(RLPri_None) )); 2878 /* fetch the result from memory, using %r_argp, which the 2879 register allocator will keep alive across the call. */ 2880 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 8, dst, 2881 AMD64AMode_IR(0, hregAMD64_RSP()))); 2882 /* and finally, clear the space */ 2883 add_to_rsp(env, 32); 2884 return dst; 2885 } 2886 2887 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) { 2888 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 2889 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2); 2890 HReg dst = newVRegV(env); 2891 2892 /* rf now holds the value to be rounded. The first thing to do 2893 is set the FPU's rounding mode accordingly. */ 2894 2895 /* Set host x87 rounding mode */ 2896 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2897 2898 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp)); 2899 addInstr(env, AMD64Instr_A87Free(1)); 2900 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 2901 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND)); 2902 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); 2903 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 2904 2905 /* Restore default x87 rounding. */ 2906 set_FPU_rounding_default( env ); 2907 2908 return dst; 2909 } 2910 2911 IRTriop *triop = e->Iex.Triop.details; 2912 if (e->tag == Iex_Triop 2913 && (triop->op == Iop_ScaleF64 2914 || triop->op == Iop_AtanF64 2915 || triop->op == Iop_Yl2xF64 2916 || triop->op == Iop_Yl2xp1F64 2917 || triop->op == Iop_PRemF64 2918 || triop->op == Iop_PRem1F64) 2919 ) { 2920 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 2921 HReg arg1 = iselDblExpr(env, triop->arg2); 2922 HReg arg2 = iselDblExpr(env, triop->arg3); 2923 HReg dst = newVRegV(env); 2924 Bool arg2first = toBool(triop->op == Iop_ScaleF64 2925 || triop->op == Iop_PRemF64 2926 || triop->op == Iop_PRem1F64); 2927 addInstr(env, AMD64Instr_A87Free(2)); 2928 2929 /* one arg -> top of x87 stack */ 2930 addInstr(env, AMD64Instr_SseLdSt( 2931 False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp)); 2932 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 2933 2934 /* other arg -> top of x87 stack */ 2935 addInstr(env, AMD64Instr_SseLdSt( 2936 False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp)); 2937 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 2938 2939 /* do it */ 2940 /* XXXROUNDINGFIXME */ 2941 /* set roundingmode here */ 2942 switch (triop->op) { 2943 case Iop_ScaleF64: 2944 addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE)); 2945 break; 2946 case Iop_AtanF64: 2947 addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN)); 2948 break; 2949 case Iop_Yl2xF64: 2950 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X)); 2951 break; 2952 case Iop_Yl2xp1F64: 2953 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1)); 2954 break; 2955 case Iop_PRemF64: 2956 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM)); 2957 break; 2958 case Iop_PRem1F64: 2959 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1)); 2960 break; 2961 default: 2962 vassert(0); 2963 } 2964 2965 /* save result */ 2966 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); 2967 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 2968 return dst; 2969 } 2970 2971 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) { 2972 HReg dst = newVRegV(env); 2973 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2); 2974 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 ); 2975 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst )); 2976 set_SSE_rounding_default( env ); 2977 return dst; 2978 } 2979 2980 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) { 2981 HReg dst = newVRegV(env); 2982 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2983 set_SSE_rounding_default( env ); 2984 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst )); 2985 return dst; 2986 } 2987 2988 if (e->tag == Iex_Unop 2989 && (e->Iex.Unop.op == Iop_NegF64 2990 || e->Iex.Unop.op == Iop_AbsF64)) { 2991 /* Sigh ... very rough code. Could do much better. */ 2992 /* Get the 128-bit literal 00---0 10---0 into a register 2993 and xor/nand it with the value to be negated. */ 2994 HReg r1 = newVRegI(env); 2995 HReg dst = newVRegV(env); 2996 HReg tmp = newVRegV(env); 2997 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 2998 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 2999 addInstr(env, mk_vMOVsd_RR(src,tmp)); 3000 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0))); 3001 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 )); 3002 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1))); 3003 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0)); 3004 3005 if (e->Iex.Unop.op == Iop_NegF64) 3006 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst)); 3007 else 3008 addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst)); 3009 3010 add_to_rsp(env, 16); 3011 return dst; 3012 } 3013 3014 if (e->tag == Iex_Binop) { 3015 A87FpOp fpop = Afp_INVALID; 3016 switch (e->Iex.Binop.op) { 3017 case Iop_SqrtF64: fpop = Afp_SQRT; break; 3018 case Iop_SinF64: fpop = Afp_SIN; break; 3019 case Iop_CosF64: fpop = Afp_COS; break; 3020 case Iop_TanF64: fpop = Afp_TAN; break; 3021 case Iop_2xm1F64: fpop = Afp_2XM1; break; 3022 default: break; 3023 } 3024 if (fpop != Afp_INVALID) { 3025 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 3026 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2); 3027 HReg dst = newVRegV(env); 3028 Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1; 3029 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp)); 3030 addInstr(env, AMD64Instr_A87Free(nNeeded)); 3031 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 3032 /* XXXROUNDINGFIXME */ 3033 /* set roundingmode here */ 3034 /* Note that AMD64Instr_A87FpOp(Afp_TAN) sets the condition 3035 codes. I don't think that matters, since this insn 3036 selector never generates such an instruction intervening 3037 between an flag-setting instruction and a flag-using 3038 instruction. */ 3039 addInstr(env, AMD64Instr_A87FpOp(fpop)); 3040 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); 3041 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 3042 return dst; 3043 } 3044 } 3045 3046 if (e->tag == Iex_Unop) { 3047 switch (e->Iex.Unop.op) { 3048//.. case Iop_I32toF64: { 3049//.. HReg dst = newVRegF(env); 3050//.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg); 3051//.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri))); 3052//.. set_FPU_rounding_default(env); 3053//.. addInstr(env, X86Instr_FpLdStI( 3054//.. True/*load*/, 4, dst, 3055//.. X86AMode_IR(0, hregX86_ESP()))); 3056//.. add_to_esp(env, 4); 3057//.. return dst; 3058//.. } 3059 case Iop_ReinterpI64asF64: { 3060 /* Given an I64, produce an IEEE754 double with the same 3061 bit pattern. */ 3062 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 3063 HReg dst = newVRegV(env); 3064 AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg); 3065 /* paranoia */ 3066 set_SSE_rounding_default(env); 3067 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp)); 3068 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 3069 return dst; 3070 } 3071 case Iop_F32toF64: { 3072 HReg f32; 3073 HReg f64 = newVRegV(env); 3074 /* this shouldn't be necessary, but be paranoid ... */ 3075 set_SSE_rounding_default(env); 3076 f32 = iselFltExpr(env, e->Iex.Unop.arg); 3077 addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64)); 3078 return f64; 3079 } 3080 default: 3081 break; 3082 } 3083 } 3084 3085 /* --------- MULTIPLEX --------- */ 3086 if (e->tag == Iex_ITE) { // VFD 3087 HReg r1, r0, dst; 3088 vassert(ty == Ity_F64); 3089 vassert(typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1); 3090 r1 = iselDblExpr(env, e->Iex.ITE.iftrue); 3091 r0 = iselDblExpr(env, e->Iex.ITE.iffalse); 3092 dst = newVRegV(env); 3093 addInstr(env, mk_vMOVsd_RR(r1,dst)); 3094 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 3095 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst)); 3096 return dst; 3097 } 3098 3099 ppIRExpr(e); 3100 vpanic("iselDblExpr_wrk"); 3101} 3102 3103 3104/*---------------------------------------------------------*/ 3105/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/ 3106/*---------------------------------------------------------*/ 3107 3108static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ) 3109{ 3110 HReg r = iselVecExpr_wrk( env, e ); 3111# if 0 3112 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3113# endif 3114 vassert(hregClass(r) == HRcVec128); 3115 vassert(hregIsVirtual(r)); 3116 return r; 3117} 3118 3119 3120/* DO NOT CALL THIS DIRECTLY */ 3121static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) 3122{ 3123 HWord fn = 0; /* address of helper fn, if required */ 3124 Bool arg1isEReg = False; 3125 AMD64SseOp op = Asse_INVALID; 3126 IRType ty = typeOfIRExpr(env->type_env,e); 3127 vassert(e); 3128 vassert(ty == Ity_V128); 3129 3130 if (e->tag == Iex_RdTmp) { 3131 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3132 } 3133 3134 if (e->tag == Iex_Get) { 3135 HReg dst = newVRegV(env); 3136 addInstr(env, AMD64Instr_SseLdSt( 3137 True/*load*/, 3138 16, 3139 dst, 3140 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP()) 3141 ) 3142 ); 3143 return dst; 3144 } 3145 3146 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 3147 HReg dst = newVRegV(env); 3148 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 3149 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am )); 3150 return dst; 3151 } 3152 3153 if (e->tag == Iex_Const) { 3154 HReg dst = newVRegV(env); 3155 vassert(e->Iex.Const.con->tag == Ico_V128); 3156 switch (e->Iex.Const.con->Ico.V128) { 3157 case 0x0000: 3158 dst = generate_zeroes_V128(env); 3159 break; 3160 case 0xFFFF: 3161 dst = generate_ones_V128(env); 3162 break; 3163 default: { 3164 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 3165 /* do push_uimm64 twice, first time for the high-order half. */ 3166 push_uimm64(env, bitmask8_to_bytemask64( 3167 (e->Iex.Const.con->Ico.V128 >> 8) & 0xFF 3168 )); 3169 push_uimm64(env, bitmask8_to_bytemask64( 3170 (e->Iex.Const.con->Ico.V128 >> 0) & 0xFF 3171 )); 3172 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 )); 3173 add_to_rsp(env, 16); 3174 break; 3175 } 3176 } 3177 return dst; 3178 } 3179 3180 if (e->tag == Iex_Unop) { 3181 switch (e->Iex.Unop.op) { 3182 3183 case Iop_NotV128: { 3184 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3185 return do_sse_NotV128(env, arg); 3186 } 3187 3188 case Iop_CmpNEZ64x2: { 3189 /* We can use SSE2 instructions for this. */ 3190 /* Ideally, we want to do a 64Ix2 comparison against zero of 3191 the operand. Problem is no such insn exists. Solution 3192 therefore is to do a 32Ix4 comparison instead, and bitwise- 3193 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and 3194 let the not'd result of this initial comparison be a:b:c:d. 3195 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use 3196 pshufd to create a value b:a:d:c, and OR that with a:b:c:d, 3197 giving the required result. 3198 3199 The required selection sequence is 2,3,0,1, which 3200 according to Intel's documentation means the pshufd 3201 literal value is 0xB1, that is, 3202 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0) 3203 */ 3204 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3205 HReg tmp = generate_zeroes_V128(env); 3206 HReg dst = newVRegV(env); 3207 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp)); 3208 tmp = do_sse_NotV128(env, tmp); 3209 addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst)); 3210 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst)); 3211 return dst; 3212 } 3213 3214 case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector; 3215 case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector; 3216 case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector; 3217 do_CmpNEZ_vector: 3218 { 3219 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3220 HReg tmp = newVRegV(env); 3221 HReg zero = generate_zeroes_V128(env); 3222 HReg dst; 3223 addInstr(env, mk_vMOVsd_RR(arg, tmp)); 3224 addInstr(env, AMD64Instr_SseReRg(op, zero, tmp)); 3225 dst = do_sse_NotV128(env, tmp); 3226 return dst; 3227 } 3228 3229 case Iop_Recip32Fx4: op = Asse_RCPF; goto do_32Fx4_unary; 3230 case Iop_RSqrt32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary; 3231 case Iop_Sqrt32Fx4: op = Asse_SQRTF; goto do_32Fx4_unary; 3232 do_32Fx4_unary: 3233 { 3234 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3235 HReg dst = newVRegV(env); 3236 addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst)); 3237 return dst; 3238 } 3239 3240 case Iop_Sqrt64Fx2: op = Asse_SQRTF; goto do_64Fx2_unary; 3241 do_64Fx2_unary: 3242 { 3243 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3244 HReg dst = newVRegV(env); 3245 addInstr(env, AMD64Instr_Sse64Fx2(op, arg, dst)); 3246 return dst; 3247 } 3248 3249 case Iop_Recip32F0x4: op = Asse_RCPF; goto do_32F0x4_unary; 3250 case Iop_RSqrt32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary; 3251 case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary; 3252 do_32F0x4_unary: 3253 { 3254 /* A bit subtle. We have to copy the arg to the result 3255 register first, because actually doing the SSE scalar insn 3256 leaves the upper 3/4 of the destination register 3257 unchanged. Whereas the required semantics of these 3258 primops is that the upper 3/4 is simply copied in from the 3259 argument. */ 3260 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3261 HReg dst = newVRegV(env); 3262 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3263 addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst)); 3264 return dst; 3265 } 3266 3267 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary; 3268 do_64F0x2_unary: 3269 { 3270 /* A bit subtle. We have to copy the arg to the result 3271 register first, because actually doing the SSE scalar insn 3272 leaves the upper half of the destination register 3273 unchanged. Whereas the required semantics of these 3274 primops is that the upper half is simply copied in from the 3275 argument. */ 3276 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3277 HReg dst = newVRegV(env); 3278 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3279 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst)); 3280 return dst; 3281 } 3282 3283 case Iop_32UtoV128: { 3284 HReg dst = newVRegV(env); 3285 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP()); 3286 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg); 3287 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32)); 3288 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32)); 3289 return dst; 3290 } 3291 3292 case Iop_64UtoV128: { 3293 HReg dst = newVRegV(env); 3294 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 3295 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg); 3296 addInstr(env, AMD64Instr_Push(rmi)); 3297 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0)); 3298 add_to_rsp(env, 8); 3299 return dst; 3300 } 3301 3302 case Iop_V256toV128_0: 3303 case Iop_V256toV128_1: { 3304 HReg vHi, vLo; 3305 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg); 3306 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo; 3307 } 3308 3309 default: 3310 break; 3311 } /* switch (e->Iex.Unop.op) */ 3312 } /* if (e->tag == Iex_Unop) */ 3313 3314 if (e->tag == Iex_Binop) { 3315 switch (e->Iex.Binop.op) { 3316 3317 /* FIXME: could we generate MOVQ here? */ 3318 case Iop_SetV128lo64: { 3319 HReg dst = newVRegV(env); 3320 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3321 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3322 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP()); 3323 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16)); 3324 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16)); 3325 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16)); 3326 return dst; 3327 } 3328 3329 /* FIXME: could we generate MOVD here? */ 3330 case Iop_SetV128lo32: { 3331 HReg dst = newVRegV(env); 3332 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3333 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3334 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP()); 3335 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16)); 3336 addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16)); 3337 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16)); 3338 return dst; 3339 } 3340 3341 case Iop_64HLtoV128: { 3342 HReg rsp = hregAMD64_RSP(); 3343 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp); 3344 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp); 3345 AMD64RI* qHi = iselIntExpr_RI(env, e->Iex.Binop.arg1); 3346 AMD64RI* qLo = iselIntExpr_RI(env, e->Iex.Binop.arg2); 3347 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qHi, m8_rsp)); 3348 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qLo, m16_rsp)); 3349 HReg dst = newVRegV(env); 3350 /* One store-forwarding stall coming up, oh well :-( */ 3351 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, m16_rsp)); 3352 return dst; 3353 } 3354 3355 case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4; 3356 case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4; 3357 case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4; 3358 case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4; 3359 case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4; 3360 case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4; 3361 do_32Fx4: 3362 { 3363 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3364 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3365 HReg dst = newVRegV(env); 3366 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3367 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst)); 3368 return dst; 3369 } 3370 3371 case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2; 3372 case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2; 3373 case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2; 3374 case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2; 3375 case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2; 3376 case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2; 3377 do_64Fx2: 3378 { 3379 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3380 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3381 HReg dst = newVRegV(env); 3382 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3383 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst)); 3384 return dst; 3385 } 3386 3387 case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4; 3388 case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4; 3389 case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4; 3390 case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4; 3391 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4; 3392 case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4; 3393 case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4; 3394 case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4; 3395 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4; 3396 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4; 3397 do_32F0x4: { 3398 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3399 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3400 HReg dst = newVRegV(env); 3401 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3402 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst)); 3403 return dst; 3404 } 3405 3406 case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2; 3407 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2; 3408 case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2; 3409 case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2; 3410 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2; 3411 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2; 3412 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2; 3413 case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2; 3414 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2; 3415 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2; 3416 do_64F0x2: { 3417 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3418 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3419 HReg dst = newVRegV(env); 3420 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3421 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst)); 3422 return dst; 3423 } 3424 3425 case Iop_QNarrowBin32Sto16Sx8: 3426 op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg; 3427 case Iop_QNarrowBin16Sto8Sx16: 3428 op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg; 3429 case Iop_QNarrowBin16Sto8Ux16: 3430 op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg; 3431 3432 case Iop_InterleaveHI8x16: 3433 op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg; 3434 case Iop_InterleaveHI16x8: 3435 op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg; 3436 case Iop_InterleaveHI32x4: 3437 op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg; 3438 case Iop_InterleaveHI64x2: 3439 op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg; 3440 3441 case Iop_InterleaveLO8x16: 3442 op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg; 3443 case Iop_InterleaveLO16x8: 3444 op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg; 3445 case Iop_InterleaveLO32x4: 3446 op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg; 3447 case Iop_InterleaveLO64x2: 3448 op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg; 3449 3450 case Iop_AndV128: op = Asse_AND; goto do_SseReRg; 3451 case Iop_OrV128: op = Asse_OR; goto do_SseReRg; 3452 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg; 3453 case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg; 3454 case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg; 3455 case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg; 3456 case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg; 3457 case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg; 3458 case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg; 3459 case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg; 3460 case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg; 3461 case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg; 3462 case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg; 3463 case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg; 3464 case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg; 3465 case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg; 3466 case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg; 3467 case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg; 3468 case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg; 3469 case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg; 3470 case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg; 3471 case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg; 3472 case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg; 3473 case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg; 3474 case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg; 3475 case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg; 3476 case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg; 3477 case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg; 3478 case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg; 3479 case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg; 3480 case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg; 3481 case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg; 3482 case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg; 3483 case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg; 3484 do_SseReRg: { 3485 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); 3486 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); 3487 HReg dst = newVRegV(env); 3488 if (arg1isEReg) { 3489 addInstr(env, mk_vMOVsd_RR(arg2, dst)); 3490 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst)); 3491 } else { 3492 addInstr(env, mk_vMOVsd_RR(arg1, dst)); 3493 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst)); 3494 } 3495 return dst; 3496 } 3497 3498 case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift; 3499 case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift; 3500 case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift; 3501 case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift; 3502 case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift; 3503 case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift; 3504 case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift; 3505 case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift; 3506 do_SseShift: { 3507 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1); 3508 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 3509 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 3510 HReg ereg = newVRegV(env); 3511 HReg dst = newVRegV(env); 3512 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0))); 3513 addInstr(env, AMD64Instr_Push(rmi)); 3514 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0)); 3515 addInstr(env, mk_vMOVsd_RR(greg, dst)); 3516 addInstr(env, AMD64Instr_SseReRg(op, ereg, dst)); 3517 add_to_rsp(env, 16); 3518 return dst; 3519 } 3520 3521 case Iop_Mul32x4: fn = (HWord)h_generic_calc_Mul32x4; 3522 goto do_SseAssistedBinary; 3523 case Iop_Max32Sx4: fn = (HWord)h_generic_calc_Max32Sx4; 3524 goto do_SseAssistedBinary; 3525 case Iop_Min32Sx4: fn = (HWord)h_generic_calc_Min32Sx4; 3526 goto do_SseAssistedBinary; 3527 case Iop_Max32Ux4: fn = (HWord)h_generic_calc_Max32Ux4; 3528 goto do_SseAssistedBinary; 3529 case Iop_Min32Ux4: fn = (HWord)h_generic_calc_Min32Ux4; 3530 goto do_SseAssistedBinary; 3531 case Iop_Max16Ux8: fn = (HWord)h_generic_calc_Max16Ux8; 3532 goto do_SseAssistedBinary; 3533 case Iop_Min16Ux8: fn = (HWord)h_generic_calc_Min16Ux8; 3534 goto do_SseAssistedBinary; 3535 case Iop_Max8Sx16: fn = (HWord)h_generic_calc_Max8Sx16; 3536 goto do_SseAssistedBinary; 3537 case Iop_Min8Sx16: fn = (HWord)h_generic_calc_Min8Sx16; 3538 goto do_SseAssistedBinary; 3539 case Iop_CmpEQ64x2: fn = (HWord)h_generic_calc_CmpEQ64x2; 3540 goto do_SseAssistedBinary; 3541 case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2; 3542 goto do_SseAssistedBinary; 3543 case Iop_Perm32x4: fn = (HWord)h_generic_calc_Perm32x4; 3544 goto do_SseAssistedBinary; 3545 case Iop_QNarrowBin32Sto16Ux8: 3546 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8; 3547 goto do_SseAssistedBinary; 3548 case Iop_NarrowBin16to8x16: 3549 fn = (HWord)h_generic_calc_NarrowBin16to8x16; 3550 goto do_SseAssistedBinary; 3551 case Iop_NarrowBin32to16x8: 3552 fn = (HWord)h_generic_calc_NarrowBin32to16x8; 3553 goto do_SseAssistedBinary; 3554 do_SseAssistedBinary: { 3555 /* RRRufff! RRRufff code is what we're generating here. Oh 3556 well. */ 3557 vassert(fn != 0); 3558 HReg dst = newVRegV(env); 3559 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3560 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3561 HReg argp = newVRegI(env); 3562 /* subq $112, %rsp -- make a space*/ 3563 sub_from_rsp(env, 112); 3564 /* leaq 48(%rsp), %r_argp -- point into it */ 3565 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()), 3566 argp)); 3567 /* andq $-16, %r_argp -- 16-align the pointer */ 3568 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 3569 AMD64RMI_Imm( ~(UInt)15 ), 3570 argp)); 3571 /* Prepare 3 arg regs: 3572 leaq 0(%r_argp), %rdi 3573 leaq 16(%r_argp), %rsi 3574 leaq 32(%r_argp), %rdx 3575 */ 3576 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp), 3577 hregAMD64_RDI())); 3578 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp), 3579 hregAMD64_RSI())); 3580 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp), 3581 hregAMD64_RDX())); 3582 /* Store the two args, at (%rsi) and (%rdx): 3583 movupd %argL, 0(%rsi) 3584 movupd %argR, 0(%rdx) 3585 */ 3586 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL, 3587 AMD64AMode_IR(0, hregAMD64_RSI()))); 3588 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argR, 3589 AMD64AMode_IR(0, hregAMD64_RDX()))); 3590 /* call the helper */ 3591 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3592 3, mk_RetLoc_simple(RLPri_None) )); 3593 /* fetch the result from memory, using %r_argp, which the 3594 register allocator will keep alive across the call. */ 3595 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst, 3596 AMD64AMode_IR(0, argp))); 3597 /* and finally, clear the space */ 3598 add_to_rsp(env, 112); 3599 return dst; 3600 } 3601 3602 case Iop_SarN64x2: fn = (HWord)h_generic_calc_SarN64x2; 3603 goto do_SseAssistedVectorAndScalar; 3604 case Iop_SarN8x16: fn = (HWord)h_generic_calc_SarN8x16; 3605 goto do_SseAssistedVectorAndScalar; 3606 do_SseAssistedVectorAndScalar: { 3607 /* RRRufff! RRRufff code is what we're generating here. Oh 3608 well. */ 3609 vassert(fn != 0); 3610 HReg dst = newVRegV(env); 3611 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3612 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 3613 HReg argp = newVRegI(env); 3614 /* subq $112, %rsp -- make a space*/ 3615 sub_from_rsp(env, 112); 3616 /* leaq 48(%rsp), %r_argp -- point into it */ 3617 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()), 3618 argp)); 3619 /* andq $-16, %r_argp -- 16-align the pointer */ 3620 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 3621 AMD64RMI_Imm( ~(UInt)15 ), 3622 argp)); 3623 /* Prepare 2 vector arg regs: 3624 leaq 0(%r_argp), %rdi 3625 leaq 16(%r_argp), %rsi 3626 */ 3627 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp), 3628 hregAMD64_RDI())); 3629 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp), 3630 hregAMD64_RSI())); 3631 /* Store the vector arg, at (%rsi): 3632 movupd %argL, 0(%rsi) 3633 */ 3634 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL, 3635 AMD64AMode_IR(0, hregAMD64_RSI()))); 3636 /* And get the scalar value into rdx */ 3637 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RDX())); 3638 3639 /* call the helper */ 3640 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3641 3, mk_RetLoc_simple(RLPri_None) )); 3642 /* fetch the result from memory, using %r_argp, which the 3643 register allocator will keep alive across the call. */ 3644 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst, 3645 AMD64AMode_IR(0, argp))); 3646 /* and finally, clear the space */ 3647 add_to_rsp(env, 112); 3648 return dst; 3649 } 3650 3651 default: 3652 break; 3653 } /* switch (e->Iex.Binop.op) */ 3654 } /* if (e->tag == Iex_Binop) */ 3655 3656 if (e->tag == Iex_Triop) { 3657 IRTriop *triop = e->Iex.Triop.details; 3658 switch (triop->op) { 3659 3660 case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2_w_rm; 3661 case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2_w_rm; 3662 case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2_w_rm; 3663 case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2_w_rm; 3664 do_64Fx2_w_rm: 3665 { 3666 HReg argL = iselVecExpr(env, triop->arg2); 3667 HReg argR = iselVecExpr(env, triop->arg3); 3668 HReg dst = newVRegV(env); 3669 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3670 /* XXXROUNDINGFIXME */ 3671 /* set roundingmode here */ 3672 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst)); 3673 return dst; 3674 } 3675 3676 case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4_w_rm; 3677 case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4_w_rm; 3678 case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4_w_rm; 3679 case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4_w_rm; 3680 do_32Fx4_w_rm: 3681 { 3682 HReg argL = iselVecExpr(env, triop->arg2); 3683 HReg argR = iselVecExpr(env, triop->arg3); 3684 HReg dst = newVRegV(env); 3685 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3686 /* XXXROUNDINGFIXME */ 3687 /* set roundingmode here */ 3688 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst)); 3689 return dst; 3690 } 3691 3692 default: 3693 break; 3694 } /* switch (triop->op) */ 3695 } /* if (e->tag == Iex_Triop) */ 3696 3697 if (e->tag == Iex_ITE) { // VFD 3698 HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue); 3699 HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse); 3700 HReg dst = newVRegV(env); 3701 addInstr(env, mk_vMOVsd_RR(r1,dst)); 3702 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 3703 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst)); 3704 return dst; 3705 } 3706 3707 //vec_fail: 3708 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n", 3709 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps)); 3710 ppIRExpr(e); 3711 vpanic("iselVecExpr_wrk"); 3712} 3713 3714 3715/*---------------------------------------------------------*/ 3716/*--- ISEL: SIMD (V256) expressions, into 2 XMM regs. --*/ 3717/*---------------------------------------------------------*/ 3718 3719static void iselDVecExpr ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo, 3720 ISelEnv* env, IRExpr* e ) 3721{ 3722 iselDVecExpr_wrk( rHi, rLo, env, e ); 3723# if 0 3724 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3725# endif 3726 vassert(hregClass(*rHi) == HRcVec128); 3727 vassert(hregClass(*rLo) == HRcVec128); 3728 vassert(hregIsVirtual(*rHi)); 3729 vassert(hregIsVirtual(*rLo)); 3730} 3731 3732 3733/* DO NOT CALL THIS DIRECTLY */ 3734static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo, 3735 ISelEnv* env, IRExpr* e ) 3736{ 3737 HWord fn = 0; /* address of helper fn, if required */ 3738 vassert(e); 3739 IRType ty = typeOfIRExpr(env->type_env,e); 3740 vassert(ty == Ity_V256); 3741 3742 AMD64SseOp op = Asse_INVALID; 3743 3744 /* read 256-bit IRTemp */ 3745 if (e->tag == Iex_RdTmp) { 3746 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp); 3747 return; 3748 } 3749 3750 if (e->tag == Iex_Get) { 3751 HReg vHi = newVRegV(env); 3752 HReg vLo = newVRegV(env); 3753 HReg rbp = hregAMD64_RBP(); 3754 AMD64AMode* am0 = AMD64AMode_IR(e->Iex.Get.offset + 0, rbp); 3755 AMD64AMode* am16 = AMD64AMode_IR(e->Iex.Get.offset + 16, rbp); 3756 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0)); 3757 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16)); 3758 *rHi = vHi; 3759 *rLo = vLo; 3760 return; 3761 } 3762 3763 if (e->tag == Iex_Load) { 3764 HReg vHi = newVRegV(env); 3765 HReg vLo = newVRegV(env); 3766 HReg rA = iselIntExpr_R(env, e->Iex.Load.addr); 3767 AMD64AMode* am0 = AMD64AMode_IR(0, rA); 3768 AMD64AMode* am16 = AMD64AMode_IR(16, rA); 3769 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0)); 3770 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16)); 3771 *rHi = vHi; 3772 *rLo = vLo; 3773 return; 3774 } 3775 3776 if (e->tag == Iex_Const) { 3777 vassert(e->Iex.Const.con->tag == Ico_V256); 3778 switch (e->Iex.Const.con->Ico.V256) { 3779 case 0x00000000: { 3780 HReg vHi = generate_zeroes_V128(env); 3781 HReg vLo = newVRegV(env); 3782 addInstr(env, mk_vMOVsd_RR(vHi, vLo)); 3783 *rHi = vHi; 3784 *rLo = vLo; 3785 return; 3786 } 3787 default: 3788 break; /* give up. Until such time as is necessary. */ 3789 } 3790 } 3791 3792 if (e->tag == Iex_Unop) { 3793 switch (e->Iex.Unop.op) { 3794 3795 case Iop_NotV256: { 3796 HReg argHi, argLo; 3797 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3798 *rHi = do_sse_NotV128(env, argHi); 3799 *rLo = do_sse_NotV128(env, argLo); 3800 return; 3801 } 3802 3803 case Iop_Recip32Fx8: op = Asse_RCPF; goto do_32Fx8_unary; 3804 case Iop_Sqrt32Fx8: op = Asse_SQRTF; goto do_32Fx8_unary; 3805 case Iop_RSqrt32Fx8: op = Asse_RSQRTF; goto do_32Fx8_unary; 3806 do_32Fx8_unary: 3807 { 3808 HReg argHi, argLo; 3809 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3810 HReg dstHi = newVRegV(env); 3811 HReg dstLo = newVRegV(env); 3812 addInstr(env, AMD64Instr_Sse32Fx4(op, argHi, dstHi)); 3813 addInstr(env, AMD64Instr_Sse32Fx4(op, argLo, dstLo)); 3814 *rHi = dstHi; 3815 *rLo = dstLo; 3816 return; 3817 } 3818 3819 case Iop_Sqrt64Fx4: op = Asse_SQRTF; goto do_64Fx4_unary; 3820 do_64Fx4_unary: 3821 { 3822 HReg argHi, argLo; 3823 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3824 HReg dstHi = newVRegV(env); 3825 HReg dstLo = newVRegV(env); 3826 addInstr(env, AMD64Instr_Sse64Fx2(op, argHi, dstHi)); 3827 addInstr(env, AMD64Instr_Sse64Fx2(op, argLo, dstLo)); 3828 *rHi = dstHi; 3829 *rLo = dstLo; 3830 return; 3831 } 3832 3833 case Iop_CmpNEZ64x4: { 3834 /* We can use SSE2 instructions for this. */ 3835 /* Same scheme as Iop_CmpNEZ64x2, except twice as wide 3836 (obviously). See comment on Iop_CmpNEZ64x2 for 3837 explanation of what's going on here. */ 3838 HReg argHi, argLo; 3839 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3840 HReg tmpHi = generate_zeroes_V128(env); 3841 HReg tmpLo = newVRegV(env); 3842 addInstr(env, mk_vMOVsd_RR(tmpHi, tmpLo)); 3843 HReg dstHi = newVRegV(env); 3844 HReg dstLo = newVRegV(env); 3845 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argHi, tmpHi)); 3846 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argLo, tmpLo)); 3847 tmpHi = do_sse_NotV128(env, tmpHi); 3848 tmpLo = do_sse_NotV128(env, tmpLo); 3849 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpHi, dstHi)); 3850 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpLo, dstLo)); 3851 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpHi, dstHi)); 3852 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpLo, dstLo)); 3853 *rHi = dstHi; 3854 *rLo = dstLo; 3855 return; 3856 } 3857 3858 case Iop_CmpNEZ32x8: op = Asse_CMPEQ32; goto do_CmpNEZ_vector; 3859 case Iop_CmpNEZ16x16: op = Asse_CMPEQ16; goto do_CmpNEZ_vector; 3860 case Iop_CmpNEZ8x32: op = Asse_CMPEQ8; goto do_CmpNEZ_vector; 3861 do_CmpNEZ_vector: 3862 { 3863 HReg argHi, argLo; 3864 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3865 HReg tmpHi = newVRegV(env); 3866 HReg tmpLo = newVRegV(env); 3867 HReg zero = generate_zeroes_V128(env); 3868 HReg dstHi, dstLo; 3869 addInstr(env, mk_vMOVsd_RR(argHi, tmpHi)); 3870 addInstr(env, mk_vMOVsd_RR(argLo, tmpLo)); 3871 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpHi)); 3872 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpLo)); 3873 dstHi = do_sse_NotV128(env, tmpHi); 3874 dstLo = do_sse_NotV128(env, tmpLo); 3875 *rHi = dstHi; 3876 *rLo = dstLo; 3877 return; 3878 } 3879 3880 default: 3881 break; 3882 } /* switch (e->Iex.Unop.op) */ 3883 } /* if (e->tag == Iex_Unop) */ 3884 3885 if (e->tag == Iex_Binop) { 3886 switch (e->Iex.Binop.op) { 3887 3888 case Iop_Max64Fx4: op = Asse_MAXF; goto do_64Fx4; 3889 case Iop_Min64Fx4: op = Asse_MINF; goto do_64Fx4; 3890 do_64Fx4: 3891 { 3892 HReg argLhi, argLlo, argRhi, argRlo; 3893 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1); 3894 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2); 3895 HReg dstHi = newVRegV(env); 3896 HReg dstLo = newVRegV(env); 3897 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); 3898 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); 3899 addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi)); 3900 addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo)); 3901 *rHi = dstHi; 3902 *rLo = dstLo; 3903 return; 3904 } 3905 3906 case Iop_Max32Fx8: op = Asse_MAXF; goto do_32Fx8; 3907 case Iop_Min32Fx8: op = Asse_MINF; goto do_32Fx8; 3908 do_32Fx8: 3909 { 3910 HReg argLhi, argLlo, argRhi, argRlo; 3911 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1); 3912 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2); 3913 HReg dstHi = newVRegV(env); 3914 HReg dstLo = newVRegV(env); 3915 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); 3916 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); 3917 addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi)); 3918 addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo)); 3919 *rHi = dstHi; 3920 *rLo = dstLo; 3921 return; 3922 } 3923 3924 case Iop_AndV256: op = Asse_AND; goto do_SseReRg; 3925 case Iop_OrV256: op = Asse_OR; goto do_SseReRg; 3926 case Iop_XorV256: op = Asse_XOR; goto do_SseReRg; 3927 case Iop_Add8x32: op = Asse_ADD8; goto do_SseReRg; 3928 case Iop_Add16x16: op = Asse_ADD16; goto do_SseReRg; 3929 case Iop_Add32x8: op = Asse_ADD32; goto do_SseReRg; 3930 case Iop_Add64x4: op = Asse_ADD64; goto do_SseReRg; 3931 case Iop_QAdd8Sx32: op = Asse_QADD8S; goto do_SseReRg; 3932 case Iop_QAdd16Sx16: op = Asse_QADD16S; goto do_SseReRg; 3933 case Iop_QAdd8Ux32: op = Asse_QADD8U; goto do_SseReRg; 3934 case Iop_QAdd16Ux16: op = Asse_QADD16U; goto do_SseReRg; 3935 case Iop_Avg8Ux32: op = Asse_AVG8U; goto do_SseReRg; 3936 case Iop_Avg16Ux16: op = Asse_AVG16U; goto do_SseReRg; 3937 case Iop_CmpEQ8x32: op = Asse_CMPEQ8; goto do_SseReRg; 3938 case Iop_CmpEQ16x16: op = Asse_CMPEQ16; goto do_SseReRg; 3939 case Iop_CmpEQ32x8: op = Asse_CMPEQ32; goto do_SseReRg; 3940 case Iop_CmpGT8Sx32: op = Asse_CMPGT8S; goto do_SseReRg; 3941 case Iop_CmpGT16Sx16: op = Asse_CMPGT16S; goto do_SseReRg; 3942 case Iop_CmpGT32Sx8: op = Asse_CMPGT32S; goto do_SseReRg; 3943 case Iop_Max16Sx16: op = Asse_MAX16S; goto do_SseReRg; 3944 case Iop_Max8Ux32: op = Asse_MAX8U; goto do_SseReRg; 3945 case Iop_Min16Sx16: op = Asse_MIN16S; goto do_SseReRg; 3946 case Iop_Min8Ux32: op = Asse_MIN8U; goto do_SseReRg; 3947 case Iop_MulHi16Ux16: op = Asse_MULHI16U; goto do_SseReRg; 3948 case Iop_MulHi16Sx16: op = Asse_MULHI16S; goto do_SseReRg; 3949 case Iop_Mul16x16: op = Asse_MUL16; goto do_SseReRg; 3950 case Iop_Sub8x32: op = Asse_SUB8; goto do_SseReRg; 3951 case Iop_Sub16x16: op = Asse_SUB16; goto do_SseReRg; 3952 case Iop_Sub32x8: op = Asse_SUB32; goto do_SseReRg; 3953 case Iop_Sub64x4: op = Asse_SUB64; goto do_SseReRg; 3954 case Iop_QSub8Sx32: op = Asse_QSUB8S; goto do_SseReRg; 3955 case Iop_QSub16Sx16: op = Asse_QSUB16S; goto do_SseReRg; 3956 case Iop_QSub8Ux32: op = Asse_QSUB8U; goto do_SseReRg; 3957 case Iop_QSub16Ux16: op = Asse_QSUB16U; goto do_SseReRg; 3958 do_SseReRg: 3959 { 3960 HReg argLhi, argLlo, argRhi, argRlo; 3961 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1); 3962 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2); 3963 HReg dstHi = newVRegV(env); 3964 HReg dstLo = newVRegV(env); 3965 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); 3966 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); 3967 addInstr(env, AMD64Instr_SseReRg(op, argRhi, dstHi)); 3968 addInstr(env, AMD64Instr_SseReRg(op, argRlo, dstLo)); 3969 *rHi = dstHi; 3970 *rLo = dstLo; 3971 return; 3972 } 3973 3974 case Iop_ShlN16x16: op = Asse_SHL16; goto do_SseShift; 3975 case Iop_ShlN32x8: op = Asse_SHL32; goto do_SseShift; 3976 case Iop_ShlN64x4: op = Asse_SHL64; goto do_SseShift; 3977 case Iop_SarN16x16: op = Asse_SAR16; goto do_SseShift; 3978 case Iop_SarN32x8: op = Asse_SAR32; goto do_SseShift; 3979 case Iop_ShrN16x16: op = Asse_SHR16; goto do_SseShift; 3980 case Iop_ShrN32x8: op = Asse_SHR32; goto do_SseShift; 3981 case Iop_ShrN64x4: op = Asse_SHR64; goto do_SseShift; 3982 do_SseShift: { 3983 HReg gregHi, gregLo; 3984 iselDVecExpr(&gregHi, &gregLo, env, e->Iex.Binop.arg1); 3985 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 3986 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 3987 HReg ereg = newVRegV(env); 3988 HReg dstHi = newVRegV(env); 3989 HReg dstLo = newVRegV(env); 3990 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0))); 3991 addInstr(env, AMD64Instr_Push(rmi)); 3992 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0)); 3993 addInstr(env, mk_vMOVsd_RR(gregHi, dstHi)); 3994 addInstr(env, AMD64Instr_SseReRg(op, ereg, dstHi)); 3995 addInstr(env, mk_vMOVsd_RR(gregLo, dstLo)); 3996 addInstr(env, AMD64Instr_SseReRg(op, ereg, dstLo)); 3997 add_to_rsp(env, 16); 3998 *rHi = dstHi; 3999 *rLo = dstLo; 4000 return; 4001 } 4002 4003 case Iop_V128HLtoV256: { 4004 *rHi = iselVecExpr(env, e->Iex.Binop.arg1); 4005 *rLo = iselVecExpr(env, e->Iex.Binop.arg2); 4006 return; 4007 } 4008 4009 case Iop_Mul32x8: fn = (HWord)h_generic_calc_Mul32x4; 4010 goto do_SseAssistedBinary; 4011 case Iop_Max32Sx8: fn = (HWord)h_generic_calc_Max32Sx4; 4012 goto do_SseAssistedBinary; 4013 case Iop_Min32Sx8: fn = (HWord)h_generic_calc_Min32Sx4; 4014 goto do_SseAssistedBinary; 4015 case Iop_Max32Ux8: fn = (HWord)h_generic_calc_Max32Ux4; 4016 goto do_SseAssistedBinary; 4017 case Iop_Min32Ux8: fn = (HWord)h_generic_calc_Min32Ux4; 4018 goto do_SseAssistedBinary; 4019 case Iop_Max16Ux16: fn = (HWord)h_generic_calc_Max16Ux8; 4020 goto do_SseAssistedBinary; 4021 case Iop_Min16Ux16: fn = (HWord)h_generic_calc_Min16Ux8; 4022 goto do_SseAssistedBinary; 4023 case Iop_Max8Sx32: fn = (HWord)h_generic_calc_Max8Sx16; 4024 goto do_SseAssistedBinary; 4025 case Iop_Min8Sx32: fn = (HWord)h_generic_calc_Min8Sx16; 4026 goto do_SseAssistedBinary; 4027 case Iop_CmpEQ64x4: fn = (HWord)h_generic_calc_CmpEQ64x2; 4028 goto do_SseAssistedBinary; 4029 case Iop_CmpGT64Sx4: fn = (HWord)h_generic_calc_CmpGT64Sx2; 4030 goto do_SseAssistedBinary; 4031 do_SseAssistedBinary: { 4032 /* RRRufff! RRRufff code is what we're generating here. Oh 4033 well. */ 4034 vassert(fn != 0); 4035 HReg dstHi = newVRegV(env); 4036 HReg dstLo = newVRegV(env); 4037 HReg argLhi, argLlo, argRhi, argRlo; 4038 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1); 4039 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2); 4040 HReg argp = newVRegI(env); 4041 /* subq $160, %rsp -- make a space*/ 4042 sub_from_rsp(env, 160); 4043 /* leaq 48(%rsp), %r_argp -- point into it */ 4044 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()), 4045 argp)); 4046 /* andq $-16, %r_argp -- 16-align the pointer */ 4047 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 4048 AMD64RMI_Imm( ~(UInt)15 ), 4049 argp)); 4050 /* Prepare 3 arg regs: 4051 leaq 0(%r_argp), %rdi 4052 leaq 16(%r_argp), %rsi 4053 leaq 32(%r_argp), %rdx 4054 */ 4055 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp), 4056 hregAMD64_RDI())); 4057 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp), 4058 hregAMD64_RSI())); 4059 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp), 4060 hregAMD64_RDX())); 4061 /* Store the two high args, at (%rsi) and (%rdx): 4062 movupd %argLhi, 0(%rsi) 4063 movupd %argRhi, 0(%rdx) 4064 */ 4065 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi, 4066 AMD64AMode_IR(0, hregAMD64_RSI()))); 4067 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi, 4068 AMD64AMode_IR(0, hregAMD64_RDX()))); 4069 /* Store the two low args, at 48(%rsi) and 48(%rdx): 4070 movupd %argLlo, 48(%rsi) 4071 movupd %argRlo, 48(%rdx) 4072 */ 4073 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo, 4074 AMD64AMode_IR(48, hregAMD64_RSI()))); 4075 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo, 4076 AMD64AMode_IR(48, hregAMD64_RDX()))); 4077 /* call the helper */ 4078 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3, 4079 mk_RetLoc_simple(RLPri_None) )); 4080 /* Prepare 3 arg regs: 4081 leaq 48(%r_argp), %rdi 4082 leaq 64(%r_argp), %rsi 4083 leaq 80(%r_argp), %rdx 4084 */ 4085 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, argp), 4086 hregAMD64_RDI())); 4087 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp), 4088 hregAMD64_RSI())); 4089 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(80, argp), 4090 hregAMD64_RDX())); 4091 /* call the helper */ 4092 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3, 4093 mk_RetLoc_simple(RLPri_None) )); 4094 /* fetch the result from memory, using %r_argp, which the 4095 register allocator will keep alive across the call. */ 4096 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi, 4097 AMD64AMode_IR(0, argp))); 4098 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo, 4099 AMD64AMode_IR(48, argp))); 4100 /* and finally, clear the space */ 4101 add_to_rsp(env, 160); 4102 *rHi = dstHi; 4103 *rLo = dstLo; 4104 return; 4105 } 4106 4107 case Iop_Perm32x8: fn = (HWord)h_generic_calc_Perm32x8; 4108 goto do_SseAssistedBinary256; 4109 do_SseAssistedBinary256: { 4110 /* RRRufff! RRRufff code is what we're generating here. Oh 4111 well. */ 4112 vassert(fn != 0); 4113 HReg dstHi = newVRegV(env); 4114 HReg dstLo = newVRegV(env); 4115 HReg argLhi, argLlo, argRhi, argRlo; 4116 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1); 4117 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2); 4118 HReg argp = newVRegI(env); 4119 /* subq $160, %rsp -- make a space*/ 4120 sub_from_rsp(env, 160); 4121 /* leaq 48(%rsp), %r_argp -- point into it */ 4122 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()), 4123 argp)); 4124 /* andq $-16, %r_argp -- 16-align the pointer */ 4125 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 4126 AMD64RMI_Imm( ~(UInt)15 ), 4127 argp)); 4128 /* Prepare 3 arg regs: 4129 leaq 0(%r_argp), %rdi 4130 leaq 32(%r_argp), %rsi 4131 leaq 64(%r_argp), %rdx 4132 */ 4133 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp), 4134 hregAMD64_RDI())); 4135 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp), 4136 hregAMD64_RSI())); 4137 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp), 4138 hregAMD64_RDX())); 4139 /* Store the two args, at (%rsi) and (%rdx): 4140 movupd %argLlo, 0(%rsi) 4141 movupd %argLhi, 16(%rsi) 4142 movupd %argRlo, 0(%rdx) 4143 movupd %argRhi, 16(%rdx) 4144 */ 4145 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo, 4146 AMD64AMode_IR(0, hregAMD64_RSI()))); 4147 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi, 4148 AMD64AMode_IR(16, hregAMD64_RSI()))); 4149 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo, 4150 AMD64AMode_IR(0, hregAMD64_RDX()))); 4151 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi, 4152 AMD64AMode_IR(16, hregAMD64_RDX()))); 4153 /* call the helper */ 4154 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3, 4155 mk_RetLoc_simple(RLPri_None) )); 4156 /* fetch the result from memory, using %r_argp, which the 4157 register allocator will keep alive across the call. */ 4158 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo, 4159 AMD64AMode_IR(0, argp))); 4160 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi, 4161 AMD64AMode_IR(16, argp))); 4162 /* and finally, clear the space */ 4163 add_to_rsp(env, 160); 4164 *rHi = dstHi; 4165 *rLo = dstLo; 4166 return; 4167 } 4168 4169 default: 4170 break; 4171 } /* switch (e->Iex.Binop.op) */ 4172 } /* if (e->tag == Iex_Binop) */ 4173 4174 if (e->tag == Iex_Triop) { 4175 IRTriop *triop = e->Iex.Triop.details; 4176 switch (triop->op) { 4177 4178 case Iop_Add64Fx4: op = Asse_ADDF; goto do_64Fx4_w_rm; 4179 case Iop_Sub64Fx4: op = Asse_SUBF; goto do_64Fx4_w_rm; 4180 case Iop_Mul64Fx4: op = Asse_MULF; goto do_64Fx4_w_rm; 4181 case Iop_Div64Fx4: op = Asse_DIVF; goto do_64Fx4_w_rm; 4182 do_64Fx4_w_rm: 4183 { 4184 HReg argLhi, argLlo, argRhi, argRlo; 4185 iselDVecExpr(&argLhi, &argLlo, env, triop->arg2); 4186 iselDVecExpr(&argRhi, &argRlo, env, triop->arg3); 4187 HReg dstHi = newVRegV(env); 4188 HReg dstLo = newVRegV(env); 4189 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); 4190 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); 4191 /* XXXROUNDINGFIXME */ 4192 /* set roundingmode here */ 4193 addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi)); 4194 addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo)); 4195 *rHi = dstHi; 4196 *rLo = dstLo; 4197 return; 4198 } 4199 4200 case Iop_Add32Fx8: op = Asse_ADDF; goto do_32Fx8_w_rm; 4201 case Iop_Sub32Fx8: op = Asse_SUBF; goto do_32Fx8_w_rm; 4202 case Iop_Mul32Fx8: op = Asse_MULF; goto do_32Fx8_w_rm; 4203 case Iop_Div32Fx8: op = Asse_DIVF; goto do_32Fx8_w_rm; 4204 do_32Fx8_w_rm: 4205 { 4206 HReg argLhi, argLlo, argRhi, argRlo; 4207 iselDVecExpr(&argLhi, &argLlo, env, triop->arg2); 4208 iselDVecExpr(&argRhi, &argRlo, env, triop->arg3); 4209 HReg dstHi = newVRegV(env); 4210 HReg dstLo = newVRegV(env); 4211 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); 4212 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); 4213 /* XXXROUNDINGFIXME */ 4214 /* set roundingmode here */ 4215 addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi)); 4216 addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo)); 4217 *rHi = dstHi; 4218 *rLo = dstLo; 4219 return; 4220 } 4221 4222 default: 4223 break; 4224 } /* switch (triop->op) */ 4225 } /* if (e->tag == Iex_Triop) */ 4226 4227 4228 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_64x4toV256) { 4229 HReg rsp = hregAMD64_RSP(); 4230 HReg vHi = newVRegV(env); 4231 HReg vLo = newVRegV(env); 4232 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp); 4233 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp); 4234 /* arg1 is the most significant (Q3), arg4 the least (Q0) */ 4235 /* Get all the args into regs, before messing with the stack. */ 4236 AMD64RI* q3 = iselIntExpr_RI(env, e->Iex.Qop.details->arg1); 4237 AMD64RI* q2 = iselIntExpr_RI(env, e->Iex.Qop.details->arg2); 4238 AMD64RI* q1 = iselIntExpr_RI(env, e->Iex.Qop.details->arg3); 4239 AMD64RI* q0 = iselIntExpr_RI(env, e->Iex.Qop.details->arg4); 4240 /* less significant lane (Q2) at the lower address (-16(rsp)) */ 4241 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q3, m8_rsp)); 4242 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q2, m16_rsp)); 4243 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, m16_rsp)); 4244 /* and then the lower half .. */ 4245 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q1, m8_rsp)); 4246 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q0, m16_rsp)); 4247 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, m16_rsp)); 4248 *rHi = vHi; 4249 *rLo = vLo; 4250 return; 4251 } 4252 4253 if (e->tag == Iex_ITE) { 4254 HReg r1Hi, r1Lo, r0Hi, r0Lo; 4255 iselDVecExpr(&r1Hi, &r1Lo, env, e->Iex.ITE.iftrue); 4256 iselDVecExpr(&r0Hi, &r0Lo, env, e->Iex.ITE.iffalse); 4257 HReg dstHi = newVRegV(env); 4258 HReg dstLo = newVRegV(env); 4259 addInstr(env, mk_vMOVsd_RR(r1Hi,dstHi)); 4260 addInstr(env, mk_vMOVsd_RR(r1Lo,dstLo)); 4261 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 4262 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Hi, dstHi)); 4263 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Lo, dstLo)); 4264 *rHi = dstHi; 4265 *rLo = dstLo; 4266 return; 4267 } 4268 4269 //avx_fail: 4270 vex_printf("iselDVecExpr (amd64, subarch = %s): can't reduce\n", 4271 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps)); 4272 ppIRExpr(e); 4273 vpanic("iselDVecExpr_wrk"); 4274} 4275 4276 4277/*---------------------------------------------------------*/ 4278/*--- ISEL: Statements ---*/ 4279/*---------------------------------------------------------*/ 4280 4281static void iselStmt ( ISelEnv* env, IRStmt* stmt ) 4282{ 4283 if (vex_traceflags & VEX_TRACE_VCODE) { 4284 vex_printf("\n-- "); 4285 ppIRStmt(stmt); 4286 vex_printf("\n"); 4287 } 4288 4289 switch (stmt->tag) { 4290 4291 /* --------- STORE --------- */ 4292 case Ist_Store: { 4293 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); 4294 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); 4295 IREndness end = stmt->Ist.Store.end; 4296 4297 if (tya != Ity_I64 || end != Iend_LE) 4298 goto stmt_fail; 4299 4300 if (tyd == Ity_I64) { 4301 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 4302 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data); 4303 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am)); 4304 return; 4305 } 4306 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) { 4307 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 4308 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data); 4309 addInstr(env, AMD64Instr_Store( 4310 toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)), 4311 r,am)); 4312 return; 4313 } 4314 if (tyd == Ity_F64) { 4315 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 4316 HReg r = iselDblExpr(env, stmt->Ist.Store.data); 4317 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am)); 4318 return; 4319 } 4320 if (tyd == Ity_F32) { 4321 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 4322 HReg r = iselFltExpr(env, stmt->Ist.Store.data); 4323 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am)); 4324 return; 4325 } 4326 if (tyd == Ity_V128) { 4327 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 4328 HReg r = iselVecExpr(env, stmt->Ist.Store.data); 4329 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am)); 4330 return; 4331 } 4332 if (tyd == Ity_V256) { 4333 HReg rA = iselIntExpr_R(env, stmt->Ist.Store.addr); 4334 AMD64AMode* am0 = AMD64AMode_IR(0, rA); 4335 AMD64AMode* am16 = AMD64AMode_IR(16, rA); 4336 HReg vHi, vLo; 4337 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Store.data); 4338 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0)); 4339 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16)); 4340 return; 4341 } 4342 break; 4343 } 4344 4345 /* --------- PUT --------- */ 4346 case Ist_Put: { 4347 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); 4348 if (ty == Ity_I64) { 4349 /* We're going to write to memory, so compute the RHS into an 4350 AMD64RI. */ 4351 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data); 4352 addInstr(env, 4353 AMD64Instr_Alu64M( 4354 Aalu_MOV, 4355 ri, 4356 AMD64AMode_IR(stmt->Ist.Put.offset, 4357 hregAMD64_RBP()) 4358 )); 4359 return; 4360 } 4361 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) { 4362 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data); 4363 addInstr(env, AMD64Instr_Store( 4364 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)), 4365 r, 4366 AMD64AMode_IR(stmt->Ist.Put.offset, 4367 hregAMD64_RBP()))); 4368 return; 4369 } 4370 if (ty == Ity_F32) { 4371 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data); 4372 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP()); 4373 set_SSE_rounding_default(env); /* paranoia */ 4374 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am )); 4375 return; 4376 } 4377 if (ty == Ity_F64) { 4378 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data); 4379 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset, 4380 hregAMD64_RBP() ); 4381 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am )); 4382 return; 4383 } 4384 if (ty == Ity_V128) { 4385 HReg vec = iselVecExpr(env, stmt->Ist.Put.data); 4386 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, 4387 hregAMD64_RBP()); 4388 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am)); 4389 return; 4390 } 4391 if (ty == Ity_V256) { 4392 HReg vHi, vLo; 4393 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Put.data); 4394 HReg rbp = hregAMD64_RBP(); 4395 AMD64AMode* am0 = AMD64AMode_IR(stmt->Ist.Put.offset + 0, rbp); 4396 AMD64AMode* am16 = AMD64AMode_IR(stmt->Ist.Put.offset + 16, rbp); 4397 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0)); 4398 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16)); 4399 return; 4400 } 4401 break; 4402 } 4403 4404 /* --------- Indexed PUT --------- */ 4405 case Ist_PutI: { 4406 IRPutI *puti = stmt->Ist.PutI.details; 4407 4408 AMD64AMode* am 4409 = genGuestArrayOffset( 4410 env, puti->descr, 4411 puti->ix, puti->bias ); 4412 4413 IRType ty = typeOfIRExpr(env->type_env, puti->data); 4414 if (ty == Ity_F64) { 4415 HReg val = iselDblExpr(env, puti->data); 4416 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am )); 4417 return; 4418 } 4419 if (ty == Ity_I8) { 4420 HReg r = iselIntExpr_R(env, puti->data); 4421 addInstr(env, AMD64Instr_Store( 1, r, am )); 4422 return; 4423 } 4424 if (ty == Ity_I64) { 4425 AMD64RI* ri = iselIntExpr_RI(env, puti->data); 4426 addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am )); 4427 return; 4428 } 4429 break; 4430 } 4431 4432 /* --------- TMP --------- */ 4433 case Ist_WrTmp: { 4434 IRTemp tmp = stmt->Ist.WrTmp.tmp; 4435 IRType ty = typeOfIRTemp(env->type_env, tmp); 4436 4437 /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..), 4438 compute it into an AMode and then use LEA. This usually 4439 produces fewer instructions, often because (for memcheck 4440 created IR) we get t = address-expression, (t is later used 4441 twice) and so doing this naturally turns address-expression 4442 back into an AMD64 amode. */ 4443 if (ty == Ity_I64 4444 && stmt->Ist.WrTmp.data->tag == Iex_Binop 4445 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) { 4446 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data); 4447 HReg dst = lookupIRTemp(env, tmp); 4448 if (am->tag == Aam_IR && am->Aam.IR.imm == 0) { 4449 /* Hmm, iselIntExpr_AMode wimped out and just computed the 4450 value into a register. Just emit a normal reg-reg move 4451 so reg-alloc can coalesce it away in the usual way. */ 4452 HReg src = am->Aam.IR.reg; 4453 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst)); 4454 } else { 4455 addInstr(env, AMD64Instr_Lea64(am,dst)); 4456 } 4457 return; 4458 } 4459 4460 if (ty == Ity_I64 || ty == Ity_I32 4461 || ty == Ity_I16 || ty == Ity_I8) { 4462 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data); 4463 HReg dst = lookupIRTemp(env, tmp); 4464 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst)); 4465 return; 4466 } 4467 if (ty == Ity_I128) { 4468 HReg rHi, rLo, dstHi, dstLo; 4469 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); 4470 lookupIRTempPair( &dstHi, &dstLo, env, tmp); 4471 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) ); 4472 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) ); 4473 return; 4474 } 4475 if (ty == Ity_I1) { 4476 AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data); 4477 HReg dst = lookupIRTemp(env, tmp); 4478 addInstr(env, AMD64Instr_Set64(cond, dst)); 4479 return; 4480 } 4481 if (ty == Ity_F64) { 4482 HReg dst = lookupIRTemp(env, tmp); 4483 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); 4484 addInstr(env, mk_vMOVsd_RR(src, dst)); 4485 return; 4486 } 4487 if (ty == Ity_F32) { 4488 HReg dst = lookupIRTemp(env, tmp); 4489 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); 4490 addInstr(env, mk_vMOVsd_RR(src, dst)); 4491 return; 4492 } 4493 if (ty == Ity_V128) { 4494 HReg dst = lookupIRTemp(env, tmp); 4495 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data); 4496 addInstr(env, mk_vMOVsd_RR(src, dst)); 4497 return; 4498 } 4499 if (ty == Ity_V256) { 4500 HReg rHi, rLo, dstHi, dstLo; 4501 iselDVecExpr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); 4502 lookupIRTempPair( &dstHi, &dstLo, env, tmp); 4503 addInstr(env, mk_vMOVsd_RR(rHi,dstHi) ); 4504 addInstr(env, mk_vMOVsd_RR(rLo,dstLo) ); 4505 return; 4506 } 4507 break; 4508 } 4509 4510 /* --------- Call to DIRTY helper --------- */ 4511 case Ist_Dirty: { 4512 IRDirty* d = stmt->Ist.Dirty.details; 4513 4514 /* Figure out the return type, if any. */ 4515 IRType retty = Ity_INVALID; 4516 if (d->tmp != IRTemp_INVALID) 4517 retty = typeOfIRTemp(env->type_env, d->tmp); 4518 4519 /* Throw out any return types we don't know about. */ 4520 Bool retty_ok = False; 4521 switch (retty) { 4522 case Ity_INVALID: /* function doesn't return anything */ 4523 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 4524 case Ity_V128: case Ity_V256: 4525 retty_ok = True; break; 4526 default: 4527 break; 4528 } 4529 if (!retty_ok) 4530 break; /* will go to stmt_fail: */ 4531 4532 /* Marshal args, do the call, and set the return value to 4533 0x555..555 if this is a conditional call that returns a value 4534 and the call is skipped. */ 4535 UInt addToSp = 0; 4536 RetLoc rloc = mk_RetLoc_INVALID(); 4537 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args ); 4538 vassert(is_sane_RetLoc(rloc)); 4539 4540 /* Now figure out what to do with the returned value, if any. */ 4541 switch (retty) { 4542 case Ity_INVALID: { 4543 /* No return value. Nothing to do. */ 4544 vassert(d->tmp == IRTemp_INVALID); 4545 vassert(rloc.pri == RLPri_None); 4546 vassert(addToSp == 0); 4547 return; 4548 } 4549 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: { 4550 /* The returned value is in %rax. Park it in the register 4551 associated with tmp. */ 4552 vassert(rloc.pri == RLPri_Int); 4553 vassert(addToSp == 0); 4554 HReg dst = lookupIRTemp(env, d->tmp); 4555 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) ); 4556 return; 4557 } 4558 case Ity_V128: { 4559 /* The returned value is on the stack, and rloc.spOff 4560 tells us where. Fish it off the stack and then move 4561 the stack pointer upwards to clear it, as directed by 4562 doHelperCall. */ 4563 vassert(rloc.pri == RLPri_V128SpRel); 4564 vassert(addToSp >= 16); 4565 HReg dst = lookupIRTemp(env, d->tmp); 4566 AMD64AMode* am = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP()); 4567 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am )); 4568 add_to_rsp(env, addToSp); 4569 return; 4570 } 4571 case Ity_V256: { 4572 /* See comments for Ity_V128. */ 4573 vassert(rloc.pri == RLPri_V256SpRel); 4574 vassert(addToSp >= 32); 4575 HReg dstLo, dstHi; 4576 lookupIRTempPair(&dstHi, &dstLo, env, d->tmp); 4577 AMD64AMode* amLo = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP()); 4578 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dstLo, amLo )); 4579 AMD64AMode* amHi = AMD64AMode_IR(rloc.spOff+16, hregAMD64_RSP()); 4580 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dstHi, amHi )); 4581 add_to_rsp(env, addToSp); 4582 return; 4583 } 4584 default: 4585 /*NOTREACHED*/ 4586 vassert(0); 4587 } 4588 break; 4589 } 4590 4591 /* --------- MEM FENCE --------- */ 4592 case Ist_MBE: 4593 switch (stmt->Ist.MBE.event) { 4594 case Imbe_Fence: 4595 addInstr(env, AMD64Instr_MFence()); 4596 return; 4597 default: 4598 break; 4599 } 4600 break; 4601 4602 /* --------- ACAS --------- */ 4603 case Ist_CAS: 4604 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) { 4605 /* "normal" singleton CAS */ 4606 UChar sz; 4607 IRCAS* cas = stmt->Ist.CAS.details; 4608 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 4609 /* get: cas->expd into %rax, and cas->data into %rbx */ 4610 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr); 4611 HReg rData = iselIntExpr_R(env, cas->dataLo); 4612 HReg rExpd = iselIntExpr_R(env, cas->expdLo); 4613 HReg rOld = lookupIRTemp(env, cas->oldLo); 4614 vassert(cas->expdHi == NULL); 4615 vassert(cas->dataHi == NULL); 4616 addInstr(env, mk_iMOVsd_RR(rExpd, rOld)); 4617 addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX())); 4618 addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX())); 4619 switch (ty) { 4620 case Ity_I64: sz = 8; break; 4621 case Ity_I32: sz = 4; break; 4622 case Ity_I16: sz = 2; break; 4623 case Ity_I8: sz = 1; break; 4624 default: goto unhandled_cas; 4625 } 4626 addInstr(env, AMD64Instr_ACAS(am, sz)); 4627 addInstr(env, AMD64Instr_CMov64( 4628 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOld)); 4629 return; 4630 } else { 4631 /* double CAS */ 4632 UChar sz; 4633 IRCAS* cas = stmt->Ist.CAS.details; 4634 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 4635 /* only 32-bit and 64-bit allowed in this case */ 4636 /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */ 4637 /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */ 4638 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr); 4639 HReg rDataHi = iselIntExpr_R(env, cas->dataHi); 4640 HReg rDataLo = iselIntExpr_R(env, cas->dataLo); 4641 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi); 4642 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo); 4643 HReg rOldHi = lookupIRTemp(env, cas->oldHi); 4644 HReg rOldLo = lookupIRTemp(env, cas->oldLo); 4645 switch (ty) { 4646 case Ity_I64: 4647 if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16)) 4648 goto unhandled_cas; /* we'd have to generate 4649 cmpxchg16b, but the host 4650 doesn't support that */ 4651 sz = 8; 4652 break; 4653 case Ity_I32: 4654 sz = 4; 4655 break; 4656 default: 4657 goto unhandled_cas; 4658 } 4659 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi)); 4660 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo)); 4661 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX())); 4662 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX())); 4663 addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX())); 4664 addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX())); 4665 addInstr(env, AMD64Instr_DACAS(am, sz)); 4666 addInstr(env, 4667 AMD64Instr_CMov64( 4668 Acc_NZ, AMD64RM_Reg(hregAMD64_RDX()), rOldHi)); 4669 addInstr(env, 4670 AMD64Instr_CMov64( 4671 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOldLo)); 4672 return; 4673 } 4674 unhandled_cas: 4675 break; 4676 4677 /* --------- INSTR MARK --------- */ 4678 /* Doesn't generate any executable code ... */ 4679 case Ist_IMark: 4680 return; 4681 4682 /* --------- ABI HINT --------- */ 4683 /* These have no meaning (denotation in the IR) and so we ignore 4684 them ... if any actually made it this far. */ 4685 case Ist_AbiHint: 4686 return; 4687 4688 /* --------- NO-OP --------- */ 4689 case Ist_NoOp: 4690 return; 4691 4692 /* --------- EXIT --------- */ 4693 case Ist_Exit: { 4694 if (stmt->Ist.Exit.dst->tag != Ico_U64) 4695 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value"); 4696 4697 AMD64CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard); 4698 AMD64AMode* amRIP = AMD64AMode_IR(stmt->Ist.Exit.offsIP, 4699 hregAMD64_RBP()); 4700 4701 /* Case: boring transfer to known address */ 4702 if (stmt->Ist.Exit.jk == Ijk_Boring) { 4703 if (env->chainingAllowed) { 4704 /* .. almost always true .. */ 4705 /* Skip the event check at the dst if this is a forwards 4706 edge. */ 4707 Bool toFastEP 4708 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga; 4709 if (0) vex_printf("%s", toFastEP ? "Y" : ","); 4710 addInstr(env, AMD64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64, 4711 amRIP, cc, toFastEP)); 4712 } else { 4713 /* .. very occasionally .. */ 4714 /* We can't use chaining, so ask for an assisted transfer, 4715 as that's the only alternative that is allowable. */ 4716 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 4717 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, Ijk_Boring)); 4718 } 4719 return; 4720 } 4721 4722 /* Case: assisted transfer to arbitrary address */ 4723 switch (stmt->Ist.Exit.jk) { 4724 /* Keep this list in sync with that in iselNext below */ 4725 case Ijk_ClientReq: 4726 case Ijk_EmWarn: 4727 case Ijk_NoDecode: 4728 case Ijk_NoRedir: 4729 case Ijk_SigSEGV: 4730 case Ijk_SigTRAP: 4731 case Ijk_Sys_syscall: 4732 case Ijk_InvalICache: 4733 case Ijk_Yield: 4734 { 4735 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 4736 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, stmt->Ist.Exit.jk)); 4737 return; 4738 } 4739 default: 4740 break; 4741 } 4742 4743 /* Do we ever expect to see any other kind? */ 4744 goto stmt_fail; 4745 } 4746 4747 default: break; 4748 } 4749 stmt_fail: 4750 ppIRStmt(stmt); 4751 vpanic("iselStmt(amd64)"); 4752} 4753 4754 4755/*---------------------------------------------------------*/ 4756/*--- ISEL: Basic block terminators (Nexts) ---*/ 4757/*---------------------------------------------------------*/ 4758 4759static void iselNext ( ISelEnv* env, 4760 IRExpr* next, IRJumpKind jk, Int offsIP ) 4761{ 4762 if (vex_traceflags & VEX_TRACE_VCODE) { 4763 vex_printf( "\n-- PUT(%d) = ", offsIP); 4764 ppIRExpr( next ); 4765 vex_printf( "; exit-"); 4766 ppIRJumpKind(jk); 4767 vex_printf( "\n"); 4768 } 4769 4770 /* Case: boring transfer to known address */ 4771 if (next->tag == Iex_Const) { 4772 IRConst* cdst = next->Iex.Const.con; 4773 vassert(cdst->tag == Ico_U64); 4774 if (jk == Ijk_Boring || jk == Ijk_Call) { 4775 /* Boring transfer to known address */ 4776 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP()); 4777 if (env->chainingAllowed) { 4778 /* .. almost always true .. */ 4779 /* Skip the event check at the dst if this is a forwards 4780 edge. */ 4781 Bool toFastEP 4782 = ((Addr64)cdst->Ico.U64) > env->max_ga; 4783 if (0) vex_printf("%s", toFastEP ? "X" : "."); 4784 addInstr(env, AMD64Instr_XDirect(cdst->Ico.U64, 4785 amRIP, Acc_ALWAYS, 4786 toFastEP)); 4787 } else { 4788 /* .. very occasionally .. */ 4789 /* We can't use chaining, so ask for an indirect transfer, 4790 as that's the cheapest alternative that is 4791 allowable. */ 4792 HReg r = iselIntExpr_R(env, next); 4793 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, 4794 Ijk_Boring)); 4795 } 4796 return; 4797 } 4798 } 4799 4800 /* Case: call/return (==boring) transfer to any address */ 4801 switch (jk) { 4802 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { 4803 HReg r = iselIntExpr_R(env, next); 4804 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP()); 4805 if (env->chainingAllowed) { 4806 addInstr(env, AMD64Instr_XIndir(r, amRIP, Acc_ALWAYS)); 4807 } else { 4808 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, 4809 Ijk_Boring)); 4810 } 4811 return; 4812 } 4813 default: 4814 break; 4815 } 4816 4817 /* Case: assisted transfer to arbitrary address */ 4818 switch (jk) { 4819 /* Keep this list in sync with that for Ist_Exit above */ 4820 case Ijk_ClientReq: 4821 case Ijk_EmWarn: 4822 case Ijk_NoDecode: 4823 case Ijk_NoRedir: 4824 case Ijk_SigSEGV: 4825 case Ijk_SigTRAP: 4826 case Ijk_Sys_syscall: 4827 case Ijk_InvalICache: 4828 case Ijk_Yield: { 4829 HReg r = iselIntExpr_R(env, next); 4830 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP()); 4831 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, jk)); 4832 return; 4833 } 4834 default: 4835 break; 4836 } 4837 4838 vex_printf( "\n-- PUT(%d) = ", offsIP); 4839 ppIRExpr( next ); 4840 vex_printf( "; exit-"); 4841 ppIRJumpKind(jk); 4842 vex_printf( "\n"); 4843 vassert(0); // are we expecting any other kind? 4844} 4845 4846 4847/*---------------------------------------------------------*/ 4848/*--- Insn selector top-level ---*/ 4849/*---------------------------------------------------------*/ 4850 4851/* Translate an entire SB to amd64 code. */ 4852 4853HInstrArray* iselSB_AMD64 ( IRSB* bb, 4854 VexArch arch_host, 4855 VexArchInfo* archinfo_host, 4856 VexAbiInfo* vbi/*UNUSED*/, 4857 Int offs_Host_EvC_Counter, 4858 Int offs_Host_EvC_FailAddr, 4859 Bool chainingAllowed, 4860 Bool addProfInc, 4861 Addr64 max_ga ) 4862{ 4863 Int i, j; 4864 HReg hreg, hregHI; 4865 ISelEnv* env; 4866 UInt hwcaps_host = archinfo_host->hwcaps; 4867 AMD64AMode *amCounter, *amFailAddr; 4868 4869 /* sanity ... */ 4870 vassert(arch_host == VexArchAMD64); 4871 vassert(0 == (hwcaps_host 4872 & ~(VEX_HWCAPS_AMD64_SSE3 4873 | VEX_HWCAPS_AMD64_CX16 4874 | VEX_HWCAPS_AMD64_LZCNT 4875 | VEX_HWCAPS_AMD64_AVX 4876 | VEX_HWCAPS_AMD64_RDTSCP 4877 | VEX_HWCAPS_AMD64_BMI 4878 | VEX_HWCAPS_AMD64_AVX2))); 4879 4880 /* Make up an initial environment to use. */ 4881 env = LibVEX_Alloc(sizeof(ISelEnv)); 4882 env->vreg_ctr = 0; 4883 4884 /* Set up output code array. */ 4885 env->code = newHInstrArray(); 4886 4887 /* Copy BB's type env. */ 4888 env->type_env = bb->tyenv; 4889 4890 /* Make up an IRTemp -> virtual HReg mapping. This doesn't 4891 change as we go along. */ 4892 env->n_vregmap = bb->tyenv->types_used; 4893 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 4894 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 4895 4896 /* and finally ... */ 4897 env->chainingAllowed = chainingAllowed; 4898 env->hwcaps = hwcaps_host; 4899 env->max_ga = max_ga; 4900 4901 /* For each IR temporary, allocate a suitably-kinded virtual 4902 register. */ 4903 j = 0; 4904 for (i = 0; i < env->n_vregmap; i++) { 4905 hregHI = hreg = INVALID_HREG; 4906 switch (bb->tyenv->types[i]) { 4907 case Ity_I1: 4908 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: 4909 hreg = mkHReg(j++, HRcInt64, True); 4910 break; 4911 case Ity_I128: 4912 hreg = mkHReg(j++, HRcInt64, True); 4913 hregHI = mkHReg(j++, HRcInt64, True); 4914 break; 4915 case Ity_F32: 4916 case Ity_F64: 4917 case Ity_V128: 4918 hreg = mkHReg(j++, HRcVec128, True); 4919 break; 4920 case Ity_V256: 4921 hreg = mkHReg(j++, HRcVec128, True); 4922 hregHI = mkHReg(j++, HRcVec128, True); 4923 break; 4924 default: 4925 ppIRType(bb->tyenv->types[i]); 4926 vpanic("iselBB(amd64): IRTemp type"); 4927 } 4928 env->vregmap[i] = hreg; 4929 env->vregmapHI[i] = hregHI; 4930 } 4931 env->vreg_ctr = j; 4932 4933 /* The very first instruction must be an event check. */ 4934 amCounter = AMD64AMode_IR(offs_Host_EvC_Counter, hregAMD64_RBP()); 4935 amFailAddr = AMD64AMode_IR(offs_Host_EvC_FailAddr, hregAMD64_RBP()); 4936 addInstr(env, AMD64Instr_EvCheck(amCounter, amFailAddr)); 4937 4938 /* Possibly a block counter increment (for profiling). At this 4939 point we don't know the address of the counter, so just pretend 4940 it is zero. It will have to be patched later, but before this 4941 translation is used, by a call to LibVEX_patchProfCtr. */ 4942 if (addProfInc) { 4943 addInstr(env, AMD64Instr_ProfInc()); 4944 } 4945 4946 /* Ok, finally we can iterate over the statements. */ 4947 for (i = 0; i < bb->stmts_used; i++) 4948 if (bb->stmts[i]) 4949 iselStmt(env, bb->stmts[i]); 4950 4951 iselNext(env, bb->next, bb->jumpkind, bb->offsIP); 4952 4953 /* record the number of vregs we used. */ 4954 env->code->n_vregs = env->vreg_ctr; 4955 return env->code; 4956} 4957 4958 4959/*---------------------------------------------------------------*/ 4960/*--- end host_amd64_isel.c ---*/ 4961/*---------------------------------------------------------------*/ 4962