1 2/*---------------------------------------------------------------*/ 3/*--- begin host_amd64_isel.c ---*/ 4/*---------------------------------------------------------------*/ 5 6/* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2017 OpenWorks LLP 11 info@open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34*/ 35 36#include "libvex_basictypes.h" 37#include "libvex_ir.h" 38#include "libvex.h" 39 40#include "ir_match.h" 41#include "main_util.h" 42#include "main_globals.h" 43#include "host_generic_regs.h" 44#include "host_generic_simd64.h" 45#include "host_generic_simd128.h" 46#include "host_generic_simd256.h" 47#include "host_generic_maddf.h" 48#include "host_amd64_defs.h" 49 50 51/*---------------------------------------------------------*/ 52/*--- x87/SSE control word stuff ---*/ 53/*---------------------------------------------------------*/ 54 55/* Vex-generated code expects to run with the FPU set as follows: all 56 exceptions masked, round-to-nearest, precision = 53 bits. This 57 corresponds to a FPU control word value of 0x027F. 58 59 Similarly the SSE control word (%mxcsr) should be 0x1F80. 60 61 %fpucw and %mxcsr should have these values on entry to 62 Vex-generated code, and should those values should be 63 unchanged at exit. 64*/ 65 66#define DEFAULT_FPUCW 0x027F 67 68#define DEFAULT_MXCSR 0x1F80 69 70/* debugging only, do not use */ 71/* define DEFAULT_FPUCW 0x037F */ 72 73 74/*---------------------------------------------------------*/ 75/*--- misc helpers ---*/ 76/*---------------------------------------------------------*/ 77 78/* These are duplicated in guest-amd64/toIR.c */ 79static IRExpr* unop ( IROp op, IRExpr* a ) 80{ 81 return IRExpr_Unop(op, a); 82} 83 84static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 85{ 86 return IRExpr_Binop(op, a1, a2); 87} 88 89static IRExpr* bind ( Int binder ) 90{ 91 return IRExpr_Binder(binder); 92} 93 94static Bool isZeroU8 ( IRExpr* e ) 95{ 96 return e->tag == Iex_Const 97 && e->Iex.Const.con->tag == Ico_U8 98 && e->Iex.Const.con->Ico.U8 == 0; 99} 100 101 102/*---------------------------------------------------------*/ 103/*--- ISelEnv ---*/ 104/*---------------------------------------------------------*/ 105 106/* This carries around: 107 108 - A mapping from IRTemp to IRType, giving the type of any IRTemp we 109 might encounter. This is computed before insn selection starts, 110 and does not change. 111 112 - A mapping from IRTemp to HReg. This tells the insn selector 113 which virtual register is associated with each IRTemp 114 temporary. This is computed before insn selection starts, and 115 does not change. We expect this mapping to map precisely the 116 same set of IRTemps as the type mapping does. 117 118 - vregmap holds the primary register for the IRTemp. 119 - vregmapHI is only used for 128-bit integer-typed 120 IRTemps. It holds the identity of a second 121 64-bit virtual HReg, which holds the high half 122 of the value. 123 124 - The host subarchitecture we are selecting insns for. 125 This is set at the start and does not change. 126 127 - The code array, that is, the insns selected so far. 128 129 - A counter, for generating new virtual registers. 130 131 - A Bool for indicating whether we may generate chain-me 132 instructions for control flow transfers, or whether we must use 133 XAssisted. 134 135 - The maximum guest address of any guest insn in this block. 136 Actually, the address of the highest-addressed byte from any insn 137 in this block. Is set at the start and does not change. This is 138 used for detecting jumps which are definitely forward-edges from 139 this block, and therefore can be made (chained) to the fast entry 140 point of the destination, thereby avoiding the destination's 141 event check. 142 143 Note, this is all host-independent. (JRS 20050201: well, kinda 144 ... not completely. Compare with ISelEnv for X86.) 145*/ 146 147typedef 148 struct { 149 /* Constant -- are set at the start and do not change. */ 150 IRTypeEnv* type_env; 151 152 HReg* vregmap; 153 HReg* vregmapHI; 154 Int n_vregmap; 155 156 UInt hwcaps; 157 158 Bool chainingAllowed; 159 Addr64 max_ga; 160 161 /* These are modified as we go along. */ 162 HInstrArray* code; 163 Int vreg_ctr; 164 } 165 ISelEnv; 166 167 168static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) 169{ 170 vassert(tmp >= 0); 171 vassert(tmp < env->n_vregmap); 172 return env->vregmap[tmp]; 173} 174 175static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO, 176 ISelEnv* env, IRTemp tmp ) 177{ 178 vassert(tmp >= 0); 179 vassert(tmp < env->n_vregmap); 180 vassert(! hregIsInvalid(env->vregmapHI[tmp])); 181 *vrLO = env->vregmap[tmp]; 182 *vrHI = env->vregmapHI[tmp]; 183} 184 185static void addInstr ( ISelEnv* env, AMD64Instr* instr ) 186{ 187 addHInstr(env->code, instr); 188 if (vex_traceflags & VEX_TRACE_VCODE) { 189 ppAMD64Instr(instr, True); 190 vex_printf("\n"); 191 } 192} 193 194static HReg newVRegI ( ISelEnv* env ) 195{ 196 HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0/*enc*/, env->vreg_ctr); 197 env->vreg_ctr++; 198 return reg; 199} 200 201static HReg newVRegV ( ISelEnv* env ) 202{ 203 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr); 204 env->vreg_ctr++; 205 return reg; 206} 207 208 209/*---------------------------------------------------------*/ 210/*--- ISEL: Forward declarations ---*/ 211/*---------------------------------------------------------*/ 212 213/* These are organised as iselXXX and iselXXX_wrk pairs. The 214 iselXXX_wrk do the real work, but are not to be called directly. 215 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then 216 checks that all returned registers are virtual. You should not 217 call the _wrk version directly. 218*/ 219static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e ); 220static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e ); 221 222static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e ); 223static AMD64RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e ); 224 225static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e ); 226static AMD64RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e ); 227 228static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e ); 229static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e ); 230 231static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e ); 232static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e ); 233 234static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, 235 ISelEnv* env, const IRExpr* e ); 236static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo, 237 ISelEnv* env, const IRExpr* e ); 238 239static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e ); 240static AMD64CondCode iselCondCode ( ISelEnv* env, const IRExpr* e ); 241 242static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e ); 243static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e ); 244 245static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e ); 246static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e ); 247 248static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e ); 249static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e ); 250 251static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, 252 ISelEnv* env, const IRExpr* e ); 253static void iselDVecExpr ( /*OUT*/HReg* rHi, HReg* rLo, 254 ISelEnv* env, const IRExpr* e ); 255 256 257/*---------------------------------------------------------*/ 258/*--- ISEL: Misc helpers ---*/ 259/*---------------------------------------------------------*/ 260 261static Bool sane_AMode ( AMD64AMode* am ) 262{ 263 switch (am->tag) { 264 case Aam_IR: 265 return 266 toBool( hregClass(am->Aam.IR.reg) == HRcInt64 267 && (hregIsVirtual(am->Aam.IR.reg) 268 || sameHReg(am->Aam.IR.reg, hregAMD64_RBP())) ); 269 case Aam_IRRS: 270 return 271 toBool( hregClass(am->Aam.IRRS.base) == HRcInt64 272 && hregIsVirtual(am->Aam.IRRS.base) 273 && hregClass(am->Aam.IRRS.index) == HRcInt64 274 && hregIsVirtual(am->Aam.IRRS.index) ); 275 default: 276 vpanic("sane_AMode: unknown amd64 amode tag"); 277 } 278} 279 280 281/* Can the lower 32 bits be signedly widened to produce the whole 282 64-bit value? In other words, are the top 33 bits either all 0 or 283 all 1 ? */ 284static Bool fitsIn32Bits ( ULong x ) 285{ 286 Long y1; 287 y1 = x << 32; 288 y1 >>=/*s*/ 32; 289 return toBool(x == y1); 290} 291 292/* Is this a 64-bit zero expression? */ 293 294static Bool isZeroU64 ( IRExpr* e ) 295{ 296 return e->tag == Iex_Const 297 && e->Iex.Const.con->tag == Ico_U64 298 && e->Iex.Const.con->Ico.U64 == 0ULL; 299} 300 301static Bool isZeroU32 ( IRExpr* e ) 302{ 303 return e->tag == Iex_Const 304 && e->Iex.Const.con->tag == Ico_U32 305 && e->Iex.Const.con->Ico.U32 == 0; 306} 307 308/* Make a int reg-reg move. */ 309 310static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst ) 311{ 312 vassert(hregClass(src) == HRcInt64); 313 vassert(hregClass(dst) == HRcInt64); 314 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst); 315} 316 317/* Make a vector (128 bit) reg-reg move. */ 318 319static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst ) 320{ 321 vassert(hregClass(src) == HRcVec128); 322 vassert(hregClass(dst) == HRcVec128); 323 return AMD64Instr_SseReRg(Asse_MOV, src, dst); 324} 325 326/* Advance/retreat %rsp by n. */ 327 328static void add_to_rsp ( ISelEnv* env, Int n ) 329{ 330 vassert(n > 0 && n < 256 && (n%8) == 0); 331 addInstr(env, 332 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n), 333 hregAMD64_RSP())); 334} 335 336static void sub_from_rsp ( ISelEnv* env, Int n ) 337{ 338 vassert(n > 0 && n < 256 && (n%8) == 0); 339 addInstr(env, 340 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n), 341 hregAMD64_RSP())); 342} 343 344/* Push 64-bit constants on the stack. */ 345static void push_uimm64( ISelEnv* env, ULong uimm64 ) 346{ 347 /* If uimm64 can be expressed as the sign extension of its 348 lower 32 bits, we can do it the easy way. */ 349 Long simm64 = (Long)uimm64; 350 if ( simm64 == ((Long)(uimm64 << 32) >> 32) ) { 351 addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) ); 352 } else { 353 HReg tmp = newVRegI(env); 354 addInstr( env, AMD64Instr_Imm64(uimm64, tmp) ); 355 addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) ); 356 } 357} 358 359 360/* Used only in doHelperCall. If possible, produce a single 361 instruction which computes 'e' into 'dst'. If not possible, return 362 NULL. */ 363 364static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env, 365 HReg dst, 366 IRExpr* e ) 367{ 368 /* Per comments in doHelperCall below, appearance of 369 Iex_VECRET implies ill-formed IR. */ 370 vassert(e->tag != Iex_VECRET); 371 372 /* In this case we give out a copy of the BaseBlock pointer. */ 373 if (UNLIKELY(e->tag == Iex_GSPTR)) { 374 return mk_iMOVsd_RR( hregAMD64_RBP(), dst ); 375 } 376 377 vassert(typeOfIRExpr(env->type_env, e) == Ity_I64); 378 379 if (e->tag == Iex_Const) { 380 vassert(e->Iex.Const.con->tag == Ico_U64); 381 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) { 382 return AMD64Instr_Alu64R( 383 Aalu_MOV, 384 AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)), 385 dst 386 ); 387 } else { 388 return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst); 389 } 390 } 391 392 if (e->tag == Iex_RdTmp) { 393 HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp); 394 return mk_iMOVsd_RR(src, dst); 395 } 396 397 if (e->tag == Iex_Get) { 398 vassert(e->Iex.Get.ty == Ity_I64); 399 return AMD64Instr_Alu64R( 400 Aalu_MOV, 401 AMD64RMI_Mem( 402 AMD64AMode_IR(e->Iex.Get.offset, 403 hregAMD64_RBP())), 404 dst); 405 } 406 407 if (e->tag == Iex_Unop 408 && e->Iex.Unop.op == Iop_32Uto64 409 && e->Iex.Unop.arg->tag == Iex_RdTmp) { 410 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp); 411 return AMD64Instr_MovxLQ(False, src, dst); 412 } 413 414 if (0) { ppIRExpr(e); vex_printf("\n"); } 415 416 return NULL; 417} 418 419 420/* Do a complete function call. |guard| is a Ity_Bit expression 421 indicating whether or not the call happens. If guard==NULL, the 422 call is unconditional. |retloc| is set to indicate where the 423 return value is after the call. The caller (of this fn) must 424 generate code to add |stackAdjustAfterCall| to the stack pointer 425 after the call is done. */ 426 427static 428void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall, 429 /*OUT*/RetLoc* retloc, 430 ISelEnv* env, 431 IRExpr* guard, 432 IRCallee* cee, IRType retTy, IRExpr** args ) 433{ 434 AMD64CondCode cc; 435 HReg argregs[6]; 436 HReg tmpregs[6]; 437 AMD64Instr* fastinstrs[6]; 438 UInt n_args, i; 439 440 /* Set default returns. We'll update them later if needed. */ 441 *stackAdjustAfterCall = 0; 442 *retloc = mk_RetLoc_INVALID(); 443 444 /* These are used for cross-checking that IR-level constraints on 445 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */ 446 UInt nVECRETs = 0; 447 UInt nGSPTRs = 0; 448 449 /* Marshal args for a call and do the call. 450 451 This function only deals with a tiny set of possibilities, which 452 cover all helpers in practice. The restrictions are that only 453 arguments in registers are supported, hence only 6x64 integer 454 bits in total can be passed. In fact the only supported arg 455 type is I64. 456 457 The return type can be I{64,32,16,8} or V{128,256}. In the 458 latter two cases, it is expected that |args| will contain the 459 special node IRExpr_VECRET(), in which case this routine 460 generates code to allocate space on the stack for the vector 461 return value. Since we are not passing any scalars on the 462 stack, it is enough to preallocate the return space before 463 marshalling any arguments, in this case. 464 465 |args| may also contain IRExpr_GSPTR(), in which case the 466 value in %rbp is passed as the corresponding argument. 467 468 Generating code which is both efficient and correct when 469 parameters are to be passed in registers is difficult, for the 470 reasons elaborated in detail in comments attached to 471 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant 472 of the method described in those comments. 473 474 The problem is split into two cases: the fast scheme and the 475 slow scheme. In the fast scheme, arguments are computed 476 directly into the target (real) registers. This is only safe 477 when we can be sure that computation of each argument will not 478 trash any real registers set by computation of any other 479 argument. 480 481 In the slow scheme, all args are first computed into vregs, and 482 once they are all done, they are moved to the relevant real 483 regs. This always gives correct code, but it also gives a bunch 484 of vreg-to-rreg moves which are usually redundant but are hard 485 for the register allocator to get rid of. 486 487 To decide which scheme to use, all argument expressions are 488 first examined. If they are all so simple that it is clear they 489 will be evaluated without use of any fixed registers, use the 490 fast scheme, else use the slow scheme. Note also that only 491 unconditional calls may use the fast scheme, since having to 492 compute a condition expression could itself trash real 493 registers. Note that for simplicity, in the case where 494 IRExpr_VECRET() is present, we use the slow scheme. This is 495 motivated by the desire to avoid any possible complexity 496 w.r.t. nested calls. 497 498 Note this requires being able to examine an expression and 499 determine whether or not evaluation of it might use a fixed 500 register. That requires knowledge of how the rest of this insn 501 selector works. Currently just the following 3 are regarded as 502 safe -- hopefully they cover the majority of arguments in 503 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. 504 */ 505 506 /* Note that the cee->regparms field is meaningless on AMD64 host 507 (since there is only one calling convention) and so we always 508 ignore it. */ 509 n_args = 0; 510 for (i = 0; args[i]; i++) 511 n_args++; 512 513 if (n_args > 6) 514 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args"); 515 516 argregs[0] = hregAMD64_RDI(); 517 argregs[1] = hregAMD64_RSI(); 518 argregs[2] = hregAMD64_RDX(); 519 argregs[3] = hregAMD64_RCX(); 520 argregs[4] = hregAMD64_R8(); 521 argregs[5] = hregAMD64_R9(); 522 523 tmpregs[0] = tmpregs[1] = tmpregs[2] = 524 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG; 525 526 fastinstrs[0] = fastinstrs[1] = fastinstrs[2] = 527 fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL; 528 529 /* First decide which scheme (slow or fast) is to be used. First 530 assume the fast scheme, and select slow if any contraindications 531 (wow) appear. */ 532 533 /* We'll need space on the stack for the return value. Avoid 534 possible complications with nested calls by using the slow 535 scheme. */ 536 if (retTy == Ity_V128 || retTy == Ity_V256) 537 goto slowscheme; 538 539 if (guard) { 540 if (guard->tag == Iex_Const 541 && guard->Iex.Const.con->tag == Ico_U1 542 && guard->Iex.Const.con->Ico.U1 == True) { 543 /* unconditional */ 544 } else { 545 /* Not manifestly unconditional -- be conservative. */ 546 goto slowscheme; 547 } 548 } 549 550 /* Ok, let's try for the fast scheme. If it doesn't pan out, we'll 551 use the slow scheme. Because this is tentative, we can't call 552 addInstr (that is, commit to) any instructions until we're 553 handled all the arguments. So park the resulting instructions 554 in a buffer and emit that if we're successful. */ 555 556 /* FAST SCHEME */ 557 /* In this loop, we process args that can be computed into the 558 destination (real) register with a single instruction, without 559 using any fixed regs. That also includes IRExpr_GSPTR(), but 560 not IRExpr_VECRET(). Indeed, if the IR is well-formed, we can 561 never see IRExpr_VECRET() at this point, since the return-type 562 check above should ensure all those cases use the slow scheme 563 instead. */ 564 vassert(n_args >= 0 && n_args <= 6); 565 for (i = 0; i < n_args; i++) { 566 IRExpr* arg = args[i]; 567 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) { 568 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64); 569 } 570 fastinstrs[i] 571 = iselIntExpr_single_instruction( env, argregs[i], args[i] ); 572 if (fastinstrs[i] == NULL) 573 goto slowscheme; 574 } 575 576 /* Looks like we're in luck. Emit the accumulated instructions and 577 move on to doing the call itself. */ 578 for (i = 0; i < n_args; i++) 579 addInstr(env, fastinstrs[i]); 580 581 /* Fast scheme only applies for unconditional calls. Hence: */ 582 cc = Acc_ALWAYS; 583 584 goto handle_call; 585 586 587 /* SLOW SCHEME; move via temporaries */ 588 slowscheme: 589 {} 590# if 0 /* debug only */ 591 if (n_args > 0) {for (i = 0; args[i]; i++) { 592 ppIRExpr(args[i]); vex_printf(" "); } 593 vex_printf("\n");} 594# endif 595 596 /* If we have a vector return type, allocate a place for it on the 597 stack and record its address. */ 598 HReg r_vecRetAddr = INVALID_HREG; 599 if (retTy == Ity_V128) { 600 r_vecRetAddr = newVRegI(env); 601 sub_from_rsp(env, 16); 602 addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr )); 603 } 604 else if (retTy == Ity_V256) { 605 r_vecRetAddr = newVRegI(env); 606 sub_from_rsp(env, 32); 607 addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr )); 608 } 609 610 vassert(n_args >= 0 && n_args <= 6); 611 for (i = 0; i < n_args; i++) { 612 IRExpr* arg = args[i]; 613 if (UNLIKELY(arg->tag == Iex_GSPTR)) { 614 tmpregs[i] = newVRegI(env); 615 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[i])); 616 nGSPTRs++; 617 } 618 else if (UNLIKELY(arg->tag == Iex_VECRET)) { 619 /* We stashed the address of the return slot earlier, so just 620 retrieve it now. */ 621 vassert(!hregIsInvalid(r_vecRetAddr)); 622 tmpregs[i] = r_vecRetAddr; 623 nVECRETs++; 624 } 625 else { 626 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64); 627 tmpregs[i] = iselIntExpr_R(env, args[i]); 628 } 629 } 630 631 /* Now we can compute the condition. We can't do it earlier 632 because the argument computations could trash the condition 633 codes. Be a bit clever to handle the common case where the 634 guard is 1:Bit. */ 635 cc = Acc_ALWAYS; 636 if (guard) { 637 if (guard->tag == Iex_Const 638 && guard->Iex.Const.con->tag == Ico_U1 639 && guard->Iex.Const.con->Ico.U1 == True) { 640 /* unconditional -- do nothing */ 641 } else { 642 cc = iselCondCode( env, guard ); 643 } 644 } 645 646 /* Move the args to their final destinations. */ 647 for (i = 0; i < n_args; i++) { 648 /* None of these insns, including any spill code that might 649 be generated, may alter the condition codes. */ 650 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) ); 651 } 652 653 654 /* Do final checks, set the return values, and generate the call 655 instruction proper. */ 656 handle_call: 657 658 if (retTy == Ity_V128 || retTy == Ity_V256) { 659 vassert(nVECRETs == 1); 660 } else { 661 vassert(nVECRETs == 0); 662 } 663 664 vassert(nGSPTRs == 0 || nGSPTRs == 1); 665 666 vassert(*stackAdjustAfterCall == 0); 667 vassert(is_RetLoc_INVALID(*retloc)); 668 switch (retTy) { 669 case Ity_INVALID: 670 /* Function doesn't return a value. */ 671 *retloc = mk_RetLoc_simple(RLPri_None); 672 break; 673 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 674 *retloc = mk_RetLoc_simple(RLPri_Int); 675 break; 676 case Ity_V128: 677 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0); 678 *stackAdjustAfterCall = 16; 679 break; 680 case Ity_V256: 681 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0); 682 *stackAdjustAfterCall = 32; 683 break; 684 default: 685 /* IR can denote other possible return types, but we don't 686 handle those here. */ 687 vassert(0); 688 } 689 690 /* Finally, generate the call itself. This needs the *retloc value 691 set in the switch above, which is why it's at the end. */ 692 addInstr(env, 693 AMD64Instr_Call(cc, (Addr)cee->addr, n_args, *retloc)); 694} 695 696 697/* Given a guest-state array descriptor, an index expression and a 698 bias, generate an AMD64AMode holding the relevant guest state 699 offset. */ 700 701static 702AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr, 703 IRExpr* off, Int bias ) 704{ 705 HReg tmp, roff; 706 Int elemSz = sizeofIRType(descr->elemTy); 707 Int nElems = descr->nElems; 708 709 /* Throw out any cases not generated by an amd64 front end. In 710 theory there might be a day where we need to handle them -- if 711 we ever run non-amd64-guest on amd64 host. */ 712 713 if (nElems != 8 || (elemSz != 1 && elemSz != 8)) 714 vpanic("genGuestArrayOffset(amd64 host)"); 715 716 /* Compute off into a reg, %off. Then return: 717 718 movq %off, %tmp 719 addq $bias, %tmp (if bias != 0) 720 andq %tmp, 7 721 ... base(%rbp, %tmp, shift) ... 722 */ 723 tmp = newVRegI(env); 724 roff = iselIntExpr_R(env, off); 725 addInstr(env, mk_iMOVsd_RR(roff, tmp)); 726 if (bias != 0) { 727 /* Make sure the bias is sane, in the sense that there are 728 no significant bits above bit 30 in it. */ 729 vassert(-10000 < bias && bias < 10000); 730 addInstr(env, 731 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp)); 732 } 733 addInstr(env, 734 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp)); 735 vassert(elemSz == 1 || elemSz == 8); 736 return 737 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp, 738 elemSz==8 ? 3 : 0); 739} 740 741 742/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */ 743static 744void set_SSE_rounding_default ( ISelEnv* env ) 745{ 746 /* pushq $DEFAULT_MXCSR 747 ldmxcsr 0(%rsp) 748 addq $8, %rsp 749 */ 750 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP()); 751 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR))); 752 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp)); 753 add_to_rsp(env, 8); 754} 755 756/* Mess with the FPU's rounding mode: set to the default rounding mode 757 (DEFAULT_FPUCW). */ 758static 759void set_FPU_rounding_default ( ISelEnv* env ) 760{ 761 /* movq $DEFAULT_FPUCW, -8(%rsp) 762 fldcw -8(%esp) 763 */ 764 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 765 addInstr(env, AMD64Instr_Alu64M( 766 Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp)); 767 addInstr(env, AMD64Instr_A87LdCW(m8_rsp)); 768} 769 770 771/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed 772 expression denoting a value in the range 0 .. 3, indicating a round 773 mode encoded as per type IRRoundingMode. Set the SSE machinery to 774 have the same rounding. 775*/ 776static 777void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode ) 778{ 779 /* Note: this sequence only makes sense because DEFAULT_MXCSR has 780 both rounding bits == 0. If that wasn't the case, we couldn't 781 create a new rounding field simply by ORing the new value into 782 place. */ 783 784 /* movq $3, %reg 785 andq [[mode]], %reg -- shouldn't be needed; paranoia 786 shlq $13, %reg 787 orq $DEFAULT_MXCSR, %reg 788 pushq %reg 789 ldmxcsr 0(%esp) 790 addq $8, %rsp 791 */ 792 HReg reg = newVRegI(env); 793 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP()); 794 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg)); 795 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 796 iselIntExpr_RMI(env, mode), reg)); 797 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg)); 798 addInstr(env, AMD64Instr_Alu64R( 799 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg)); 800 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg))); 801 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp)); 802 add_to_rsp(env, 8); 803} 804 805 806/* Mess with the FPU's rounding mode: 'mode' is an I32-typed 807 expression denoting a value in the range 0 .. 3, indicating a round 808 mode encoded as per type IRRoundingMode. Set the x87 FPU to have 809 the same rounding. 810*/ 811static 812void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode ) 813{ 814 HReg rrm = iselIntExpr_R(env, mode); 815 HReg rrm2 = newVRegI(env); 816 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 817 818 /* movq %rrm, %rrm2 819 andq $3, %rrm2 -- shouldn't be needed; paranoia 820 shlq $10, %rrm2 821 orq $DEFAULT_FPUCW, %rrm2 822 movq %rrm2, -8(%rsp) 823 fldcw -8(%esp) 824 */ 825 addInstr(env, mk_iMOVsd_RR(rrm, rrm2)); 826 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2)); 827 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2)); 828 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, 829 AMD64RMI_Imm(DEFAULT_FPUCW), rrm2)); 830 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, 831 AMD64RI_Reg(rrm2), m8_rsp)); 832 addInstr(env, AMD64Instr_A87LdCW(m8_rsp)); 833} 834 835 836/* Generate all-zeroes into a new vector register. 837*/ 838static HReg generate_zeroes_V128 ( ISelEnv* env ) 839{ 840 HReg dst = newVRegV(env); 841 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst)); 842 return dst; 843} 844 845/* Generate all-ones into a new vector register. 846*/ 847static HReg generate_ones_V128 ( ISelEnv* env ) 848{ 849 HReg dst = newVRegV(env); 850 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst)); 851 return dst; 852} 853 854 855/* Generate !src into a new vector register. Amazing that there isn't 856 a less crappy way to do this. 857*/ 858static HReg do_sse_NotV128 ( ISelEnv* env, HReg src ) 859{ 860 HReg dst = generate_ones_V128(env); 861 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst)); 862 return dst; 863} 864 865 866/* Expand the given byte into a 64-bit word, by cloning each bit 867 8 times. */ 868static ULong bitmask8_to_bytemask64 ( UShort w8 ) 869{ 870 vassert(w8 == (w8 & 0xFF)); 871 ULong w64 = 0; 872 Int i; 873 for (i = 0; i < 8; i++) { 874 if (w8 & (1<<i)) 875 w64 |= (0xFFULL << (8 * i)); 876 } 877 return w64; 878} 879 880 881/*---------------------------------------------------------*/ 882/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/ 883/*---------------------------------------------------------*/ 884 885/* Select insns for an integer-typed expression, and add them to the 886 code list. Return a reg holding the result. This reg will be a 887 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you 888 want to modify it, ask for a new vreg, copy it in there, and modify 889 the copy. The register allocator will do its best to map both 890 vregs to the same real register, so the copies will often disappear 891 later in the game. 892 893 This should handle expressions of 64, 32, 16 and 8-bit type. All 894 results are returned in a 64-bit register. For 32-, 16- and 8-bit 895 expressions, the upper 32/48/56 bits are arbitrary, so you should 896 mask or sign extend partial values if necessary. 897*/ 898 899static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e ) 900{ 901 HReg r = iselIntExpr_R_wrk(env, e); 902 /* sanity checks ... */ 903# if 0 904 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n"); 905# endif 906 vassert(hregClass(r) == HRcInt64); 907 vassert(hregIsVirtual(r)); 908 return r; 909} 910 911/* DO NOT CALL THIS DIRECTLY ! */ 912static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e ) 913{ 914 /* Used for unary/binary SIMD64 ops. */ 915 HWord fn = 0; 916 Bool second_is_UInt; 917 918 MatchInfo mi; 919 DECLARE_PATTERN(p_1Uto8_64to1); 920 DECLARE_PATTERN(p_LDle8_then_8Uto64); 921 DECLARE_PATTERN(p_LDle16_then_16Uto64); 922 923 IRType ty = typeOfIRExpr(env->type_env,e); 924 switch (ty) { 925 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: break; 926 default: vassert(0); 927 } 928 929 switch (e->tag) { 930 931 /* --------- TEMP --------- */ 932 case Iex_RdTmp: { 933 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 934 } 935 936 /* --------- LOAD --------- */ 937 case Iex_Load: { 938 HReg dst = newVRegI(env); 939 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr ); 940 941 /* We can't handle big-endian loads, nor load-linked. */ 942 if (e->Iex.Load.end != Iend_LE) 943 goto irreducible; 944 945 if (ty == Ity_I64) { 946 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, 947 AMD64RMI_Mem(amode), dst) ); 948 return dst; 949 } 950 if (ty == Ity_I32) { 951 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst)); 952 return dst; 953 } 954 if (ty == Ity_I16) { 955 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst)); 956 return dst; 957 } 958 if (ty == Ity_I8) { 959 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst)); 960 return dst; 961 } 962 break; 963 } 964 965 /* --------- BINARY OP --------- */ 966 case Iex_Binop: { 967 AMD64AluOp aluOp; 968 AMD64ShiftOp shOp; 969 970 /* Pattern: Sub64(0,x) */ 971 /* and: Sub32(0,x) */ 972 if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1)) 973 || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) { 974 HReg dst = newVRegI(env); 975 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2); 976 addInstr(env, mk_iMOVsd_RR(reg,dst)); 977 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); 978 return dst; 979 } 980 981 /* Is it an addition or logical style op? */ 982 switch (e->Iex.Binop.op) { 983 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64: 984 aluOp = Aalu_ADD; break; 985 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64: 986 aluOp = Aalu_SUB; break; 987 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64: 988 aluOp = Aalu_AND; break; 989 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64: 990 aluOp = Aalu_OR; break; 991 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64: 992 aluOp = Aalu_XOR; break; 993 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64: 994 aluOp = Aalu_MUL; break; 995 default: 996 aluOp = Aalu_INVALID; break; 997 } 998 /* For commutative ops we assume any literal 999 values are on the second operand. */ 1000 if (aluOp != Aalu_INVALID) { 1001 HReg dst = newVRegI(env); 1002 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 1003 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1004 addInstr(env, mk_iMOVsd_RR(reg,dst)); 1005 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst)); 1006 return dst; 1007 } 1008 1009 /* Perhaps a shift op? */ 1010 switch (e->Iex.Binop.op) { 1011 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8: 1012 shOp = Ash_SHL; break; 1013 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8: 1014 shOp = Ash_SHR; break; 1015 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8: 1016 shOp = Ash_SAR; break; 1017 default: 1018 shOp = Ash_INVALID; break; 1019 } 1020 if (shOp != Ash_INVALID) { 1021 HReg dst = newVRegI(env); 1022 1023 /* regL = the value to be shifted */ 1024 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1025 addInstr(env, mk_iMOVsd_RR(regL,dst)); 1026 1027 /* Do any necessary widening for 32/16/8 bit operands */ 1028 switch (e->Iex.Binop.op) { 1029 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64: 1030 break; 1031 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8: 1032 break; 1033 case Iop_Shr8: 1034 addInstr(env, AMD64Instr_Alu64R( 1035 Aalu_AND, AMD64RMI_Imm(0xFF), dst)); 1036 break; 1037 case Iop_Shr16: 1038 addInstr(env, AMD64Instr_Alu64R( 1039 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst)); 1040 break; 1041 case Iop_Shr32: 1042 addInstr(env, AMD64Instr_MovxLQ(False, dst, dst)); 1043 break; 1044 case Iop_Sar8: 1045 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst)); 1046 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst)); 1047 break; 1048 case Iop_Sar16: 1049 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst)); 1050 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst)); 1051 break; 1052 case Iop_Sar32: 1053 addInstr(env, AMD64Instr_MovxLQ(True, dst, dst)); 1054 break; 1055 default: 1056 ppIROp(e->Iex.Binop.op); 1057 vassert(0); 1058 } 1059 1060 /* Now consider the shift amount. If it's a literal, we 1061 can do a much better job than the general case. */ 1062 if (e->Iex.Binop.arg2->tag == Iex_Const) { 1063 /* assert that the IR is well-typed */ 1064 Int nshift; 1065 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1066 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1067 vassert(nshift >= 0); 1068 if (nshift > 0) 1069 /* Can't allow nshift==0 since that means %cl */ 1070 addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst)); 1071 } else { 1072 /* General case; we have to force the amount into %cl. */ 1073 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1074 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX())); 1075 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst)); 1076 } 1077 return dst; 1078 } 1079 1080 /* Deal with 64-bit SIMD binary ops */ 1081 second_is_UInt = False; 1082 switch (e->Iex.Binop.op) { 1083 case Iop_Add8x8: 1084 fn = (HWord)h_generic_calc_Add8x8; break; 1085 case Iop_Add16x4: 1086 fn = (HWord)h_generic_calc_Add16x4; break; 1087 case Iop_Add32x2: 1088 fn = (HWord)h_generic_calc_Add32x2; break; 1089 1090 case Iop_Avg8Ux8: 1091 fn = (HWord)h_generic_calc_Avg8Ux8; break; 1092 case Iop_Avg16Ux4: 1093 fn = (HWord)h_generic_calc_Avg16Ux4; break; 1094 1095 case Iop_CmpEQ8x8: 1096 fn = (HWord)h_generic_calc_CmpEQ8x8; break; 1097 case Iop_CmpEQ16x4: 1098 fn = (HWord)h_generic_calc_CmpEQ16x4; break; 1099 case Iop_CmpEQ32x2: 1100 fn = (HWord)h_generic_calc_CmpEQ32x2; break; 1101 1102 case Iop_CmpGT8Sx8: 1103 fn = (HWord)h_generic_calc_CmpGT8Sx8; break; 1104 case Iop_CmpGT16Sx4: 1105 fn = (HWord)h_generic_calc_CmpGT16Sx4; break; 1106 case Iop_CmpGT32Sx2: 1107 fn = (HWord)h_generic_calc_CmpGT32Sx2; break; 1108 1109 case Iop_InterleaveHI8x8: 1110 fn = (HWord)h_generic_calc_InterleaveHI8x8; break; 1111 case Iop_InterleaveLO8x8: 1112 fn = (HWord)h_generic_calc_InterleaveLO8x8; break; 1113 case Iop_InterleaveHI16x4: 1114 fn = (HWord)h_generic_calc_InterleaveHI16x4; break; 1115 case Iop_InterleaveLO16x4: 1116 fn = (HWord)h_generic_calc_InterleaveLO16x4; break; 1117 case Iop_InterleaveHI32x2: 1118 fn = (HWord)h_generic_calc_InterleaveHI32x2; break; 1119 case Iop_InterleaveLO32x2: 1120 fn = (HWord)h_generic_calc_InterleaveLO32x2; break; 1121 case Iop_CatOddLanes16x4: 1122 fn = (HWord)h_generic_calc_CatOddLanes16x4; break; 1123 case Iop_CatEvenLanes16x4: 1124 fn = (HWord)h_generic_calc_CatEvenLanes16x4; break; 1125 case Iop_Perm8x8: 1126 fn = (HWord)h_generic_calc_Perm8x8; break; 1127 1128 case Iop_Max8Ux8: 1129 fn = (HWord)h_generic_calc_Max8Ux8; break; 1130 case Iop_Max16Sx4: 1131 fn = (HWord)h_generic_calc_Max16Sx4; break; 1132 case Iop_Min8Ux8: 1133 fn = (HWord)h_generic_calc_Min8Ux8; break; 1134 case Iop_Min16Sx4: 1135 fn = (HWord)h_generic_calc_Min16Sx4; break; 1136 1137 case Iop_Mul16x4: 1138 fn = (HWord)h_generic_calc_Mul16x4; break; 1139 case Iop_Mul32x2: 1140 fn = (HWord)h_generic_calc_Mul32x2; break; 1141 case Iop_MulHi16Sx4: 1142 fn = (HWord)h_generic_calc_MulHi16Sx4; break; 1143 case Iop_MulHi16Ux4: 1144 fn = (HWord)h_generic_calc_MulHi16Ux4; break; 1145 1146 case Iop_QAdd8Sx8: 1147 fn = (HWord)h_generic_calc_QAdd8Sx8; break; 1148 case Iop_QAdd16Sx4: 1149 fn = (HWord)h_generic_calc_QAdd16Sx4; break; 1150 case Iop_QAdd8Ux8: 1151 fn = (HWord)h_generic_calc_QAdd8Ux8; break; 1152 case Iop_QAdd16Ux4: 1153 fn = (HWord)h_generic_calc_QAdd16Ux4; break; 1154 1155 case Iop_QNarrowBin32Sto16Sx4: 1156 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; break; 1157 case Iop_QNarrowBin16Sto8Sx8: 1158 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break; 1159 case Iop_QNarrowBin16Sto8Ux8: 1160 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break; 1161 case Iop_NarrowBin16to8x8: 1162 fn = (HWord)h_generic_calc_NarrowBin16to8x8; break; 1163 case Iop_NarrowBin32to16x4: 1164 fn = (HWord)h_generic_calc_NarrowBin32to16x4; break; 1165 1166 case Iop_QSub8Sx8: 1167 fn = (HWord)h_generic_calc_QSub8Sx8; break; 1168 case Iop_QSub16Sx4: 1169 fn = (HWord)h_generic_calc_QSub16Sx4; break; 1170 case Iop_QSub8Ux8: 1171 fn = (HWord)h_generic_calc_QSub8Ux8; break; 1172 case Iop_QSub16Ux4: 1173 fn = (HWord)h_generic_calc_QSub16Ux4; break; 1174 1175 case Iop_Sub8x8: 1176 fn = (HWord)h_generic_calc_Sub8x8; break; 1177 case Iop_Sub16x4: 1178 fn = (HWord)h_generic_calc_Sub16x4; break; 1179 case Iop_Sub32x2: 1180 fn = (HWord)h_generic_calc_Sub32x2; break; 1181 1182 case Iop_ShlN32x2: 1183 fn = (HWord)h_generic_calc_ShlN32x2; 1184 second_is_UInt = True; 1185 break; 1186 case Iop_ShlN16x4: 1187 fn = (HWord)h_generic_calc_ShlN16x4; 1188 second_is_UInt = True; 1189 break; 1190 case Iop_ShlN8x8: 1191 fn = (HWord)h_generic_calc_ShlN8x8; 1192 second_is_UInt = True; 1193 break; 1194 case Iop_ShrN32x2: 1195 fn = (HWord)h_generic_calc_ShrN32x2; 1196 second_is_UInt = True; 1197 break; 1198 case Iop_ShrN16x4: 1199 fn = (HWord)h_generic_calc_ShrN16x4; 1200 second_is_UInt = True; 1201 break; 1202 case Iop_SarN32x2: 1203 fn = (HWord)h_generic_calc_SarN32x2; 1204 second_is_UInt = True; 1205 break; 1206 case Iop_SarN16x4: 1207 fn = (HWord)h_generic_calc_SarN16x4; 1208 second_is_UInt = True; 1209 break; 1210 case Iop_SarN8x8: 1211 fn = (HWord)h_generic_calc_SarN8x8; 1212 second_is_UInt = True; 1213 break; 1214 1215 default: 1216 fn = (HWord)0; break; 1217 } 1218 if (fn != (HWord)0) { 1219 /* Note: the following assumes all helpers are of signature 1220 ULong fn ( ULong, ULong ), and they are 1221 not marked as regparm functions. 1222 */ 1223 HReg dst = newVRegI(env); 1224 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1225 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1226 if (second_is_UInt) 1227 addInstr(env, AMD64Instr_MovxLQ(False, argR, argR)); 1228 addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) ); 1229 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) ); 1230 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2, 1231 mk_RetLoc_simple(RLPri_Int) )); 1232 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst)); 1233 return dst; 1234 } 1235 1236 /* Handle misc other ops. */ 1237 1238 if (e->Iex.Binop.op == Iop_Max32U) { 1239 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1240 HReg dst = newVRegI(env); 1241 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); 1242 addInstr(env, mk_iMOVsd_RR(src1, dst)); 1243 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP, AMD64RMI_Reg(src2), dst)); 1244 addInstr(env, AMD64Instr_CMov64(Acc_B, src2, dst)); 1245 return dst; 1246 } 1247 1248 if (e->Iex.Binop.op == Iop_DivModS64to32 1249 || e->Iex.Binop.op == Iop_DivModU64to32) { 1250 /* 64 x 32 -> (32(rem),32(div)) division */ 1251 /* Get the 64-bit operand into edx:eax, and the other into 1252 any old R/M. */ 1253 HReg rax = hregAMD64_RAX(); 1254 HReg rdx = hregAMD64_RDX(); 1255 HReg dst = newVRegI(env); 1256 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32); 1257 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2); 1258 /* Compute the left operand into a reg, and then 1259 put the top half in edx and the bottom in eax. */ 1260 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1261 addInstr(env, mk_iMOVsd_RR(left64, rdx)); 1262 addInstr(env, mk_iMOVsd_RR(left64, rax)); 1263 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx)); 1264 addInstr(env, AMD64Instr_Div(syned, 4, rmRight)); 1265 addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx)); 1266 addInstr(env, AMD64Instr_MovxLQ(False, rax, rax)); 1267 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx)); 1268 addInstr(env, mk_iMOVsd_RR(rax, dst)); 1269 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst)); 1270 return dst; 1271 } 1272 1273 if (e->Iex.Binop.op == Iop_32HLto64) { 1274 HReg hi32 = newVRegI(env); 1275 HReg lo32 = newVRegI(env); 1276 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1277 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1278 addInstr(env, mk_iMOVsd_RR(hi32s, hi32)); 1279 addInstr(env, mk_iMOVsd_RR(lo32s, lo32)); 1280 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32)); 1281 addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32)); 1282 addInstr(env, AMD64Instr_Alu64R( 1283 Aalu_OR, AMD64RMI_Reg(lo32), hi32)); 1284 return hi32; 1285 } 1286 1287 if (e->Iex.Binop.op == Iop_16HLto32) { 1288 HReg hi16 = newVRegI(env); 1289 HReg lo16 = newVRegI(env); 1290 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1291 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1292 addInstr(env, mk_iMOVsd_RR(hi16s, hi16)); 1293 addInstr(env, mk_iMOVsd_RR(lo16s, lo16)); 1294 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16)); 1295 addInstr(env, AMD64Instr_Alu64R( 1296 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16)); 1297 addInstr(env, AMD64Instr_Alu64R( 1298 Aalu_OR, AMD64RMI_Reg(lo16), hi16)); 1299 return hi16; 1300 } 1301 1302 if (e->Iex.Binop.op == Iop_8HLto16) { 1303 HReg hi8 = newVRegI(env); 1304 HReg lo8 = newVRegI(env); 1305 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1306 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1307 addInstr(env, mk_iMOVsd_RR(hi8s, hi8)); 1308 addInstr(env, mk_iMOVsd_RR(lo8s, lo8)); 1309 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8)); 1310 addInstr(env, AMD64Instr_Alu64R( 1311 Aalu_AND, AMD64RMI_Imm(0xFF), lo8)); 1312 addInstr(env, AMD64Instr_Alu64R( 1313 Aalu_OR, AMD64RMI_Reg(lo8), hi8)); 1314 return hi8; 1315 } 1316 1317 if (e->Iex.Binop.op == Iop_MullS32 1318 || e->Iex.Binop.op == Iop_MullS16 1319 || e->Iex.Binop.op == Iop_MullS8 1320 || e->Iex.Binop.op == Iop_MullU32 1321 || e->Iex.Binop.op == Iop_MullU16 1322 || e->Iex.Binop.op == Iop_MullU8) { 1323 HReg a32 = newVRegI(env); 1324 HReg b32 = newVRegI(env); 1325 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1326 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1327 Int shift = 0; 1328 AMD64ShiftOp shr_op = Ash_SHR; 1329 switch (e->Iex.Binop.op) { 1330 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break; 1331 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break; 1332 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break; 1333 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break; 1334 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break; 1335 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break; 1336 default: vassert(0); 1337 } 1338 1339 addInstr(env, mk_iMOVsd_RR(a32s, a32)); 1340 addInstr(env, mk_iMOVsd_RR(b32s, b32)); 1341 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32)); 1342 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32)); 1343 addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32)); 1344 addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32)); 1345 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32)); 1346 return b32; 1347 } 1348 1349 if (e->Iex.Binop.op == Iop_CmpF64) { 1350 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1); 1351 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2); 1352 HReg dst = newVRegI(env); 1353 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst)); 1354 /* Mask out irrelevant parts of the result so as to conform 1355 to the CmpF64 definition. */ 1356 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst)); 1357 return dst; 1358 } 1359 1360 if (e->Iex.Binop.op == Iop_F64toI32S 1361 || e->Iex.Binop.op == Iop_F64toI64S) { 1362 Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8; 1363 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 1364 HReg dst = newVRegI(env); 1365 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 ); 1366 addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst )); 1367 set_SSE_rounding_default(env); 1368 return dst; 1369 } 1370 1371 break; 1372 } 1373 1374 /* --------- UNARY OP --------- */ 1375 case Iex_Unop: { 1376 1377 /* 1Uto8(64to1(expr64)) */ 1378 { 1379 DEFINE_PATTERN( p_1Uto8_64to1, 1380 unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) ); 1381 if (matchIRExpr(&mi,p_1Uto8_64to1,e)) { 1382 const IRExpr* expr64 = mi.bindee[0]; 1383 HReg dst = newVRegI(env); 1384 HReg src = iselIntExpr_R(env, expr64); 1385 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1386 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 1387 AMD64RMI_Imm(1), dst)); 1388 return dst; 1389 } 1390 } 1391 1392 /* 8Uto64(LDle(expr64)) */ 1393 { 1394 DEFINE_PATTERN(p_LDle8_then_8Uto64, 1395 unop(Iop_8Uto64, 1396 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1397 if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) { 1398 HReg dst = newVRegI(env); 1399 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1400 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst)); 1401 return dst; 1402 } 1403 } 1404 1405 /* 16Uto64(LDle(expr64)) */ 1406 { 1407 DEFINE_PATTERN(p_LDle16_then_16Uto64, 1408 unop(Iop_16Uto64, 1409 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) ); 1410 if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) { 1411 HReg dst = newVRegI(env); 1412 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1413 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst)); 1414 return dst; 1415 } 1416 } 1417 1418 /* 32Uto64( Add32/Sub32/And32/Or32/Xor32(expr32, expr32) ) 1419 Use 32 bit arithmetic and let the default zero-extend rule 1420 do the 32Uto64 for free. */ 1421 if (e->Iex.Unop.op == Iop_32Uto64 && e->Iex.Unop.arg->tag == Iex_Binop) { 1422 IROp opi = e->Iex.Unop.arg->Iex.Binop.op; /* inner op */ 1423 IRExpr* argL = e->Iex.Unop.arg->Iex.Binop.arg1; 1424 IRExpr* argR = e->Iex.Unop.arg->Iex.Binop.arg2; 1425 AMD64AluOp aluOp = Aalu_INVALID; 1426 switch (opi) { 1427 case Iop_Add32: aluOp = Aalu_ADD; break; 1428 case Iop_Sub32: aluOp = Aalu_SUB; break; 1429 case Iop_And32: aluOp = Aalu_AND; break; 1430 case Iop_Or32: aluOp = Aalu_OR; break; 1431 case Iop_Xor32: aluOp = Aalu_XOR; break; 1432 default: break; 1433 } 1434 if (aluOp != Aalu_INVALID) { 1435 /* For commutative ops we assume any literal values are on 1436 the second operand. */ 1437 HReg dst = newVRegI(env); 1438 HReg reg = iselIntExpr_R(env, argL); 1439 AMD64RMI* rmi = iselIntExpr_RMI(env, argR); 1440 addInstr(env, mk_iMOVsd_RR(reg,dst)); 1441 addInstr(env, AMD64Instr_Alu32R(aluOp, rmi, dst)); 1442 return dst; 1443 } 1444 /* just fall through to normal handling for Iop_32Uto64 */ 1445 } 1446 1447 /* Fallback cases */ 1448 switch (e->Iex.Unop.op) { 1449 case Iop_32Uto64: 1450 case Iop_32Sto64: { 1451 HReg dst = newVRegI(env); 1452 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1453 addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64, 1454 src, dst) ); 1455 return dst; 1456 } 1457 case Iop_128HIto64: { 1458 HReg rHi, rLo; 1459 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1460 return rHi; /* and abandon rLo */ 1461 } 1462 case Iop_128to64: { 1463 HReg rHi, rLo; 1464 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1465 return rLo; /* and abandon rHi */ 1466 } 1467 case Iop_8Uto16: 1468 case Iop_8Uto32: 1469 case Iop_8Uto64: 1470 case Iop_16Uto64: 1471 case Iop_16Uto32: { 1472 HReg dst = newVRegI(env); 1473 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1474 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32 1475 || e->Iex.Unop.op==Iop_16Uto64 ); 1476 UInt mask = srcIs16 ? 0xFFFF : 0xFF; 1477 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1478 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 1479 AMD64RMI_Imm(mask), dst)); 1480 return dst; 1481 } 1482 case Iop_8Sto16: 1483 case Iop_8Sto64: 1484 case Iop_8Sto32: 1485 case Iop_16Sto32: 1486 case Iop_16Sto64: { 1487 HReg dst = newVRegI(env); 1488 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1489 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32 1490 || e->Iex.Unop.op==Iop_16Sto64 ); 1491 UInt amt = srcIs16 ? 48 : 56; 1492 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1493 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst)); 1494 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst)); 1495 return dst; 1496 } 1497 case Iop_Not8: 1498 case Iop_Not16: 1499 case Iop_Not32: 1500 case Iop_Not64: { 1501 HReg dst = newVRegI(env); 1502 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1503 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1504 addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst)); 1505 return dst; 1506 } 1507 case Iop_16HIto8: 1508 case Iop_32HIto16: 1509 case Iop_64HIto32: { 1510 HReg dst = newVRegI(env); 1511 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1512 Int shift = 0; 1513 switch (e->Iex.Unop.op) { 1514 case Iop_16HIto8: shift = 8; break; 1515 case Iop_32HIto16: shift = 16; break; 1516 case Iop_64HIto32: shift = 32; break; 1517 default: vassert(0); 1518 } 1519 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1520 addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst)); 1521 return dst; 1522 } 1523 case Iop_1Uto64: 1524 case Iop_1Uto32: 1525 case Iop_1Uto8: { 1526 HReg dst = newVRegI(env); 1527 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1528 addInstr(env, AMD64Instr_Set64(cond,dst)); 1529 return dst; 1530 } 1531 case Iop_1Sto8: 1532 case Iop_1Sto16: 1533 case Iop_1Sto32: 1534 case Iop_1Sto64: { 1535 /* could do better than this, but for now ... */ 1536 HReg dst = newVRegI(env); 1537 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1538 addInstr(env, AMD64Instr_Set64(cond,dst)); 1539 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst)); 1540 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); 1541 return dst; 1542 } 1543 case Iop_Ctz64: { 1544 /* Count trailing zeroes, implemented by amd64 'bsfq' */ 1545 HReg dst = newVRegI(env); 1546 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1547 addInstr(env, AMD64Instr_Bsfr64(True,src,dst)); 1548 return dst; 1549 } 1550 case Iop_Clz64: { 1551 /* Count leading zeroes. Do 'bsrq' to establish the index 1552 of the highest set bit, and subtract that value from 1553 63. */ 1554 HReg tmp = newVRegI(env); 1555 HReg dst = newVRegI(env); 1556 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1557 addInstr(env, AMD64Instr_Bsfr64(False,src,tmp)); 1558 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, 1559 AMD64RMI_Imm(63), dst)); 1560 addInstr(env, AMD64Instr_Alu64R(Aalu_SUB, 1561 AMD64RMI_Reg(tmp), dst)); 1562 return dst; 1563 } 1564 1565 case Iop_CmpwNEZ64: { 1566 HReg dst = newVRegI(env); 1567 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1568 addInstr(env, mk_iMOVsd_RR(src,dst)); 1569 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); 1570 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, 1571 AMD64RMI_Reg(src), dst)); 1572 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); 1573 return dst; 1574 } 1575 1576 case Iop_CmpwNEZ32: { 1577 HReg src = newVRegI(env); 1578 HReg dst = newVRegI(env); 1579 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg); 1580 addInstr(env, mk_iMOVsd_RR(pre,src)); 1581 addInstr(env, AMD64Instr_MovxLQ(False, src, src)); 1582 addInstr(env, mk_iMOVsd_RR(src,dst)); 1583 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); 1584 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, 1585 AMD64RMI_Reg(src), dst)); 1586 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); 1587 return dst; 1588 } 1589 1590 case Iop_Left8: 1591 case Iop_Left16: 1592 case Iop_Left32: 1593 case Iop_Left64: { 1594 HReg dst = newVRegI(env); 1595 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1596 addInstr(env, mk_iMOVsd_RR(src, dst)); 1597 addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst)); 1598 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst)); 1599 return dst; 1600 } 1601 1602 case Iop_V128to32: { 1603 HReg dst = newVRegI(env); 1604 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1605 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP()); 1606 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16)); 1607 addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst)); 1608 return dst; 1609 } 1610 1611 /* V128{HI}to64 */ 1612 case Iop_V128HIto64: 1613 case Iop_V128to64: { 1614 HReg dst = newVRegI(env); 1615 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? -8 : -16; 1616 HReg rsp = hregAMD64_RSP(); 1617 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1618 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp); 1619 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp); 1620 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 1621 16, vec, m16_rsp)); 1622 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, 1623 AMD64RMI_Mem(off_rsp), dst )); 1624 return dst; 1625 } 1626 1627 case Iop_V256to64_0: case Iop_V256to64_1: 1628 case Iop_V256to64_2: case Iop_V256to64_3: { 1629 HReg vHi, vLo, vec; 1630 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg); 1631 /* Do the first part of the selection by deciding which of 1632 the 128 bit registers do look at, and second part using 1633 the same scheme as for V128{HI}to64 above. */ 1634 Int off = 0; 1635 switch (e->Iex.Unop.op) { 1636 case Iop_V256to64_0: vec = vLo; off = -16; break; 1637 case Iop_V256to64_1: vec = vLo; off = -8; break; 1638 case Iop_V256to64_2: vec = vHi; off = -16; break; 1639 case Iop_V256to64_3: vec = vHi; off = -8; break; 1640 default: vassert(0); 1641 } 1642 HReg dst = newVRegI(env); 1643 HReg rsp = hregAMD64_RSP(); 1644 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp); 1645 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp); 1646 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 1647 16, vec, m16_rsp)); 1648 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, 1649 AMD64RMI_Mem(off_rsp), dst )); 1650 return dst; 1651 } 1652 1653 /* ReinterpF64asI64(e) */ 1654 /* Given an IEEE754 double, produce an I64 with the same bit 1655 pattern. */ 1656 case Iop_ReinterpF64asI64: { 1657 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 1658 HReg dst = newVRegI(env); 1659 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 1660 /* paranoia */ 1661 set_SSE_rounding_default(env); 1662 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp)); 1663 addInstr(env, AMD64Instr_Alu64R( 1664 Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst)); 1665 return dst; 1666 } 1667 1668 /* ReinterpF32asI32(e) */ 1669 /* Given an IEEE754 single, produce an I64 with the same bit 1670 pattern in the lower half. */ 1671 case Iop_ReinterpF32asI32: { 1672 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 1673 HReg dst = newVRegI(env); 1674 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 1675 /* paranoia */ 1676 set_SSE_rounding_default(env); 1677 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp)); 1678 addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst )); 1679 return dst; 1680 } 1681 1682 case Iop_16to8: 1683 case Iop_32to8: 1684 case Iop_64to8: 1685 case Iop_32to16: 1686 case Iop_64to16: 1687 case Iop_64to32: 1688 /* These are no-ops. */ 1689 return iselIntExpr_R(env, e->Iex.Unop.arg); 1690 1691 case Iop_GetMSBs8x8: { 1692 /* Note: the following assumes the helper is of 1693 signature 1694 UInt fn ( ULong ), and is not a regparm fn. 1695 */ 1696 HReg dst = newVRegI(env); 1697 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 1698 fn = (HWord)h_generic_calc_GetMSBs8x8; 1699 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) ); 1700 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1701 1, mk_RetLoc_simple(RLPri_Int) )); 1702 /* MovxLQ is not exactly the right thing here. We just 1703 need to get the bottom 8 bits of RAX into dst, and zero 1704 out everything else. Assuming that the helper returns 1705 a UInt with the top 24 bits zeroed out, it'll do, 1706 though. */ 1707 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst)); 1708 return dst; 1709 } 1710 1711 case Iop_GetMSBs8x16: { 1712 /* Note: the following assumes the helper is of signature 1713 UInt fn ( ULong w64hi, ULong w64Lo ), 1714 and is not a regparm fn. */ 1715 HReg dst = newVRegI(env); 1716 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1717 HReg rsp = hregAMD64_RSP(); 1718 fn = (HWord)h_generic_calc_GetMSBs8x16; 1719 AMD64AMode* m8_rsp = AMD64AMode_IR( -8, rsp); 1720 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp); 1721 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 1722 16, vec, m16_rsp)); 1723 /* hi 64 bits into RDI -- the first arg */ 1724 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, 1725 AMD64RMI_Mem(m8_rsp), 1726 hregAMD64_RDI() )); /* 1st arg */ 1727 /* lo 64 bits into RSI -- the 2nd arg */ 1728 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, 1729 AMD64RMI_Mem(m16_rsp), 1730 hregAMD64_RSI() )); /* 2nd arg */ 1731 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1732 2, mk_RetLoc_simple(RLPri_Int) )); 1733 /* MovxLQ is not exactly the right thing here. We just 1734 need to get the bottom 16 bits of RAX into dst, and zero 1735 out everything else. Assuming that the helper returns 1736 a UInt with the top 16 bits zeroed out, it'll do, 1737 though. */ 1738 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst)); 1739 return dst; 1740 } 1741 1742 default: 1743 break; 1744 } 1745 1746 /* Deal with unary 64-bit SIMD ops. */ 1747 switch (e->Iex.Unop.op) { 1748 case Iop_CmpNEZ32x2: 1749 fn = (HWord)h_generic_calc_CmpNEZ32x2; break; 1750 case Iop_CmpNEZ16x4: 1751 fn = (HWord)h_generic_calc_CmpNEZ16x4; break; 1752 case Iop_CmpNEZ8x8: 1753 fn = (HWord)h_generic_calc_CmpNEZ8x8; break; 1754 default: 1755 fn = (HWord)0; break; 1756 } 1757 if (fn != (HWord)0) { 1758 /* Note: the following assumes all helpers are of 1759 signature 1760 ULong fn ( ULong ), and they are 1761 not marked as regparm functions. 1762 */ 1763 HReg dst = newVRegI(env); 1764 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 1765 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) ); 1766 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1, 1767 mk_RetLoc_simple(RLPri_Int) )); 1768 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst)); 1769 return dst; 1770 } 1771 1772 break; 1773 } 1774 1775 /* --------- GET --------- */ 1776 case Iex_Get: { 1777 if (ty == Ity_I64) { 1778 HReg dst = newVRegI(env); 1779 addInstr(env, AMD64Instr_Alu64R( 1780 Aalu_MOV, 1781 AMD64RMI_Mem( 1782 AMD64AMode_IR(e->Iex.Get.offset, 1783 hregAMD64_RBP())), 1784 dst)); 1785 return dst; 1786 } 1787 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) { 1788 HReg dst = newVRegI(env); 1789 addInstr(env, AMD64Instr_LoadEX( 1790 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)), 1791 False, 1792 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()), 1793 dst)); 1794 return dst; 1795 } 1796 break; 1797 } 1798 1799 case Iex_GetI: { 1800 AMD64AMode* am 1801 = genGuestArrayOffset( 1802 env, e->Iex.GetI.descr, 1803 e->Iex.GetI.ix, e->Iex.GetI.bias ); 1804 HReg dst = newVRegI(env); 1805 if (ty == Ity_I8) { 1806 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst )); 1807 return dst; 1808 } 1809 if (ty == Ity_I64) { 1810 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst )); 1811 return dst; 1812 } 1813 break; 1814 } 1815 1816 /* --------- CCALL --------- */ 1817 case Iex_CCall: { 1818 HReg dst = newVRegI(env); 1819 vassert(ty == e->Iex.CCall.retty); 1820 1821 /* be very restrictive for now. Only 64-bit ints allowed for 1822 args, and 64 or 32 bits for return type. */ 1823 if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32) 1824 goto irreducible; 1825 1826 /* Marshal args, do the call. */ 1827 UInt addToSp = 0; 1828 RetLoc rloc = mk_RetLoc_INVALID(); 1829 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, 1830 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args ); 1831 vassert(is_sane_RetLoc(rloc)); 1832 vassert(rloc.pri == RLPri_Int); 1833 vassert(addToSp == 0); 1834 1835 /* Move to dst, and zero out the top 32 bits if the result type is 1836 Ity_I32. Probably overkill, but still .. */ 1837 if (e->Iex.CCall.retty == Ity_I64) 1838 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst)); 1839 else 1840 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst)); 1841 1842 return dst; 1843 } 1844 1845 /* --------- LITERAL --------- */ 1846 /* 64/32/16/8-bit literals */ 1847 case Iex_Const: 1848 if (ty == Ity_I64) { 1849 HReg r = newVRegI(env); 1850 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r)); 1851 return r; 1852 } else { 1853 AMD64RMI* rmi = iselIntExpr_RMI ( env, e ); 1854 HReg r = newVRegI(env); 1855 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r)); 1856 return r; 1857 } 1858 1859 /* --------- MULTIPLEX --------- */ 1860 case Iex_ITE: { // VFD 1861 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) 1862 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) { 1863 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue); 1864 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse); 1865 HReg dst = newVRegI(env); 1866 addInstr(env, mk_iMOVsd_RR(r1,dst)); 1867 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 1868 addInstr(env, AMD64Instr_CMov64(cc ^ 1, r0, dst)); 1869 return dst; 1870 } 1871 break; 1872 } 1873 1874 /* --------- TERNARY OP --------- */ 1875 case Iex_Triop: { 1876 IRTriop *triop = e->Iex.Triop.details; 1877 /* C3210 flags following FPU partial remainder (fprem), both 1878 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */ 1879 if (triop->op == Iop_PRemC3210F64 1880 || triop->op == Iop_PRem1C3210F64) { 1881 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 1882 HReg arg1 = iselDblExpr(env, triop->arg2); 1883 HReg arg2 = iselDblExpr(env, triop->arg3); 1884 HReg dst = newVRegI(env); 1885 addInstr(env, AMD64Instr_A87Free(2)); 1886 1887 /* one arg -> top of x87 stack */ 1888 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp)); 1889 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 1890 1891 /* other arg -> top of x87 stack */ 1892 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp)); 1893 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 1894 1895 switch (triop->op) { 1896 case Iop_PRemC3210F64: 1897 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM)); 1898 break; 1899 case Iop_PRem1C3210F64: 1900 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1)); 1901 break; 1902 default: 1903 vassert(0); 1904 } 1905 /* Ignore the result, and instead make off with the FPU's 1906 C3210 flags (in the status word). */ 1907 addInstr(env, AMD64Instr_A87StSW(m8_rsp)); 1908 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst)); 1909 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst)); 1910 return dst; 1911 } 1912 break; 1913 } 1914 1915 default: 1916 break; 1917 } /* switch (e->tag) */ 1918 1919 /* We get here if no pattern matched. */ 1920 irreducible: 1921 ppIRExpr(e); 1922 vpanic("iselIntExpr_R(amd64): cannot reduce tree"); 1923} 1924 1925 1926/*---------------------------------------------------------*/ 1927/*--- ISEL: Integer expression auxiliaries ---*/ 1928/*---------------------------------------------------------*/ 1929 1930/* --------------------- AMODEs --------------------- */ 1931 1932/* Return an AMode which computes the value of the specified 1933 expression, possibly also adding insns to the code list as a 1934 result. The expression may only be a 32-bit one. 1935*/ 1936 1937static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e ) 1938{ 1939 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e); 1940 vassert(sane_AMode(am)); 1941 return am; 1942} 1943 1944/* DO NOT CALL THIS DIRECTLY ! */ 1945static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e ) 1946{ 1947 MatchInfo mi; 1948 DECLARE_PATTERN(p_complex); 1949 IRType ty = typeOfIRExpr(env->type_env,e); 1950 vassert(ty == Ity_I64); 1951 1952 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */ 1953 /* bind0 bind1 bind2 bind3 */ 1954 DEFINE_PATTERN(p_complex, 1955 binop( Iop_Add64, 1956 binop( Iop_Add64, 1957 bind(0), 1958 binop(Iop_Shl64, bind(1), bind(2)) 1959 ), 1960 bind(3) 1961 ) 1962 ); 1963 if (matchIRExpr(&mi, p_complex, e)) { 1964 const IRExpr* expr1 = mi.bindee[0]; 1965 const IRExpr* expr2 = mi.bindee[1]; 1966 const IRExpr* imm8 = mi.bindee[2]; 1967 const IRExpr* simm32 = mi.bindee[3]; 1968 if (imm8->tag == Iex_Const 1969 && imm8->Iex.Const.con->tag == Ico_U8 1970 && imm8->Iex.Const.con->Ico.U8 < 4 1971 /* imm8 is OK, now check simm32 */ 1972 && simm32->tag == Iex_Const 1973 && simm32->Iex.Const.con->tag == Ico_U64 1974 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) { 1975 UInt shift = imm8->Iex.Const.con->Ico.U8; 1976 UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64); 1977 HReg r1 = iselIntExpr_R(env, expr1); 1978 HReg r2 = iselIntExpr_R(env, expr2); 1979 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3); 1980 return AMD64AMode_IRRS(offset, r1, r2, shift); 1981 } 1982 } 1983 1984 /* Add64(expr1, Shl64(expr2, imm)) */ 1985 if (e->tag == Iex_Binop 1986 && e->Iex.Binop.op == Iop_Add64 1987 && e->Iex.Binop.arg2->tag == Iex_Binop 1988 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64 1989 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const 1990 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { 1991 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1992 if (shift == 1 || shift == 2 || shift == 3) { 1993 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1994 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 ); 1995 return AMD64AMode_IRRS(0, r1, r2, shift); 1996 } 1997 } 1998 1999 /* Add64(expr,i) */ 2000 if (e->tag == Iex_Binop 2001 && e->Iex.Binop.op == Iop_Add64 2002 && e->Iex.Binop.arg2->tag == Iex_Const 2003 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64 2004 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) { 2005 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2006 return AMD64AMode_IR( 2007 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64), 2008 r1 2009 ); 2010 } 2011 2012 /* Doesn't match anything in particular. Generate it into 2013 a register and use that. */ 2014 { 2015 HReg r1 = iselIntExpr_R(env, e); 2016 return AMD64AMode_IR(0, r1); 2017 } 2018} 2019 2020 2021/* --------------------- RMIs --------------------- */ 2022 2023/* Similarly, calculate an expression into an X86RMI operand. As with 2024 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 2025 2026static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e ) 2027{ 2028 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e); 2029 /* sanity checks ... */ 2030 switch (rmi->tag) { 2031 case Armi_Imm: 2032 return rmi; 2033 case Armi_Reg: 2034 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64); 2035 vassert(hregIsVirtual(rmi->Armi.Reg.reg)); 2036 return rmi; 2037 case Armi_Mem: 2038 vassert(sane_AMode(rmi->Armi.Mem.am)); 2039 return rmi; 2040 default: 2041 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag"); 2042 } 2043} 2044 2045/* DO NOT CALL THIS DIRECTLY ! */ 2046static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e ) 2047{ 2048 IRType ty = typeOfIRExpr(env->type_env,e); 2049 vassert(ty == Ity_I64 || ty == Ity_I32 2050 || ty == Ity_I16 || ty == Ity_I8); 2051 2052 /* special case: immediate 64/32/16/8 */ 2053 if (e->tag == Iex_Const) { 2054 switch (e->Iex.Const.con->tag) { 2055 case Ico_U64: 2056 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) { 2057 return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)); 2058 } 2059 break; 2060 case Ico_U32: 2061 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break; 2062 case Ico_U16: 2063 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break; 2064 case Ico_U8: 2065 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break; 2066 default: 2067 vpanic("iselIntExpr_RMI.Iex_Const(amd64)"); 2068 } 2069 } 2070 2071 /* special case: 64-bit GET */ 2072 if (e->tag == Iex_Get && ty == Ity_I64) { 2073 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset, 2074 hregAMD64_RBP())); 2075 } 2076 2077 /* special case: 64-bit load from memory */ 2078 if (e->tag == Iex_Load && ty == Ity_I64 2079 && e->Iex.Load.end == Iend_LE) { 2080 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2081 return AMD64RMI_Mem(am); 2082 } 2083 2084 /* default case: calculate into a register and return that */ 2085 { 2086 HReg r = iselIntExpr_R ( env, e ); 2087 return AMD64RMI_Reg(r); 2088 } 2089} 2090 2091 2092/* --------------------- RIs --------------------- */ 2093 2094/* Calculate an expression into an AMD64RI operand. As with 2095 iselIntExpr_R, the expression can have type 64, 32, 16 or 8 2096 bits. */ 2097 2098static AMD64RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e ) 2099{ 2100 AMD64RI* ri = iselIntExpr_RI_wrk(env, e); 2101 /* sanity checks ... */ 2102 switch (ri->tag) { 2103 case Ari_Imm: 2104 return ri; 2105 case Ari_Reg: 2106 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64); 2107 vassert(hregIsVirtual(ri->Ari.Reg.reg)); 2108 return ri; 2109 default: 2110 vpanic("iselIntExpr_RI: unknown amd64 RI tag"); 2111 } 2112} 2113 2114/* DO NOT CALL THIS DIRECTLY ! */ 2115static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e ) 2116{ 2117 IRType ty = typeOfIRExpr(env->type_env,e); 2118 vassert(ty == Ity_I64 || ty == Ity_I32 2119 || ty == Ity_I16 || ty == Ity_I8); 2120 2121 /* special case: immediate */ 2122 if (e->tag == Iex_Const) { 2123 switch (e->Iex.Const.con->tag) { 2124 case Ico_U64: 2125 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) { 2126 return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64)); 2127 } 2128 break; 2129 case Ico_U32: 2130 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32); 2131 case Ico_U16: 2132 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); 2133 case Ico_U8: 2134 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8); 2135 default: 2136 vpanic("iselIntExpr_RMI.Iex_Const(amd64)"); 2137 } 2138 } 2139 2140 /* default case: calculate into a register and return that */ 2141 { 2142 HReg r = iselIntExpr_R ( env, e ); 2143 return AMD64RI_Reg(r); 2144 } 2145} 2146 2147 2148/* --------------------- RMs --------------------- */ 2149 2150/* Similarly, calculate an expression into an AMD64RM operand. As 2151 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8 2152 bits. */ 2153 2154static AMD64RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e ) 2155{ 2156 AMD64RM* rm = iselIntExpr_RM_wrk(env, e); 2157 /* sanity checks ... */ 2158 switch (rm->tag) { 2159 case Arm_Reg: 2160 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64); 2161 vassert(hregIsVirtual(rm->Arm.Reg.reg)); 2162 return rm; 2163 case Arm_Mem: 2164 vassert(sane_AMode(rm->Arm.Mem.am)); 2165 return rm; 2166 default: 2167 vpanic("iselIntExpr_RM: unknown amd64 RM tag"); 2168 } 2169} 2170 2171/* DO NOT CALL THIS DIRECTLY ! */ 2172static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e ) 2173{ 2174 IRType ty = typeOfIRExpr(env->type_env,e); 2175 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 2176 2177 /* special case: 64-bit GET */ 2178 if (e->tag == Iex_Get && ty == Ity_I64) { 2179 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset, 2180 hregAMD64_RBP())); 2181 } 2182 2183 /* special case: load from memory */ 2184 2185 /* default case: calculate into a register and return that */ 2186 { 2187 HReg r = iselIntExpr_R ( env, e ); 2188 return AMD64RM_Reg(r); 2189 } 2190} 2191 2192 2193/* --------------------- CONDCODE --------------------- */ 2194 2195/* Generate code to evaluated a bit-typed expression, returning the 2196 condition code which would correspond when the expression would 2197 notionally have returned 1. */ 2198 2199static AMD64CondCode iselCondCode ( ISelEnv* env, const IRExpr* e ) 2200{ 2201 /* Uh, there's nothing we can sanity check here, unfortunately. */ 2202 return iselCondCode_wrk(env,e); 2203} 2204 2205/* DO NOT CALL THIS DIRECTLY ! */ 2206static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e ) 2207{ 2208 MatchInfo mi; 2209 2210 vassert(e); 2211 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); 2212 2213 /* var */ 2214 if (e->tag == Iex_RdTmp) { 2215 HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp); 2216 HReg dst = newVRegI(env); 2217 addInstr(env, mk_iMOVsd_RR(r64,dst)); 2218 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst)); 2219 return Acc_NZ; 2220 } 2221 2222 /* Constant 1:Bit */ 2223 if (e->tag == Iex_Const) { 2224 HReg r; 2225 vassert(e->Iex.Const.con->tag == Ico_U1); 2226 vassert(e->Iex.Const.con->Ico.U1 == True 2227 || e->Iex.Const.con->Ico.U1 == False); 2228 r = newVRegI(env); 2229 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r)); 2230 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r)); 2231 return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ; 2232 } 2233 2234 /* Not1(...) */ 2235 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { 2236 /* Generate code for the arg, and negate the test condition */ 2237 return 1 ^ iselCondCode(env, e->Iex.Unop.arg); 2238 } 2239 2240 /* --- patterns rooted at: 64to1 --- */ 2241 2242 /* 64to1 */ 2243 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) { 2244 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg); 2245 addInstr(env, AMD64Instr_Test64(1,reg)); 2246 return Acc_NZ; 2247 } 2248 2249 /* --- patterns rooted at: 32to1 --- */ 2250 2251 /* 32to1 */ 2252 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_32to1) { 2253 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg); 2254 addInstr(env, AMD64Instr_Test64(1,reg)); 2255 return Acc_NZ; 2256 } 2257 2258 /* --- patterns rooted at: CmpNEZ8 --- */ 2259 2260 /* CmpNEZ8(x) */ 2261 if (e->tag == Iex_Unop 2262 && e->Iex.Unop.op == Iop_CmpNEZ8) { 2263 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg); 2264 addInstr(env, AMD64Instr_Test64(0xFF,r)); 2265 return Acc_NZ; 2266 } 2267 2268 /* --- patterns rooted at: CmpNEZ16 --- */ 2269 2270 /* CmpNEZ16(x) */ 2271 if (e->tag == Iex_Unop 2272 && e->Iex.Unop.op == Iop_CmpNEZ16) { 2273 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg); 2274 addInstr(env, AMD64Instr_Test64(0xFFFF,r)); 2275 return Acc_NZ; 2276 } 2277 2278 /* --- patterns rooted at: CmpNEZ32 --- */ 2279 2280 /* CmpNEZ32(x) */ 2281 if (e->tag == Iex_Unop 2282 && e->Iex.Unop.op == Iop_CmpNEZ32) { 2283 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 2284 AMD64RMI* rmi2 = AMD64RMI_Imm(0); 2285 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1)); 2286 return Acc_NZ; 2287 } 2288 2289 /* --- patterns rooted at: CmpNEZ64 --- */ 2290 2291 /* CmpNEZ64(Or64(x,y)) */ 2292 { 2293 DECLARE_PATTERN(p_CmpNEZ64_Or64); 2294 DEFINE_PATTERN(p_CmpNEZ64_Or64, 2295 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1)))); 2296 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) { 2297 HReg r0 = iselIntExpr_R(env, mi.bindee[0]); 2298 AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]); 2299 HReg tmp = newVRegI(env); 2300 addInstr(env, mk_iMOVsd_RR(r0, tmp)); 2301 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp)); 2302 return Acc_NZ; 2303 } 2304 } 2305 2306 /* CmpNEZ64(x) */ 2307 if (e->tag == Iex_Unop 2308 && e->Iex.Unop.op == Iop_CmpNEZ64) { 2309 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 2310 AMD64RMI* rmi2 = AMD64RMI_Imm(0); 2311 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1)); 2312 return Acc_NZ; 2313 } 2314 2315 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */ 2316 2317 /* CmpEQ8 / CmpNE8 */ 2318 if (e->tag == Iex_Binop 2319 && (e->Iex.Binop.op == Iop_CmpEQ8 2320 || e->Iex.Binop.op == Iop_CmpNE8 2321 || e->Iex.Binop.op == Iop_CasCmpEQ8 2322 || e->Iex.Binop.op == Iop_CasCmpNE8)) { 2323 if (isZeroU8(e->Iex.Binop.arg2)) { 2324 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2325 addInstr(env, AMD64Instr_Test64(0xFF,r1)); 2326 switch (e->Iex.Binop.op) { 2327 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z; 2328 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ; 2329 default: vpanic("iselCondCode(amd64): CmpXX8(expr,0:I8)"); 2330 } 2331 } else { 2332 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2333 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2334 HReg r = newVRegI(env); 2335 addInstr(env, mk_iMOVsd_RR(r1,r)); 2336 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r)); 2337 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r)); 2338 switch (e->Iex.Binop.op) { 2339 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z; 2340 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ; 2341 default: vpanic("iselCondCode(amd64): CmpXX8(expr,expr)"); 2342 } 2343 } 2344 } 2345 2346 /* CmpEQ16 / CmpNE16 */ 2347 if (e->tag == Iex_Binop 2348 && (e->Iex.Binop.op == Iop_CmpEQ16 2349 || e->Iex.Binop.op == Iop_CmpNE16 2350 || e->Iex.Binop.op == Iop_CasCmpEQ16 2351 || e->Iex.Binop.op == Iop_CasCmpNE16)) { 2352 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2353 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2354 HReg r = newVRegI(env); 2355 addInstr(env, mk_iMOVsd_RR(r1,r)); 2356 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r)); 2357 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r)); 2358 switch (e->Iex.Binop.op) { 2359 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z; 2360 case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ; 2361 default: vpanic("iselCondCode(amd64): CmpXX16"); 2362 } 2363 } 2364 2365 /* CmpNE64(ccall, 64-bit constant) (--smc-check=all optimisation). 2366 Saves a "movq %rax, %tmp" compared to the default route. */ 2367 if (e->tag == Iex_Binop 2368 && e->Iex.Binop.op == Iop_CmpNE64 2369 && e->Iex.Binop.arg1->tag == Iex_CCall 2370 && e->Iex.Binop.arg2->tag == Iex_Const) { 2371 IRExpr* cal = e->Iex.Binop.arg1; 2372 IRExpr* con = e->Iex.Binop.arg2; 2373 HReg tmp = newVRegI(env); 2374 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */ 2375 vassert(cal->Iex.CCall.retty == Ity_I64); /* else ill-typed IR */ 2376 vassert(con->Iex.Const.con->tag == Ico_U64); 2377 /* Marshal args, do the call. */ 2378 UInt addToSp = 0; 2379 RetLoc rloc = mk_RetLoc_INVALID(); 2380 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, 2381 cal->Iex.CCall.cee, 2382 cal->Iex.CCall.retty, cal->Iex.CCall.args ); 2383 vassert(is_sane_RetLoc(rloc)); 2384 vassert(rloc.pri == RLPri_Int); 2385 vassert(addToSp == 0); 2386 /* */ 2387 addInstr(env, AMD64Instr_Imm64(con->Iex.Const.con->Ico.U64, tmp)); 2388 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP, 2389 AMD64RMI_Reg(hregAMD64_RAX()), tmp)); 2390 return Acc_NZ; 2391 } 2392 2393 /* Cmp*64*(x,y) */ 2394 if (e->tag == Iex_Binop 2395 && (e->Iex.Binop.op == Iop_CmpEQ64 2396 || e->Iex.Binop.op == Iop_CmpNE64 2397 || e->Iex.Binop.op == Iop_CmpLT64S 2398 || e->Iex.Binop.op == Iop_CmpLT64U 2399 || e->Iex.Binop.op == Iop_CmpLE64S 2400 || e->Iex.Binop.op == Iop_CmpLE64U 2401 || e->Iex.Binop.op == Iop_CasCmpEQ64 2402 || e->Iex.Binop.op == Iop_CasCmpNE64 2403 || e->Iex.Binop.op == Iop_ExpCmpNE64)) { 2404 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2405 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2406 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1)); 2407 switch (e->Iex.Binop.op) { 2408 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z; 2409 case Iop_CmpNE64: 2410 case Iop_CasCmpNE64: case Iop_ExpCmpNE64: return Acc_NZ; 2411 case Iop_CmpLT64S: return Acc_L; 2412 case Iop_CmpLT64U: return Acc_B; 2413 case Iop_CmpLE64S: return Acc_LE; 2414 case Iop_CmpLE64U: return Acc_BE; 2415 default: vpanic("iselCondCode(amd64): CmpXX64"); 2416 } 2417 } 2418 2419 /* Cmp*32*(x,y) */ 2420 if (e->tag == Iex_Binop 2421 && (e->Iex.Binop.op == Iop_CmpEQ32 2422 || e->Iex.Binop.op == Iop_CmpNE32 2423 || e->Iex.Binop.op == Iop_CmpLT32S 2424 || e->Iex.Binop.op == Iop_CmpLT32U 2425 || e->Iex.Binop.op == Iop_CmpLE32S 2426 || e->Iex.Binop.op == Iop_CmpLE32U 2427 || e->Iex.Binop.op == Iop_CasCmpEQ32 2428 || e->Iex.Binop.op == Iop_CasCmpNE32 2429 || e->Iex.Binop.op == Iop_ExpCmpNE32)) { 2430 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2431 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2432 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1)); 2433 switch (e->Iex.Binop.op) { 2434 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z; 2435 case Iop_CmpNE32: 2436 case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Acc_NZ; 2437 case Iop_CmpLT32S: return Acc_L; 2438 case Iop_CmpLT32U: return Acc_B; 2439 case Iop_CmpLE32S: return Acc_LE; 2440 case Iop_CmpLE32U: return Acc_BE; 2441 default: vpanic("iselCondCode(amd64): CmpXX32"); 2442 } 2443 } 2444 2445 ppIRExpr(e); 2446 vpanic("iselCondCode(amd64)"); 2447} 2448 2449 2450/*---------------------------------------------------------*/ 2451/*--- ISEL: Integer expressions (128 bit) ---*/ 2452/*---------------------------------------------------------*/ 2453 2454/* Compute a 128-bit value into a register pair, which is returned as 2455 the first two parameters. As with iselIntExpr_R, these may be 2456 either real or virtual regs; in any case they must not be changed 2457 by subsequent code emitted by the caller. */ 2458 2459static void iselInt128Expr ( HReg* rHi, HReg* rLo, 2460 ISelEnv* env, const IRExpr* e ) 2461{ 2462 iselInt128Expr_wrk(rHi, rLo, env, e); 2463# if 0 2464 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2465# endif 2466 vassert(hregClass(*rHi) == HRcInt64); 2467 vassert(hregIsVirtual(*rHi)); 2468 vassert(hregClass(*rLo) == HRcInt64); 2469 vassert(hregIsVirtual(*rLo)); 2470} 2471 2472/* DO NOT CALL THIS DIRECTLY ! */ 2473static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, 2474 ISelEnv* env, const IRExpr* e ) 2475{ 2476 vassert(e); 2477 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128); 2478 2479 /* read 128-bit IRTemp */ 2480 if (e->tag == Iex_RdTmp) { 2481 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp); 2482 return; 2483 } 2484 2485 /* --------- BINARY ops --------- */ 2486 if (e->tag == Iex_Binop) { 2487 switch (e->Iex.Binop.op) { 2488 /* 64 x 64 -> 128 multiply */ 2489 case Iop_MullU64: 2490 case Iop_MullS64: { 2491 /* get one operand into %rax, and the other into a R/M. 2492 Need to make an educated guess about which is better in 2493 which. */ 2494 HReg tLo = newVRegI(env); 2495 HReg tHi = newVRegI(env); 2496 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64); 2497 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1); 2498 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2); 2499 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX())); 2500 addInstr(env, AMD64Instr_MulL(syned, rmLeft)); 2501 /* Result is now in RDX:RAX. Tell the caller. */ 2502 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi)); 2503 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo)); 2504 *rHi = tHi; 2505 *rLo = tLo; 2506 return; 2507 } 2508 2509 /* 128 x 64 -> (64(rem),64(div)) division */ 2510 case Iop_DivModU128to64: 2511 case Iop_DivModS128to64: { 2512 /* Get the 128-bit operand into rdx:rax, and the other into 2513 any old R/M. */ 2514 HReg sHi, sLo; 2515 HReg tLo = newVRegI(env); 2516 HReg tHi = newVRegI(env); 2517 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64); 2518 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2); 2519 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2520 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX())); 2521 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX())); 2522 addInstr(env, AMD64Instr_Div(syned, 8, rmRight)); 2523 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi)); 2524 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo)); 2525 *rHi = tHi; 2526 *rLo = tLo; 2527 return; 2528 } 2529 2530 /* 64HLto128(e1,e2) */ 2531 case Iop_64HLto128: 2532 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2533 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2534 return; 2535 2536 default: 2537 break; 2538 } 2539 } /* if (e->tag == Iex_Binop) */ 2540 2541 ppIRExpr(e); 2542 vpanic("iselInt128Expr"); 2543} 2544 2545 2546/*---------------------------------------------------------*/ 2547/*--- ISEL: Floating point expressions (32 bit) ---*/ 2548/*---------------------------------------------------------*/ 2549 2550/* Nothing interesting here; really just wrappers for 2551 64-bit stuff. */ 2552 2553static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e ) 2554{ 2555 HReg r = iselFltExpr_wrk( env, e ); 2556# if 0 2557 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2558# endif 2559 vassert(hregClass(r) == HRcVec128); 2560 vassert(hregIsVirtual(r)); 2561 return r; 2562} 2563 2564/* DO NOT CALL THIS DIRECTLY */ 2565static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e ) 2566{ 2567 IRType ty = typeOfIRExpr(env->type_env,e); 2568 vassert(ty == Ity_F32); 2569 2570 if (e->tag == Iex_RdTmp) { 2571 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2572 } 2573 2574 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2575 AMD64AMode* am; 2576 HReg res = newVRegV(env); 2577 vassert(e->Iex.Load.ty == Ity_F32); 2578 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2579 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am)); 2580 return res; 2581 } 2582 2583 if (e->tag == Iex_Binop 2584 && e->Iex.Binop.op == Iop_F64toF32) { 2585 /* Although the result is still held in a standard SSE register, 2586 we need to round it to reflect the loss of accuracy/range 2587 entailed in casting it to a 32-bit float. */ 2588 HReg dst = newVRegV(env); 2589 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 2590 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 ); 2591 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst)); 2592 set_SSE_rounding_default( env ); 2593 return dst; 2594 } 2595 2596 if (e->tag == Iex_Get) { 2597 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset, 2598 hregAMD64_RBP() ); 2599 HReg res = newVRegV(env); 2600 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am )); 2601 return res; 2602 } 2603 2604 if (e->tag == Iex_Unop 2605 && e->Iex.Unop.op == Iop_ReinterpI32asF32) { 2606 /* Given an I32, produce an IEEE754 float with the same bit 2607 pattern. */ 2608 HReg dst = newVRegV(env); 2609 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2610 AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP()); 2611 addInstr(env, AMD64Instr_Store(4, src, m4_rsp)); 2612 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp )); 2613 return dst; 2614 } 2615 2616 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) { 2617 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 2618 HReg arg = iselFltExpr(env, e->Iex.Binop.arg2); 2619 HReg dst = newVRegV(env); 2620 2621 /* rf now holds the value to be rounded. The first thing to do 2622 is set the FPU's rounding mode accordingly. */ 2623 2624 /* Set host x87 rounding mode */ 2625 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2626 2627 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp)); 2628 addInstr(env, AMD64Instr_A87Free(1)); 2629 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4)); 2630 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND)); 2631 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4)); 2632 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp)); 2633 2634 /* Restore default x87 rounding. */ 2635 set_FPU_rounding_default( env ); 2636 2637 return dst; 2638 } 2639 2640 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_NegF32) { 2641 /* Sigh ... very rough code. Could do much better. */ 2642 /* Get the 128-bit literal 00---0 10---0 into a register 2643 and xor it with the value to be negated. */ 2644 HReg r1 = newVRegI(env); 2645 HReg dst = newVRegV(env); 2646 HReg tmp = newVRegV(env); 2647 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 2648 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 2649 addInstr(env, mk_vMOVsd_RR(src,tmp)); 2650 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0))); 2651 addInstr(env, AMD64Instr_Imm64( 1ULL<<31, r1 )); 2652 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1))); 2653 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0)); 2654 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst)); 2655 add_to_rsp(env, 16); 2656 return dst; 2657 } 2658 2659 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF32) { 2660 IRQop *qop = e->Iex.Qop.details; 2661 HReg dst = newVRegV(env); 2662 HReg argX = iselFltExpr(env, qop->arg2); 2663 HReg argY = iselFltExpr(env, qop->arg3); 2664 HReg argZ = iselFltExpr(env, qop->arg4); 2665 /* XXXROUNDINGFIXME */ 2666 /* set roundingmode here */ 2667 /* subq $16, %rsp -- make a space*/ 2668 sub_from_rsp(env, 16); 2669 /* Prepare 4 arg regs: 2670 leaq 0(%rsp), %rdi 2671 leaq 4(%rsp), %rsi 2672 leaq 8(%rsp), %rdx 2673 leaq 12(%rsp), %rcx 2674 */ 2675 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()), 2676 hregAMD64_RDI())); 2677 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(4, hregAMD64_RSP()), 2678 hregAMD64_RSI())); 2679 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()), 2680 hregAMD64_RDX())); 2681 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(12, hregAMD64_RSP()), 2682 hregAMD64_RCX())); 2683 /* Store the three args, at (%rsi), (%rdx) and (%rcx): 2684 movss %argX, 0(%rsi) 2685 movss %argY, 0(%rdx) 2686 movss %argZ, 0(%rcx) 2687 */ 2688 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argX, 2689 AMD64AMode_IR(0, hregAMD64_RSI()))); 2690 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argY, 2691 AMD64AMode_IR(0, hregAMD64_RDX()))); 2692 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argZ, 2693 AMD64AMode_IR(0, hregAMD64_RCX()))); 2694 /* call the helper */ 2695 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, 2696 (ULong)(HWord)h_generic_calc_MAddF32, 2697 4, mk_RetLoc_simple(RLPri_None) )); 2698 /* fetch the result from memory, using %r_argp, which the 2699 register allocator will keep alive across the call. */ 2700 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 4, dst, 2701 AMD64AMode_IR(0, hregAMD64_RSP()))); 2702 /* and finally, clear the space */ 2703 add_to_rsp(env, 16); 2704 return dst; 2705 } 2706 2707 ppIRExpr(e); 2708 vpanic("iselFltExpr_wrk"); 2709} 2710 2711 2712/*---------------------------------------------------------*/ 2713/*--- ISEL: Floating point expressions (64 bit) ---*/ 2714/*---------------------------------------------------------*/ 2715 2716/* Compute a 64-bit floating point value into the lower half of an xmm 2717 register, the identity of which is returned. As with 2718 iselIntExpr_R, the returned reg will be virtual, and it must not be 2719 changed by subsequent code emitted by the caller. 2720*/ 2721 2722/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm: 2723 2724 Type S (1 bit) E (11 bits) F (52 bits) 2725 ---- --------- ----------- ----------- 2726 signalling NaN u 2047 (max) .0uuuuu---u 2727 (with at least 2728 one 1 bit) 2729 quiet NaN u 2047 (max) .1uuuuu---u 2730 2731 negative infinity 1 2047 (max) .000000---0 2732 2733 positive infinity 0 2047 (max) .000000---0 2734 2735 negative zero 1 0 .000000---0 2736 2737 positive zero 0 0 .000000---0 2738*/ 2739 2740static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e ) 2741{ 2742 HReg r = iselDblExpr_wrk( env, e ); 2743# if 0 2744 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2745# endif 2746 vassert(hregClass(r) == HRcVec128); 2747 vassert(hregIsVirtual(r)); 2748 return r; 2749} 2750 2751/* DO NOT CALL THIS DIRECTLY */ 2752static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e ) 2753{ 2754 IRType ty = typeOfIRExpr(env->type_env,e); 2755 vassert(e); 2756 vassert(ty == Ity_F64); 2757 2758 if (e->tag == Iex_RdTmp) { 2759 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2760 } 2761 2762 if (e->tag == Iex_Const) { 2763 union { ULong u64; Double f64; } u; 2764 HReg res = newVRegV(env); 2765 HReg tmp = newVRegI(env); 2766 vassert(sizeof(u) == 8); 2767 vassert(sizeof(u.u64) == 8); 2768 vassert(sizeof(u.f64) == 8); 2769 2770 if (e->Iex.Const.con->tag == Ico_F64) { 2771 u.f64 = e->Iex.Const.con->Ico.F64; 2772 } 2773 else if (e->Iex.Const.con->tag == Ico_F64i) { 2774 u.u64 = e->Iex.Const.con->Ico.F64i; 2775 } 2776 else 2777 vpanic("iselDblExpr(amd64): const"); 2778 2779 addInstr(env, AMD64Instr_Imm64(u.u64, tmp)); 2780 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp))); 2781 addInstr(env, AMD64Instr_SseLdSt( 2782 True/*load*/, 8, res, 2783 AMD64AMode_IR(0, hregAMD64_RSP()) 2784 )); 2785 add_to_rsp(env, 8); 2786 return res; 2787 } 2788 2789 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2790 AMD64AMode* am; 2791 HReg res = newVRegV(env); 2792 vassert(e->Iex.Load.ty == Ity_F64); 2793 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2794 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am )); 2795 return res; 2796 } 2797 2798 if (e->tag == Iex_Get) { 2799 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset, 2800 hregAMD64_RBP() ); 2801 HReg res = newVRegV(env); 2802 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am )); 2803 return res; 2804 } 2805 2806 if (e->tag == Iex_GetI) { 2807 AMD64AMode* am 2808 = genGuestArrayOffset( 2809 env, e->Iex.GetI.descr, 2810 e->Iex.GetI.ix, e->Iex.GetI.bias ); 2811 HReg res = newVRegV(env); 2812 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am )); 2813 return res; 2814 } 2815 2816 if (e->tag == Iex_Triop) { 2817 IRTriop *triop = e->Iex.Triop.details; 2818 AMD64SseOp op = Asse_INVALID; 2819 switch (triop->op) { 2820 case Iop_AddF64: op = Asse_ADDF; break; 2821 case Iop_SubF64: op = Asse_SUBF; break; 2822 case Iop_MulF64: op = Asse_MULF; break; 2823 case Iop_DivF64: op = Asse_DIVF; break; 2824 default: break; 2825 } 2826 if (op != Asse_INVALID) { 2827 HReg dst = newVRegV(env); 2828 HReg argL = iselDblExpr(env, triop->arg2); 2829 HReg argR = iselDblExpr(env, triop->arg3); 2830 addInstr(env, mk_vMOVsd_RR(argL, dst)); 2831 /* XXXROUNDINGFIXME */ 2832 /* set roundingmode here */ 2833 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst)); 2834 return dst; 2835 } 2836 } 2837 2838 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF64) { 2839 IRQop *qop = e->Iex.Qop.details; 2840 HReg dst = newVRegV(env); 2841 HReg argX = iselDblExpr(env, qop->arg2); 2842 HReg argY = iselDblExpr(env, qop->arg3); 2843 HReg argZ = iselDblExpr(env, qop->arg4); 2844 /* XXXROUNDINGFIXME */ 2845 /* set roundingmode here */ 2846 /* subq $32, %rsp -- make a space*/ 2847 sub_from_rsp(env, 32); 2848 /* Prepare 4 arg regs: 2849 leaq 0(%rsp), %rdi 2850 leaq 8(%rsp), %rsi 2851 leaq 16(%rsp), %rdx 2852 leaq 24(%rsp), %rcx 2853 */ 2854 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()), 2855 hregAMD64_RDI())); 2856 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()), 2857 hregAMD64_RSI())); 2858 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, hregAMD64_RSP()), 2859 hregAMD64_RDX())); 2860 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(24, hregAMD64_RSP()), 2861 hregAMD64_RCX())); 2862 /* Store the three args, at (%rsi), (%rdx) and (%rcx): 2863 movsd %argX, 0(%rsi) 2864 movsd %argY, 0(%rdx) 2865 movsd %argZ, 0(%rcx) 2866 */ 2867 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argX, 2868 AMD64AMode_IR(0, hregAMD64_RSI()))); 2869 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argY, 2870 AMD64AMode_IR(0, hregAMD64_RDX()))); 2871 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argZ, 2872 AMD64AMode_IR(0, hregAMD64_RCX()))); 2873 /* call the helper */ 2874 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, 2875 (ULong)(HWord)h_generic_calc_MAddF64, 2876 4, mk_RetLoc_simple(RLPri_None) )); 2877 /* fetch the result from memory, using %r_argp, which the 2878 register allocator will keep alive across the call. */ 2879 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 8, dst, 2880 AMD64AMode_IR(0, hregAMD64_RSP()))); 2881 /* and finally, clear the space */ 2882 add_to_rsp(env, 32); 2883 return dst; 2884 } 2885 2886 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) { 2887 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 2888 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2); 2889 HReg dst = newVRegV(env); 2890 2891 /* rf now holds the value to be rounded. The first thing to do 2892 is set the FPU's rounding mode accordingly. */ 2893 2894 /* Set host x87 rounding mode */ 2895 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2896 2897 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp)); 2898 addInstr(env, AMD64Instr_A87Free(1)); 2899 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 2900 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND)); 2901 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); 2902 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 2903 2904 /* Restore default x87 rounding. */ 2905 set_FPU_rounding_default( env ); 2906 2907 return dst; 2908 } 2909 2910 IRTriop *triop = e->Iex.Triop.details; 2911 if (e->tag == Iex_Triop 2912 && (triop->op == Iop_ScaleF64 2913 || triop->op == Iop_AtanF64 2914 || triop->op == Iop_Yl2xF64 2915 || triop->op == Iop_Yl2xp1F64 2916 || triop->op == Iop_PRemF64 2917 || triop->op == Iop_PRem1F64) 2918 ) { 2919 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 2920 HReg arg1 = iselDblExpr(env, triop->arg2); 2921 HReg arg2 = iselDblExpr(env, triop->arg3); 2922 HReg dst = newVRegV(env); 2923 Bool arg2first = toBool(triop->op == Iop_ScaleF64 2924 || triop->op == Iop_PRemF64 2925 || triop->op == Iop_PRem1F64); 2926 addInstr(env, AMD64Instr_A87Free(2)); 2927 2928 /* one arg -> top of x87 stack */ 2929 addInstr(env, AMD64Instr_SseLdSt( 2930 False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp)); 2931 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 2932 2933 /* other arg -> top of x87 stack */ 2934 addInstr(env, AMD64Instr_SseLdSt( 2935 False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp)); 2936 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 2937 2938 /* do it */ 2939 /* XXXROUNDINGFIXME */ 2940 /* set roundingmode here */ 2941 switch (triop->op) { 2942 case Iop_ScaleF64: 2943 addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE)); 2944 break; 2945 case Iop_AtanF64: 2946 addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN)); 2947 break; 2948 case Iop_Yl2xF64: 2949 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X)); 2950 break; 2951 case Iop_Yl2xp1F64: 2952 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1)); 2953 break; 2954 case Iop_PRemF64: 2955 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM)); 2956 break; 2957 case Iop_PRem1F64: 2958 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1)); 2959 break; 2960 default: 2961 vassert(0); 2962 } 2963 2964 /* save result */ 2965 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); 2966 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 2967 return dst; 2968 } 2969 2970 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) { 2971 HReg dst = newVRegV(env); 2972 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2); 2973 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 ); 2974 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst )); 2975 set_SSE_rounding_default( env ); 2976 return dst; 2977 } 2978 2979 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) { 2980 HReg dst = newVRegV(env); 2981 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2982 set_SSE_rounding_default( env ); 2983 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst )); 2984 return dst; 2985 } 2986 2987 if (e->tag == Iex_Unop 2988 && (e->Iex.Unop.op == Iop_NegF64 2989 || e->Iex.Unop.op == Iop_AbsF64)) { 2990 /* Sigh ... very rough code. Could do much better. */ 2991 /* Get the 128-bit literal 00---0 10---0 into a register 2992 and xor/nand it with the value to be negated. */ 2993 HReg r1 = newVRegI(env); 2994 HReg dst = newVRegV(env); 2995 HReg tmp = newVRegV(env); 2996 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 2997 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 2998 addInstr(env, mk_vMOVsd_RR(src,tmp)); 2999 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0))); 3000 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 )); 3001 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1))); 3002 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0)); 3003 3004 if (e->Iex.Unop.op == Iop_NegF64) 3005 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst)); 3006 else 3007 addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst)); 3008 3009 add_to_rsp(env, 16); 3010 return dst; 3011 } 3012 3013 if (e->tag == Iex_Binop) { 3014 A87FpOp fpop = Afp_INVALID; 3015 switch (e->Iex.Binop.op) { 3016 case Iop_SqrtF64: fpop = Afp_SQRT; break; 3017 case Iop_SinF64: fpop = Afp_SIN; break; 3018 case Iop_CosF64: fpop = Afp_COS; break; 3019 case Iop_TanF64: fpop = Afp_TAN; break; 3020 case Iop_2xm1F64: fpop = Afp_2XM1; break; 3021 default: break; 3022 } 3023 if (fpop != Afp_INVALID) { 3024 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 3025 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2); 3026 HReg dst = newVRegV(env); 3027 Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1; 3028 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp)); 3029 addInstr(env, AMD64Instr_A87Free(nNeeded)); 3030 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 3031 /* XXXROUNDINGFIXME */ 3032 /* set roundingmode here */ 3033 /* Note that AMD64Instr_A87FpOp(Afp_TAN) sets the condition 3034 codes. I don't think that matters, since this insn 3035 selector never generates such an instruction intervening 3036 between an flag-setting instruction and a flag-using 3037 instruction. */ 3038 addInstr(env, AMD64Instr_A87FpOp(fpop)); 3039 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); 3040 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 3041 return dst; 3042 } 3043 } 3044 3045 if (e->tag == Iex_Unop) { 3046 switch (e->Iex.Unop.op) { 3047//.. case Iop_I32toF64: { 3048//.. HReg dst = newVRegF(env); 3049//.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg); 3050//.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri))); 3051//.. set_FPU_rounding_default(env); 3052//.. addInstr(env, X86Instr_FpLdStI( 3053//.. True/*load*/, 4, dst, 3054//.. X86AMode_IR(0, hregX86_ESP()))); 3055//.. add_to_esp(env, 4); 3056//.. return dst; 3057//.. } 3058 case Iop_ReinterpI64asF64: { 3059 /* Given an I64, produce an IEEE754 double with the same 3060 bit pattern. */ 3061 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 3062 HReg dst = newVRegV(env); 3063 AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg); 3064 /* paranoia */ 3065 set_SSE_rounding_default(env); 3066 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp)); 3067 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 3068 return dst; 3069 } 3070 case Iop_F32toF64: { 3071 HReg f32; 3072 HReg f64 = newVRegV(env); 3073 /* this shouldn't be necessary, but be paranoid ... */ 3074 set_SSE_rounding_default(env); 3075 f32 = iselFltExpr(env, e->Iex.Unop.arg); 3076 addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64)); 3077 return f64; 3078 } 3079 default: 3080 break; 3081 } 3082 } 3083 3084 /* --------- MULTIPLEX --------- */ 3085 if (e->tag == Iex_ITE) { // VFD 3086 HReg r1, r0, dst; 3087 vassert(ty == Ity_F64); 3088 vassert(typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1); 3089 r1 = iselDblExpr(env, e->Iex.ITE.iftrue); 3090 r0 = iselDblExpr(env, e->Iex.ITE.iffalse); 3091 dst = newVRegV(env); 3092 addInstr(env, mk_vMOVsd_RR(r1,dst)); 3093 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 3094 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst)); 3095 return dst; 3096 } 3097 3098 ppIRExpr(e); 3099 vpanic("iselDblExpr_wrk"); 3100} 3101 3102 3103/*---------------------------------------------------------*/ 3104/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/ 3105/*---------------------------------------------------------*/ 3106 3107static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e ) 3108{ 3109 HReg r = iselVecExpr_wrk( env, e ); 3110# if 0 3111 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3112# endif 3113 vassert(hregClass(r) == HRcVec128); 3114 vassert(hregIsVirtual(r)); 3115 return r; 3116} 3117 3118 3119/* DO NOT CALL THIS DIRECTLY */ 3120static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e ) 3121{ 3122 HWord fn = 0; /* address of helper fn, if required */ 3123 Bool arg1isEReg = False; 3124 AMD64SseOp op = Asse_INVALID; 3125 IRType ty = typeOfIRExpr(env->type_env,e); 3126 vassert(e); 3127 vassert(ty == Ity_V128); 3128 3129 if (e->tag == Iex_RdTmp) { 3130 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3131 } 3132 3133 if (e->tag == Iex_Get) { 3134 HReg dst = newVRegV(env); 3135 addInstr(env, AMD64Instr_SseLdSt( 3136 True/*load*/, 3137 16, 3138 dst, 3139 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP()) 3140 ) 3141 ); 3142 return dst; 3143 } 3144 3145 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 3146 HReg dst = newVRegV(env); 3147 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 3148 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am )); 3149 return dst; 3150 } 3151 3152 if (e->tag == Iex_Const) { 3153 HReg dst = newVRegV(env); 3154 vassert(e->Iex.Const.con->tag == Ico_V128); 3155 switch (e->Iex.Const.con->Ico.V128) { 3156 case 0x0000: 3157 dst = generate_zeroes_V128(env); 3158 break; 3159 case 0xFFFF: 3160 dst = generate_ones_V128(env); 3161 break; 3162 default: { 3163 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 3164 /* do push_uimm64 twice, first time for the high-order half. */ 3165 push_uimm64(env, bitmask8_to_bytemask64( 3166 (e->Iex.Const.con->Ico.V128 >> 8) & 0xFF 3167 )); 3168 push_uimm64(env, bitmask8_to_bytemask64( 3169 (e->Iex.Const.con->Ico.V128 >> 0) & 0xFF 3170 )); 3171 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 )); 3172 add_to_rsp(env, 16); 3173 break; 3174 } 3175 } 3176 return dst; 3177 } 3178 3179 if (e->tag == Iex_Unop) { 3180 switch (e->Iex.Unop.op) { 3181 3182 case Iop_NotV128: { 3183 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3184 return do_sse_NotV128(env, arg); 3185 } 3186 3187 case Iop_CmpNEZ64x2: { 3188 /* We can use SSE2 instructions for this. */ 3189 /* Ideally, we want to do a 64Ix2 comparison against zero of 3190 the operand. Problem is no such insn exists. Solution 3191 therefore is to do a 32Ix4 comparison instead, and bitwise- 3192 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and 3193 let the not'd result of this initial comparison be a:b:c:d. 3194 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use 3195 pshufd to create a value b:a:d:c, and OR that with a:b:c:d, 3196 giving the required result. 3197 3198 The required selection sequence is 2,3,0,1, which 3199 according to Intel's documentation means the pshufd 3200 literal value is 0xB1, that is, 3201 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0) 3202 */ 3203 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3204 HReg tmp = generate_zeroes_V128(env); 3205 HReg dst = newVRegV(env); 3206 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp)); 3207 tmp = do_sse_NotV128(env, tmp); 3208 addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst)); 3209 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst)); 3210 return dst; 3211 } 3212 3213 case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector; 3214 case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector; 3215 case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector; 3216 do_CmpNEZ_vector: 3217 { 3218 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3219 HReg tmp = newVRegV(env); 3220 HReg zero = generate_zeroes_V128(env); 3221 HReg dst; 3222 addInstr(env, mk_vMOVsd_RR(arg, tmp)); 3223 addInstr(env, AMD64Instr_SseReRg(op, zero, tmp)); 3224 dst = do_sse_NotV128(env, tmp); 3225 return dst; 3226 } 3227 3228 case Iop_RecipEst32Fx4: op = Asse_RCPF; goto do_32Fx4_unary; 3229 case Iop_RSqrtEst32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary; 3230 do_32Fx4_unary: 3231 { 3232 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3233 HReg dst = newVRegV(env); 3234 addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst)); 3235 return dst; 3236 } 3237 3238 case Iop_RecipEst32F0x4: op = Asse_RCPF; goto do_32F0x4_unary; 3239 case Iop_RSqrtEst32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary; 3240 case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary; 3241 do_32F0x4_unary: 3242 { 3243 /* A bit subtle. We have to copy the arg to the result 3244 register first, because actually doing the SSE scalar insn 3245 leaves the upper 3/4 of the destination register 3246 unchanged. Whereas the required semantics of these 3247 primops is that the upper 3/4 is simply copied in from the 3248 argument. */ 3249 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3250 HReg dst = newVRegV(env); 3251 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3252 addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst)); 3253 return dst; 3254 } 3255 3256 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary; 3257 do_64F0x2_unary: 3258 { 3259 /* A bit subtle. We have to copy the arg to the result 3260 register first, because actually doing the SSE scalar insn 3261 leaves the upper half of the destination register 3262 unchanged. Whereas the required semantics of these 3263 primops is that the upper half is simply copied in from the 3264 argument. */ 3265 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3266 HReg dst = newVRegV(env); 3267 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3268 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst)); 3269 return dst; 3270 } 3271 3272 case Iop_32UtoV128: { 3273 HReg dst = newVRegV(env); 3274 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP()); 3275 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg); 3276 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32)); 3277 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32)); 3278 return dst; 3279 } 3280 3281 case Iop_64UtoV128: { 3282 HReg dst = newVRegV(env); 3283 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 3284 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg); 3285 addInstr(env, AMD64Instr_Push(rmi)); 3286 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0)); 3287 add_to_rsp(env, 8); 3288 return dst; 3289 } 3290 3291 case Iop_V256toV128_0: 3292 case Iop_V256toV128_1: { 3293 HReg vHi, vLo; 3294 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg); 3295 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo; 3296 } 3297 3298 default: 3299 break; 3300 } /* switch (e->Iex.Unop.op) */ 3301 } /* if (e->tag == Iex_Unop) */ 3302 3303 if (e->tag == Iex_Binop) { 3304 switch (e->Iex.Binop.op) { 3305 3306 case Iop_Sqrt64Fx2: 3307 case Iop_Sqrt32Fx4: { 3308 /* :: (rmode, vec) -> vec */ 3309 HReg arg = iselVecExpr(env, e->Iex.Binop.arg2); 3310 HReg dst = newVRegV(env); 3311 /* XXXROUNDINGFIXME */ 3312 /* set roundingmode here */ 3313 addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2 3314 ? AMD64Instr_Sse64Fx2 : AMD64Instr_Sse32Fx4) 3315 (Asse_SQRTF, arg, dst)); 3316 return dst; 3317 } 3318 3319 /* FIXME: could we generate MOVQ here? */ 3320 case Iop_SetV128lo64: { 3321 HReg dst = newVRegV(env); 3322 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3323 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3324 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP()); 3325 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16)); 3326 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16)); 3327 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16)); 3328 return dst; 3329 } 3330 3331 /* FIXME: could we generate MOVD here? */ 3332 case Iop_SetV128lo32: { 3333 HReg dst = newVRegV(env); 3334 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3335 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3336 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP()); 3337 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16)); 3338 addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16)); 3339 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16)); 3340 return dst; 3341 } 3342 3343 case Iop_64HLtoV128: { 3344 HReg rsp = hregAMD64_RSP(); 3345 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp); 3346 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp); 3347 AMD64RI* qHi = iselIntExpr_RI(env, e->Iex.Binop.arg1); 3348 AMD64RI* qLo = iselIntExpr_RI(env, e->Iex.Binop.arg2); 3349 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qHi, m8_rsp)); 3350 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qLo, m16_rsp)); 3351 HReg dst = newVRegV(env); 3352 /* One store-forwarding stall coming up, oh well :-( */ 3353 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, m16_rsp)); 3354 return dst; 3355 } 3356 3357 case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4; 3358 case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4; 3359 case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4; 3360 case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4; 3361 case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4; 3362 case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4; 3363 do_32Fx4: 3364 { 3365 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3366 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3367 HReg dst = newVRegV(env); 3368 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3369 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst)); 3370 return dst; 3371 } 3372 3373 case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2; 3374 case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2; 3375 case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2; 3376 case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2; 3377 case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2; 3378 case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2; 3379 do_64Fx2: 3380 { 3381 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3382 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3383 HReg dst = newVRegV(env); 3384 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3385 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst)); 3386 return dst; 3387 } 3388 3389 case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4; 3390 case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4; 3391 case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4; 3392 case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4; 3393 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4; 3394 case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4; 3395 case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4; 3396 case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4; 3397 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4; 3398 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4; 3399 do_32F0x4: { 3400 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3401 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3402 HReg dst = newVRegV(env); 3403 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3404 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst)); 3405 return dst; 3406 } 3407 3408 case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2; 3409 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2; 3410 case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2; 3411 case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2; 3412 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2; 3413 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2; 3414 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2; 3415 case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2; 3416 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2; 3417 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2; 3418 do_64F0x2: { 3419 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3420 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3421 HReg dst = newVRegV(env); 3422 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3423 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst)); 3424 return dst; 3425 } 3426 3427 case Iop_QNarrowBin32Sto16Sx8: 3428 op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg; 3429 case Iop_QNarrowBin16Sto8Sx16: 3430 op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg; 3431 case Iop_QNarrowBin16Sto8Ux16: 3432 op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg; 3433 3434 case Iop_InterleaveHI8x16: 3435 op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg; 3436 case Iop_InterleaveHI16x8: 3437 op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg; 3438 case Iop_InterleaveHI32x4: 3439 op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg; 3440 case Iop_InterleaveHI64x2: 3441 op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg; 3442 3443 case Iop_InterleaveLO8x16: 3444 op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg; 3445 case Iop_InterleaveLO16x8: 3446 op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg; 3447 case Iop_InterleaveLO32x4: 3448 op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg; 3449 case Iop_InterleaveLO64x2: 3450 op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg; 3451 3452 case Iop_AndV128: op = Asse_AND; goto do_SseReRg; 3453 case Iop_OrV128: op = Asse_OR; goto do_SseReRg; 3454 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg; 3455 case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg; 3456 case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg; 3457 case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg; 3458 case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg; 3459 case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg; 3460 case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg; 3461 case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg; 3462 case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg; 3463 case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg; 3464 case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg; 3465 case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg; 3466 case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg; 3467 case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg; 3468 case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg; 3469 case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg; 3470 case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg; 3471 case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg; 3472 case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg; 3473 case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg; 3474 case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg; 3475 case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg; 3476 case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg; 3477 case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg; 3478 case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg; 3479 case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg; 3480 case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg; 3481 case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg; 3482 case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg; 3483 case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg; 3484 case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg; 3485 case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg; 3486 do_SseReRg: { 3487 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); 3488 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); 3489 HReg dst = newVRegV(env); 3490 if (arg1isEReg) { 3491 addInstr(env, mk_vMOVsd_RR(arg2, dst)); 3492 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst)); 3493 } else { 3494 addInstr(env, mk_vMOVsd_RR(arg1, dst)); 3495 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst)); 3496 } 3497 return dst; 3498 } 3499 3500 case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift; 3501 case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift; 3502 case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift; 3503 case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift; 3504 case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift; 3505 case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift; 3506 case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift; 3507 case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift; 3508 do_SseShift: { 3509 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1); 3510 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 3511 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 3512 HReg ereg = newVRegV(env); 3513 HReg dst = newVRegV(env); 3514 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0))); 3515 addInstr(env, AMD64Instr_Push(rmi)); 3516 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0)); 3517 addInstr(env, mk_vMOVsd_RR(greg, dst)); 3518 addInstr(env, AMD64Instr_SseReRg(op, ereg, dst)); 3519 add_to_rsp(env, 16); 3520 return dst; 3521 } 3522 3523 case Iop_Mul32x4: fn = (HWord)h_generic_calc_Mul32x4; 3524 goto do_SseAssistedBinary; 3525 case Iop_Max32Sx4: fn = (HWord)h_generic_calc_Max32Sx4; 3526 goto do_SseAssistedBinary; 3527 case Iop_Min32Sx4: fn = (HWord)h_generic_calc_Min32Sx4; 3528 goto do_SseAssistedBinary; 3529 case Iop_Max32Ux4: fn = (HWord)h_generic_calc_Max32Ux4; 3530 goto do_SseAssistedBinary; 3531 case Iop_Min32Ux4: fn = (HWord)h_generic_calc_Min32Ux4; 3532 goto do_SseAssistedBinary; 3533 case Iop_Max16Ux8: fn = (HWord)h_generic_calc_Max16Ux8; 3534 goto do_SseAssistedBinary; 3535 case Iop_Min16Ux8: fn = (HWord)h_generic_calc_Min16Ux8; 3536 goto do_SseAssistedBinary; 3537 case Iop_Max8Sx16: fn = (HWord)h_generic_calc_Max8Sx16; 3538 goto do_SseAssistedBinary; 3539 case Iop_Min8Sx16: fn = (HWord)h_generic_calc_Min8Sx16; 3540 goto do_SseAssistedBinary; 3541 case Iop_CmpEQ64x2: fn = (HWord)h_generic_calc_CmpEQ64x2; 3542 goto do_SseAssistedBinary; 3543 case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2; 3544 goto do_SseAssistedBinary; 3545 case Iop_Perm32x4: fn = (HWord)h_generic_calc_Perm32x4; 3546 goto do_SseAssistedBinary; 3547 case Iop_QNarrowBin32Sto16Ux8: 3548 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8; 3549 goto do_SseAssistedBinary; 3550 case Iop_NarrowBin16to8x16: 3551 fn = (HWord)h_generic_calc_NarrowBin16to8x16; 3552 goto do_SseAssistedBinary; 3553 case Iop_NarrowBin32to16x8: 3554 fn = (HWord)h_generic_calc_NarrowBin32to16x8; 3555 goto do_SseAssistedBinary; 3556 do_SseAssistedBinary: { 3557 /* RRRufff! RRRufff code is what we're generating here. Oh 3558 well. */ 3559 vassert(fn != 0); 3560 HReg dst = newVRegV(env); 3561 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3562 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3563 HReg argp = newVRegI(env); 3564 /* subq $112, %rsp -- make a space*/ 3565 sub_from_rsp(env, 112); 3566 /* leaq 48(%rsp), %r_argp -- point into it */ 3567 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()), 3568 argp)); 3569 /* andq $-16, %r_argp -- 16-align the pointer */ 3570 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 3571 AMD64RMI_Imm( ~(UInt)15 ), 3572 argp)); 3573 /* Prepare 3 arg regs: 3574 leaq 0(%r_argp), %rdi 3575 leaq 16(%r_argp), %rsi 3576 leaq 32(%r_argp), %rdx 3577 */ 3578 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp), 3579 hregAMD64_RDI())); 3580 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp), 3581 hregAMD64_RSI())); 3582 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp), 3583 hregAMD64_RDX())); 3584 /* Store the two args, at (%rsi) and (%rdx): 3585 movupd %argL, 0(%rsi) 3586 movupd %argR, 0(%rdx) 3587 */ 3588 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL, 3589 AMD64AMode_IR(0, hregAMD64_RSI()))); 3590 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argR, 3591 AMD64AMode_IR(0, hregAMD64_RDX()))); 3592 /* call the helper */ 3593 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3594 3, mk_RetLoc_simple(RLPri_None) )); 3595 /* fetch the result from memory, using %r_argp, which the 3596 register allocator will keep alive across the call. */ 3597 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst, 3598 AMD64AMode_IR(0, argp))); 3599 /* and finally, clear the space */ 3600 add_to_rsp(env, 112); 3601 return dst; 3602 } 3603 3604 case Iop_SarN64x2: fn = (HWord)h_generic_calc_SarN64x2; 3605 goto do_SseAssistedVectorAndScalar; 3606 case Iop_SarN8x16: fn = (HWord)h_generic_calc_SarN8x16; 3607 goto do_SseAssistedVectorAndScalar; 3608 do_SseAssistedVectorAndScalar: { 3609 /* RRRufff! RRRufff code is what we're generating here. Oh 3610 well. */ 3611 vassert(fn != 0); 3612 HReg dst = newVRegV(env); 3613 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3614 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 3615 HReg argp = newVRegI(env); 3616 /* subq $112, %rsp -- make a space*/ 3617 sub_from_rsp(env, 112); 3618 /* leaq 48(%rsp), %r_argp -- point into it */ 3619 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()), 3620 argp)); 3621 /* andq $-16, %r_argp -- 16-align the pointer */ 3622 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 3623 AMD64RMI_Imm( ~(UInt)15 ), 3624 argp)); 3625 /* Prepare 2 vector arg regs: 3626 leaq 0(%r_argp), %rdi 3627 leaq 16(%r_argp), %rsi 3628 */ 3629 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp), 3630 hregAMD64_RDI())); 3631 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp), 3632 hregAMD64_RSI())); 3633 /* Store the vector arg, at (%rsi): 3634 movupd %argL, 0(%rsi) 3635 */ 3636 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL, 3637 AMD64AMode_IR(0, hregAMD64_RSI()))); 3638 /* And get the scalar value into rdx */ 3639 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RDX())); 3640 3641 /* call the helper */ 3642 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3643 3, mk_RetLoc_simple(RLPri_None) )); 3644 /* fetch the result from memory, using %r_argp, which the 3645 register allocator will keep alive across the call. */ 3646 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst, 3647 AMD64AMode_IR(0, argp))); 3648 /* and finally, clear the space */ 3649 add_to_rsp(env, 112); 3650 return dst; 3651 } 3652 3653 default: 3654 break; 3655 } /* switch (e->Iex.Binop.op) */ 3656 } /* if (e->tag == Iex_Binop) */ 3657 3658 if (e->tag == Iex_Triop) { 3659 IRTriop *triop = e->Iex.Triop.details; 3660 switch (triop->op) { 3661 3662 case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2_w_rm; 3663 case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2_w_rm; 3664 case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2_w_rm; 3665 case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2_w_rm; 3666 do_64Fx2_w_rm: 3667 { 3668 HReg argL = iselVecExpr(env, triop->arg2); 3669 HReg argR = iselVecExpr(env, triop->arg3); 3670 HReg dst = newVRegV(env); 3671 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3672 /* XXXROUNDINGFIXME */ 3673 /* set roundingmode here */ 3674 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst)); 3675 return dst; 3676 } 3677 3678 case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4_w_rm; 3679 case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4_w_rm; 3680 case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4_w_rm; 3681 case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4_w_rm; 3682 do_32Fx4_w_rm: 3683 { 3684 HReg argL = iselVecExpr(env, triop->arg2); 3685 HReg argR = iselVecExpr(env, triop->arg3); 3686 HReg dst = newVRegV(env); 3687 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3688 /* XXXROUNDINGFIXME */ 3689 /* set roundingmode here */ 3690 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst)); 3691 return dst; 3692 } 3693 3694 default: 3695 break; 3696 } /* switch (triop->op) */ 3697 } /* if (e->tag == Iex_Triop) */ 3698 3699 if (e->tag == Iex_ITE) { // VFD 3700 HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue); 3701 HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse); 3702 HReg dst = newVRegV(env); 3703 addInstr(env, mk_vMOVsd_RR(r1,dst)); 3704 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 3705 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst)); 3706 return dst; 3707 } 3708 3709 //vec_fail: 3710 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n", 3711 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps)); 3712 ppIRExpr(e); 3713 vpanic("iselVecExpr_wrk"); 3714} 3715 3716 3717/*---------------------------------------------------------*/ 3718/*--- ISEL: SIMD (V256) expressions, into 2 XMM regs. --*/ 3719/*---------------------------------------------------------*/ 3720 3721static void iselDVecExpr ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo, 3722 ISelEnv* env, const IRExpr* e ) 3723{ 3724 iselDVecExpr_wrk( rHi, rLo, env, e ); 3725# if 0 3726 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3727# endif 3728 vassert(hregClass(*rHi) == HRcVec128); 3729 vassert(hregClass(*rLo) == HRcVec128); 3730 vassert(hregIsVirtual(*rHi)); 3731 vassert(hregIsVirtual(*rLo)); 3732} 3733 3734 3735/* DO NOT CALL THIS DIRECTLY */ 3736static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo, 3737 ISelEnv* env, const IRExpr* e ) 3738{ 3739 HWord fn = 0; /* address of helper fn, if required */ 3740 vassert(e); 3741 IRType ty = typeOfIRExpr(env->type_env,e); 3742 vassert(ty == Ity_V256); 3743 3744 AMD64SseOp op = Asse_INVALID; 3745 3746 /* read 256-bit IRTemp */ 3747 if (e->tag == Iex_RdTmp) { 3748 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp); 3749 return; 3750 } 3751 3752 if (e->tag == Iex_Get) { 3753 HReg vHi = newVRegV(env); 3754 HReg vLo = newVRegV(env); 3755 HReg rbp = hregAMD64_RBP(); 3756 AMD64AMode* am0 = AMD64AMode_IR(e->Iex.Get.offset + 0, rbp); 3757 AMD64AMode* am16 = AMD64AMode_IR(e->Iex.Get.offset + 16, rbp); 3758 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0)); 3759 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16)); 3760 *rHi = vHi; 3761 *rLo = vLo; 3762 return; 3763 } 3764 3765 if (e->tag == Iex_Load) { 3766 HReg vHi = newVRegV(env); 3767 HReg vLo = newVRegV(env); 3768 HReg rA = iselIntExpr_R(env, e->Iex.Load.addr); 3769 AMD64AMode* am0 = AMD64AMode_IR(0, rA); 3770 AMD64AMode* am16 = AMD64AMode_IR(16, rA); 3771 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0)); 3772 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16)); 3773 *rHi = vHi; 3774 *rLo = vLo; 3775 return; 3776 } 3777 3778 if (e->tag == Iex_Const) { 3779 vassert(e->Iex.Const.con->tag == Ico_V256); 3780 switch (e->Iex.Const.con->Ico.V256) { 3781 case 0x00000000: { 3782 HReg vHi = generate_zeroes_V128(env); 3783 HReg vLo = newVRegV(env); 3784 addInstr(env, mk_vMOVsd_RR(vHi, vLo)); 3785 *rHi = vHi; 3786 *rLo = vLo; 3787 return; 3788 } 3789 default: 3790 break; /* give up. Until such time as is necessary. */ 3791 } 3792 } 3793 3794 if (e->tag == Iex_Unop) { 3795 switch (e->Iex.Unop.op) { 3796 3797 case Iop_NotV256: { 3798 HReg argHi, argLo; 3799 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3800 *rHi = do_sse_NotV128(env, argHi); 3801 *rLo = do_sse_NotV128(env, argLo); 3802 return; 3803 } 3804 3805 case Iop_RecipEst32Fx8: op = Asse_RCPF; goto do_32Fx8_unary; 3806 case Iop_Sqrt32Fx8: op = Asse_SQRTF; goto do_32Fx8_unary; 3807 case Iop_RSqrtEst32Fx8: op = Asse_RSQRTF; goto do_32Fx8_unary; 3808 do_32Fx8_unary: 3809 { 3810 HReg argHi, argLo; 3811 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3812 HReg dstHi = newVRegV(env); 3813 HReg dstLo = newVRegV(env); 3814 addInstr(env, AMD64Instr_Sse32Fx4(op, argHi, dstHi)); 3815 addInstr(env, AMD64Instr_Sse32Fx4(op, argLo, dstLo)); 3816 *rHi = dstHi; 3817 *rLo = dstLo; 3818 return; 3819 } 3820 3821 case Iop_Sqrt64Fx4: op = Asse_SQRTF; goto do_64Fx4_unary; 3822 do_64Fx4_unary: 3823 { 3824 HReg argHi, argLo; 3825 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3826 HReg dstHi = newVRegV(env); 3827 HReg dstLo = newVRegV(env); 3828 addInstr(env, AMD64Instr_Sse64Fx2(op, argHi, dstHi)); 3829 addInstr(env, AMD64Instr_Sse64Fx2(op, argLo, dstLo)); 3830 *rHi = dstHi; 3831 *rLo = dstLo; 3832 return; 3833 } 3834 3835 case Iop_CmpNEZ64x4: { 3836 /* We can use SSE2 instructions for this. */ 3837 /* Same scheme as Iop_CmpNEZ64x2, except twice as wide 3838 (obviously). See comment on Iop_CmpNEZ64x2 for 3839 explanation of what's going on here. */ 3840 HReg argHi, argLo; 3841 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3842 HReg tmpHi = generate_zeroes_V128(env); 3843 HReg tmpLo = newVRegV(env); 3844 addInstr(env, mk_vMOVsd_RR(tmpHi, tmpLo)); 3845 HReg dstHi = newVRegV(env); 3846 HReg dstLo = newVRegV(env); 3847 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argHi, tmpHi)); 3848 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argLo, tmpLo)); 3849 tmpHi = do_sse_NotV128(env, tmpHi); 3850 tmpLo = do_sse_NotV128(env, tmpLo); 3851 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpHi, dstHi)); 3852 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpLo, dstLo)); 3853 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpHi, dstHi)); 3854 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpLo, dstLo)); 3855 *rHi = dstHi; 3856 *rLo = dstLo; 3857 return; 3858 } 3859 3860 case Iop_CmpNEZ32x8: op = Asse_CMPEQ32; goto do_CmpNEZ_vector; 3861 case Iop_CmpNEZ16x16: op = Asse_CMPEQ16; goto do_CmpNEZ_vector; 3862 case Iop_CmpNEZ8x32: op = Asse_CMPEQ8; goto do_CmpNEZ_vector; 3863 do_CmpNEZ_vector: 3864 { 3865 HReg argHi, argLo; 3866 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3867 HReg tmpHi = newVRegV(env); 3868 HReg tmpLo = newVRegV(env); 3869 HReg zero = generate_zeroes_V128(env); 3870 HReg dstHi, dstLo; 3871 addInstr(env, mk_vMOVsd_RR(argHi, tmpHi)); 3872 addInstr(env, mk_vMOVsd_RR(argLo, tmpLo)); 3873 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpHi)); 3874 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpLo)); 3875 dstHi = do_sse_NotV128(env, tmpHi); 3876 dstLo = do_sse_NotV128(env, tmpLo); 3877 *rHi = dstHi; 3878 *rLo = dstLo; 3879 return; 3880 } 3881 3882 default: 3883 break; 3884 } /* switch (e->Iex.Unop.op) */ 3885 } /* if (e->tag == Iex_Unop) */ 3886 3887 if (e->tag == Iex_Binop) { 3888 switch (e->Iex.Binop.op) { 3889 3890 case Iop_Max64Fx4: op = Asse_MAXF; goto do_64Fx4; 3891 case Iop_Min64Fx4: op = Asse_MINF; goto do_64Fx4; 3892 do_64Fx4: 3893 { 3894 HReg argLhi, argLlo, argRhi, argRlo; 3895 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1); 3896 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2); 3897 HReg dstHi = newVRegV(env); 3898 HReg dstLo = newVRegV(env); 3899 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); 3900 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); 3901 addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi)); 3902 addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo)); 3903 *rHi = dstHi; 3904 *rLo = dstLo; 3905 return; 3906 } 3907 3908 case Iop_Max32Fx8: op = Asse_MAXF; goto do_32Fx8; 3909 case Iop_Min32Fx8: op = Asse_MINF; goto do_32Fx8; 3910 do_32Fx8: 3911 { 3912 HReg argLhi, argLlo, argRhi, argRlo; 3913 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1); 3914 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2); 3915 HReg dstHi = newVRegV(env); 3916 HReg dstLo = newVRegV(env); 3917 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); 3918 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); 3919 addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi)); 3920 addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo)); 3921 *rHi = dstHi; 3922 *rLo = dstLo; 3923 return; 3924 } 3925 3926 case Iop_AndV256: op = Asse_AND; goto do_SseReRg; 3927 case Iop_OrV256: op = Asse_OR; goto do_SseReRg; 3928 case Iop_XorV256: op = Asse_XOR; goto do_SseReRg; 3929 case Iop_Add8x32: op = Asse_ADD8; goto do_SseReRg; 3930 case Iop_Add16x16: op = Asse_ADD16; goto do_SseReRg; 3931 case Iop_Add32x8: op = Asse_ADD32; goto do_SseReRg; 3932 case Iop_Add64x4: op = Asse_ADD64; goto do_SseReRg; 3933 case Iop_QAdd8Sx32: op = Asse_QADD8S; goto do_SseReRg; 3934 case Iop_QAdd16Sx16: op = Asse_QADD16S; goto do_SseReRg; 3935 case Iop_QAdd8Ux32: op = Asse_QADD8U; goto do_SseReRg; 3936 case Iop_QAdd16Ux16: op = Asse_QADD16U; goto do_SseReRg; 3937 case Iop_Avg8Ux32: op = Asse_AVG8U; goto do_SseReRg; 3938 case Iop_Avg16Ux16: op = Asse_AVG16U; goto do_SseReRg; 3939 case Iop_CmpEQ8x32: op = Asse_CMPEQ8; goto do_SseReRg; 3940 case Iop_CmpEQ16x16: op = Asse_CMPEQ16; goto do_SseReRg; 3941 case Iop_CmpEQ32x8: op = Asse_CMPEQ32; goto do_SseReRg; 3942 case Iop_CmpGT8Sx32: op = Asse_CMPGT8S; goto do_SseReRg; 3943 case Iop_CmpGT16Sx16: op = Asse_CMPGT16S; goto do_SseReRg; 3944 case Iop_CmpGT32Sx8: op = Asse_CMPGT32S; goto do_SseReRg; 3945 case Iop_Max16Sx16: op = Asse_MAX16S; goto do_SseReRg; 3946 case Iop_Max8Ux32: op = Asse_MAX8U; goto do_SseReRg; 3947 case Iop_Min16Sx16: op = Asse_MIN16S; goto do_SseReRg; 3948 case Iop_Min8Ux32: op = Asse_MIN8U; goto do_SseReRg; 3949 case Iop_MulHi16Ux16: op = Asse_MULHI16U; goto do_SseReRg; 3950 case Iop_MulHi16Sx16: op = Asse_MULHI16S; goto do_SseReRg; 3951 case Iop_Mul16x16: op = Asse_MUL16; goto do_SseReRg; 3952 case Iop_Sub8x32: op = Asse_SUB8; goto do_SseReRg; 3953 case Iop_Sub16x16: op = Asse_SUB16; goto do_SseReRg; 3954 case Iop_Sub32x8: op = Asse_SUB32; goto do_SseReRg; 3955 case Iop_Sub64x4: op = Asse_SUB64; goto do_SseReRg; 3956 case Iop_QSub8Sx32: op = Asse_QSUB8S; goto do_SseReRg; 3957 case Iop_QSub16Sx16: op = Asse_QSUB16S; goto do_SseReRg; 3958 case Iop_QSub8Ux32: op = Asse_QSUB8U; goto do_SseReRg; 3959 case Iop_QSub16Ux16: op = Asse_QSUB16U; goto do_SseReRg; 3960 do_SseReRg: 3961 { 3962 HReg argLhi, argLlo, argRhi, argRlo; 3963 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1); 3964 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2); 3965 HReg dstHi = newVRegV(env); 3966 HReg dstLo = newVRegV(env); 3967 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); 3968 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); 3969 addInstr(env, AMD64Instr_SseReRg(op, argRhi, dstHi)); 3970 addInstr(env, AMD64Instr_SseReRg(op, argRlo, dstLo)); 3971 *rHi = dstHi; 3972 *rLo = dstLo; 3973 return; 3974 } 3975 3976 case Iop_ShlN16x16: op = Asse_SHL16; goto do_SseShift; 3977 case Iop_ShlN32x8: op = Asse_SHL32; goto do_SseShift; 3978 case Iop_ShlN64x4: op = Asse_SHL64; goto do_SseShift; 3979 case Iop_SarN16x16: op = Asse_SAR16; goto do_SseShift; 3980 case Iop_SarN32x8: op = Asse_SAR32; goto do_SseShift; 3981 case Iop_ShrN16x16: op = Asse_SHR16; goto do_SseShift; 3982 case Iop_ShrN32x8: op = Asse_SHR32; goto do_SseShift; 3983 case Iop_ShrN64x4: op = Asse_SHR64; goto do_SseShift; 3984 do_SseShift: { 3985 HReg gregHi, gregLo; 3986 iselDVecExpr(&gregHi, &gregLo, env, e->Iex.Binop.arg1); 3987 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 3988 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 3989 HReg ereg = newVRegV(env); 3990 HReg dstHi = newVRegV(env); 3991 HReg dstLo = newVRegV(env); 3992 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0))); 3993 addInstr(env, AMD64Instr_Push(rmi)); 3994 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0)); 3995 addInstr(env, mk_vMOVsd_RR(gregHi, dstHi)); 3996 addInstr(env, AMD64Instr_SseReRg(op, ereg, dstHi)); 3997 addInstr(env, mk_vMOVsd_RR(gregLo, dstLo)); 3998 addInstr(env, AMD64Instr_SseReRg(op, ereg, dstLo)); 3999 add_to_rsp(env, 16); 4000 *rHi = dstHi; 4001 *rLo = dstLo; 4002 return; 4003 } 4004 4005 case Iop_V128HLtoV256: { 4006 *rHi = iselVecExpr(env, e->Iex.Binop.arg1); 4007 *rLo = iselVecExpr(env, e->Iex.Binop.arg2); 4008 return; 4009 } 4010 4011 case Iop_Mul32x8: fn = (HWord)h_generic_calc_Mul32x4; 4012 goto do_SseAssistedBinary; 4013 case Iop_Max32Sx8: fn = (HWord)h_generic_calc_Max32Sx4; 4014 goto do_SseAssistedBinary; 4015 case Iop_Min32Sx8: fn = (HWord)h_generic_calc_Min32Sx4; 4016 goto do_SseAssistedBinary; 4017 case Iop_Max32Ux8: fn = (HWord)h_generic_calc_Max32Ux4; 4018 goto do_SseAssistedBinary; 4019 case Iop_Min32Ux8: fn = (HWord)h_generic_calc_Min32Ux4; 4020 goto do_SseAssistedBinary; 4021 case Iop_Max16Ux16: fn = (HWord)h_generic_calc_Max16Ux8; 4022 goto do_SseAssistedBinary; 4023 case Iop_Min16Ux16: fn = (HWord)h_generic_calc_Min16Ux8; 4024 goto do_SseAssistedBinary; 4025 case Iop_Max8Sx32: fn = (HWord)h_generic_calc_Max8Sx16; 4026 goto do_SseAssistedBinary; 4027 case Iop_Min8Sx32: fn = (HWord)h_generic_calc_Min8Sx16; 4028 goto do_SseAssistedBinary; 4029 case Iop_CmpEQ64x4: fn = (HWord)h_generic_calc_CmpEQ64x2; 4030 goto do_SseAssistedBinary; 4031 case Iop_CmpGT64Sx4: fn = (HWord)h_generic_calc_CmpGT64Sx2; 4032 goto do_SseAssistedBinary; 4033 do_SseAssistedBinary: { 4034 /* RRRufff! RRRufff code is what we're generating here. Oh 4035 well. */ 4036 vassert(fn != 0); 4037 HReg dstHi = newVRegV(env); 4038 HReg dstLo = newVRegV(env); 4039 HReg argLhi, argLlo, argRhi, argRlo; 4040 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1); 4041 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2); 4042 HReg argp = newVRegI(env); 4043 /* subq $160, %rsp -- make a space*/ 4044 sub_from_rsp(env, 160); 4045 /* leaq 48(%rsp), %r_argp -- point into it */ 4046 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()), 4047 argp)); 4048 /* andq $-16, %r_argp -- 16-align the pointer */ 4049 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 4050 AMD64RMI_Imm( ~(UInt)15 ), 4051 argp)); 4052 /* Prepare 3 arg regs: 4053 leaq 0(%r_argp), %rdi 4054 leaq 16(%r_argp), %rsi 4055 leaq 32(%r_argp), %rdx 4056 */ 4057 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp), 4058 hregAMD64_RDI())); 4059 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp), 4060 hregAMD64_RSI())); 4061 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp), 4062 hregAMD64_RDX())); 4063 /* Store the two high args, at (%rsi) and (%rdx): 4064 movupd %argLhi, 0(%rsi) 4065 movupd %argRhi, 0(%rdx) 4066 */ 4067 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi, 4068 AMD64AMode_IR(0, hregAMD64_RSI()))); 4069 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi, 4070 AMD64AMode_IR(0, hregAMD64_RDX()))); 4071 /* Store the two low args, at 48(%rsi) and 48(%rdx): 4072 movupd %argLlo, 48(%rsi) 4073 movupd %argRlo, 48(%rdx) 4074 */ 4075 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo, 4076 AMD64AMode_IR(48, hregAMD64_RSI()))); 4077 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo, 4078 AMD64AMode_IR(48, hregAMD64_RDX()))); 4079 /* call the helper */ 4080 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3, 4081 mk_RetLoc_simple(RLPri_None) )); 4082 /* Prepare 3 arg regs: 4083 leaq 48(%r_argp), %rdi 4084 leaq 64(%r_argp), %rsi 4085 leaq 80(%r_argp), %rdx 4086 */ 4087 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, argp), 4088 hregAMD64_RDI())); 4089 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp), 4090 hregAMD64_RSI())); 4091 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(80, argp), 4092 hregAMD64_RDX())); 4093 /* call the helper */ 4094 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3, 4095 mk_RetLoc_simple(RLPri_None) )); 4096 /* fetch the result from memory, using %r_argp, which the 4097 register allocator will keep alive across the call. */ 4098 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi, 4099 AMD64AMode_IR(0, argp))); 4100 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo, 4101 AMD64AMode_IR(48, argp))); 4102 /* and finally, clear the space */ 4103 add_to_rsp(env, 160); 4104 *rHi = dstHi; 4105 *rLo = dstLo; 4106 return; 4107 } 4108 4109 case Iop_Perm32x8: fn = (HWord)h_generic_calc_Perm32x8; 4110 goto do_SseAssistedBinary256; 4111 do_SseAssistedBinary256: { 4112 /* RRRufff! RRRufff code is what we're generating here. Oh 4113 well. */ 4114 vassert(fn != 0); 4115 HReg dstHi = newVRegV(env); 4116 HReg dstLo = newVRegV(env); 4117 HReg argLhi, argLlo, argRhi, argRlo; 4118 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1); 4119 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2); 4120 HReg argp = newVRegI(env); 4121 /* subq $160, %rsp -- make a space*/ 4122 sub_from_rsp(env, 160); 4123 /* leaq 48(%rsp), %r_argp -- point into it */ 4124 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()), 4125 argp)); 4126 /* andq $-16, %r_argp -- 16-align the pointer */ 4127 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 4128 AMD64RMI_Imm( ~(UInt)15 ), 4129 argp)); 4130 /* Prepare 3 arg regs: 4131 leaq 0(%r_argp), %rdi 4132 leaq 32(%r_argp), %rsi 4133 leaq 64(%r_argp), %rdx 4134 */ 4135 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp), 4136 hregAMD64_RDI())); 4137 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp), 4138 hregAMD64_RSI())); 4139 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp), 4140 hregAMD64_RDX())); 4141 /* Store the two args, at (%rsi) and (%rdx): 4142 movupd %argLlo, 0(%rsi) 4143 movupd %argLhi, 16(%rsi) 4144 movupd %argRlo, 0(%rdx) 4145 movupd %argRhi, 16(%rdx) 4146 */ 4147 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo, 4148 AMD64AMode_IR(0, hregAMD64_RSI()))); 4149 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi, 4150 AMD64AMode_IR(16, hregAMD64_RSI()))); 4151 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo, 4152 AMD64AMode_IR(0, hregAMD64_RDX()))); 4153 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi, 4154 AMD64AMode_IR(16, hregAMD64_RDX()))); 4155 /* call the helper */ 4156 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3, 4157 mk_RetLoc_simple(RLPri_None) )); 4158 /* fetch the result from memory, using %r_argp, which the 4159 register allocator will keep alive across the call. */ 4160 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo, 4161 AMD64AMode_IR(0, argp))); 4162 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi, 4163 AMD64AMode_IR(16, argp))); 4164 /* and finally, clear the space */ 4165 add_to_rsp(env, 160); 4166 *rHi = dstHi; 4167 *rLo = dstLo; 4168 return; 4169 } 4170 4171 default: 4172 break; 4173 } /* switch (e->Iex.Binop.op) */ 4174 } /* if (e->tag == Iex_Binop) */ 4175 4176 if (e->tag == Iex_Triop) { 4177 IRTriop *triop = e->Iex.Triop.details; 4178 switch (triop->op) { 4179 4180 case Iop_Add64Fx4: op = Asse_ADDF; goto do_64Fx4_w_rm; 4181 case Iop_Sub64Fx4: op = Asse_SUBF; goto do_64Fx4_w_rm; 4182 case Iop_Mul64Fx4: op = Asse_MULF; goto do_64Fx4_w_rm; 4183 case Iop_Div64Fx4: op = Asse_DIVF; goto do_64Fx4_w_rm; 4184 do_64Fx4_w_rm: 4185 { 4186 HReg argLhi, argLlo, argRhi, argRlo; 4187 iselDVecExpr(&argLhi, &argLlo, env, triop->arg2); 4188 iselDVecExpr(&argRhi, &argRlo, env, triop->arg3); 4189 HReg dstHi = newVRegV(env); 4190 HReg dstLo = newVRegV(env); 4191 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); 4192 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); 4193 /* XXXROUNDINGFIXME */ 4194 /* set roundingmode here */ 4195 addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi)); 4196 addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo)); 4197 *rHi = dstHi; 4198 *rLo = dstLo; 4199 return; 4200 } 4201 4202 case Iop_Add32Fx8: op = Asse_ADDF; goto do_32Fx8_w_rm; 4203 case Iop_Sub32Fx8: op = Asse_SUBF; goto do_32Fx8_w_rm; 4204 case Iop_Mul32Fx8: op = Asse_MULF; goto do_32Fx8_w_rm; 4205 case Iop_Div32Fx8: op = Asse_DIVF; goto do_32Fx8_w_rm; 4206 do_32Fx8_w_rm: 4207 { 4208 HReg argLhi, argLlo, argRhi, argRlo; 4209 iselDVecExpr(&argLhi, &argLlo, env, triop->arg2); 4210 iselDVecExpr(&argRhi, &argRlo, env, triop->arg3); 4211 HReg dstHi = newVRegV(env); 4212 HReg dstLo = newVRegV(env); 4213 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); 4214 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); 4215 /* XXXROUNDINGFIXME */ 4216 /* set roundingmode here */ 4217 addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi)); 4218 addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo)); 4219 *rHi = dstHi; 4220 *rLo = dstLo; 4221 return; 4222 } 4223 4224 default: 4225 break; 4226 } /* switch (triop->op) */ 4227 } /* if (e->tag == Iex_Triop) */ 4228 4229 4230 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_64x4toV256) { 4231 HReg rsp = hregAMD64_RSP(); 4232 HReg vHi = newVRegV(env); 4233 HReg vLo = newVRegV(env); 4234 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp); 4235 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp); 4236 /* arg1 is the most significant (Q3), arg4 the least (Q0) */ 4237 /* Get all the args into regs, before messing with the stack. */ 4238 AMD64RI* q3 = iselIntExpr_RI(env, e->Iex.Qop.details->arg1); 4239 AMD64RI* q2 = iselIntExpr_RI(env, e->Iex.Qop.details->arg2); 4240 AMD64RI* q1 = iselIntExpr_RI(env, e->Iex.Qop.details->arg3); 4241 AMD64RI* q0 = iselIntExpr_RI(env, e->Iex.Qop.details->arg4); 4242 /* less significant lane (Q2) at the lower address (-16(rsp)) */ 4243 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q3, m8_rsp)); 4244 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q2, m16_rsp)); 4245 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, m16_rsp)); 4246 /* and then the lower half .. */ 4247 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q1, m8_rsp)); 4248 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q0, m16_rsp)); 4249 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, m16_rsp)); 4250 *rHi = vHi; 4251 *rLo = vLo; 4252 return; 4253 } 4254 4255 if (e->tag == Iex_ITE) { 4256 HReg r1Hi, r1Lo, r0Hi, r0Lo; 4257 iselDVecExpr(&r1Hi, &r1Lo, env, e->Iex.ITE.iftrue); 4258 iselDVecExpr(&r0Hi, &r0Lo, env, e->Iex.ITE.iffalse); 4259 HReg dstHi = newVRegV(env); 4260 HReg dstLo = newVRegV(env); 4261 addInstr(env, mk_vMOVsd_RR(r1Hi,dstHi)); 4262 addInstr(env, mk_vMOVsd_RR(r1Lo,dstLo)); 4263 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 4264 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Hi, dstHi)); 4265 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Lo, dstLo)); 4266 *rHi = dstHi; 4267 *rLo = dstLo; 4268 return; 4269 } 4270 4271 //avx_fail: 4272 vex_printf("iselDVecExpr (amd64, subarch = %s): can't reduce\n", 4273 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps)); 4274 ppIRExpr(e); 4275 vpanic("iselDVecExpr_wrk"); 4276} 4277 4278 4279/*---------------------------------------------------------*/ 4280/*--- ISEL: Statements ---*/ 4281/*---------------------------------------------------------*/ 4282 4283static void iselStmt ( ISelEnv* env, IRStmt* stmt ) 4284{ 4285 if (vex_traceflags & VEX_TRACE_VCODE) { 4286 vex_printf("\n-- "); 4287 ppIRStmt(stmt); 4288 vex_printf("\n"); 4289 } 4290 4291 switch (stmt->tag) { 4292 4293 /* --------- LOADG (guarded load) --------- */ 4294 case Ist_LoadG: { 4295 IRLoadG* lg = stmt->Ist.LoadG.details; 4296 if (lg->end != Iend_LE) 4297 goto stmt_fail; 4298 4299 UChar szB = 0; /* invalid */ 4300 switch (lg->cvt) { 4301 case ILGop_Ident32: szB = 4; break; 4302 case ILGop_Ident64: szB = 8; break; 4303 case ILGop_IdentV128: szB = 16; break; 4304 default: break; 4305 } 4306 if (szB == 0) 4307 goto stmt_fail; 4308 4309 AMD64AMode* amAddr 4310 = iselIntExpr_AMode(env, lg->addr); 4311 HReg rAlt 4312 = szB == 16 ? iselVecExpr(env, lg->alt) 4313 : iselIntExpr_R(env, lg->alt); 4314 HReg rDst 4315 = lookupIRTemp(env, lg->dst); 4316 4317 /* Get the alt value into the dst. We'll do a conditional load 4318 which overwrites it -- or not -- with loaded data. */ 4319 if (szB == 16) { 4320 addInstr(env, mk_vMOVsd_RR(rAlt, rDst)); 4321 } else { 4322 addInstr(env, mk_iMOVsd_RR(rAlt, rDst)); 4323 } 4324 AMD64CondCode cc = iselCondCode(env, lg->guard); 4325 if (szB == 16) { 4326 addInstr(env, AMD64Instr_SseCLoad(cc, amAddr, rDst)); 4327 } else { 4328 addInstr(env, AMD64Instr_CLoad(cc, szB, amAddr, rDst)); 4329 } 4330 return; 4331 } 4332 4333 /* --------- STOREG (guarded store) --------- */ 4334 case Ist_StoreG: { 4335 IRStoreG* sg = stmt->Ist.StoreG.details; 4336 if (sg->end != Iend_LE) 4337 goto stmt_fail; 4338 4339 UChar szB = 0; /* invalid */ 4340 switch (typeOfIRExpr(env->type_env, sg->data)) { 4341 case Ity_I32: szB = 4; break; 4342 case Ity_I64: szB = 8; break; 4343 case Ity_V128: szB = 16; break; 4344 default: break; 4345 } 4346 if (szB == 0) 4347 goto stmt_fail; 4348 4349 AMD64AMode* amAddr 4350 = iselIntExpr_AMode(env, sg->addr); 4351 HReg rSrc 4352 = szB == 16 ? iselVecExpr(env, sg->data) 4353 : iselIntExpr_R(env, sg->data); 4354 AMD64CondCode cc 4355 = iselCondCode(env, sg->guard); 4356 if (szB == 16) { 4357 addInstr(env, AMD64Instr_SseCStore(cc, rSrc, amAddr)); 4358 } else { 4359 addInstr(env, AMD64Instr_CStore(cc, szB, rSrc, amAddr)); 4360 } 4361 return; 4362 } 4363 4364 /* --------- STORE --------- */ 4365 case Ist_Store: { 4366 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); 4367 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); 4368 IREndness end = stmt->Ist.Store.end; 4369 4370 if (tya != Ity_I64 || end != Iend_LE) 4371 goto stmt_fail; 4372 4373 if (tyd == Ity_I64) { 4374 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 4375 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data); 4376 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am)); 4377 return; 4378 } 4379 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) { 4380 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 4381 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data); 4382 addInstr(env, AMD64Instr_Store( 4383 toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)), 4384 r,am)); 4385 return; 4386 } 4387 if (tyd == Ity_F64) { 4388 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 4389 HReg r = iselDblExpr(env, stmt->Ist.Store.data); 4390 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am)); 4391 return; 4392 } 4393 if (tyd == Ity_F32) { 4394 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 4395 HReg r = iselFltExpr(env, stmt->Ist.Store.data); 4396 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am)); 4397 return; 4398 } 4399 if (tyd == Ity_V128) { 4400 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 4401 HReg r = iselVecExpr(env, stmt->Ist.Store.data); 4402 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am)); 4403 return; 4404 } 4405 if (tyd == Ity_V256) { 4406 HReg rA = iselIntExpr_R(env, stmt->Ist.Store.addr); 4407 AMD64AMode* am0 = AMD64AMode_IR(0, rA); 4408 AMD64AMode* am16 = AMD64AMode_IR(16, rA); 4409 HReg vHi, vLo; 4410 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Store.data); 4411 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0)); 4412 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16)); 4413 return; 4414 } 4415 break; 4416 } 4417 4418 /* --------- PUT --------- */ 4419 case Ist_Put: { 4420 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); 4421 if (ty == Ity_I64) { 4422 /* We're going to write to memory, so compute the RHS into an 4423 AMD64RI. */ 4424 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data); 4425 addInstr(env, 4426 AMD64Instr_Alu64M( 4427 Aalu_MOV, 4428 ri, 4429 AMD64AMode_IR(stmt->Ist.Put.offset, 4430 hregAMD64_RBP()) 4431 )); 4432 return; 4433 } 4434 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) { 4435 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data); 4436 addInstr(env, AMD64Instr_Store( 4437 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)), 4438 r, 4439 AMD64AMode_IR(stmt->Ist.Put.offset, 4440 hregAMD64_RBP()))); 4441 return; 4442 } 4443 if (ty == Ity_F32) { 4444 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data); 4445 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP()); 4446 set_SSE_rounding_default(env); /* paranoia */ 4447 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am )); 4448 return; 4449 } 4450 if (ty == Ity_F64) { 4451 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data); 4452 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset, 4453 hregAMD64_RBP() ); 4454 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am )); 4455 return; 4456 } 4457 if (ty == Ity_V128) { 4458 HReg vec = iselVecExpr(env, stmt->Ist.Put.data); 4459 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, 4460 hregAMD64_RBP()); 4461 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am)); 4462 return; 4463 } 4464 if (ty == Ity_V256) { 4465 HReg vHi, vLo; 4466 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Put.data); 4467 HReg rbp = hregAMD64_RBP(); 4468 AMD64AMode* am0 = AMD64AMode_IR(stmt->Ist.Put.offset + 0, rbp); 4469 AMD64AMode* am16 = AMD64AMode_IR(stmt->Ist.Put.offset + 16, rbp); 4470 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0)); 4471 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16)); 4472 return; 4473 } 4474 break; 4475 } 4476 4477 /* --------- Indexed PUT --------- */ 4478 case Ist_PutI: { 4479 IRPutI *puti = stmt->Ist.PutI.details; 4480 4481 AMD64AMode* am 4482 = genGuestArrayOffset( 4483 env, puti->descr, 4484 puti->ix, puti->bias ); 4485 4486 IRType ty = typeOfIRExpr(env->type_env, puti->data); 4487 if (ty == Ity_F64) { 4488 HReg val = iselDblExpr(env, puti->data); 4489 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am )); 4490 return; 4491 } 4492 if (ty == Ity_I8) { 4493 HReg r = iselIntExpr_R(env, puti->data); 4494 addInstr(env, AMD64Instr_Store( 1, r, am )); 4495 return; 4496 } 4497 if (ty == Ity_I64) { 4498 AMD64RI* ri = iselIntExpr_RI(env, puti->data); 4499 addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am )); 4500 return; 4501 } 4502 break; 4503 } 4504 4505 /* --------- TMP --------- */ 4506 case Ist_WrTmp: { 4507 IRTemp tmp = stmt->Ist.WrTmp.tmp; 4508 IRType ty = typeOfIRTemp(env->type_env, tmp); 4509 4510 /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..), 4511 compute it into an AMode and then use LEA. This usually 4512 produces fewer instructions, often because (for memcheck 4513 created IR) we get t = address-expression, (t is later used 4514 twice) and so doing this naturally turns address-expression 4515 back into an AMD64 amode. */ 4516 if (ty == Ity_I64 4517 && stmt->Ist.WrTmp.data->tag == Iex_Binop 4518 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) { 4519 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data); 4520 HReg dst = lookupIRTemp(env, tmp); 4521 if (am->tag == Aam_IR && am->Aam.IR.imm == 0) { 4522 /* Hmm, iselIntExpr_AMode wimped out and just computed the 4523 value into a register. Just emit a normal reg-reg move 4524 so reg-alloc can coalesce it away in the usual way. */ 4525 HReg src = am->Aam.IR.reg; 4526 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst)); 4527 } else { 4528 addInstr(env, AMD64Instr_Lea64(am,dst)); 4529 } 4530 return; 4531 } 4532 4533 if (ty == Ity_I64 || ty == Ity_I32 4534 || ty == Ity_I16 || ty == Ity_I8) { 4535 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data); 4536 HReg dst = lookupIRTemp(env, tmp); 4537 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst)); 4538 return; 4539 } 4540 if (ty == Ity_I128) { 4541 HReg rHi, rLo, dstHi, dstLo; 4542 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); 4543 lookupIRTempPair( &dstHi, &dstLo, env, tmp); 4544 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) ); 4545 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) ); 4546 return; 4547 } 4548 if (ty == Ity_I1) { 4549 AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data); 4550 HReg dst = lookupIRTemp(env, tmp); 4551 addInstr(env, AMD64Instr_Set64(cond, dst)); 4552 return; 4553 } 4554 if (ty == Ity_F64) { 4555 HReg dst = lookupIRTemp(env, tmp); 4556 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); 4557 addInstr(env, mk_vMOVsd_RR(src, dst)); 4558 return; 4559 } 4560 if (ty == Ity_F32) { 4561 HReg dst = lookupIRTemp(env, tmp); 4562 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); 4563 addInstr(env, mk_vMOVsd_RR(src, dst)); 4564 return; 4565 } 4566 if (ty == Ity_V128) { 4567 HReg dst = lookupIRTemp(env, tmp); 4568 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data); 4569 addInstr(env, mk_vMOVsd_RR(src, dst)); 4570 return; 4571 } 4572 if (ty == Ity_V256) { 4573 HReg rHi, rLo, dstHi, dstLo; 4574 iselDVecExpr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); 4575 lookupIRTempPair( &dstHi, &dstLo, env, tmp); 4576 addInstr(env, mk_vMOVsd_RR(rHi,dstHi) ); 4577 addInstr(env, mk_vMOVsd_RR(rLo,dstLo) ); 4578 return; 4579 } 4580 break; 4581 } 4582 4583 /* --------- Call to DIRTY helper --------- */ 4584 case Ist_Dirty: { 4585 IRDirty* d = stmt->Ist.Dirty.details; 4586 4587 /* Figure out the return type, if any. */ 4588 IRType retty = Ity_INVALID; 4589 if (d->tmp != IRTemp_INVALID) 4590 retty = typeOfIRTemp(env->type_env, d->tmp); 4591 4592 /* Throw out any return types we don't know about. */ 4593 Bool retty_ok = False; 4594 switch (retty) { 4595 case Ity_INVALID: /* function doesn't return anything */ 4596 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 4597 case Ity_V128: case Ity_V256: 4598 retty_ok = True; break; 4599 default: 4600 break; 4601 } 4602 if (!retty_ok) 4603 break; /* will go to stmt_fail: */ 4604 4605 /* Marshal args, do the call, and set the return value to 4606 0x555..555 if this is a conditional call that returns a value 4607 and the call is skipped. */ 4608 UInt addToSp = 0; 4609 RetLoc rloc = mk_RetLoc_INVALID(); 4610 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args ); 4611 vassert(is_sane_RetLoc(rloc)); 4612 4613 /* Now figure out what to do with the returned value, if any. */ 4614 switch (retty) { 4615 case Ity_INVALID: { 4616 /* No return value. Nothing to do. */ 4617 vassert(d->tmp == IRTemp_INVALID); 4618 vassert(rloc.pri == RLPri_None); 4619 vassert(addToSp == 0); 4620 return; 4621 } 4622 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: { 4623 /* The returned value is in %rax. Park it in the register 4624 associated with tmp. */ 4625 vassert(rloc.pri == RLPri_Int); 4626 vassert(addToSp == 0); 4627 HReg dst = lookupIRTemp(env, d->tmp); 4628 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) ); 4629 return; 4630 } 4631 case Ity_V128: { 4632 /* The returned value is on the stack, and rloc.spOff 4633 tells us where. Fish it off the stack and then move 4634 the stack pointer upwards to clear it, as directed by 4635 doHelperCall. */ 4636 vassert(rloc.pri == RLPri_V128SpRel); 4637 vassert(addToSp >= 16); 4638 HReg dst = lookupIRTemp(env, d->tmp); 4639 AMD64AMode* am = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP()); 4640 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am )); 4641 add_to_rsp(env, addToSp); 4642 return; 4643 } 4644 case Ity_V256: { 4645 /* See comments for Ity_V128. */ 4646 vassert(rloc.pri == RLPri_V256SpRel); 4647 vassert(addToSp >= 32); 4648 HReg dstLo, dstHi; 4649 lookupIRTempPair(&dstHi, &dstLo, env, d->tmp); 4650 AMD64AMode* amLo = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP()); 4651 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dstLo, amLo )); 4652 AMD64AMode* amHi = AMD64AMode_IR(rloc.spOff+16, hregAMD64_RSP()); 4653 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dstHi, amHi )); 4654 add_to_rsp(env, addToSp); 4655 return; 4656 } 4657 default: 4658 /*NOTREACHED*/ 4659 vassert(0); 4660 } 4661 break; 4662 } 4663 4664 /* --------- MEM FENCE --------- */ 4665 case Ist_MBE: 4666 switch (stmt->Ist.MBE.event) { 4667 case Imbe_Fence: 4668 addInstr(env, AMD64Instr_MFence()); 4669 return; 4670 default: 4671 break; 4672 } 4673 break; 4674 4675 /* --------- ACAS --------- */ 4676 case Ist_CAS: 4677 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) { 4678 /* "normal" singleton CAS */ 4679 UChar sz; 4680 IRCAS* cas = stmt->Ist.CAS.details; 4681 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 4682 /* get: cas->expd into %rax, and cas->data into %rbx */ 4683 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr); 4684 HReg rData = iselIntExpr_R(env, cas->dataLo); 4685 HReg rExpd = iselIntExpr_R(env, cas->expdLo); 4686 HReg rOld = lookupIRTemp(env, cas->oldLo); 4687 vassert(cas->expdHi == NULL); 4688 vassert(cas->dataHi == NULL); 4689 addInstr(env, mk_iMOVsd_RR(rExpd, rOld)); 4690 addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX())); 4691 addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX())); 4692 switch (ty) { 4693 case Ity_I64: sz = 8; break; 4694 case Ity_I32: sz = 4; break; 4695 case Ity_I16: sz = 2; break; 4696 case Ity_I8: sz = 1; break; 4697 default: goto unhandled_cas; 4698 } 4699 addInstr(env, AMD64Instr_ACAS(am, sz)); 4700 addInstr(env, AMD64Instr_CMov64(Acc_NZ, hregAMD64_RAX(), rOld)); 4701 return; 4702 } else { 4703 /* double CAS */ 4704 UChar sz; 4705 IRCAS* cas = stmt->Ist.CAS.details; 4706 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 4707 /* only 32-bit and 64-bit allowed in this case */ 4708 /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */ 4709 /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */ 4710 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr); 4711 HReg rDataHi = iselIntExpr_R(env, cas->dataHi); 4712 HReg rDataLo = iselIntExpr_R(env, cas->dataLo); 4713 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi); 4714 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo); 4715 HReg rOldHi = lookupIRTemp(env, cas->oldHi); 4716 HReg rOldLo = lookupIRTemp(env, cas->oldLo); 4717 switch (ty) { 4718 case Ity_I64: 4719 if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16)) 4720 goto unhandled_cas; /* we'd have to generate 4721 cmpxchg16b, but the host 4722 doesn't support that */ 4723 sz = 8; 4724 break; 4725 case Ity_I32: 4726 sz = 4; 4727 break; 4728 default: 4729 goto unhandled_cas; 4730 } 4731 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi)); 4732 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo)); 4733 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX())); 4734 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX())); 4735 addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX())); 4736 addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX())); 4737 addInstr(env, AMD64Instr_DACAS(am, sz)); 4738 addInstr(env, AMD64Instr_CMov64(Acc_NZ, hregAMD64_RDX(), rOldHi)); 4739 addInstr(env, AMD64Instr_CMov64(Acc_NZ, hregAMD64_RAX(), rOldLo)); 4740 return; 4741 } 4742 unhandled_cas: 4743 break; 4744 4745 /* --------- INSTR MARK --------- */ 4746 /* Doesn't generate any executable code ... */ 4747 case Ist_IMark: 4748 return; 4749 4750 /* --------- ABI HINT --------- */ 4751 /* These have no meaning (denotation in the IR) and so we ignore 4752 them ... if any actually made it this far. */ 4753 case Ist_AbiHint: 4754 return; 4755 4756 /* --------- NO-OP --------- */ 4757 case Ist_NoOp: 4758 return; 4759 4760 /* --------- EXIT --------- */ 4761 case Ist_Exit: { 4762 if (stmt->Ist.Exit.dst->tag != Ico_U64) 4763 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value"); 4764 4765 AMD64CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard); 4766 AMD64AMode* amRIP = AMD64AMode_IR(stmt->Ist.Exit.offsIP, 4767 hregAMD64_RBP()); 4768 4769 /* Case: boring transfer to known address */ 4770 if (stmt->Ist.Exit.jk == Ijk_Boring) { 4771 if (env->chainingAllowed) { 4772 /* .. almost always true .. */ 4773 /* Skip the event check at the dst if this is a forwards 4774 edge. */ 4775 Bool toFastEP 4776 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga; 4777 if (0) vex_printf("%s", toFastEP ? "Y" : ","); 4778 addInstr(env, AMD64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64, 4779 amRIP, cc, toFastEP)); 4780 } else { 4781 /* .. very occasionally .. */ 4782 /* We can't use chaining, so ask for an assisted transfer, 4783 as that's the only alternative that is allowable. */ 4784 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 4785 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, Ijk_Boring)); 4786 } 4787 return; 4788 } 4789 4790 /* Case: assisted transfer to arbitrary address */ 4791 switch (stmt->Ist.Exit.jk) { 4792 /* Keep this list in sync with that in iselNext below */ 4793 case Ijk_ClientReq: 4794 case Ijk_EmWarn: 4795 case Ijk_NoDecode: 4796 case Ijk_NoRedir: 4797 case Ijk_SigSEGV: 4798 case Ijk_SigTRAP: 4799 case Ijk_Sys_syscall: 4800 case Ijk_Sys_int210: 4801 case Ijk_InvalICache: 4802 case Ijk_Yield: 4803 { 4804 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 4805 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, stmt->Ist.Exit.jk)); 4806 return; 4807 } 4808 default: 4809 break; 4810 } 4811 4812 /* Do we ever expect to see any other kind? */ 4813 goto stmt_fail; 4814 } 4815 4816 default: break; 4817 } 4818 stmt_fail: 4819 ppIRStmt(stmt); 4820 vpanic("iselStmt(amd64)"); 4821} 4822 4823 4824/*---------------------------------------------------------*/ 4825/*--- ISEL: Basic block terminators (Nexts) ---*/ 4826/*---------------------------------------------------------*/ 4827 4828static void iselNext ( ISelEnv* env, 4829 IRExpr* next, IRJumpKind jk, Int offsIP ) 4830{ 4831 if (vex_traceflags & VEX_TRACE_VCODE) { 4832 vex_printf( "\n-- PUT(%d) = ", offsIP); 4833 ppIRExpr( next ); 4834 vex_printf( "; exit-"); 4835 ppIRJumpKind(jk); 4836 vex_printf( "\n"); 4837 } 4838 4839 /* Case: boring transfer to known address */ 4840 if (next->tag == Iex_Const) { 4841 IRConst* cdst = next->Iex.Const.con; 4842 vassert(cdst->tag == Ico_U64); 4843 if (jk == Ijk_Boring || jk == Ijk_Call) { 4844 /* Boring transfer to known address */ 4845 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP()); 4846 if (env->chainingAllowed) { 4847 /* .. almost always true .. */ 4848 /* Skip the event check at the dst if this is a forwards 4849 edge. */ 4850 Bool toFastEP 4851 = ((Addr64)cdst->Ico.U64) > env->max_ga; 4852 if (0) vex_printf("%s", toFastEP ? "X" : "."); 4853 addInstr(env, AMD64Instr_XDirect(cdst->Ico.U64, 4854 amRIP, Acc_ALWAYS, 4855 toFastEP)); 4856 } else { 4857 /* .. very occasionally .. */ 4858 /* We can't use chaining, so ask for an indirect transfer, 4859 as that's the cheapest alternative that is 4860 allowable. */ 4861 HReg r = iselIntExpr_R(env, next); 4862 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, 4863 Ijk_Boring)); 4864 } 4865 return; 4866 } 4867 } 4868 4869 /* Case: call/return (==boring) transfer to any address */ 4870 switch (jk) { 4871 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { 4872 HReg r = iselIntExpr_R(env, next); 4873 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP()); 4874 if (env->chainingAllowed) { 4875 addInstr(env, AMD64Instr_XIndir(r, amRIP, Acc_ALWAYS)); 4876 } else { 4877 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, 4878 Ijk_Boring)); 4879 } 4880 return; 4881 } 4882 default: 4883 break; 4884 } 4885 4886 /* Case: assisted transfer to arbitrary address */ 4887 switch (jk) { 4888 /* Keep this list in sync with that for Ist_Exit above */ 4889 case Ijk_ClientReq: 4890 case Ijk_EmWarn: 4891 case Ijk_NoDecode: 4892 case Ijk_NoRedir: 4893 case Ijk_SigSEGV: 4894 case Ijk_SigTRAP: 4895 case Ijk_Sys_syscall: 4896 case Ijk_Sys_int210: 4897 case Ijk_InvalICache: 4898 case Ijk_Yield: { 4899 HReg r = iselIntExpr_R(env, next); 4900 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP()); 4901 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, jk)); 4902 return; 4903 } 4904 default: 4905 break; 4906 } 4907 4908 vex_printf( "\n-- PUT(%d) = ", offsIP); 4909 ppIRExpr( next ); 4910 vex_printf( "; exit-"); 4911 ppIRJumpKind(jk); 4912 vex_printf( "\n"); 4913 vassert(0); // are we expecting any other kind? 4914} 4915 4916 4917/*---------------------------------------------------------*/ 4918/*--- Insn selector top-level ---*/ 4919/*---------------------------------------------------------*/ 4920 4921/* Translate an entire SB to amd64 code. */ 4922 4923HInstrArray* iselSB_AMD64 ( const IRSB* bb, 4924 VexArch arch_host, 4925 const VexArchInfo* archinfo_host, 4926 const VexAbiInfo* vbi/*UNUSED*/, 4927 Int offs_Host_EvC_Counter, 4928 Int offs_Host_EvC_FailAddr, 4929 Bool chainingAllowed, 4930 Bool addProfInc, 4931 Addr max_ga ) 4932{ 4933 Int i, j; 4934 HReg hreg, hregHI; 4935 ISelEnv* env; 4936 UInt hwcaps_host = archinfo_host->hwcaps; 4937 AMD64AMode *amCounter, *amFailAddr; 4938 4939 /* sanity ... */ 4940 vassert(arch_host == VexArchAMD64); 4941 vassert(0 == (hwcaps_host 4942 & ~(VEX_HWCAPS_AMD64_SSE3 4943 | VEX_HWCAPS_AMD64_CX16 4944 | VEX_HWCAPS_AMD64_LZCNT 4945 | VEX_HWCAPS_AMD64_AVX 4946 | VEX_HWCAPS_AMD64_RDTSCP 4947 | VEX_HWCAPS_AMD64_BMI 4948 | VEX_HWCAPS_AMD64_AVX2))); 4949 4950 /* Check that the host's endianness is as expected. */ 4951 vassert(archinfo_host->endness == VexEndnessLE); 4952 4953 /* Make up an initial environment to use. */ 4954 env = LibVEX_Alloc_inline(sizeof(ISelEnv)); 4955 env->vreg_ctr = 0; 4956 4957 /* Set up output code array. */ 4958 env->code = newHInstrArray(); 4959 4960 /* Copy BB's type env. */ 4961 env->type_env = bb->tyenv; 4962 4963 /* Make up an IRTemp -> virtual HReg mapping. This doesn't 4964 change as we go along. */ 4965 env->n_vregmap = bb->tyenv->types_used; 4966 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg)); 4967 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg)); 4968 4969 /* and finally ... */ 4970 env->chainingAllowed = chainingAllowed; 4971 env->hwcaps = hwcaps_host; 4972 env->max_ga = max_ga; 4973 4974 /* For each IR temporary, allocate a suitably-kinded virtual 4975 register. */ 4976 j = 0; 4977 for (i = 0; i < env->n_vregmap; i++) { 4978 hregHI = hreg = INVALID_HREG; 4979 switch (bb->tyenv->types[i]) { 4980 case Ity_I1: 4981 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: 4982 hreg = mkHReg(True, HRcInt64, 0, j++); 4983 break; 4984 case Ity_I128: 4985 hreg = mkHReg(True, HRcInt64, 0, j++); 4986 hregHI = mkHReg(True, HRcInt64, 0, j++); 4987 break; 4988 case Ity_F32: 4989 case Ity_F64: 4990 case Ity_V128: 4991 hreg = mkHReg(True, HRcVec128, 0, j++); 4992 break; 4993 case Ity_V256: 4994 hreg = mkHReg(True, HRcVec128, 0, j++); 4995 hregHI = mkHReg(True, HRcVec128, 0, j++); 4996 break; 4997 default: 4998 ppIRType(bb->tyenv->types[i]); 4999 vpanic("iselBB(amd64): IRTemp type"); 5000 } 5001 env->vregmap[i] = hreg; 5002 env->vregmapHI[i] = hregHI; 5003 } 5004 env->vreg_ctr = j; 5005 5006 /* The very first instruction must be an event check. */ 5007 amCounter = AMD64AMode_IR(offs_Host_EvC_Counter, hregAMD64_RBP()); 5008 amFailAddr = AMD64AMode_IR(offs_Host_EvC_FailAddr, hregAMD64_RBP()); 5009 addInstr(env, AMD64Instr_EvCheck(amCounter, amFailAddr)); 5010 5011 /* Possibly a block counter increment (for profiling). At this 5012 point we don't know the address of the counter, so just pretend 5013 it is zero. It will have to be patched later, but before this 5014 translation is used, by a call to LibVEX_patchProfCtr. */ 5015 if (addProfInc) { 5016 addInstr(env, AMD64Instr_ProfInc()); 5017 } 5018 5019 /* Ok, finally we can iterate over the statements. */ 5020 for (i = 0; i < bb->stmts_used; i++) 5021 if (bb->stmts[i]) 5022 iselStmt(env, bb->stmts[i]); 5023 5024 iselNext(env, bb->next, bb->jumpkind, bb->offsIP); 5025 5026 /* record the number of vregs we used. */ 5027 env->code->n_vregs = env->vreg_ctr; 5028 return env->code; 5029} 5030 5031 5032/*---------------------------------------------------------------*/ 5033/*--- end host_amd64_isel.c ---*/ 5034/*---------------------------------------------------------------*/ 5035