1 2/*---------------------------------------------------------------*/ 3/*--- begin host_arm64_isel.c ---*/ 4/*---------------------------------------------------------------*/ 5 6/* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2013-2013 OpenWorks 11 info@open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31#include "libvex_basictypes.h" 32#include "libvex_ir.h" 33#include "libvex.h" 34#include "ir_match.h" 35 36#include "main_util.h" 37#include "main_globals.h" 38#include "host_generic_regs.h" 39#include "host_generic_simd64.h" // for 32-bit SIMD helpers 40#include "host_arm64_defs.h" 41 42 43/*---------------------------------------------------------*/ 44/*--- ISelEnv ---*/ 45/*---------------------------------------------------------*/ 46 47/* This carries around: 48 49 - A mapping from IRTemp to IRType, giving the type of any IRTemp we 50 might encounter. This is computed before insn selection starts, 51 and does not change. 52 53 - A mapping from IRTemp to HReg. This tells the insn selector 54 which virtual register is associated with each IRTemp temporary. 55 This is computed before insn selection starts, and does not 56 change. We expect this mapping to map precisely the same set of 57 IRTemps as the type mapping does. 58 59 |vregmap| holds the primary register for the IRTemp. 60 |vregmapHI| is only used for 128-bit integer-typed 61 IRTemps. It holds the identity of a second 62 64-bit virtual HReg, which holds the high half 63 of the value. 64 65 - The code array, that is, the insns selected so far. 66 67 - A counter, for generating new virtual registers. 68 69 - The host hardware capabilities word. This is set at the start 70 and does not change. 71 72 - A Bool for indicating whether we may generate chain-me 73 instructions for control flow transfers, or whether we must use 74 XAssisted. 75 76 - The maximum guest address of any guest insn in this block. 77 Actually, the address of the highest-addressed byte from any insn 78 in this block. Is set at the start and does not change. This is 79 used for detecting jumps which are definitely forward-edges from 80 this block, and therefore can be made (chained) to the fast entry 81 point of the destination, thereby avoiding the destination's 82 event check. 83 84 - An IRExpr*, which may be NULL, holding the IR expression (an 85 IRRoundingMode-encoded value) to which the FPU's rounding mode 86 was most recently set. Setting to NULL is always safe. Used to 87 avoid redundant settings of the FPU's rounding mode, as 88 described in set_FPCR_rounding_mode below. 89 90 Note, this is all (well, mostly) host-independent. 91*/ 92 93typedef 94 struct { 95 /* Constant -- are set at the start and do not change. */ 96 IRTypeEnv* type_env; 97 98 HReg* vregmap; 99 HReg* vregmapHI; 100 Int n_vregmap; 101 102 UInt hwcaps; 103 104 Bool chainingAllowed; 105 Addr64 max_ga; 106 107 /* These are modified as we go along. */ 108 HInstrArray* code; 109 Int vreg_ctr; 110 111 IRExpr* previous_rm; 112 } 113 ISelEnv; 114 115static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) 116{ 117 vassert(tmp >= 0); 118 vassert(tmp < env->n_vregmap); 119 return env->vregmap[tmp]; 120} 121 122static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO, 123 ISelEnv* env, IRTemp tmp ) 124{ 125 vassert(tmp >= 0); 126 vassert(tmp < env->n_vregmap); 127 vassert(! hregIsInvalid(env->vregmapHI[tmp])); 128 *vrLO = env->vregmap[tmp]; 129 *vrHI = env->vregmapHI[tmp]; 130} 131 132static void addInstr ( ISelEnv* env, ARM64Instr* instr ) 133{ 134 addHInstr(env->code, instr); 135 if (vex_traceflags & VEX_TRACE_VCODE) { 136 ppARM64Instr(instr); 137 vex_printf("\n"); 138 } 139} 140 141static HReg newVRegI ( ISelEnv* env ) 142{ 143 HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0, env->vreg_ctr); 144 env->vreg_ctr++; 145 return reg; 146} 147 148static HReg newVRegD ( ISelEnv* env ) 149{ 150 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0, env->vreg_ctr); 151 env->vreg_ctr++; 152 return reg; 153} 154 155static HReg newVRegV ( ISelEnv* env ) 156{ 157 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0, env->vreg_ctr); 158 env->vreg_ctr++; 159 return reg; 160} 161 162 163/*---------------------------------------------------------*/ 164/*--- ISEL: Forward declarations ---*/ 165/*---------------------------------------------------------*/ 166 167/* These are organised as iselXXX and iselXXX_wrk pairs. The 168 iselXXX_wrk do the real work, but are not to be called directly. 169 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then 170 checks that all returned registers are virtual. You should not 171 call the _wrk version directly. 172 173 Because some forms of ARM64 memory amodes are implicitly scaled by 174 the access size, iselIntExpr_AMode takes an IRType which tells it 175 the type of the access for which the amode is to be used. This 176 type needs to be correct, else you'll get incorrect code. 177*/ 178static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, 179 IRExpr* e, IRType dty ); 180static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, 181 IRExpr* e, IRType dty ); 182 183static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e ); 184static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e ); 185 186static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e ); 187static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e ); 188 189static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e ); 190static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e ); 191 192static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ); 193static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ); 194 195static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); 196static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); 197 198static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, 199 ISelEnv* env, IRExpr* e ); 200static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo, 201 ISelEnv* env, IRExpr* e ); 202 203static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); 204static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); 205 206static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); 207static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); 208 209static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e ); 210static HReg iselF16Expr ( ISelEnv* env, IRExpr* e ); 211 212static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ); 213static HReg iselV128Expr ( ISelEnv* env, IRExpr* e ); 214 215static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, 216 ISelEnv* env, IRExpr* e ); 217static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo, 218 ISelEnv* env, IRExpr* e ); 219 220static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 ); 221 222 223/*---------------------------------------------------------*/ 224/*--- ISEL: Misc helpers ---*/ 225/*---------------------------------------------------------*/ 226 227/* Generate an amode suitable for a 64-bit sized access relative to 228 the baseblock register (X21). This generates an RI12 amode, which 229 means its scaled by the access size, which is why the access size 230 -- 64 bit -- is stated explicitly here. Consequently |off| needs 231 to be divisible by 8. */ 232static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off ) 233{ 234 vassert(off < (8 << 12)); /* otherwise it's unrepresentable */ 235 vassert((off & 7) == 0); /* ditto */ 236 return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/); 237} 238 239/* Ditto, for 32 bit accesses. */ 240static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off ) 241{ 242 vassert(off < (4 << 12)); /* otherwise it's unrepresentable */ 243 vassert((off & 3) == 0); /* ditto */ 244 return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/); 245} 246 247/* Ditto, for 16 bit accesses. */ 248static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off ) 249{ 250 vassert(off < (2 << 12)); /* otherwise it's unrepresentable */ 251 vassert((off & 1) == 0); /* ditto */ 252 return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/); 253} 254 255/* Ditto, for 8 bit accesses. */ 256static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off ) 257{ 258 vassert(off < (1 << 12)); /* otherwise it's unrepresentable */ 259 return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/); 260} 261 262static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off ) 263{ 264 vassert(off < (1<<12)); 265 HReg r = newVRegI(env); 266 addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(), 267 ARM64RIA_I12(off,0), True/*isAdd*/)); 268 return r; 269} 270 271static HReg get_baseblock_register ( void ) 272{ 273 return hregARM64_X21(); 274} 275 276/* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in 277 a new register, and return the new register. */ 278static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src ) 279{ 280 HReg dst = newVRegI(env); 281 ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */ 282 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND)); 283 return dst; 284} 285 286/* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in 287 a new register, and return the new register. */ 288static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src ) 289{ 290 HReg dst = newVRegI(env); 291 ARM64RI6* n48 = ARM64RI6_I6(48); 292 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL)); 293 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR)); 294 return dst; 295} 296 297/* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in 298 a new register, and return the new register. */ 299static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src ) 300{ 301 HReg dst = newVRegI(env); 302 ARM64RI6* n48 = ARM64RI6_I6(48); 303 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL)); 304 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SHR)); 305 return dst; 306} 307 308/* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in 309 a new register, and return the new register. */ 310static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src ) 311{ 312 HReg dst = newVRegI(env); 313 ARM64RI6* n32 = ARM64RI6_I6(32); 314 addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL)); 315 addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR)); 316 return dst; 317} 318 319/* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in 320 a new register, and return the new register. */ 321static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src ) 322{ 323 HReg dst = newVRegI(env); 324 ARM64RI6* n56 = ARM64RI6_I6(56); 325 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL)); 326 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR)); 327 return dst; 328} 329 330static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src ) 331{ 332 HReg dst = newVRegI(env); 333 ARM64RI6* n56 = ARM64RI6_I6(56); 334 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL)); 335 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SHR)); 336 return dst; 337} 338 339/* Is this IRExpr_Const(IRConst_U64(0)) ? */ 340static Bool isZeroU64 ( IRExpr* e ) { 341 if (e->tag != Iex_Const) return False; 342 IRConst* con = e->Iex.Const.con; 343 vassert(con->tag == Ico_U64); 344 return con->Ico.U64 == 0; 345} 346 347 348/*---------------------------------------------------------*/ 349/*--- ISEL: FP rounding mode helpers ---*/ 350/*---------------------------------------------------------*/ 351 352/* Set the FP rounding mode: 'mode' is an I32-typed expression 353 denoting a value in the range 0 .. 3, indicating a round mode 354 encoded as per type IRRoundingMode -- the first four values only 355 (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the ARM64 356 FSCR to have the same rounding. 357 358 For speed & simplicity, we're setting the *entire* FPCR here. 359 360 Setting the rounding mode is expensive. So this function tries to 361 avoid repeatedly setting the rounding mode to the same thing by 362 first comparing 'mode' to the 'mode' tree supplied in the previous 363 call to this function, if any. (The previous value is stored in 364 env->previous_rm.) If 'mode' is a single IR temporary 't' and 365 env->previous_rm is also just 't', then the setting is skipped. 366 367 This is safe because of the SSA property of IR: an IR temporary can 368 only be defined once and so will have the same value regardless of 369 where it appears in the block. Cool stuff, SSA. 370 371 A safety condition: all attempts to set the RM must be aware of 372 this mechanism - by being routed through the functions here. 373 374 Of course this only helps if blocks where the RM is set more than 375 once and it is set to the same value each time, *and* that value is 376 held in the same IR temporary each time. In order to assure the 377 latter as much as possible, the IR optimiser takes care to do CSE 378 on any block with any sign of floating point activity. 379*/ 380static 381void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode ) 382{ 383 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32); 384 385 /* Do we need to do anything? */ 386 if (env->previous_rm 387 && env->previous_rm->tag == Iex_RdTmp 388 && mode->tag == Iex_RdTmp 389 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) { 390 /* no - setting it to what it was before. */ 391 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32); 392 return; 393 } 394 395 /* No luck - we better set it, and remember what we set it to. */ 396 env->previous_rm = mode; 397 398 /* Only supporting the rounding-mode bits - the rest of FPCR is set 399 to zero - so we can set the whole register at once (faster). */ 400 401 /* This isn't simple, because 'mode' carries an IR rounding 402 encoding, and we need to translate that to an ARM64 FP one: 403 The IR encoding: 404 00 to nearest (the default) 405 10 to +infinity 406 01 to -infinity 407 11 to zero 408 The ARM64 FP encoding: 409 00 to nearest 410 01 to +infinity 411 10 to -infinity 412 11 to zero 413 Easy enough to do; just swap the two bits. 414 */ 415 HReg irrm = iselIntExpr_R(env, mode); 416 HReg tL = newVRegI(env); 417 HReg tR = newVRegI(env); 418 HReg t3 = newVRegI(env); 419 /* tL = irrm << 1; 420 tR = irrm >> 1; if we're lucky, these will issue together 421 tL &= 2; 422 tR &= 1; ditto 423 t3 = tL | tR; 424 t3 <<= 22; 425 fmxr fpscr, t3 426 */ 427 ARM64RIL* ril_one = mb_mkARM64RIL_I(1); 428 ARM64RIL* ril_two = mb_mkARM64RIL_I(2); 429 vassert(ril_one && ril_two); 430 addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL)); 431 addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR)); 432 addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND)); 433 addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND)); 434 addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR)); 435 addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL)); 436 addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3)); 437} 438 439 440/*---------------------------------------------------------*/ 441/*--- ISEL: Function call helpers ---*/ 442/*---------------------------------------------------------*/ 443 444/* Used only in doHelperCall. See big comment in doHelperCall re 445 handling of register-parameter args. This function figures out 446 whether evaluation of an expression might require use of a fixed 447 register. If in doubt return True (safe but suboptimal). 448*/ 449static 450Bool mightRequireFixedRegs ( IRExpr* e ) 451{ 452 if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) { 453 // These are always "safe" -- either a copy of SP in some 454 // arbitrary vreg, or a copy of x21, respectively. 455 return False; 456 } 457 /* Else it's a "normal" expression. */ 458 switch (e->tag) { 459 case Iex_RdTmp: case Iex_Const: case Iex_Get: 460 return False; 461 default: 462 return True; 463 } 464} 465 466 467/* Do a complete function call. |guard| is a Ity_Bit expression 468 indicating whether or not the call happens. If guard==NULL, the 469 call is unconditional. |retloc| is set to indicate where the 470 return value is after the call. The caller (of this fn) must 471 generate code to add |stackAdjustAfterCall| to the stack pointer 472 after the call is done. Returns True iff it managed to handle this 473 combination of arg/return types, else returns False. */ 474 475static 476Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall, 477 /*OUT*/RetLoc* retloc, 478 ISelEnv* env, 479 IRExpr* guard, 480 IRCallee* cee, IRType retTy, IRExpr** args ) 481{ 482 ARM64CondCode cc; 483 HReg argregs[ARM64_N_ARGREGS]; 484 HReg tmpregs[ARM64_N_ARGREGS]; 485 Bool go_fast; 486 Int n_args, i, nextArgReg; 487 Addr64 target; 488 489 vassert(ARM64_N_ARGREGS == 8); 490 491 /* Set default returns. We'll update them later if needed. */ 492 *stackAdjustAfterCall = 0; 493 *retloc = mk_RetLoc_INVALID(); 494 495 /* These are used for cross-checking that IR-level constraints on 496 the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */ 497 UInt nVECRETs = 0; 498 UInt nBBPTRs = 0; 499 500 /* Marshal args for a call and do the call. 501 502 This function only deals with a tiny set of possibilities, which 503 cover all helpers in practice. The restrictions are that only 504 arguments in registers are supported, hence only 505 ARM64_N_REGPARMS x 64 integer bits in total can be passed. In 506 fact the only supported arg type is I64. 507 508 The return type can be I{64,32} or V128. In the V128 case, it 509 is expected that |args| will contain the special node 510 IRExpr_VECRET(), in which case this routine generates code to 511 allocate space on the stack for the vector return value. Since 512 we are not passing any scalars on the stack, it is enough to 513 preallocate the return space before marshalling any arguments, 514 in this case. 515 516 |args| may also contain IRExpr_BBPTR(), in which case the 517 value in x21 is passed as the corresponding argument. 518 519 Generating code which is both efficient and correct when 520 parameters are to be passed in registers is difficult, for the 521 reasons elaborated in detail in comments attached to 522 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant 523 of the method described in those comments. 524 525 The problem is split into two cases: the fast scheme and the 526 slow scheme. In the fast scheme, arguments are computed 527 directly into the target (real) registers. This is only safe 528 when we can be sure that computation of each argument will not 529 trash any real registers set by computation of any other 530 argument. 531 532 In the slow scheme, all args are first computed into vregs, and 533 once they are all done, they are moved to the relevant real 534 regs. This always gives correct code, but it also gives a bunch 535 of vreg-to-rreg moves which are usually redundant but are hard 536 for the register allocator to get rid of. 537 538 To decide which scheme to use, all argument expressions are 539 first examined. If they are all so simple that it is clear they 540 will be evaluated without use of any fixed registers, use the 541 fast scheme, else use the slow scheme. Note also that only 542 unconditional calls may use the fast scheme, since having to 543 compute a condition expression could itself trash real 544 registers. 545 546 Note this requires being able to examine an expression and 547 determine whether or not evaluation of it might use a fixed 548 register. That requires knowledge of how the rest of this insn 549 selector works. Currently just the following 3 are regarded as 550 safe -- hopefully they cover the majority of arguments in 551 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. 552 */ 553 554 /* Note that the cee->regparms field is meaningless on ARM64 hosts 555 (since there is only one calling convention) and so we always 556 ignore it. */ 557 558 n_args = 0; 559 for (i = 0; args[i]; i++) { 560 IRExpr* arg = args[i]; 561 if (UNLIKELY(arg->tag == Iex_VECRET)) { 562 nVECRETs++; 563 } else if (UNLIKELY(arg->tag == Iex_BBPTR)) { 564 nBBPTRs++; 565 } 566 n_args++; 567 } 568 569 /* If this fails, the IR is ill-formed */ 570 vassert(nBBPTRs == 0 || nBBPTRs == 1); 571 572 /* If we have a VECRET, allocate space on the stack for the return 573 value, and record the stack pointer after that. */ 574 HReg r_vecRetAddr = INVALID_HREG; 575 if (nVECRETs == 1) { 576 vassert(retTy == Ity_V128 || retTy == Ity_V256); 577 vassert(retTy != Ity_V256); // we don't handle that yet (if ever) 578 r_vecRetAddr = newVRegI(env); 579 addInstr(env, ARM64Instr_AddToSP(-16)); 580 addInstr(env, ARM64Instr_FromSP(r_vecRetAddr)); 581 } else { 582 // If either of these fail, the IR is ill-formed 583 vassert(retTy != Ity_V128 && retTy != Ity_V256); 584 vassert(nVECRETs == 0); 585 } 586 587 argregs[0] = hregARM64_X0(); 588 argregs[1] = hregARM64_X1(); 589 argregs[2] = hregARM64_X2(); 590 argregs[3] = hregARM64_X3(); 591 argregs[4] = hregARM64_X4(); 592 argregs[5] = hregARM64_X5(); 593 argregs[6] = hregARM64_X6(); 594 argregs[7] = hregARM64_X7(); 595 596 tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG; 597 tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG; 598 599 /* First decide which scheme (slow or fast) is to be used. First 600 assume the fast scheme, and select slow if any contraindications 601 (wow) appear. */ 602 603 go_fast = True; 604 605 if (guard) { 606 if (guard->tag == Iex_Const 607 && guard->Iex.Const.con->tag == Ico_U1 608 && guard->Iex.Const.con->Ico.U1 == True) { 609 /* unconditional */ 610 } else { 611 /* Not manifestly unconditional -- be conservative. */ 612 go_fast = False; 613 } 614 } 615 616 if (go_fast) { 617 for (i = 0; i < n_args; i++) { 618 if (mightRequireFixedRegs(args[i])) { 619 go_fast = False; 620 break; 621 } 622 } 623 } 624 625 if (go_fast) { 626 if (retTy == Ity_V128 || retTy == Ity_V256) 627 go_fast = False; 628 } 629 630 /* At this point the scheme to use has been established. Generate 631 code to get the arg values into the argument rregs. If we run 632 out of arg regs, give up. */ 633 634 if (go_fast) { 635 636 /* FAST SCHEME */ 637 nextArgReg = 0; 638 639 for (i = 0; i < n_args; i++) { 640 IRExpr* arg = args[i]; 641 642 IRType aTy = Ity_INVALID; 643 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 644 aTy = typeOfIRExpr(env->type_env, args[i]); 645 646 if (nextArgReg >= ARM64_N_ARGREGS) 647 return False; /* out of argregs */ 648 649 if (aTy == Ity_I64) { 650 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg], 651 iselIntExpr_R(env, args[i]) )); 652 nextArgReg++; 653 } 654 else if (arg->tag == Iex_BBPTR) { 655 vassert(0); //ATC 656 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg], 657 hregARM64_X21() )); 658 nextArgReg++; 659 } 660 else if (arg->tag == Iex_VECRET) { 661 // because of the go_fast logic above, we can't get here, 662 // since vector return values makes us use the slow path 663 // instead. 664 vassert(0); 665 } 666 else 667 return False; /* unhandled arg type */ 668 } 669 670 /* Fast scheme only applies for unconditional calls. Hence: */ 671 cc = ARM64cc_AL; 672 673 } else { 674 675 /* SLOW SCHEME; move via temporaries */ 676 nextArgReg = 0; 677 678 for (i = 0; i < n_args; i++) { 679 IRExpr* arg = args[i]; 680 681 IRType aTy = Ity_INVALID; 682 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 683 aTy = typeOfIRExpr(env->type_env, args[i]); 684 685 if (nextArgReg >= ARM64_N_ARGREGS) 686 return False; /* out of argregs */ 687 688 if (aTy == Ity_I64) { 689 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]); 690 nextArgReg++; 691 } 692 else if (arg->tag == Iex_BBPTR) { 693 vassert(0); //ATC 694 tmpregs[nextArgReg] = hregARM64_X21(); 695 nextArgReg++; 696 } 697 else if (arg->tag == Iex_VECRET) { 698 vassert(!hregIsInvalid(r_vecRetAddr)); 699 tmpregs[nextArgReg] = r_vecRetAddr; 700 nextArgReg++; 701 } 702 else 703 return False; /* unhandled arg type */ 704 } 705 706 /* Now we can compute the condition. We can't do it earlier 707 because the argument computations could trash the condition 708 codes. Be a bit clever to handle the common case where the 709 guard is 1:Bit. */ 710 cc = ARM64cc_AL; 711 if (guard) { 712 if (guard->tag == Iex_Const 713 && guard->Iex.Const.con->tag == Ico_U1 714 && guard->Iex.Const.con->Ico.U1 == True) { 715 /* unconditional -- do nothing */ 716 } else { 717 cc = iselCondCode( env, guard ); 718 } 719 } 720 721 /* Move the args to their final destinations. */ 722 for (i = 0; i < nextArgReg; i++) { 723 vassert(!(hregIsInvalid(tmpregs[i]))); 724 /* None of these insns, including any spill code that might 725 be generated, may alter the condition codes. */ 726 addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) ); 727 } 728 729 } 730 731 /* Should be assured by checks above */ 732 vassert(nextArgReg <= ARM64_N_ARGREGS); 733 734 /* Do final checks, set the return values, and generate the call 735 instruction proper. */ 736 vassert(nBBPTRs == 0 || nBBPTRs == 1); 737 vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0); 738 vassert(*stackAdjustAfterCall == 0); 739 vassert(is_RetLoc_INVALID(*retloc)); 740 switch (retTy) { 741 case Ity_INVALID: 742 /* Function doesn't return a value. */ 743 *retloc = mk_RetLoc_simple(RLPri_None); 744 break; 745 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 746 *retloc = mk_RetLoc_simple(RLPri_Int); 747 break; 748 case Ity_V128: 749 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0); 750 *stackAdjustAfterCall = 16; 751 break; 752 case Ity_V256: 753 vassert(0); // ATC 754 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0); 755 *stackAdjustAfterCall = 32; 756 break; 757 default: 758 /* IR can denote other possible return types, but we don't 759 handle those here. */ 760 vassert(0); 761 } 762 763 /* Finally, generate the call itself. This needs the *retloc value 764 set in the switch above, which is why it's at the end. */ 765 766 /* nextArgReg doles out argument registers. Since these are 767 assigned in the order x0 .. x7, its numeric value at this point, 768 which must be between 0 and 8 inclusive, is going to be equal to 769 the number of arg regs in use for the call. Hence bake that 770 number into the call (we'll need to know it when doing register 771 allocation, to know what regs the call reads.) */ 772 773 target = (Addr)cee->addr; 774 addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc )); 775 776 return True; /* success */ 777} 778 779 780/*---------------------------------------------------------*/ 781/*--- ISEL: Integer expressions (64/32 bit) ---*/ 782/*---------------------------------------------------------*/ 783 784/* Select insns for an integer-typed expression, and add them to the 785 code list. Return a reg holding the result. This reg will be a 786 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you 787 want to modify it, ask for a new vreg, copy it in there, and modify 788 the copy. The register allocator will do its best to map both 789 vregs to the same real register, so the copies will often disappear 790 later in the game. 791 792 This should handle expressions of 64- and 32-bit type. All results 793 are returned in a 64-bit register. For 32-bit expressions, the 794 upper 32 bits are arbitrary, so you should mask or sign extend 795 partial values if necessary. 796*/ 797 798/* --------------------- AMode --------------------- */ 799 800/* Return an AMode which computes the value of the specified 801 expression, possibly also adding insns to the code list as a 802 result. The expression may only be a 64-bit one. 803*/ 804 805static Bool isValidScale ( UChar scale ) 806{ 807 switch (scale) { 808 case 1: case 2: case 4: case 8: /* case 16: ??*/ return True; 809 default: return False; 810 } 811} 812 813static Bool sane_AMode ( ARM64AMode* am ) 814{ 815 switch (am->tag) { 816 case ARM64am_RI9: 817 return 818 toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64 819 && (hregIsVirtual(am->ARM64am.RI9.reg) 820 /* || sameHReg(am->ARM64am.RI9.reg, 821 hregARM64_X21()) */ ) 822 && am->ARM64am.RI9.simm9 >= -256 823 && am->ARM64am.RI9.simm9 <= 255 ); 824 case ARM64am_RI12: 825 return 826 toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64 827 && (hregIsVirtual(am->ARM64am.RI12.reg) 828 /* || sameHReg(am->ARM64am.RI12.reg, 829 hregARM64_X21()) */ ) 830 && am->ARM64am.RI12.uimm12 < 4096 831 && isValidScale(am->ARM64am.RI12.szB) ); 832 case ARM64am_RR: 833 return 834 toBool( hregClass(am->ARM64am.RR.base) == HRcInt64 835 && hregIsVirtual(am->ARM64am.RR.base) 836 && hregClass(am->ARM64am.RR.index) == HRcInt64 837 && hregIsVirtual(am->ARM64am.RR.index) ); 838 default: 839 vpanic("sane_AMode: unknown ARM64 AMode1 tag"); 840 } 841} 842 843static 844ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty ) 845{ 846 ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty); 847 vassert(sane_AMode(am)); 848 return am; 849} 850 851static 852ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty ) 853{ 854 IRType ty = typeOfIRExpr(env->type_env,e); 855 vassert(ty == Ity_I64); 856 857 ULong szBbits = 0; 858 switch (dty) { 859 case Ity_I64: szBbits = 3; break; 860 case Ity_I32: szBbits = 2; break; 861 case Ity_I16: szBbits = 1; break; 862 case Ity_I8: szBbits = 0; break; 863 default: vassert(0); 864 } 865 866 /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since 867 we're going to create an amode suitable for LDU* or STU* 868 instructions, which use unscaled immediate offsets. */ 869 if (e->tag == Iex_Binop 870 && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64) 871 && e->Iex.Binop.arg2->tag == Iex_Const 872 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) { 873 Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 874 if (simm >= -255 && simm <= 255) { 875 /* Although the gating condition might seem to be 876 simm >= -256 && simm <= 255 877 we will need to negate simm in the case where the op is Sub64. 878 Hence limit the lower value to -255 in order that its negation 879 is representable. */ 880 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 881 if (e->Iex.Binop.op == Iop_Sub64) simm = -simm; 882 return ARM64AMode_RI9(reg, (Int)simm); 883 } 884 } 885 886 /* Add64(expr, uimm12 * transfer-size) */ 887 if (e->tag == Iex_Binop 888 && e->Iex.Binop.op == Iop_Add64 889 && e->Iex.Binop.arg2->tag == Iex_Const 890 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) { 891 ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 892 ULong szB = 1 << szBbits; 893 if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */ 894 && (uimm >> szBbits) < 4096) { 895 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 896 return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB); 897 } 898 } 899 900 /* Add64(expr1, expr2) */ 901 if (e->tag == Iex_Binop 902 && e->Iex.Binop.op == Iop_Add64) { 903 HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 904 HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2); 905 return ARM64AMode_RR(reg1, reg2); 906 } 907 908 /* Doesn't match anything in particular. Generate it into 909 a register and use that. */ 910 HReg reg = iselIntExpr_R(env, e); 911 return ARM64AMode_RI9(reg, 0); 912} 913 914 915/* --------------------- RIA --------------------- */ 916 917/* Select instructions to generate 'e' into a RIA. */ 918 919static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e ) 920{ 921 ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e); 922 /* sanity checks ... */ 923 switch (ri->tag) { 924 case ARM64riA_I12: 925 vassert(ri->ARM64riA.I12.imm12 < 4096); 926 vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12); 927 return ri; 928 case ARM64riA_R: 929 vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64); 930 vassert(hregIsVirtual(ri->ARM64riA.R.reg)); 931 return ri; 932 default: 933 vpanic("iselIntExpr_RIA: unknown arm RIA tag"); 934 } 935} 936 937/* DO NOT CALL THIS DIRECTLY ! */ 938static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e ) 939{ 940 IRType ty = typeOfIRExpr(env->type_env,e); 941 vassert(ty == Ity_I64 || ty == Ity_I32); 942 943 /* special case: immediate */ 944 if (e->tag == Iex_Const) { 945 ULong u = 0xF000000ULL; /* invalid */ 946 switch (e->Iex.Const.con->tag) { 947 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break; 948 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 949 default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)"); 950 } 951 if (0 == (u & ~(0xFFFULL << 0))) 952 return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0); 953 if (0 == (u & ~(0xFFFULL << 12))) 954 return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12); 955 /* else fail, fall through to default case */ 956 } 957 958 /* default case: calculate into a register and return that */ 959 { 960 HReg r = iselIntExpr_R ( env, e ); 961 return ARM64RIA_R(r); 962 } 963} 964 965 966/* --------------------- RIL --------------------- */ 967 968/* Select instructions to generate 'e' into a RIL. At this point we 969 have to deal with the strange bitfield-immediate encoding for logic 970 instructions. */ 971 972 973// The following four functions 974// CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical 975// are copied, with modifications, from 976// https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc 977// which has the following copyright notice: 978/* 979 Copyright 2013, ARM Limited 980 All rights reserved. 981 982 Redistribution and use in source and binary forms, with or without 983 modification, are permitted provided that the following conditions are met: 984 985 * Redistributions of source code must retain the above copyright notice, 986 this list of conditions and the following disclaimer. 987 * Redistributions in binary form must reproduce the above copyright notice, 988 this list of conditions and the following disclaimer in the documentation 989 and/or other materials provided with the distribution. 990 * Neither the name of ARM Limited nor the names of its contributors may be 991 used to endorse or promote products derived from this software without 992 specific prior written permission. 993 994 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 995 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 996 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 997 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 998 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 999 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 1000 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 1001 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 1002 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1003 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1004*/ 1005 1006static Int CountLeadingZeros(ULong value, Int width) 1007{ 1008 vassert(width == 32 || width == 64); 1009 Int count = 0; 1010 ULong bit_test = 1ULL << (width - 1); 1011 while ((count < width) && ((bit_test & value) == 0)) { 1012 count++; 1013 bit_test >>= 1; 1014 } 1015 return count; 1016} 1017 1018static Int CountTrailingZeros(ULong value, Int width) 1019{ 1020 vassert(width == 32 || width == 64); 1021 Int count = 0; 1022 while ((count < width) && (((value >> count) & 1) == 0)) { 1023 count++; 1024 } 1025 return count; 1026} 1027 1028static Int CountSetBits(ULong value, Int width) 1029{ 1030 // TODO: Other widths could be added here, as the implementation already 1031 // supports them. 1032 vassert(width == 32 || width == 64); 1033 1034 // Mask out unused bits to ensure that they are not counted. 1035 value &= (0xffffffffffffffffULL >> (64-width)); 1036 1037 // Add up the set bits. 1038 // The algorithm works by adding pairs of bit fields together iteratively, 1039 // where the size of each bit field doubles each time. 1040 // An example for an 8-bit value: 1041 // Bits: h g f e d c b a 1042 // \ | \ | \ | \ | 1043 // value = h+g f+e d+c b+a 1044 // \ | \ | 1045 // value = h+g+f+e d+c+b+a 1046 // \ | 1047 // value = h+g+f+e+d+c+b+a 1048 value = ((value >> 1) & 0x5555555555555555ULL) 1049 + (value & 0x5555555555555555ULL); 1050 value = ((value >> 2) & 0x3333333333333333ULL) 1051 + (value & 0x3333333333333333ULL); 1052 value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL) 1053 + (value & 0x0f0f0f0f0f0f0f0fULL); 1054 value = ((value >> 8) & 0x00ff00ff00ff00ffULL) 1055 + (value & 0x00ff00ff00ff00ffULL); 1056 value = ((value >> 16) & 0x0000ffff0000ffffULL) 1057 + (value & 0x0000ffff0000ffffULL); 1058 value = ((value >> 32) & 0x00000000ffffffffULL) 1059 + (value & 0x00000000ffffffffULL); 1060 1061 return value; 1062} 1063 1064static Bool isImmLogical ( /*OUT*/UInt* n, 1065 /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r, 1066 ULong value, UInt width ) 1067{ 1068 // Test if a given value can be encoded in the immediate field of a 1069 // logical instruction. 1070 1071 // If it can be encoded, the function returns true, and values 1072 // pointed to by n, imm_s and imm_r are updated with immediates 1073 // encoded in the format required by the corresponding fields in the 1074 // logical instruction. If it can not be encoded, the function 1075 // returns false, and the values pointed to by n, imm_s and imm_r 1076 // are undefined. 1077 vassert(n != NULL && imm_s != NULL && imm_r != NULL); 1078 vassert(width == 32 || width == 64); 1079 1080 // Logical immediates are encoded using parameters n, imm_s and imm_r using 1081 // the following table: 1082 // 1083 // N imms immr size S R 1084 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) 1085 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) 1086 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) 1087 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) 1088 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) 1089 // 0 11110s xxxxxr 2 UInt(s) UInt(r) 1090 // (s bits must not be all set) 1091 // 1092 // A pattern is constructed of size bits, where the least significant S+1 1093 // bits are set. The pattern is rotated right by R, and repeated across a 1094 // 32 or 64-bit value, depending on destination register width. 1095 // 1096 // To test if an arbitrary immediate can be encoded using this scheme, an 1097 // iterative algorithm is used. 1098 // 1099 // TODO: This code does not consider using X/W register overlap to support 1100 // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits 1101 // are an encodable logical immediate. 1102 1103 // 1. If the value has all set or all clear bits, it can't be encoded. 1104 if ((value == 0) || (value == 0xffffffffffffffffULL) || 1105 ((width == 32) && (value == 0xffffffff))) { 1106 return False; 1107 } 1108 1109 UInt lead_zero = CountLeadingZeros(value, width); 1110 UInt lead_one = CountLeadingZeros(~value, width); 1111 UInt trail_zero = CountTrailingZeros(value, width); 1112 UInt trail_one = CountTrailingZeros(~value, width); 1113 UInt set_bits = CountSetBits(value, width); 1114 1115 // The fixed bits in the immediate s field. 1116 // If width == 64 (X reg), start at 0xFFFFFF80. 1117 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit 1118 // widths won't be executed. 1119 Int imm_s_fixed = (width == 64) ? -128 : -64; 1120 Int imm_s_mask = 0x3F; 1121 1122 for (;;) { 1123 // 2. If the value is two bits wide, it can be encoded. 1124 if (width == 2) { 1125 *n = 0; 1126 *imm_s = 0x3C; 1127 *imm_r = (value & 3) - 1; 1128 return True; 1129 } 1130 1131 *n = (width == 64) ? 1 : 0; 1132 *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask); 1133 if ((lead_zero + set_bits) == width) { 1134 *imm_r = 0; 1135 } else { 1136 *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one; 1137 } 1138 1139 // 3. If the sum of leading zeros, trailing zeros and set bits is equal to 1140 // the bit width of the value, it can be encoded. 1141 if (lead_zero + trail_zero + set_bits == width) { 1142 return True; 1143 } 1144 1145 // 4. If the sum of leading ones, trailing ones and unset bits in the 1146 // value is equal to the bit width of the value, it can be encoded. 1147 if (lead_one + trail_one + (width - set_bits) == width) { 1148 return True; 1149 } 1150 1151 // 5. If the most-significant half of the bitwise value is equal to the 1152 // least-significant half, return to step 2 using the least-significant 1153 // half of the value. 1154 ULong mask = (1ULL << (width >> 1)) - 1; 1155 if ((value & mask) == ((value >> (width >> 1)) & mask)) { 1156 width >>= 1; 1157 set_bits >>= 1; 1158 imm_s_fixed >>= 1; 1159 continue; 1160 } 1161 1162 // 6. Otherwise, the value can't be encoded. 1163 return False; 1164 } 1165} 1166 1167 1168/* Create a RIL for the given immediate, if it is representable, or 1169 return NULL if not. */ 1170 1171static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 ) 1172{ 1173 UInt n = 0, imm_s = 0, imm_r = 0; 1174 Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64); 1175 if (!ok) return NULL; 1176 vassert(n < 2 && imm_s < 64 && imm_r < 64); 1177 return ARM64RIL_I13(n, imm_r, imm_s); 1178} 1179 1180/* So, finally .. */ 1181 1182static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e ) 1183{ 1184 ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e); 1185 /* sanity checks ... */ 1186 switch (ri->tag) { 1187 case ARM64riL_I13: 1188 vassert(ri->ARM64riL.I13.bitN < 2); 1189 vassert(ri->ARM64riL.I13.immR < 64); 1190 vassert(ri->ARM64riL.I13.immS < 64); 1191 return ri; 1192 case ARM64riL_R: 1193 vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64); 1194 vassert(hregIsVirtual(ri->ARM64riL.R.reg)); 1195 return ri; 1196 default: 1197 vpanic("iselIntExpr_RIL: unknown arm RIL tag"); 1198 } 1199} 1200 1201/* DO NOT CALL THIS DIRECTLY ! */ 1202static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e ) 1203{ 1204 IRType ty = typeOfIRExpr(env->type_env,e); 1205 vassert(ty == Ity_I64 || ty == Ity_I32); 1206 1207 /* special case: immediate */ 1208 if (e->tag == Iex_Const) { 1209 ARM64RIL* maybe = NULL; 1210 if (ty == Ity_I64) { 1211 vassert(e->Iex.Const.con->tag == Ico_U64); 1212 maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64); 1213 } else { 1214 vassert(ty == Ity_I32); 1215 vassert(e->Iex.Const.con->tag == Ico_U32); 1216 UInt u32 = e->Iex.Const.con->Ico.U32; 1217 ULong u64 = (ULong)u32; 1218 /* First try with 32 leading zeroes. */ 1219 maybe = mb_mkARM64RIL_I(u64); 1220 /* If that doesn't work, try with 2 copies, since it doesn't 1221 matter what winds up in the upper 32 bits. */ 1222 if (!maybe) { 1223 maybe = mb_mkARM64RIL_I((u64 << 32) | u64); 1224 } 1225 } 1226 if (maybe) return maybe; 1227 /* else fail, fall through to default case */ 1228 } 1229 1230 /* default case: calculate into a register and return that */ 1231 { 1232 HReg r = iselIntExpr_R ( env, e ); 1233 return ARM64RIL_R(r); 1234 } 1235} 1236 1237 1238/* --------------------- RI6 --------------------- */ 1239 1240/* Select instructions to generate 'e' into a RI6. */ 1241 1242static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e ) 1243{ 1244 ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e); 1245 /* sanity checks ... */ 1246 switch (ri->tag) { 1247 case ARM64ri6_I6: 1248 vassert(ri->ARM64ri6.I6.imm6 < 64); 1249 vassert(ri->ARM64ri6.I6.imm6 > 0); 1250 return ri; 1251 case ARM64ri6_R: 1252 vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64); 1253 vassert(hregIsVirtual(ri->ARM64ri6.R.reg)); 1254 return ri; 1255 default: 1256 vpanic("iselIntExpr_RI6: unknown arm RI6 tag"); 1257 } 1258} 1259 1260/* DO NOT CALL THIS DIRECTLY ! */ 1261static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e ) 1262{ 1263 IRType ty = typeOfIRExpr(env->type_env,e); 1264 vassert(ty == Ity_I64 || ty == Ity_I8); 1265 1266 /* special case: immediate */ 1267 if (e->tag == Iex_Const) { 1268 switch (e->Iex.Const.con->tag) { 1269 case Ico_U8: { 1270 UInt u = e->Iex.Const.con->Ico.U8; 1271 if (u > 0 && u < 64) 1272 return ARM64RI6_I6(u); 1273 break; 1274 default: 1275 break; 1276 } 1277 } 1278 /* else fail, fall through to default case */ 1279 } 1280 1281 /* default case: calculate into a register and return that */ 1282 { 1283 HReg r = iselIntExpr_R ( env, e ); 1284 return ARM64RI6_R(r); 1285 } 1286} 1287 1288 1289/* ------------------- CondCode ------------------- */ 1290 1291/* Generate code to evaluated a bit-typed expression, returning the 1292 condition code which would correspond when the expression would 1293 notionally have returned 1. */ 1294 1295static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ) 1296{ 1297 ARM64CondCode cc = iselCondCode_wrk(env,e); 1298 vassert(cc != ARM64cc_NV); 1299 return cc; 1300} 1301 1302static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) 1303{ 1304 vassert(e); 1305 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); 1306 1307 /* var */ 1308 if (e->tag == Iex_RdTmp) { 1309 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp); 1310 /* Cmp doesn't modify rTmp; so this is OK. */ 1311 ARM64RIL* one = mb_mkARM64RIL_I(1); 1312 vassert(one); 1313 addInstr(env, ARM64Instr_Test(rTmp, one)); 1314 return ARM64cc_NE; 1315 } 1316 1317 /* Not1(e) */ 1318 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { 1319 /* Generate code for the arg, and negate the test condition */ 1320 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); 1321 if (cc == ARM64cc_AL || cc == ARM64cc_NV) { 1322 return ARM64cc_AL; 1323 } else { 1324 return 1 ^ cc; 1325 } 1326 } 1327 1328 /* --- patterns rooted at: 64to1 --- */ 1329 1330 if (e->tag == Iex_Unop 1331 && e->Iex.Unop.op == Iop_64to1) { 1332 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg); 1333 ARM64RIL* one = mb_mkARM64RIL_I(1); 1334 vassert(one); /* '1' must be representable */ 1335 addInstr(env, ARM64Instr_Test(rTmp, one)); 1336 return ARM64cc_NE; 1337 } 1338 1339 /* --- patterns rooted at: CmpNEZ8 --- */ 1340 1341 if (e->tag == Iex_Unop 1342 && e->Iex.Unop.op == Iop_CmpNEZ8) { 1343 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1344 ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF); 1345 addInstr(env, ARM64Instr_Test(r1, xFF)); 1346 return ARM64cc_NE; 1347 } 1348 1349 /* --- patterns rooted at: CmpNEZ16 --- */ 1350 1351 if (e->tag == Iex_Unop 1352 && e->Iex.Unop.op == Iop_CmpNEZ16) { 1353 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1354 ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF); 1355 addInstr(env, ARM64Instr_Test(r1, xFFFF)); 1356 return ARM64cc_NE; 1357 } 1358 1359 /* --- patterns rooted at: CmpNEZ64 --- */ 1360 1361 if (e->tag == Iex_Unop 1362 && e->Iex.Unop.op == Iop_CmpNEZ64) { 1363 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1364 ARM64RIA* zero = ARM64RIA_I12(0,0); 1365 addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/)); 1366 return ARM64cc_NE; 1367 } 1368 1369 /* --- patterns rooted at: CmpNEZ32 --- */ 1370 1371 if (e->tag == Iex_Unop 1372 && e->Iex.Unop.op == Iop_CmpNEZ32) { 1373 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1374 ARM64RIA* zero = ARM64RIA_I12(0,0); 1375 addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/)); 1376 return ARM64cc_NE; 1377 } 1378 1379 /* --- Cmp*64*(x,y) --- */ 1380 if (e->tag == Iex_Binop 1381 && (e->Iex.Binop.op == Iop_CmpEQ64 1382 || e->Iex.Binop.op == Iop_CmpNE64 1383 || e->Iex.Binop.op == Iop_CmpLT64S 1384 || e->Iex.Binop.op == Iop_CmpLT64U 1385 || e->Iex.Binop.op == Iop_CmpLE64S 1386 || e->Iex.Binop.op == Iop_CmpLE64U)) { 1387 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1388 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); 1389 addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/)); 1390 switch (e->Iex.Binop.op) { 1391 case Iop_CmpEQ64: return ARM64cc_EQ; 1392 case Iop_CmpNE64: return ARM64cc_NE; 1393 case Iop_CmpLT64S: return ARM64cc_LT; 1394 case Iop_CmpLT64U: return ARM64cc_CC; 1395 case Iop_CmpLE64S: return ARM64cc_LE; 1396 case Iop_CmpLE64U: return ARM64cc_LS; 1397 default: vpanic("iselCondCode(arm64): CmpXX64"); 1398 } 1399 } 1400 1401 /* --- Cmp*32*(x,y) --- */ 1402 if (e->tag == Iex_Binop 1403 && (e->Iex.Binop.op == Iop_CmpEQ32 1404 || e->Iex.Binop.op == Iop_CmpNE32 1405 || e->Iex.Binop.op == Iop_CmpLT32S 1406 || e->Iex.Binop.op == Iop_CmpLT32U 1407 || e->Iex.Binop.op == Iop_CmpLE32S 1408 || e->Iex.Binop.op == Iop_CmpLE32U)) { 1409 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1410 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); 1411 addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/)); 1412 switch (e->Iex.Binop.op) { 1413 case Iop_CmpEQ32: return ARM64cc_EQ; 1414 case Iop_CmpNE32: return ARM64cc_NE; 1415 case Iop_CmpLT32S: return ARM64cc_LT; 1416 case Iop_CmpLT32U: return ARM64cc_CC; 1417 case Iop_CmpLE32S: return ARM64cc_LE; 1418 case Iop_CmpLE32U: return ARM64cc_LS; 1419 default: vpanic("iselCondCode(arm64): CmpXX32"); 1420 } 1421 } 1422 1423 ppIRExpr(e); 1424 vpanic("iselCondCode"); 1425} 1426 1427 1428/* --------------------- Reg --------------------- */ 1429 1430static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) 1431{ 1432 HReg r = iselIntExpr_R_wrk(env, e); 1433 /* sanity checks ... */ 1434# if 0 1435 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 1436# endif 1437 vassert(hregClass(r) == HRcInt64); 1438 vassert(hregIsVirtual(r)); 1439 return r; 1440} 1441 1442/* DO NOT CALL THIS DIRECTLY ! */ 1443static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) 1444{ 1445 IRType ty = typeOfIRExpr(env->type_env,e); 1446 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1447 1448 switch (e->tag) { 1449 1450 /* --------- TEMP --------- */ 1451 case Iex_RdTmp: { 1452 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 1453 } 1454 1455 /* --------- LOAD --------- */ 1456 case Iex_Load: { 1457 HReg dst = newVRegI(env); 1458 1459 if (e->Iex.Load.end != Iend_LE) 1460 goto irreducible; 1461 1462 if (ty == Ity_I64) { 1463 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); 1464 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode)); 1465 return dst; 1466 } 1467 if (ty == Ity_I32) { 1468 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); 1469 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode)); 1470 return dst; 1471 } 1472 if (ty == Ity_I16) { 1473 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); 1474 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode)); 1475 return dst; 1476 } 1477 if (ty == Ity_I8) { 1478 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); 1479 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode)); 1480 return dst; 1481 } 1482 break; 1483 } 1484 1485 /* --------- BINARY OP --------- */ 1486 case Iex_Binop: { 1487 1488 ARM64LogicOp lop = 0; /* invalid */ 1489 ARM64ShiftOp sop = 0; /* invalid */ 1490 1491 /* Special-case 0-x into a Neg instruction. Not because it's 1492 particularly useful but more so as to give value flow using 1493 this instruction, so as to check its assembly correctness for 1494 implementation of Left32/Left64. */ 1495 switch (e->Iex.Binop.op) { 1496 case Iop_Sub64: 1497 if (isZeroU64(e->Iex.Binop.arg1)) { 1498 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1499 HReg dst = newVRegI(env); 1500 addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG)); 1501 return dst; 1502 } 1503 break; 1504 default: 1505 break; 1506 } 1507 1508 /* ADD/SUB */ 1509 switch (e->Iex.Binop.op) { 1510 case Iop_Add64: case Iop_Add32: 1511 case Iop_Sub64: case Iop_Sub32: { 1512 Bool isAdd = e->Iex.Binop.op == Iop_Add64 1513 || e->Iex.Binop.op == Iop_Add32; 1514 HReg dst = newVRegI(env); 1515 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1516 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); 1517 addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd)); 1518 return dst; 1519 } 1520 default: 1521 break; 1522 } 1523 1524 /* AND/OR/XOR */ 1525 switch (e->Iex.Binop.op) { 1526 case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop; 1527 case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop; 1528 case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop; 1529 log_binop: { 1530 HReg dst = newVRegI(env); 1531 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1532 ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2); 1533 addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop)); 1534 return dst; 1535 } 1536 default: 1537 break; 1538 } 1539 1540 /* SHL/SHR/SAR */ 1541 switch (e->Iex.Binop.op) { 1542 case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop; 1543 case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop; 1544 case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop; 1545 sh_binop: { 1546 HReg dst = newVRegI(env); 1547 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1548 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2); 1549 addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop)); 1550 return dst; 1551 } 1552 case Iop_Shr32: 1553 case Iop_Sar32: { 1554 Bool zx = e->Iex.Binop.op == Iop_Shr32; 1555 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1556 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2); 1557 HReg dst = zx ? widen_z_32_to_64(env, argL) 1558 : widen_s_32_to_64(env, argL); 1559 addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR)); 1560 return dst; 1561 } 1562 default: break; 1563 } 1564 1565 /* MUL */ 1566 if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) { 1567 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1568 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1569 HReg dst = newVRegI(env); 1570 addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN)); 1571 return dst; 1572 } 1573 1574 /* MULL */ 1575 if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) { 1576 Bool isS = e->Iex.Binop.op == Iop_MullS32; 1577 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1578 HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL); 1579 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1580 HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR); 1581 HReg dst = newVRegI(env); 1582 addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN)); 1583 return dst; 1584 } 1585 1586 /* Handle misc other ops. */ 1587 1588 if (e->Iex.Binop.op == Iop_Max32U) { 1589 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1590 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1591 HReg dst = newVRegI(env); 1592 addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/)); 1593 addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS)); 1594 return dst; 1595 } 1596 1597 if (e->Iex.Binop.op == Iop_32HLto64) { 1598 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1599 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1600 HReg lo32 = widen_z_32_to_64(env, lo32s); 1601 HReg hi32 = newVRegI(env); 1602 addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32), 1603 ARM64sh_SHL)); 1604 addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32), 1605 ARM64lo_OR)); 1606 return hi32; 1607 } 1608 1609 if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) { 1610 Bool isD = e->Iex.Binop.op == Iop_CmpF64; 1611 HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1); 1612 HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2); 1613 HReg dst = newVRegI(env); 1614 HReg imm = newVRegI(env); 1615 /* Do the compare (FCMP), which sets NZCV in PSTATE. Then 1616 create in dst, the IRCmpF64Result encoded result. */ 1617 addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR)); 1618 addInstr(env, ARM64Instr_Imm64(dst, 0)); 1619 addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ 1620 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ)); 1621 addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT 1622 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI)); 1623 addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT 1624 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT)); 1625 addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN 1626 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS)); 1627 return dst; 1628 } 1629 1630 { /* local scope */ 1631 ARM64CvtOp cvt_op = ARM64cvt_INVALID; 1632 Bool srcIsD = False; 1633 switch (e->Iex.Binop.op) { 1634 case Iop_F64toI64S: 1635 cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break; 1636 case Iop_F64toI64U: 1637 cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break; 1638 case Iop_F64toI32S: 1639 cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break; 1640 case Iop_F64toI32U: 1641 cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break; 1642 case Iop_F32toI32S: 1643 cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break; 1644 case Iop_F32toI32U: 1645 cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break; 1646 case Iop_F32toI64S: 1647 cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break; 1648 case Iop_F32toI64U: 1649 cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break; 1650 default: 1651 break; 1652 } 1653 if (cvt_op != ARM64cvt_INVALID) { 1654 /* This is all a bit dodgy, because we can't handle a 1655 non-constant (not-known-at-JIT-time) rounding mode 1656 indication. That's because there's no instruction 1657 AFAICS that does this conversion but rounds according to 1658 FPCR.RM, so we have to bake the rounding mode into the 1659 instruction right now. But that should be OK because 1660 (1) the front end attaches a literal Irrm_ value to the 1661 conversion binop, and (2) iropt will never float that 1662 off via CSE, into a literal. Hence we should always 1663 have an Irrm_ value as the first arg. */ 1664 IRExpr* arg1 = e->Iex.Binop.arg1; 1665 if (arg1->tag != Iex_Const) goto irreducible; 1666 IRConst* arg1con = arg1->Iex.Const.con; 1667 vassert(arg1con->tag == Ico_U32); // else ill-typed IR 1668 UInt irrm = arg1con->Ico.U32; 1669 /* Find the ARM-encoded equivalent for |irrm|. */ 1670 UInt armrm = 4; /* impossible */ 1671 switch (irrm) { 1672 case Irrm_NEAREST: armrm = 0; break; 1673 case Irrm_NegINF: armrm = 2; break; 1674 case Irrm_PosINF: armrm = 1; break; 1675 case Irrm_ZERO: armrm = 3; break; 1676 default: goto irreducible; 1677 } 1678 HReg src = (srcIsD ? iselDblExpr : iselFltExpr) 1679 (env, e->Iex.Binop.arg2); 1680 HReg dst = newVRegI(env); 1681 addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm)); 1682 return dst; 1683 } 1684 } /* local scope */ 1685 1686 /* All cases involving host-side helper calls. */ 1687 void* fn = NULL; 1688 switch (e->Iex.Binop.op) { 1689 case Iop_DivU32: 1690 fn = &h_calc_udiv32_w_arm_semantics; break; 1691 case Iop_DivS32: 1692 fn = &h_calc_sdiv32_w_arm_semantics; break; 1693 case Iop_DivU64: 1694 fn = &h_calc_udiv64_w_arm_semantics; break; 1695 case Iop_DivS64: 1696 fn = &h_calc_sdiv64_w_arm_semantics; break; 1697 default: 1698 break; 1699 } 1700 1701 if (fn) { 1702 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1703 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1704 HReg res = newVRegI(env); 1705 addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL)); 1706 addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR)); 1707 addInstr(env, ARM64Instr_Call( ARM64cc_AL, (Addr)fn, 1708 2, mk_RetLoc_simple(RLPri_Int) )); 1709 addInstr(env, ARM64Instr_MovI(res, hregARM64_X0())); 1710 return res; 1711 } 1712 1713 break; 1714 } 1715 1716 /* --------- UNARY OP --------- */ 1717 case Iex_Unop: { 1718 1719 switch (e->Iex.Unop.op) { 1720 case Iop_16Uto64: { 1721 /* This probably doesn't occur often enough to be worth 1722 rolling the extension into the load. */ 1723 IRExpr* arg = e->Iex.Unop.arg; 1724 HReg src = iselIntExpr_R(env, arg); 1725 HReg dst = widen_z_16_to_64(env, src); 1726 return dst; 1727 } 1728 case Iop_32Uto64: { 1729 IRExpr* arg = e->Iex.Unop.arg; 1730 if (arg->tag == Iex_Load) { 1731 /* This correctly zero extends because _LdSt32 is 1732 defined to do a zero extending load. */ 1733 HReg dst = newVRegI(env); 1734 ARM64AMode* am 1735 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32); 1736 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am)); 1737 return dst; 1738 } 1739 /* else be lame and mask it */ 1740 HReg src = iselIntExpr_R(env, arg); 1741 HReg dst = widen_z_32_to_64(env, src); 1742 return dst; 1743 } 1744 case Iop_8Uto32: /* Just freeload on the 8Uto64 case */ 1745 case Iop_8Uto64: { 1746 IRExpr* arg = e->Iex.Unop.arg; 1747 if (arg->tag == Iex_Load) { 1748 /* This correctly zero extends because _LdSt8 is 1749 defined to do a zero extending load. */ 1750 HReg dst = newVRegI(env); 1751 ARM64AMode* am 1752 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8); 1753 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am)); 1754 return dst; 1755 } 1756 /* else be lame and mask it */ 1757 HReg src = iselIntExpr_R(env, arg); 1758 HReg dst = widen_z_8_to_64(env, src); 1759 return dst; 1760 } 1761 case Iop_128HIto64: { 1762 HReg rHi, rLo; 1763 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1764 return rHi; /* and abandon rLo */ 1765 } 1766 case Iop_8Sto32: case Iop_8Sto64: { 1767 IRExpr* arg = e->Iex.Unop.arg; 1768 HReg src = iselIntExpr_R(env, arg); 1769 HReg dst = widen_s_8_to_64(env, src); 1770 return dst; 1771 } 1772 case Iop_16Sto32: case Iop_16Sto64: { 1773 IRExpr* arg = e->Iex.Unop.arg; 1774 HReg src = iselIntExpr_R(env, arg); 1775 HReg dst = widen_s_16_to_64(env, src); 1776 return dst; 1777 } 1778 case Iop_32Sto64: { 1779 IRExpr* arg = e->Iex.Unop.arg; 1780 HReg src = iselIntExpr_R(env, arg); 1781 HReg dst = widen_s_32_to_64(env, src); 1782 return dst; 1783 } 1784 case Iop_Not32: 1785 case Iop_Not64: { 1786 HReg dst = newVRegI(env); 1787 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1788 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT)); 1789 return dst; 1790 } 1791 case Iop_Clz64: { 1792 HReg dst = newVRegI(env); 1793 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1794 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ)); 1795 return dst; 1796 } 1797 case Iop_Left32: 1798 case Iop_Left64: { 1799 /* Left64(src) = src | -src. Left32 can use the same 1800 implementation since in that case we don't care what 1801 the upper 32 bits become. */ 1802 HReg dst = newVRegI(env); 1803 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1804 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); 1805 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), 1806 ARM64lo_OR)); 1807 return dst; 1808 } 1809 case Iop_CmpwNEZ64: { 1810 /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1 1811 = Left64(src) >>s 63 */ 1812 HReg dst = newVRegI(env); 1813 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1814 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); 1815 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), 1816 ARM64lo_OR)); 1817 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), 1818 ARM64sh_SAR)); 1819 return dst; 1820 } 1821 case Iop_CmpwNEZ32: { 1822 /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF) 1823 = Left64(src & 0xFFFFFFFF) >>s 63 */ 1824 HReg dst = newVRegI(env); 1825 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg); 1826 HReg src = widen_z_32_to_64(env, pre); 1827 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); 1828 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), 1829 ARM64lo_OR)); 1830 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), 1831 ARM64sh_SAR)); 1832 return dst; 1833 } 1834 case Iop_V128to64: case Iop_V128HIto64: { 1835 HReg dst = newVRegI(env); 1836 HReg src = iselV128Expr(env, e->Iex.Unop.arg); 1837 UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0; 1838 addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo)); 1839 return dst; 1840 } 1841 case Iop_ReinterpF64asI64: { 1842 HReg dst = newVRegI(env); 1843 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 1844 addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/)); 1845 return dst; 1846 } 1847 case Iop_ReinterpF32asI32: { 1848 HReg dst = newVRegI(env); 1849 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 1850 addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/)); 1851 return dst; 1852 } 1853 case Iop_1Sto16: 1854 case Iop_1Sto32: 1855 case Iop_1Sto64: { 1856 /* As with the iselStmt case for 'tmp:I1 = expr', we could 1857 do a lot better here if it ever became necessary. */ 1858 HReg zero = newVRegI(env); 1859 HReg one = newVRegI(env); 1860 HReg dst = newVRegI(env); 1861 addInstr(env, ARM64Instr_Imm64(zero, 0)); 1862 addInstr(env, ARM64Instr_Imm64(one, 1)); 1863 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); 1864 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); 1865 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), 1866 ARM64sh_SHL)); 1867 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), 1868 ARM64sh_SAR)); 1869 return dst; 1870 } 1871 case Iop_NarrowUn16to8x8: 1872 case Iop_NarrowUn32to16x4: 1873 case Iop_NarrowUn64to32x2: 1874 case Iop_QNarrowUn16Sto8Sx8: 1875 case Iop_QNarrowUn32Sto16Sx4: 1876 case Iop_QNarrowUn64Sto32Sx2: 1877 case Iop_QNarrowUn16Uto8Ux8: 1878 case Iop_QNarrowUn32Uto16Ux4: 1879 case Iop_QNarrowUn64Uto32Ux2: 1880 case Iop_QNarrowUn16Sto8Ux8: 1881 case Iop_QNarrowUn32Sto16Ux4: 1882 case Iop_QNarrowUn64Sto32Ux2: 1883 { 1884 HReg src = iselV128Expr(env, e->Iex.Unop.arg); 1885 HReg tmp = newVRegV(env); 1886 HReg dst = newVRegI(env); 1887 UInt dszBlg2 = 3; /* illegal */ 1888 ARM64VecNarrowOp op = ARM64vecna_INVALID; 1889 switch (e->Iex.Unop.op) { 1890 case Iop_NarrowUn16to8x8: 1891 dszBlg2 = 0; op = ARM64vecna_XTN; break; 1892 case Iop_NarrowUn32to16x4: 1893 dszBlg2 = 1; op = ARM64vecna_XTN; break; 1894 case Iop_NarrowUn64to32x2: 1895 dszBlg2 = 2; op = ARM64vecna_XTN; break; 1896 case Iop_QNarrowUn16Sto8Sx8: 1897 dszBlg2 = 0; op = ARM64vecna_SQXTN; break; 1898 case Iop_QNarrowUn32Sto16Sx4: 1899 dszBlg2 = 1; op = ARM64vecna_SQXTN; break; 1900 case Iop_QNarrowUn64Sto32Sx2: 1901 dszBlg2 = 2; op = ARM64vecna_SQXTN; break; 1902 case Iop_QNarrowUn16Uto8Ux8: 1903 dszBlg2 = 0; op = ARM64vecna_UQXTN; break; 1904 case Iop_QNarrowUn32Uto16Ux4: 1905 dszBlg2 = 1; op = ARM64vecna_UQXTN; break; 1906 case Iop_QNarrowUn64Uto32Ux2: 1907 dszBlg2 = 2; op = ARM64vecna_UQXTN; break; 1908 case Iop_QNarrowUn16Sto8Ux8: 1909 dszBlg2 = 0; op = ARM64vecna_SQXTUN; break; 1910 case Iop_QNarrowUn32Sto16Ux4: 1911 dszBlg2 = 1; op = ARM64vecna_SQXTUN; break; 1912 case Iop_QNarrowUn64Sto32Ux2: 1913 dszBlg2 = 2; op = ARM64vecna_SQXTUN; break; 1914 default: 1915 vassert(0); 1916 } 1917 addInstr(env, ARM64Instr_VNarrowV(op, dszBlg2, tmp, src)); 1918 addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/)); 1919 return dst; 1920 } 1921 case Iop_1Uto64: { 1922 /* 1Uto64(tmp). */ 1923 HReg dst = newVRegI(env); 1924 if (e->Iex.Unop.arg->tag == Iex_RdTmp) { 1925 ARM64RIL* one = mb_mkARM64RIL_I(1); 1926 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp); 1927 vassert(one); 1928 addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND)); 1929 } else { 1930 /* CLONE-01 */ 1931 HReg zero = newVRegI(env); 1932 HReg one = newVRegI(env); 1933 addInstr(env, ARM64Instr_Imm64(zero, 0)); 1934 addInstr(env, ARM64Instr_Imm64(one, 1)); 1935 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); 1936 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); 1937 } 1938 return dst; 1939 } 1940 case Iop_64to32: 1941 case Iop_64to16: 1942 case Iop_64to8: 1943 /* These are no-ops. */ 1944 return iselIntExpr_R(env, e->Iex.Unop.arg); 1945 1946 default: 1947 break; 1948 } 1949 1950 break; 1951 } 1952 1953 /* --------- GET --------- */ 1954 case Iex_Get: { 1955 if (ty == Ity_I64 1956 && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) { 1957 HReg dst = newVRegI(env); 1958 ARM64AMode* am 1959 = mk_baseblock_64bit_access_amode(e->Iex.Get.offset); 1960 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am)); 1961 return dst; 1962 } 1963 if (ty == Ity_I32 1964 && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) { 1965 HReg dst = newVRegI(env); 1966 ARM64AMode* am 1967 = mk_baseblock_32bit_access_amode(e->Iex.Get.offset); 1968 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am)); 1969 return dst; 1970 } 1971 if (ty == Ity_I16 1972 && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) { 1973 HReg dst = newVRegI(env); 1974 ARM64AMode* am 1975 = mk_baseblock_16bit_access_amode(e->Iex.Get.offset); 1976 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am)); 1977 return dst; 1978 } 1979 if (ty == Ity_I8 1980 /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) { 1981 HReg dst = newVRegI(env); 1982 ARM64AMode* am 1983 = mk_baseblock_8bit_access_amode(e->Iex.Get.offset); 1984 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am)); 1985 return dst; 1986 } 1987 break; 1988 } 1989 1990 /* --------- CCALL --------- */ 1991 case Iex_CCall: { 1992 HReg dst = newVRegI(env); 1993 vassert(ty == e->Iex.CCall.retty); 1994 1995 /* be very restrictive for now. Only 64-bit ints allowed for 1996 args, and 64 bits for return type. Don't forget to change 1997 the RetLoc if more types are allowed in future. */ 1998 if (e->Iex.CCall.retty != Ity_I64) 1999 goto irreducible; 2000 2001 /* Marshal args, do the call, clear stack. */ 2002 UInt addToSp = 0; 2003 RetLoc rloc = mk_RetLoc_INVALID(); 2004 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, 2005 e->Iex.CCall.cee, e->Iex.CCall.retty, 2006 e->Iex.CCall.args ); 2007 /* */ 2008 if (ok) { 2009 vassert(is_sane_RetLoc(rloc)); 2010 vassert(rloc.pri == RLPri_Int); 2011 vassert(addToSp == 0); 2012 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0())); 2013 return dst; 2014 } 2015 /* else fall through; will hit the irreducible: label */ 2016 } 2017 2018 /* --------- LITERAL --------- */ 2019 /* 64-bit literals */ 2020 case Iex_Const: { 2021 ULong u = 0; 2022 HReg dst = newVRegI(env); 2023 switch (e->Iex.Const.con->tag) { 2024 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break; 2025 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 2026 case Ico_U16: u = e->Iex.Const.con->Ico.U16; break; 2027 case Ico_U8: u = e->Iex.Const.con->Ico.U8; break; 2028 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)"); 2029 } 2030 addInstr(env, ARM64Instr_Imm64(dst, u)); 2031 return dst; 2032 } 2033 2034 /* --------- MULTIPLEX --------- */ 2035 case Iex_ITE: { 2036 /* ITE(ccexpr, iftrue, iffalse) */ 2037 if (ty == Ity_I64 || ty == Ity_I32) { 2038 ARM64CondCode cc; 2039 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue); 2040 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse); 2041 HReg dst = newVRegI(env); 2042 cc = iselCondCode(env, e->Iex.ITE.cond); 2043 addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc)); 2044 return dst; 2045 } 2046 break; 2047 } 2048 2049 default: 2050 break; 2051 } /* switch (e->tag) */ 2052 2053 /* We get here if no pattern matched. */ 2054 irreducible: 2055 ppIRExpr(e); 2056 vpanic("iselIntExpr_R: cannot reduce tree"); 2057} 2058 2059 2060/*---------------------------------------------------------*/ 2061/*--- ISEL: Integer expressions (128 bit) ---*/ 2062/*---------------------------------------------------------*/ 2063 2064/* Compute a 128-bit value into a register pair, which is returned as 2065 the first two parameters. As with iselIntExpr_R, these may be 2066 either real or virtual regs; in any case they must not be changed 2067 by subsequent code emitted by the caller. */ 2068 2069static void iselInt128Expr ( HReg* rHi, HReg* rLo, 2070 ISelEnv* env, IRExpr* e ) 2071{ 2072 iselInt128Expr_wrk(rHi, rLo, env, e); 2073# if 0 2074 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2075# endif 2076 vassert(hregClass(*rHi) == HRcInt64); 2077 vassert(hregIsVirtual(*rHi)); 2078 vassert(hregClass(*rLo) == HRcInt64); 2079 vassert(hregIsVirtual(*rLo)); 2080} 2081 2082/* DO NOT CALL THIS DIRECTLY ! */ 2083static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, 2084 ISelEnv* env, IRExpr* e ) 2085{ 2086 vassert(e); 2087 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128); 2088 2089 /* --------- BINARY ops --------- */ 2090 if (e->tag == Iex_Binop) { 2091 switch (e->Iex.Binop.op) { 2092 /* 64 x 64 -> 128 multiply */ 2093 case Iop_MullU64: 2094 case Iop_MullS64: { 2095 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64); 2096 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 2097 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 2098 HReg dstLo = newVRegI(env); 2099 HReg dstHi = newVRegI(env); 2100 addInstr(env, ARM64Instr_Mul(dstLo, argL, argR, 2101 ARM64mul_PLAIN)); 2102 addInstr(env, ARM64Instr_Mul(dstHi, argL, argR, 2103 syned ? ARM64mul_SX : ARM64mul_ZX)); 2104 *rHi = dstHi; 2105 *rLo = dstLo; 2106 return; 2107 } 2108 /* 64HLto128(e1,e2) */ 2109 case Iop_64HLto128: 2110 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2111 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2112 return; 2113 default: 2114 break; 2115 } 2116 } /* if (e->tag == Iex_Binop) */ 2117 2118 ppIRExpr(e); 2119 vpanic("iselInt128Expr(arm64)"); 2120} 2121 2122 2123/*---------------------------------------------------------*/ 2124/*--- ISEL: Vector expressions (128 bit) ---*/ 2125/*---------------------------------------------------------*/ 2126 2127static HReg iselV128Expr ( ISelEnv* env, IRExpr* e ) 2128{ 2129 HReg r = iselV128Expr_wrk( env, e ); 2130 vassert(hregClass(r) == HRcVec128); 2131 vassert(hregIsVirtual(r)); 2132 return r; 2133} 2134 2135/* DO NOT CALL THIS DIRECTLY */ 2136static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) 2137{ 2138 IRType ty = typeOfIRExpr(env->type_env, e); 2139 vassert(e); 2140 vassert(ty == Ity_V128); 2141 2142 if (e->tag == Iex_RdTmp) { 2143 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2144 } 2145 2146 if (e->tag == Iex_Const) { 2147 /* Only a very limited range of constants is handled. */ 2148 vassert(e->Iex.Const.con->tag == Ico_V128); 2149 UShort con = e->Iex.Const.con->Ico.V128; 2150 HReg res = newVRegV(env); 2151 switch (con) { 2152 case 0x0000: case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF: 2153 addInstr(env, ARM64Instr_VImmQ(res, con)); 2154 return res; 2155 case 0x00F0: 2156 addInstr(env, ARM64Instr_VImmQ(res, 0x000F)); 2157 addInstr(env, ARM64Instr_VExtV(res, res, res, 12)); 2158 return res; 2159 case 0x0F00: 2160 addInstr(env, ARM64Instr_VImmQ(res, 0x000F)); 2161 addInstr(env, ARM64Instr_VExtV(res, res, res, 8)); 2162 return res; 2163 case 0x0FF0: 2164 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF)); 2165 addInstr(env, ARM64Instr_VExtV(res, res, res, 12)); 2166 return res; 2167 case 0x0FFF: 2168 addInstr(env, ARM64Instr_VImmQ(res, 0x000F)); 2169 addInstr(env, ARM64Instr_VExtV(res, res, res, 4)); 2170 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res)); 2171 return res; 2172 case 0xF000: 2173 addInstr(env, ARM64Instr_VImmQ(res, 0x000F)); 2174 addInstr(env, ARM64Instr_VExtV(res, res, res, 4)); 2175 return res; 2176 case 0xFF00: 2177 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF)); 2178 addInstr(env, ARM64Instr_VExtV(res, res, res, 8)); 2179 return res; 2180 default: 2181 break; 2182 } 2183 /* Unhandled */ 2184 goto v128_expr_bad; 2185 } 2186 2187 if (e->tag == Iex_Load) { 2188 HReg res = newVRegV(env); 2189 HReg rN = iselIntExpr_R(env, e->Iex.Load.addr); 2190 vassert(ty == Ity_V128); 2191 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN)); 2192 return res; 2193 } 2194 2195 if (e->tag == Iex_Get) { 2196 UInt offs = (UInt)e->Iex.Get.offset; 2197 if (offs < (1<<12)) { 2198 HReg addr = mk_baseblock_128bit_access_addr(env, offs); 2199 HReg res = newVRegV(env); 2200 vassert(ty == Ity_V128); 2201 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr)); 2202 return res; 2203 } 2204 goto v128_expr_bad; 2205 } 2206 2207 if (e->tag == Iex_Unop) { 2208 2209 /* Iop_ZeroHIXXofV128 cases */ 2210 UShort imm16 = 0; 2211 switch (e->Iex.Unop.op) { 2212 case Iop_ZeroHI64ofV128: imm16 = 0x00FF; break; 2213 case Iop_ZeroHI96ofV128: imm16 = 0x000F; break; 2214 case Iop_ZeroHI112ofV128: imm16 = 0x0003; break; 2215 case Iop_ZeroHI120ofV128: imm16 = 0x0001; break; 2216 default: break; 2217 } 2218 if (imm16 != 0) { 2219 HReg src = iselV128Expr(env, e->Iex.Unop.arg); 2220 HReg imm = newVRegV(env); 2221 HReg res = newVRegV(env); 2222 addInstr(env, ARM64Instr_VImmQ(imm, imm16)); 2223 addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm)); 2224 return res; 2225 } 2226 2227 /* Other cases */ 2228 switch (e->Iex.Unop.op) { 2229 case Iop_NotV128: 2230 case Iop_Abs64Fx2: case Iop_Abs32Fx4: 2231 case Iop_Neg64Fx2: case Iop_Neg32Fx4: 2232 case Iop_Abs64x2: case Iop_Abs32x4: 2233 case Iop_Abs16x8: case Iop_Abs8x16: 2234 case Iop_Cls32x4: case Iop_Cls16x8: case Iop_Cls8x16: 2235 case Iop_Clz32x4: case Iop_Clz16x8: case Iop_Clz8x16: 2236 case Iop_Cnt8x16: 2237 case Iop_Reverse1sIn8_x16: 2238 case Iop_Reverse8sIn16_x8: 2239 case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4: 2240 case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2: 2241 case Iop_Reverse32sIn64_x2: 2242 case Iop_RecipEst32Ux4: 2243 case Iop_RSqrtEst32Ux4: 2244 case Iop_RecipEst64Fx2: case Iop_RecipEst32Fx4: 2245 case Iop_RSqrtEst64Fx2: case Iop_RSqrtEst32Fx4: 2246 { 2247 HReg res = newVRegV(env); 2248 HReg arg = iselV128Expr(env, e->Iex.Unop.arg); 2249 Bool setRM = False; 2250 ARM64VecUnaryOp op = ARM64vecu_INVALID; 2251 switch (e->Iex.Unop.op) { 2252 case Iop_NotV128: op = ARM64vecu_NOT; break; 2253 case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break; 2254 case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break; 2255 case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break; 2256 case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break; 2257 case Iop_Abs64x2: op = ARM64vecu_ABS64x2; break; 2258 case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break; 2259 case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break; 2260 case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break; 2261 case Iop_Cls32x4: op = ARM64vecu_CLS32x4; break; 2262 case Iop_Cls16x8: op = ARM64vecu_CLS16x8; break; 2263 case Iop_Cls8x16: op = ARM64vecu_CLS8x16; break; 2264 case Iop_Clz32x4: op = ARM64vecu_CLZ32x4; break; 2265 case Iop_Clz16x8: op = ARM64vecu_CLZ16x8; break; 2266 case Iop_Clz8x16: op = ARM64vecu_CLZ8x16; break; 2267 case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break; 2268 case Iop_Reverse1sIn8_x16: op = ARM64vecu_RBIT; break; 2269 case Iop_Reverse8sIn16_x8: op = ARM64vecu_REV1616B; break; 2270 case Iop_Reverse8sIn32_x4: op = ARM64vecu_REV3216B; break; 2271 case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H; break; 2272 case Iop_Reverse8sIn64_x2: op = ARM64vecu_REV6416B; break; 2273 case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H; break; 2274 case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S; break; 2275 case Iop_RecipEst32Ux4: op = ARM64vecu_URECPE32x4; break; 2276 case Iop_RSqrtEst32Ux4: op = ARM64vecu_URSQRTE32x4; break; 2277 case Iop_RecipEst64Fx2: setRM = True; 2278 op = ARM64vecu_FRECPE64x2; break; 2279 case Iop_RecipEst32Fx4: setRM = True; 2280 op = ARM64vecu_FRECPE32x4; break; 2281 case Iop_RSqrtEst64Fx2: setRM = True; 2282 op = ARM64vecu_FRSQRTE64x2; break; 2283 case Iop_RSqrtEst32Fx4: setRM = True; 2284 op = ARM64vecu_FRSQRTE32x4; break; 2285 default: vassert(0); 2286 } 2287 if (setRM) { 2288 // This is a bit of a kludge. We should do rm properly for 2289 // these recip-est insns, but that would require changing the 2290 // primop's type to take an rmode. 2291 set_FPCR_rounding_mode(env, IRExpr_Const( 2292 IRConst_U32(Irrm_NEAREST))); 2293 } 2294 addInstr(env, ARM64Instr_VUnaryV(op, res, arg)); 2295 return res; 2296 } 2297 case Iop_CmpNEZ8x16: 2298 case Iop_CmpNEZ16x8: 2299 case Iop_CmpNEZ32x4: 2300 case Iop_CmpNEZ64x2: { 2301 HReg arg = iselV128Expr(env, e->Iex.Unop.arg); 2302 HReg zero = newVRegV(env); 2303 HReg res = newVRegV(env); 2304 ARM64VecBinOp cmp = ARM64vecb_INVALID; 2305 switch (e->Iex.Unop.op) { 2306 case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break; 2307 case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break; 2308 case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break; 2309 case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break; 2310 default: vassert(0); 2311 } 2312 // This is pretty feeble. Better: use CMP against zero 2313 // and avoid the extra instruction and extra register. 2314 addInstr(env, ARM64Instr_VImmQ(zero, 0x0000)); 2315 addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero)); 2316 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res)); 2317 return res; 2318 } 2319 case Iop_V256toV128_0: 2320 case Iop_V256toV128_1: { 2321 HReg vHi, vLo; 2322 iselV256Expr(&vHi, &vLo, env, e->Iex.Unop.arg); 2323 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo; 2324 } 2325 case Iop_64UtoV128: { 2326 HReg res = newVRegV(env); 2327 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 2328 addInstr(env, ARM64Instr_VQfromX(res, arg)); 2329 return res; 2330 } 2331 case Iop_Widen8Sto16x8: { 2332 HReg res = newVRegV(env); 2333 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 2334 addInstr(env, ARM64Instr_VQfromX(res, arg)); 2335 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP18x16, res, res, res)); 2336 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR16x8, 2337 res, res, 8)); 2338 return res; 2339 } 2340 case Iop_Widen16Sto32x4: { 2341 HReg res = newVRegV(env); 2342 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 2343 addInstr(env, ARM64Instr_VQfromX(res, arg)); 2344 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP116x8, res, res, res)); 2345 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR32x4, 2346 res, res, 16)); 2347 return res; 2348 } 2349 case Iop_Widen32Sto64x2: { 2350 HReg res = newVRegV(env); 2351 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 2352 addInstr(env, ARM64Instr_VQfromX(res, arg)); 2353 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP132x4, res, res, res)); 2354 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR64x2, 2355 res, res, 32)); 2356 return res; 2357 } 2358 /* ... */ 2359 default: 2360 break; 2361 } /* switch on the unop */ 2362 } /* if (e->tag == Iex_Unop) */ 2363 2364 if (e->tag == Iex_Binop) { 2365 switch (e->Iex.Binop.op) { 2366 case Iop_Sqrt32Fx4: 2367 case Iop_Sqrt64Fx2: { 2368 HReg arg = iselV128Expr(env, e->Iex.Binop.arg2); 2369 HReg res = newVRegV(env); 2370 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 2371 ARM64VecUnaryOp op 2372 = e->Iex.Binop.op == Iop_Sqrt32Fx4 2373 ? ARM64vecu_FSQRT32x4 : ARM64vecu_FSQRT64x2; 2374 addInstr(env, ARM64Instr_VUnaryV(op, res, arg)); 2375 return res; 2376 } 2377 case Iop_64HLtoV128: { 2378 HReg res = newVRegV(env); 2379 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 2380 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 2381 addInstr(env, ARM64Instr_VQfromXX(res, argL, argR)); 2382 return res; 2383 } 2384 /* -- Cases where we can generate a simple three-reg instruction. -- */ 2385 case Iop_AndV128: 2386 case Iop_OrV128: 2387 case Iop_XorV128: 2388 case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16: 2389 case Iop_Min32Ux4: case Iop_Min16Ux8: case Iop_Min8Ux16: 2390 case Iop_Max32Sx4: case Iop_Max16Sx8: case Iop_Max8Sx16: 2391 case Iop_Min32Sx4: case Iop_Min16Sx8: case Iop_Min8Sx16: 2392 case Iop_Add64x2: case Iop_Add32x4: 2393 case Iop_Add16x8: case Iop_Add8x16: 2394 case Iop_Sub64x2: case Iop_Sub32x4: 2395 case Iop_Sub16x8: case Iop_Sub8x16: 2396 case Iop_Mul32x4: case Iop_Mul16x8: case Iop_Mul8x16: 2397 case Iop_CmpEQ64x2: case Iop_CmpEQ32x4: 2398 case Iop_CmpEQ16x8: case Iop_CmpEQ8x16: 2399 case Iop_CmpGT64Ux2: case Iop_CmpGT32Ux4: 2400 case Iop_CmpGT16Ux8: case Iop_CmpGT8Ux16: 2401 case Iop_CmpGT64Sx2: case Iop_CmpGT32Sx4: 2402 case Iop_CmpGT16Sx8: case Iop_CmpGT8Sx16: 2403 case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4: 2404 case Iop_CmpLE64Fx2: case Iop_CmpLE32Fx4: 2405 case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4: 2406 case Iop_Perm8x16: 2407 case Iop_InterleaveLO64x2: case Iop_CatEvenLanes32x4: 2408 case Iop_CatEvenLanes16x8: case Iop_CatEvenLanes8x16: 2409 case Iop_InterleaveHI64x2: case Iop_CatOddLanes32x4: 2410 case Iop_CatOddLanes16x8: case Iop_CatOddLanes8x16: 2411 case Iop_InterleaveHI32x4: 2412 case Iop_InterleaveHI16x8: case Iop_InterleaveHI8x16: 2413 case Iop_InterleaveLO32x4: 2414 case Iop_InterleaveLO16x8: case Iop_InterleaveLO8x16: 2415 case Iop_PolynomialMul8x16: 2416 case Iop_QAdd64Sx2: case Iop_QAdd32Sx4: 2417 case Iop_QAdd16Sx8: case Iop_QAdd8Sx16: 2418 case Iop_QAdd64Ux2: case Iop_QAdd32Ux4: 2419 case Iop_QAdd16Ux8: case Iop_QAdd8Ux16: 2420 case Iop_QSub64Sx2: case Iop_QSub32Sx4: 2421 case Iop_QSub16Sx8: case Iop_QSub8Sx16: 2422 case Iop_QSub64Ux2: case Iop_QSub32Ux4: 2423 case Iop_QSub16Ux8: case Iop_QSub8Ux16: 2424 case Iop_QDMulHi32Sx4: case Iop_QDMulHi16Sx8: 2425 case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8: 2426 case Iop_Sh8Sx16: case Iop_Sh16Sx8: 2427 case Iop_Sh32Sx4: case Iop_Sh64Sx2: 2428 case Iop_Sh8Ux16: case Iop_Sh16Ux8: 2429 case Iop_Sh32Ux4: case Iop_Sh64Ux2: 2430 case Iop_Rsh8Sx16: case Iop_Rsh16Sx8: 2431 case Iop_Rsh32Sx4: case Iop_Rsh64Sx2: 2432 case Iop_Rsh8Ux16: case Iop_Rsh16Ux8: 2433 case Iop_Rsh32Ux4: case Iop_Rsh64Ux2: 2434 case Iop_Max64Fx2: case Iop_Max32Fx4: 2435 case Iop_Min64Fx2: case Iop_Min32Fx4: 2436 case Iop_RecipStep64Fx2: case Iop_RecipStep32Fx4: 2437 case Iop_RSqrtStep64Fx2: case Iop_RSqrtStep32Fx4: 2438 { 2439 HReg res = newVRegV(env); 2440 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); 2441 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2); 2442 Bool sw = False; 2443 Bool setRM = False; 2444 ARM64VecBinOp op = ARM64vecb_INVALID; 2445 switch (e->Iex.Binop.op) { 2446 case Iop_AndV128: op = ARM64vecb_AND; break; 2447 case Iop_OrV128: op = ARM64vecb_ORR; break; 2448 case Iop_XorV128: op = ARM64vecb_XOR; break; 2449 case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break; 2450 case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break; 2451 case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break; 2452 case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break; 2453 case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break; 2454 case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break; 2455 case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break; 2456 case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break; 2457 case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break; 2458 case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break; 2459 case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break; 2460 case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break; 2461 case Iop_Add64x2: op = ARM64vecb_ADD64x2; break; 2462 case Iop_Add32x4: op = ARM64vecb_ADD32x4; break; 2463 case Iop_Add16x8: op = ARM64vecb_ADD16x8; break; 2464 case Iop_Add8x16: op = ARM64vecb_ADD8x16; break; 2465 case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break; 2466 case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break; 2467 case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break; 2468 case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break; 2469 case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break; 2470 case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break; 2471 case Iop_Mul8x16: op = ARM64vecb_MUL8x16; break; 2472 case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break; 2473 case Iop_CmpEQ32x4: op = ARM64vecb_CMEQ32x4; break; 2474 case Iop_CmpEQ16x8: op = ARM64vecb_CMEQ16x8; break; 2475 case Iop_CmpEQ8x16: op = ARM64vecb_CMEQ8x16; break; 2476 case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break; 2477 case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break; 2478 case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break; 2479 case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break; 2480 case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break; 2481 case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break; 2482 case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break; 2483 case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break; 2484 case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break; 2485 case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break; 2486 case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break; 2487 case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break; 2488 case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break; 2489 case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break; 2490 case Iop_Perm8x16: op = ARM64vecb_TBL1; break; 2491 case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True; 2492 break; 2493 case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True; 2494 break; 2495 case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True; 2496 break; 2497 case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True; 2498 break; 2499 case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True; 2500 break; 2501 case Iop_CatOddLanes32x4: op = ARM64vecb_UZP232x4; sw = True; 2502 break; 2503 case Iop_CatOddLanes16x8: op = ARM64vecb_UZP216x8; sw = True; 2504 break; 2505 case Iop_CatOddLanes8x16: op = ARM64vecb_UZP28x16; sw = True; 2506 break; 2507 case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True; 2508 break; 2509 case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True; 2510 break; 2511 case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True; 2512 break; 2513 case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True; 2514 break; 2515 case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True; 2516 break; 2517 case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True; 2518 break; 2519 case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break; 2520 case Iop_QAdd64Sx2: op = ARM64vecb_SQADD64x2; break; 2521 case Iop_QAdd32Sx4: op = ARM64vecb_SQADD32x4; break; 2522 case Iop_QAdd16Sx8: op = ARM64vecb_SQADD16x8; break; 2523 case Iop_QAdd8Sx16: op = ARM64vecb_SQADD8x16; break; 2524 case Iop_QAdd64Ux2: op = ARM64vecb_UQADD64x2; break; 2525 case Iop_QAdd32Ux4: op = ARM64vecb_UQADD32x4; break; 2526 case Iop_QAdd16Ux8: op = ARM64vecb_UQADD16x8; break; 2527 case Iop_QAdd8Ux16: op = ARM64vecb_UQADD8x16; break; 2528 case Iop_QSub64Sx2: op = ARM64vecb_SQSUB64x2; break; 2529 case Iop_QSub32Sx4: op = ARM64vecb_SQSUB32x4; break; 2530 case Iop_QSub16Sx8: op = ARM64vecb_SQSUB16x8; break; 2531 case Iop_QSub8Sx16: op = ARM64vecb_SQSUB8x16; break; 2532 case Iop_QSub64Ux2: op = ARM64vecb_UQSUB64x2; break; 2533 case Iop_QSub32Ux4: op = ARM64vecb_UQSUB32x4; break; 2534 case Iop_QSub16Ux8: op = ARM64vecb_UQSUB16x8; break; 2535 case Iop_QSub8Ux16: op = ARM64vecb_UQSUB8x16; break; 2536 case Iop_QDMulHi32Sx4: op = ARM64vecb_SQDMULH32x4; break; 2537 case Iop_QDMulHi16Sx8: op = ARM64vecb_SQDMULH16x8; break; 2538 case Iop_QRDMulHi32Sx4: op = ARM64vecb_SQRDMULH32x4; break; 2539 case Iop_QRDMulHi16Sx8: op = ARM64vecb_SQRDMULH16x8; break; 2540 case Iop_Sh8Sx16: op = ARM64vecb_SSHL8x16; break; 2541 case Iop_Sh16Sx8: op = ARM64vecb_SSHL16x8; break; 2542 case Iop_Sh32Sx4: op = ARM64vecb_SSHL32x4; break; 2543 case Iop_Sh64Sx2: op = ARM64vecb_SSHL64x2; break; 2544 case Iop_Sh8Ux16: op = ARM64vecb_USHL8x16; break; 2545 case Iop_Sh16Ux8: op = ARM64vecb_USHL16x8; break; 2546 case Iop_Sh32Ux4: op = ARM64vecb_USHL32x4; break; 2547 case Iop_Sh64Ux2: op = ARM64vecb_USHL64x2; break; 2548 case Iop_Rsh8Sx16: op = ARM64vecb_SRSHL8x16; break; 2549 case Iop_Rsh16Sx8: op = ARM64vecb_SRSHL16x8; break; 2550 case Iop_Rsh32Sx4: op = ARM64vecb_SRSHL32x4; break; 2551 case Iop_Rsh64Sx2: op = ARM64vecb_SRSHL64x2; break; 2552 case Iop_Rsh8Ux16: op = ARM64vecb_URSHL8x16; break; 2553 case Iop_Rsh16Ux8: op = ARM64vecb_URSHL16x8; break; 2554 case Iop_Rsh32Ux4: op = ARM64vecb_URSHL32x4; break; 2555 case Iop_Rsh64Ux2: op = ARM64vecb_URSHL64x2; break; 2556 case Iop_Max64Fx2: op = ARM64vecb_FMAX64x2; break; 2557 case Iop_Max32Fx4: op = ARM64vecb_FMAX32x4; break; 2558 case Iop_Min64Fx2: op = ARM64vecb_FMIN64x2; break; 2559 case Iop_Min32Fx4: op = ARM64vecb_FMIN32x4; break; 2560 case Iop_RecipStep64Fx2: setRM = True; 2561 op = ARM64vecb_FRECPS64x2; break; 2562 case Iop_RecipStep32Fx4: setRM = True; 2563 op = ARM64vecb_FRECPS32x4; break; 2564 case Iop_RSqrtStep64Fx2: setRM = True; 2565 op = ARM64vecb_FRSQRTS64x2; break; 2566 case Iop_RSqrtStep32Fx4: setRM = True; 2567 op = ARM64vecb_FRSQRTS32x4; break; 2568 default: vassert(0); 2569 } 2570 if (setRM) { 2571 // This is a bit of a kludge. We should do rm properly for 2572 // these recip-step insns, but that would require changing the 2573 // primop's type to take an rmode. 2574 set_FPCR_rounding_mode(env, IRExpr_Const( 2575 IRConst_U32(Irrm_NEAREST))); 2576 } 2577 if (sw) { 2578 addInstr(env, ARM64Instr_VBinV(op, res, argR, argL)); 2579 } else { 2580 addInstr(env, ARM64Instr_VBinV(op, res, argL, argR)); 2581 } 2582 return res; 2583 } 2584 /* -- These only have 2 operand instructions, so we have to first move 2585 the first argument into a new register, for modification. -- */ 2586 case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8: 2587 case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2: 2588 case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8: 2589 case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2: 2590 { 2591 HReg res = newVRegV(env); 2592 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); 2593 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2); 2594 ARM64VecModifyOp op = ARM64vecmo_INVALID; 2595 switch (e->Iex.Binop.op) { 2596 /* In the following 8 cases, the US - SU switching is intended. 2597 See comments on the libvex_ir.h for details. Also in the 2598 ARM64 front end, where used these primops are generated. */ 2599 case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break; 2600 case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break; 2601 case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break; 2602 case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break; 2603 case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break; 2604 case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break; 2605 case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break; 2606 case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break; 2607 default: vassert(0); 2608 } 2609 /* The order of the operands is important. Although this is 2610 basically addition, the two operands are extended differently, 2611 making it important to get them into the correct registers in 2612 the instruction. */ 2613 addInstr(env, ARM64Instr_VMov(16, res, argR)); 2614 addInstr(env, ARM64Instr_VModifyV(op, res, argL)); 2615 return res; 2616 } 2617 /* -- Shifts by an immediate. -- */ 2618 case Iop_ShrN64x2: case Iop_ShrN32x4: 2619 case Iop_ShrN16x8: case Iop_ShrN8x16: 2620 case Iop_SarN64x2: case Iop_SarN32x4: 2621 case Iop_SarN16x8: case Iop_SarN8x16: 2622 case Iop_ShlN64x2: case Iop_ShlN32x4: 2623 case Iop_ShlN16x8: case Iop_ShlN8x16: 2624 case Iop_QShlNsatUU64x2: case Iop_QShlNsatUU32x4: 2625 case Iop_QShlNsatUU16x8: case Iop_QShlNsatUU8x16: 2626 case Iop_QShlNsatSS64x2: case Iop_QShlNsatSS32x4: 2627 case Iop_QShlNsatSS16x8: case Iop_QShlNsatSS8x16: 2628 case Iop_QShlNsatSU64x2: case Iop_QShlNsatSU32x4: 2629 case Iop_QShlNsatSU16x8: case Iop_QShlNsatSU8x16: 2630 { 2631 IRExpr* argL = e->Iex.Binop.arg1; 2632 IRExpr* argR = e->Iex.Binop.arg2; 2633 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) { 2634 UInt amt = argR->Iex.Const.con->Ico.U8; 2635 UInt limLo = 0; 2636 UInt limHi = 0; 2637 ARM64VecShiftImmOp op = ARM64vecshi_INVALID; 2638 /* Establish the instruction to use. */ 2639 switch (e->Iex.Binop.op) { 2640 case Iop_ShrN64x2: op = ARM64vecshi_USHR64x2; break; 2641 case Iop_ShrN32x4: op = ARM64vecshi_USHR32x4; break; 2642 case Iop_ShrN16x8: op = ARM64vecshi_USHR16x8; break; 2643 case Iop_ShrN8x16: op = ARM64vecshi_USHR8x16; break; 2644 case Iop_SarN64x2: op = ARM64vecshi_SSHR64x2; break; 2645 case Iop_SarN32x4: op = ARM64vecshi_SSHR32x4; break; 2646 case Iop_SarN16x8: op = ARM64vecshi_SSHR16x8; break; 2647 case Iop_SarN8x16: op = ARM64vecshi_SSHR8x16; break; 2648 case Iop_ShlN64x2: op = ARM64vecshi_SHL64x2; break; 2649 case Iop_ShlN32x4: op = ARM64vecshi_SHL32x4; break; 2650 case Iop_ShlN16x8: op = ARM64vecshi_SHL16x8; break; 2651 case Iop_ShlN8x16: op = ARM64vecshi_SHL8x16; break; 2652 case Iop_QShlNsatUU64x2: op = ARM64vecshi_UQSHL64x2; break; 2653 case Iop_QShlNsatUU32x4: op = ARM64vecshi_UQSHL32x4; break; 2654 case Iop_QShlNsatUU16x8: op = ARM64vecshi_UQSHL16x8; break; 2655 case Iop_QShlNsatUU8x16: op = ARM64vecshi_UQSHL8x16; break; 2656 case Iop_QShlNsatSS64x2: op = ARM64vecshi_SQSHL64x2; break; 2657 case Iop_QShlNsatSS32x4: op = ARM64vecshi_SQSHL32x4; break; 2658 case Iop_QShlNsatSS16x8: op = ARM64vecshi_SQSHL16x8; break; 2659 case Iop_QShlNsatSS8x16: op = ARM64vecshi_SQSHL8x16; break; 2660 case Iop_QShlNsatSU64x2: op = ARM64vecshi_SQSHLU64x2; break; 2661 case Iop_QShlNsatSU32x4: op = ARM64vecshi_SQSHLU32x4; break; 2662 case Iop_QShlNsatSU16x8: op = ARM64vecshi_SQSHLU16x8; break; 2663 case Iop_QShlNsatSU8x16: op = ARM64vecshi_SQSHLU8x16; break; 2664 default: vassert(0); 2665 } 2666 /* Establish the shift limits, for sanity check purposes only. */ 2667 switch (e->Iex.Binop.op) { 2668 case Iop_ShrN64x2: limLo = 1; limHi = 64; break; 2669 case Iop_ShrN32x4: limLo = 1; limHi = 32; break; 2670 case Iop_ShrN16x8: limLo = 1; limHi = 16; break; 2671 case Iop_ShrN8x16: limLo = 1; limHi = 8; break; 2672 case Iop_SarN64x2: limLo = 1; limHi = 64; break; 2673 case Iop_SarN32x4: limLo = 1; limHi = 32; break; 2674 case Iop_SarN16x8: limLo = 1; limHi = 16; break; 2675 case Iop_SarN8x16: limLo = 1; limHi = 8; break; 2676 case Iop_ShlN64x2: limLo = 0; limHi = 63; break; 2677 case Iop_ShlN32x4: limLo = 0; limHi = 31; break; 2678 case Iop_ShlN16x8: limLo = 0; limHi = 15; break; 2679 case Iop_ShlN8x16: limLo = 0; limHi = 7; break; 2680 case Iop_QShlNsatUU64x2: limLo = 0; limHi = 63; break; 2681 case Iop_QShlNsatUU32x4: limLo = 0; limHi = 31; break; 2682 case Iop_QShlNsatUU16x8: limLo = 0; limHi = 15; break; 2683 case Iop_QShlNsatUU8x16: limLo = 0; limHi = 7; break; 2684 case Iop_QShlNsatSS64x2: limLo = 0; limHi = 63; break; 2685 case Iop_QShlNsatSS32x4: limLo = 0; limHi = 31; break; 2686 case Iop_QShlNsatSS16x8: limLo = 0; limHi = 15; break; 2687 case Iop_QShlNsatSS8x16: limLo = 0; limHi = 7; break; 2688 case Iop_QShlNsatSU64x2: limLo = 0; limHi = 63; break; 2689 case Iop_QShlNsatSU32x4: limLo = 0; limHi = 31; break; 2690 case Iop_QShlNsatSU16x8: limLo = 0; limHi = 15; break; 2691 case Iop_QShlNsatSU8x16: limLo = 0; limHi = 7; break; 2692 default: vassert(0); 2693 } 2694 /* For left shifts, the allowable amt values are 2695 0 .. lane_bits-1. For right shifts the allowable 2696 values are 1 .. lane_bits. */ 2697 if (op != ARM64vecshi_INVALID && amt >= limLo && amt <= limHi) { 2698 HReg src = iselV128Expr(env, argL); 2699 HReg dst = newVRegV(env); 2700 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt)); 2701 return dst; 2702 } 2703 /* Special case some no-op shifts that the arm64 front end 2704 throws at us. We can't generate any instructions for these, 2705 but we don't need to either. */ 2706 switch (e->Iex.Binop.op) { 2707 case Iop_ShrN64x2: case Iop_ShrN32x4: 2708 case Iop_ShrN16x8: case Iop_ShrN8x16: 2709 if (amt == 0) { 2710 return iselV128Expr(env, argL); 2711 } 2712 break; 2713 default: 2714 break; 2715 } 2716 /* otherwise unhandled */ 2717 } 2718 /* else fall out; this is unhandled */ 2719 break; 2720 } 2721 /* -- Saturating narrowing by an immediate -- */ 2722 /* uu */ 2723 case Iop_QandQShrNnarrow16Uto8Ux8: 2724 case Iop_QandQShrNnarrow32Uto16Ux4: 2725 case Iop_QandQShrNnarrow64Uto32Ux2: 2726 /* ss */ 2727 case Iop_QandQSarNnarrow16Sto8Sx8: 2728 case Iop_QandQSarNnarrow32Sto16Sx4: 2729 case Iop_QandQSarNnarrow64Sto32Sx2: 2730 /* su */ 2731 case Iop_QandQSarNnarrow16Sto8Ux8: 2732 case Iop_QandQSarNnarrow32Sto16Ux4: 2733 case Iop_QandQSarNnarrow64Sto32Ux2: 2734 /* ruu */ 2735 case Iop_QandQRShrNnarrow16Uto8Ux8: 2736 case Iop_QandQRShrNnarrow32Uto16Ux4: 2737 case Iop_QandQRShrNnarrow64Uto32Ux2: 2738 /* rss */ 2739 case Iop_QandQRSarNnarrow16Sto8Sx8: 2740 case Iop_QandQRSarNnarrow32Sto16Sx4: 2741 case Iop_QandQRSarNnarrow64Sto32Sx2: 2742 /* rsu */ 2743 case Iop_QandQRSarNnarrow16Sto8Ux8: 2744 case Iop_QandQRSarNnarrow32Sto16Ux4: 2745 case Iop_QandQRSarNnarrow64Sto32Ux2: 2746 { 2747 IRExpr* argL = e->Iex.Binop.arg1; 2748 IRExpr* argR = e->Iex.Binop.arg2; 2749 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) { 2750 UInt amt = argR->Iex.Const.con->Ico.U8; 2751 UInt limit = 0; 2752 ARM64VecShiftImmOp op = ARM64vecshi_INVALID; 2753 switch (e->Iex.Binop.op) { 2754 /* uu */ 2755 case Iop_QandQShrNnarrow64Uto32Ux2: 2756 op = ARM64vecshi_UQSHRN2SD; limit = 64; break; 2757 case Iop_QandQShrNnarrow32Uto16Ux4: 2758 op = ARM64vecshi_UQSHRN4HS; limit = 32; break; 2759 case Iop_QandQShrNnarrow16Uto8Ux8: 2760 op = ARM64vecshi_UQSHRN8BH; limit = 16; break; 2761 /* ss */ 2762 case Iop_QandQSarNnarrow64Sto32Sx2: 2763 op = ARM64vecshi_SQSHRN2SD; limit = 64; break; 2764 case Iop_QandQSarNnarrow32Sto16Sx4: 2765 op = ARM64vecshi_SQSHRN4HS; limit = 32; break; 2766 case Iop_QandQSarNnarrow16Sto8Sx8: 2767 op = ARM64vecshi_SQSHRN8BH; limit = 16; break; 2768 /* su */ 2769 case Iop_QandQSarNnarrow64Sto32Ux2: 2770 op = ARM64vecshi_SQSHRUN2SD; limit = 64; break; 2771 case Iop_QandQSarNnarrow32Sto16Ux4: 2772 op = ARM64vecshi_SQSHRUN4HS; limit = 32; break; 2773 case Iop_QandQSarNnarrow16Sto8Ux8: 2774 op = ARM64vecshi_SQSHRUN8BH; limit = 16; break; 2775 /* ruu */ 2776 case Iop_QandQRShrNnarrow64Uto32Ux2: 2777 op = ARM64vecshi_UQRSHRN2SD; limit = 64; break; 2778 case Iop_QandQRShrNnarrow32Uto16Ux4: 2779 op = ARM64vecshi_UQRSHRN4HS; limit = 32; break; 2780 case Iop_QandQRShrNnarrow16Uto8Ux8: 2781 op = ARM64vecshi_UQRSHRN8BH; limit = 16; break; 2782 /* rss */ 2783 case Iop_QandQRSarNnarrow64Sto32Sx2: 2784 op = ARM64vecshi_SQRSHRN2SD; limit = 64; break; 2785 case Iop_QandQRSarNnarrow32Sto16Sx4: 2786 op = ARM64vecshi_SQRSHRN4HS; limit = 32; break; 2787 case Iop_QandQRSarNnarrow16Sto8Sx8: 2788 op = ARM64vecshi_SQRSHRN8BH; limit = 16; break; 2789 /* rsu */ 2790 case Iop_QandQRSarNnarrow64Sto32Ux2: 2791 op = ARM64vecshi_SQRSHRUN2SD; limit = 64; break; 2792 case Iop_QandQRSarNnarrow32Sto16Ux4: 2793 op = ARM64vecshi_SQRSHRUN4HS; limit = 32; break; 2794 case Iop_QandQRSarNnarrow16Sto8Ux8: 2795 op = ARM64vecshi_SQRSHRUN8BH; limit = 16; break; 2796 /**/ 2797 default: 2798 vassert(0); 2799 } 2800 if (op != ARM64vecshi_INVALID && amt >= 1 && amt <= limit) { 2801 HReg src = iselV128Expr(env, argL); 2802 HReg dst = newVRegV(env); 2803 HReg fpsr = newVRegI(env); 2804 /* Clear FPSR.Q, do the operation, and return both its 2805 result and the new value of FPSR.Q. We can simply 2806 zero out FPSR since all the other bits have no relevance 2807 in VEX generated code. */ 2808 addInstr(env, ARM64Instr_Imm64(fpsr, 0)); 2809 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr)); 2810 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt)); 2811 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr)); 2812 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27), 2813 ARM64sh_SHR)); 2814 ARM64RIL* ril_one = mb_mkARM64RIL_I(1); 2815 vassert(ril_one); 2816 addInstr(env, ARM64Instr_Logic(fpsr, 2817 fpsr, ril_one, ARM64lo_AND)); 2818 /* Now we have: the main (shift) result in the bottom half 2819 of |dst|, and the Q bit at the bottom of |fpsr|. 2820 Combining them with a "InterleaveLO64x2" style operation 2821 produces a 128 bit value, dst[63:0]:fpsr[63:0], 2822 which is what we want. */ 2823 HReg scratch = newVRegV(env); 2824 addInstr(env, ARM64Instr_VQfromX(scratch, fpsr)); 2825 addInstr(env, ARM64Instr_VBinV(ARM64vecb_UZP164x2, 2826 dst, dst, scratch)); 2827 return dst; 2828 } 2829 } 2830 /* else fall out; this is unhandled */ 2831 break; 2832 } 2833 2834 // Use Iop_SliceV128 in preference to Iop_ShlV128 and Iop_ShrV128, 2835 // as it is in some ways more general and often leads to better 2836 // code overall. 2837 case Iop_ShlV128: 2838 case Iop_ShrV128: { 2839 Bool isSHR = e->Iex.Binop.op == Iop_ShrV128; 2840 /* This is tricky. Generate an EXT instruction with zeroes in 2841 the high operand (shift right) or low operand (shift left). 2842 Note that we can only slice in the EXT instruction at a byte 2843 level of granularity, so the shift amount needs careful 2844 checking. */ 2845 IRExpr* argL = e->Iex.Binop.arg1; 2846 IRExpr* argR = e->Iex.Binop.arg2; 2847 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) { 2848 UInt amt = argR->Iex.Const.con->Ico.U8; 2849 Bool amtOK = False; 2850 switch (amt) { 2851 case 0x08: case 0x10: case 0x18: case 0x20: case 0x28: 2852 case 0x30: case 0x38: case 0x40: case 0x48: case 0x50: 2853 case 0x58: case 0x60: case 0x68: case 0x70: case 0x78: 2854 amtOK = True; break; 2855 } 2856 /* We could also deal with amt==0 by copying the source to 2857 the destination, but there's no need for that so far. */ 2858 if (amtOK) { 2859 HReg src = iselV128Expr(env, argL); 2860 HReg srcZ = newVRegV(env); 2861 addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000)); 2862 UInt immB = amt / 8; 2863 vassert(immB >= 1 && immB <= 15); 2864 HReg dst = newVRegV(env); 2865 if (isSHR) { 2866 addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/, 2867 immB)); 2868 } else { 2869 addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/, 2870 16 - immB)); 2871 } 2872 return dst; 2873 } 2874 } 2875 /* else fall out; this is unhandled */ 2876 break; 2877 } 2878 2879 case Iop_PolynomialMull8x8: 2880 case Iop_Mull32Ux2: 2881 case Iop_Mull16Ux4: 2882 case Iop_Mull8Ux8: 2883 case Iop_Mull32Sx2: 2884 case Iop_Mull16Sx4: 2885 case Iop_Mull8Sx8: 2886 case Iop_QDMull32Sx2: 2887 case Iop_QDMull16Sx4: 2888 { 2889 HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1); 2890 HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2); 2891 HReg vSrcL = newVRegV(env); 2892 HReg vSrcR = newVRegV(env); 2893 HReg dst = newVRegV(env); 2894 ARM64VecBinOp op = ARM64vecb_INVALID; 2895 switch (e->Iex.Binop.op) { 2896 case Iop_PolynomialMull8x8: op = ARM64vecb_PMULL8x8; break; 2897 case Iop_Mull32Ux2: op = ARM64vecb_UMULL2DSS; break; 2898 case Iop_Mull16Ux4: op = ARM64vecb_UMULL4SHH; break; 2899 case Iop_Mull8Ux8: op = ARM64vecb_UMULL8HBB; break; 2900 case Iop_Mull32Sx2: op = ARM64vecb_SMULL2DSS; break; 2901 case Iop_Mull16Sx4: op = ARM64vecb_SMULL4SHH; break; 2902 case Iop_Mull8Sx8: op = ARM64vecb_SMULL8HBB; break; 2903 case Iop_QDMull32Sx2: op = ARM64vecb_SQDMULL2DSS; break; 2904 case Iop_QDMull16Sx4: op = ARM64vecb_SQDMULL4SHH; break; 2905 default: vassert(0); 2906 } 2907 addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL)); 2908 addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR)); 2909 addInstr(env, ARM64Instr_VBinV(op, dst, vSrcL, vSrcR)); 2910 return dst; 2911 } 2912 2913 /* ... */ 2914 default: 2915 break; 2916 } /* switch on the binop */ 2917 } /* if (e->tag == Iex_Binop) */ 2918 2919 if (e->tag == Iex_Triop) { 2920 IRTriop* triop = e->Iex.Triop.details; 2921 ARM64VecBinOp vecbop = ARM64vecb_INVALID; 2922 switch (triop->op) { 2923 case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break; 2924 case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break; 2925 case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break; 2926 case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break; 2927 case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break; 2928 case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break; 2929 case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break; 2930 case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break; 2931 default: break; 2932 } 2933 if (vecbop != ARM64vecb_INVALID) { 2934 HReg argL = iselV128Expr(env, triop->arg2); 2935 HReg argR = iselV128Expr(env, triop->arg3); 2936 HReg dst = newVRegV(env); 2937 set_FPCR_rounding_mode(env, triop->arg1); 2938 addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR)); 2939 return dst; 2940 } 2941 2942 if (triop->op == Iop_SliceV128) { 2943 /* Note that, compared to ShlV128/ShrV128 just above, the shift 2944 amount here is in bytes, not bits. */ 2945 IRExpr* argHi = triop->arg1; 2946 IRExpr* argLo = triop->arg2; 2947 IRExpr* argAmt = triop->arg3; 2948 if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) { 2949 UInt amt = argAmt->Iex.Const.con->Ico.U8; 2950 Bool amtOK = amt >= 1 && amt <= 15; 2951 /* We could also deal with amt==0 by copying argLO to 2952 the destination, but there's no need for that so far. */ 2953 if (amtOK) { 2954 HReg srcHi = iselV128Expr(env, argHi); 2955 HReg srcLo = iselV128Expr(env, argLo); 2956 HReg dst = newVRegV(env); 2957 addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt)); 2958 return dst; 2959 } 2960 } 2961 /* else fall out; this is unhandled */ 2962 } 2963 2964 } /* if (e->tag == Iex_Triop) */ 2965 2966 v128_expr_bad: 2967 ppIRExpr(e); 2968 vpanic("iselV128Expr_wrk"); 2969} 2970 2971 2972/*---------------------------------------------------------*/ 2973/*--- ISEL: Floating point expressions (64 bit) ---*/ 2974/*---------------------------------------------------------*/ 2975 2976/* Compute a 64-bit floating point value into a register, the identity 2977 of which is returned. As with iselIntExpr_R, the reg may be either 2978 real or virtual; in any case it must not be changed by subsequent 2979 code emitted by the caller. */ 2980 2981static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ) 2982{ 2983 HReg r = iselDblExpr_wrk( env, e ); 2984# if 0 2985 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2986# endif 2987 vassert(hregClass(r) == HRcFlt64); 2988 vassert(hregIsVirtual(r)); 2989 return r; 2990} 2991 2992/* DO NOT CALL THIS DIRECTLY */ 2993static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) 2994{ 2995 IRType ty = typeOfIRExpr(env->type_env,e); 2996 vassert(e); 2997 vassert(ty == Ity_F64); 2998 2999 if (e->tag == Iex_RdTmp) { 3000 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3001 } 3002 3003 if (e->tag == Iex_Const) { 3004 IRConst* con = e->Iex.Const.con; 3005 if (con->tag == Ico_F64i) { 3006 HReg src = newVRegI(env); 3007 HReg dst = newVRegD(env); 3008 addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i)); 3009 addInstr(env, ARM64Instr_VDfromX(dst, src)); 3010 return dst; 3011 } 3012 if (con->tag == Ico_F64) { 3013 HReg src = newVRegI(env); 3014 HReg dst = newVRegD(env); 3015 union { Double d64; ULong u64; } u; 3016 vassert(sizeof(u) == 8); 3017 u.d64 = con->Ico.F64; 3018 addInstr(env, ARM64Instr_Imm64(src, u.u64)); 3019 addInstr(env, ARM64Instr_VDfromX(dst, src)); 3020 return dst; 3021 } 3022 } 3023 3024 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 3025 vassert(e->Iex.Load.ty == Ity_F64); 3026 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr); 3027 HReg res = newVRegD(env); 3028 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0)); 3029 return res; 3030 } 3031 3032 if (e->tag == Iex_Get) { 3033 Int offs = e->Iex.Get.offset; 3034 if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) { 3035 HReg rD = newVRegD(env); 3036 HReg rN = get_baseblock_register(); 3037 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs)); 3038 return rD; 3039 } 3040 } 3041 3042 if (e->tag == Iex_Unop) { 3043 switch (e->Iex.Unop.op) { 3044 case Iop_NegF64: { 3045 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 3046 HReg dst = newVRegD(env); 3047 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src)); 3048 return dst; 3049 } 3050 case Iop_AbsF64: { 3051 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 3052 HReg dst = newVRegD(env); 3053 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src)); 3054 return dst; 3055 } 3056 case Iop_F32toF64: { 3057 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 3058 HReg dst = newVRegD(env); 3059 addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src)); 3060 return dst; 3061 } 3062 case Iop_F16toF64: { 3063 HReg src = iselF16Expr(env, e->Iex.Unop.arg); 3064 HReg dst = newVRegD(env); 3065 addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src)); 3066 return dst; 3067 } 3068 case Iop_I32UtoF64: 3069 case Iop_I32StoF64: { 3070 /* Rounding mode is not involved here, since the 3071 conversion can always be done without loss of 3072 precision. */ 3073 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 3074 HReg dst = newVRegD(env); 3075 Bool syned = e->Iex.Unop.op == Iop_I32StoF64; 3076 ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U; 3077 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src)); 3078 return dst; 3079 } 3080 default: 3081 break; 3082 } 3083 } 3084 3085 if (e->tag == Iex_Binop) { 3086 switch (e->Iex.Binop.op) { 3087 case Iop_RoundF64toInt: 3088 case Iop_SqrtF64: 3089 case Iop_RecpExpF64: { 3090 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 3091 HReg dst = newVRegD(env); 3092 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 3093 ARM64FpUnaryOp op = ARM64fpu_INVALID; 3094 switch (e->Iex.Binop.op) { 3095 case Iop_RoundF64toInt: op = ARM64fpu_RINT; break; 3096 case Iop_SqrtF64: op = ARM64fpu_SQRT; break; 3097 case Iop_RecpExpF64: op = ARM64fpu_RECPX; break; 3098 default: vassert(0); 3099 } 3100 addInstr(env, ARM64Instr_VUnaryD(op, dst, src)); 3101 return dst; 3102 } 3103 case Iop_I64StoF64: 3104 case Iop_I64UtoF64: { 3105 ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64 3106 ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U; 3107 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3108 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 3109 HReg dstS = newVRegD(env); 3110 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI)); 3111 return dstS; 3112 } 3113 default: 3114 break; 3115 } 3116 } 3117 3118 if (e->tag == Iex_Triop) { 3119 IRTriop* triop = e->Iex.Triop.details; 3120 ARM64FpBinOp dblop = ARM64fpb_INVALID; 3121 switch (triop->op) { 3122 case Iop_DivF64: dblop = ARM64fpb_DIV; break; 3123 case Iop_MulF64: dblop = ARM64fpb_MUL; break; 3124 case Iop_SubF64: dblop = ARM64fpb_SUB; break; 3125 case Iop_AddF64: dblop = ARM64fpb_ADD; break; 3126 default: break; 3127 } 3128 if (dblop != ARM64fpb_INVALID) { 3129 HReg argL = iselDblExpr(env, triop->arg2); 3130 HReg argR = iselDblExpr(env, triop->arg3); 3131 HReg dst = newVRegD(env); 3132 set_FPCR_rounding_mode(env, triop->arg1); 3133 addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR)); 3134 return dst; 3135 } 3136 } 3137 3138 if (e->tag == Iex_ITE) { 3139 /* ITE(ccexpr, iftrue, iffalse) */ 3140 ARM64CondCode cc; 3141 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue); 3142 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse); 3143 HReg dst = newVRegD(env); 3144 cc = iselCondCode(env, e->Iex.ITE.cond); 3145 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, True/*64-bit*/)); 3146 return dst; 3147 } 3148 3149 ppIRExpr(e); 3150 vpanic("iselDblExpr_wrk"); 3151} 3152 3153 3154/*---------------------------------------------------------*/ 3155/*--- ISEL: Floating point expressions (32 bit) ---*/ 3156/*---------------------------------------------------------*/ 3157 3158/* Compute a 32-bit floating point value into a register, the identity 3159 of which is returned. As with iselIntExpr_R, the reg may be either 3160 real or virtual; in any case it must not be changed by subsequent 3161 code emitted by the caller. Values are generated into HRcFlt64 3162 registers despite the values themselves being Ity_F32s. */ 3163 3164static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) 3165{ 3166 HReg r = iselFltExpr_wrk( env, e ); 3167# if 0 3168 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3169# endif 3170 vassert(hregClass(r) == HRcFlt64); 3171 vassert(hregIsVirtual(r)); 3172 return r; 3173} 3174 3175/* DO NOT CALL THIS DIRECTLY */ 3176static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) 3177{ 3178 IRType ty = typeOfIRExpr(env->type_env,e); 3179 vassert(e); 3180 vassert(ty == Ity_F32); 3181 3182 if (e->tag == Iex_RdTmp) { 3183 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3184 } 3185 3186 if (e->tag == Iex_Const) { 3187 /* This is something of a kludge. Since a 32 bit floating point 3188 zero is just .. all zeroes, just create a 64 bit zero word 3189 and transfer it. This avoids having to create a SfromW 3190 instruction for this specific case. */ 3191 IRConst* con = e->Iex.Const.con; 3192 if (con->tag == Ico_F32i && con->Ico.F32i == 0) { 3193 HReg src = newVRegI(env); 3194 HReg dst = newVRegD(env); 3195 addInstr(env, ARM64Instr_Imm64(src, 0)); 3196 addInstr(env, ARM64Instr_VDfromX(dst, src)); 3197 return dst; 3198 } 3199 if (con->tag == Ico_F32) { 3200 HReg src = newVRegI(env); 3201 HReg dst = newVRegD(env); 3202 union { Float f32; UInt u32; } u; 3203 vassert(sizeof(u) == 4); 3204 u.f32 = con->Ico.F32; 3205 addInstr(env, ARM64Instr_Imm64(src, (ULong)u.u32)); 3206 addInstr(env, ARM64Instr_VDfromX(dst, src)); 3207 return dst; 3208 } 3209 } 3210 3211 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 3212 vassert(e->Iex.Load.ty == Ity_F32); 3213 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr); 3214 HReg res = newVRegD(env); 3215 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, res, addr, 0)); 3216 return res; 3217 } 3218 3219 if (e->tag == Iex_Get) { 3220 Int offs = e->Iex.Get.offset; 3221 if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) { 3222 HReg rD = newVRegD(env); 3223 HReg rN = get_baseblock_register(); 3224 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs)); 3225 return rD; 3226 } 3227 } 3228 3229 if (e->tag == Iex_Unop) { 3230 switch (e->Iex.Unop.op) { 3231 case Iop_NegF32: { 3232 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 3233 HReg dst = newVRegD(env); 3234 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src)); 3235 return dst; 3236 } 3237 case Iop_AbsF32: { 3238 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 3239 HReg dst = newVRegD(env); 3240 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src)); 3241 return dst; 3242 } 3243 case Iop_F16toF32: { 3244 HReg src = iselF16Expr(env, e->Iex.Unop.arg); 3245 HReg dst = newVRegD(env); 3246 addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src)); 3247 return dst; 3248 } 3249 default: 3250 break; 3251 } 3252 } 3253 3254 if (e->tag == Iex_Binop) { 3255 switch (e->Iex.Binop.op) { 3256 case Iop_RoundF32toInt: 3257 case Iop_SqrtF32: 3258 case Iop_RecpExpF32: { 3259 HReg src = iselFltExpr(env, e->Iex.Binop.arg2); 3260 HReg dst = newVRegD(env); 3261 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 3262 ARM64FpUnaryOp op = ARM64fpu_INVALID; 3263 switch (e->Iex.Binop.op) { 3264 case Iop_RoundF32toInt: op = ARM64fpu_RINT; break; 3265 case Iop_SqrtF32: op = ARM64fpu_SQRT; break; 3266 case Iop_RecpExpF32: op = ARM64fpu_RECPX; break; 3267 default: vassert(0); 3268 } 3269 addInstr(env, ARM64Instr_VUnaryS(op, dst, src)); 3270 return dst; 3271 } 3272 case Iop_F64toF32: { 3273 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2); 3274 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 3275 HReg dstS = newVRegD(env); 3276 addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD)); 3277 return dstS; 3278 } 3279 case Iop_I32UtoF32: 3280 case Iop_I32StoF32: 3281 case Iop_I64UtoF32: 3282 case Iop_I64StoF32: { 3283 ARM64CvtOp cvt_op = ARM64cvt_INVALID; 3284 switch (e->Iex.Binop.op) { 3285 case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break; 3286 case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break; 3287 case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break; 3288 case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break; 3289 default: vassert(0); 3290 } 3291 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3292 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 3293 HReg dstS = newVRegD(env); 3294 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI)); 3295 return dstS; 3296 } 3297 default: 3298 break; 3299 } 3300 } 3301 3302 if (e->tag == Iex_Triop) { 3303 IRTriop* triop = e->Iex.Triop.details; 3304 ARM64FpBinOp sglop = ARM64fpb_INVALID; 3305 switch (triop->op) { 3306 case Iop_DivF32: sglop = ARM64fpb_DIV; break; 3307 case Iop_MulF32: sglop = ARM64fpb_MUL; break; 3308 case Iop_SubF32: sglop = ARM64fpb_SUB; break; 3309 case Iop_AddF32: sglop = ARM64fpb_ADD; break; 3310 default: break; 3311 } 3312 if (sglop != ARM64fpb_INVALID) { 3313 HReg argL = iselFltExpr(env, triop->arg2); 3314 HReg argR = iselFltExpr(env, triop->arg3); 3315 HReg dst = newVRegD(env); 3316 set_FPCR_rounding_mode(env, triop->arg1); 3317 addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR)); 3318 return dst; 3319 } 3320 } 3321 3322 if (e->tag == Iex_ITE) { 3323 /* ITE(ccexpr, iftrue, iffalse) */ 3324 ARM64CondCode cc; 3325 HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue); 3326 HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse); 3327 HReg dst = newVRegD(env); 3328 cc = iselCondCode(env, e->Iex.ITE.cond); 3329 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, False/*!64-bit*/)); 3330 return dst; 3331 } 3332 3333 ppIRExpr(e); 3334 vpanic("iselFltExpr_wrk"); 3335} 3336 3337 3338/*---------------------------------------------------------*/ 3339/*--- ISEL: Floating point expressions (16 bit) ---*/ 3340/*---------------------------------------------------------*/ 3341 3342/* Compute a 16-bit floating point value into a register, the identity 3343 of which is returned. As with iselIntExpr_R, the reg may be either 3344 real or virtual; in any case it must not be changed by subsequent 3345 code emitted by the caller. Values are generated into HRcFlt64 3346 registers despite the values themselves being Ity_F16s. */ 3347 3348static HReg iselF16Expr ( ISelEnv* env, IRExpr* e ) 3349{ 3350 HReg r = iselF16Expr_wrk( env, e ); 3351# if 0 3352 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3353# endif 3354 vassert(hregClass(r) == HRcFlt64); 3355 vassert(hregIsVirtual(r)); 3356 return r; 3357} 3358 3359/* DO NOT CALL THIS DIRECTLY */ 3360static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e ) 3361{ 3362 IRType ty = typeOfIRExpr(env->type_env,e); 3363 vassert(e); 3364 vassert(ty == Ity_F16); 3365 3366 if (e->tag == Iex_Get) { 3367 Int offs = e->Iex.Get.offset; 3368 if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) { 3369 HReg rD = newVRegD(env); 3370 HReg rN = get_baseblock_register(); 3371 addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs)); 3372 return rD; 3373 } 3374 } 3375 3376 if (e->tag == Iex_Binop) { 3377 switch (e->Iex.Binop.op) { 3378 case Iop_F32toF16: { 3379 HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2); 3380 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 3381 HReg dstH = newVRegD(env); 3382 addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS)); 3383 return dstH; 3384 } 3385 case Iop_F64toF16: { 3386 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2); 3387 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 3388 HReg dstH = newVRegD(env); 3389 addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD)); 3390 return dstH; 3391 } 3392 default: 3393 break; 3394 } 3395 } 3396 3397 ppIRExpr(e); 3398 vpanic("iselF16Expr_wrk"); 3399} 3400 3401 3402/*---------------------------------------------------------*/ 3403/*--- ISEL: Vector expressions (256 bit) ---*/ 3404/*---------------------------------------------------------*/ 3405 3406static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo, 3407 ISelEnv* env, IRExpr* e ) 3408{ 3409 iselV256Expr_wrk( rHi, rLo, env, e ); 3410 vassert(hregClass(*rHi) == HRcVec128); 3411 vassert(hregClass(*rLo) == HRcVec128); 3412 vassert(hregIsVirtual(*rHi)); 3413 vassert(hregIsVirtual(*rLo)); 3414} 3415 3416/* DO NOT CALL THIS DIRECTLY */ 3417static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo, 3418 ISelEnv* env, IRExpr* e ) 3419{ 3420 vassert(e); 3421 IRType ty = typeOfIRExpr(env->type_env,e); 3422 vassert(ty == Ity_V256); 3423 3424 /* read 256-bit IRTemp */ 3425 if (e->tag == Iex_RdTmp) { 3426 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp); 3427 return; 3428 } 3429 3430 if (e->tag == Iex_Binop) { 3431 switch (e->Iex.Binop.op) { 3432 case Iop_V128HLtoV256: { 3433 *rHi = iselV128Expr(env, e->Iex.Binop.arg1); 3434 *rLo = iselV128Expr(env, e->Iex.Binop.arg2); 3435 return; 3436 } 3437 case Iop_QandSQsh64x2: 3438 case Iop_QandSQsh32x4: 3439 case Iop_QandSQsh16x8: 3440 case Iop_QandSQsh8x16: 3441 case Iop_QandUQsh64x2: 3442 case Iop_QandUQsh32x4: 3443 case Iop_QandUQsh16x8: 3444 case Iop_QandUQsh8x16: 3445 case Iop_QandSQRsh64x2: 3446 case Iop_QandSQRsh32x4: 3447 case Iop_QandSQRsh16x8: 3448 case Iop_QandSQRsh8x16: 3449 case Iop_QandUQRsh64x2: 3450 case Iop_QandUQRsh32x4: 3451 case Iop_QandUQRsh16x8: 3452 case Iop_QandUQRsh8x16: 3453 { 3454 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); 3455 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2); 3456 HReg fpsr = newVRegI(env); 3457 HReg resHi = newVRegV(env); 3458 HReg resLo = newVRegV(env); 3459 ARM64VecBinOp op = ARM64vecb_INVALID; 3460 switch (e->Iex.Binop.op) { 3461 case Iop_QandSQsh64x2: op = ARM64vecb_SQSHL64x2; break; 3462 case Iop_QandSQsh32x4: op = ARM64vecb_SQSHL32x4; break; 3463 case Iop_QandSQsh16x8: op = ARM64vecb_SQSHL16x8; break; 3464 case Iop_QandSQsh8x16: op = ARM64vecb_SQSHL8x16; break; 3465 case Iop_QandUQsh64x2: op = ARM64vecb_UQSHL64x2; break; 3466 case Iop_QandUQsh32x4: op = ARM64vecb_UQSHL32x4; break; 3467 case Iop_QandUQsh16x8: op = ARM64vecb_UQSHL16x8; break; 3468 case Iop_QandUQsh8x16: op = ARM64vecb_UQSHL8x16; break; 3469 case Iop_QandSQRsh64x2: op = ARM64vecb_SQRSHL64x2; break; 3470 case Iop_QandSQRsh32x4: op = ARM64vecb_SQRSHL32x4; break; 3471 case Iop_QandSQRsh16x8: op = ARM64vecb_SQRSHL16x8; break; 3472 case Iop_QandSQRsh8x16: op = ARM64vecb_SQRSHL8x16; break; 3473 case Iop_QandUQRsh64x2: op = ARM64vecb_UQRSHL64x2; break; 3474 case Iop_QandUQRsh32x4: op = ARM64vecb_UQRSHL32x4; break; 3475 case Iop_QandUQRsh16x8: op = ARM64vecb_UQRSHL16x8; break; 3476 case Iop_QandUQRsh8x16: op = ARM64vecb_UQRSHL8x16; break; 3477 default: vassert(0); 3478 } 3479 /* Clear FPSR.Q, do the operation, and return both its result 3480 and the new value of FPSR.Q. We can simply zero out FPSR 3481 since all the other bits have no relevance in VEX generated 3482 code. */ 3483 addInstr(env, ARM64Instr_Imm64(fpsr, 0)); 3484 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr)); 3485 addInstr(env, ARM64Instr_VBinV(op, resLo, argL, argR)); 3486 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr)); 3487 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27), 3488 ARM64sh_SHR)); 3489 ARM64RIL* ril_one = mb_mkARM64RIL_I(1); 3490 vassert(ril_one); 3491 addInstr(env, ARM64Instr_Logic(fpsr, fpsr, ril_one, ARM64lo_AND)); 3492 /* Now we have: the main (shift) result in |resLo|, and the 3493 Q bit at the bottom of |fpsr|. */ 3494 addInstr(env, ARM64Instr_VQfromX(resHi, fpsr)); 3495 *rHi = resHi; 3496 *rLo = resLo; 3497 return; 3498 } 3499 3500 /* ... */ 3501 default: 3502 break; 3503 } /* switch on the binop */ 3504 } /* if (e->tag == Iex_Binop) */ 3505 3506 ppIRExpr(e); 3507 vpanic("iselV256Expr_wrk"); 3508} 3509 3510 3511/*---------------------------------------------------------*/ 3512/*--- ISEL: Statements ---*/ 3513/*---------------------------------------------------------*/ 3514 3515static void iselStmt ( ISelEnv* env, IRStmt* stmt ) 3516{ 3517 if (vex_traceflags & VEX_TRACE_VCODE) { 3518 vex_printf("\n-- "); 3519 ppIRStmt(stmt); 3520 vex_printf("\n"); 3521 } 3522 switch (stmt->tag) { 3523 3524 /* --------- STORE --------- */ 3525 /* little-endian write to memory */ 3526 case Ist_Store: { 3527 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); 3528 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); 3529 IREndness end = stmt->Ist.Store.end; 3530 3531 if (tya != Ity_I64 || end != Iend_LE) 3532 goto stmt_fail; 3533 3534 if (tyd == Ity_I64) { 3535 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 3536 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); 3537 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am)); 3538 return; 3539 } 3540 if (tyd == Ity_I32) { 3541 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 3542 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); 3543 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am)); 3544 return; 3545 } 3546 if (tyd == Ity_I16) { 3547 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 3548 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); 3549 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am)); 3550 return; 3551 } 3552 if (tyd == Ity_I8) { 3553 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 3554 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); 3555 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am)); 3556 return; 3557 } 3558 if (tyd == Ity_V128) { 3559 HReg qD = iselV128Expr(env, stmt->Ist.Store.data); 3560 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); 3561 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr)); 3562 return; 3563 } 3564 if (tyd == Ity_F64) { 3565 HReg dD = iselDblExpr(env, stmt->Ist.Store.data); 3566 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); 3567 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0)); 3568 return; 3569 } 3570 if (tyd == Ity_F32) { 3571 HReg sD = iselFltExpr(env, stmt->Ist.Store.data); 3572 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); 3573 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0)); 3574 return; 3575 } 3576 break; 3577 } 3578 3579 /* --------- PUT --------- */ 3580 /* write guest state, fixed offset */ 3581 case Ist_Put: { 3582 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); 3583 UInt offs = (UInt)stmt->Ist.Put.offset; 3584 if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) { 3585 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); 3586 ARM64AMode* am = mk_baseblock_64bit_access_amode(offs); 3587 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am)); 3588 return; 3589 } 3590 if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) { 3591 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); 3592 ARM64AMode* am = mk_baseblock_32bit_access_amode(offs); 3593 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am)); 3594 return; 3595 } 3596 if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) { 3597 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); 3598 ARM64AMode* am = mk_baseblock_16bit_access_amode(offs); 3599 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am)); 3600 return; 3601 } 3602 if (tyd == Ity_I8 && offs < (1<<12)) { 3603 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); 3604 ARM64AMode* am = mk_baseblock_8bit_access_amode(offs); 3605 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am)); 3606 return; 3607 } 3608 if (tyd == Ity_V128 && offs < (1<<12)) { 3609 HReg qD = iselV128Expr(env, stmt->Ist.Put.data); 3610 HReg addr = mk_baseblock_128bit_access_addr(env, offs); 3611 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr)); 3612 return; 3613 } 3614 if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) { 3615 HReg dD = iselDblExpr(env, stmt->Ist.Put.data); 3616 HReg bbp = get_baseblock_register(); 3617 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs)); 3618 return; 3619 } 3620 if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) { 3621 HReg sD = iselFltExpr(env, stmt->Ist.Put.data); 3622 HReg bbp = get_baseblock_register(); 3623 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs)); 3624 return; 3625 } 3626 if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) { 3627 HReg hD = iselF16Expr(env, stmt->Ist.Put.data); 3628 HReg bbp = get_baseblock_register(); 3629 addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs)); 3630 return; 3631 } 3632 3633 break; 3634 } 3635 3636 /* --------- TMP --------- */ 3637 /* assign value to temporary */ 3638 case Ist_WrTmp: { 3639 IRTemp tmp = stmt->Ist.WrTmp.tmp; 3640 IRType ty = typeOfIRTemp(env->type_env, tmp); 3641 3642 if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { 3643 /* We could do a lot better here. But for the time being: */ 3644 HReg dst = lookupIRTemp(env, tmp); 3645 HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data); 3646 addInstr(env, ARM64Instr_MovI(dst, rD)); 3647 return; 3648 } 3649 if (ty == Ity_I1) { 3650 /* Here, we are generating a I1 value into a 64 bit register. 3651 Make sure the value in the register is only zero or one, 3652 but no other. This allows optimisation of the 3653 1Uto64(tmp:I1) case, by making it simply a copy of the 3654 register holding 'tmp'. The point being that the value in 3655 the register holding 'tmp' can only have been created 3656 here. LATER: that seems dangerous; safer to do 'tmp & 1' 3657 in that case. Also, could do this just with a single CINC 3658 insn. */ 3659 /* CLONE-01 */ 3660 HReg zero = newVRegI(env); 3661 HReg one = newVRegI(env); 3662 HReg dst = lookupIRTemp(env, tmp); 3663 addInstr(env, ARM64Instr_Imm64(zero, 0)); 3664 addInstr(env, ARM64Instr_Imm64(one, 1)); 3665 ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data); 3666 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); 3667 return; 3668 } 3669 if (ty == Ity_F64) { 3670 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); 3671 HReg dst = lookupIRTemp(env, tmp); 3672 addInstr(env, ARM64Instr_VMov(8, dst, src)); 3673 return; 3674 } 3675 if (ty == Ity_F32) { 3676 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); 3677 HReg dst = lookupIRTemp(env, tmp); 3678 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src)); 3679 return; 3680 } 3681 if (ty == Ity_V128) { 3682 HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data); 3683 HReg dst = lookupIRTemp(env, tmp); 3684 addInstr(env, ARM64Instr_VMov(16, dst, src)); 3685 return; 3686 } 3687 if (ty == Ity_V256) { 3688 HReg srcHi, srcLo, dstHi, dstLo; 3689 iselV256Expr(&srcHi,&srcLo, env, stmt->Ist.WrTmp.data); 3690 lookupIRTempPair( &dstHi, &dstLo, env, tmp); 3691 addInstr(env, ARM64Instr_VMov(16, dstHi, srcHi)); 3692 addInstr(env, ARM64Instr_VMov(16, dstLo, srcLo)); 3693 return; 3694 } 3695 break; 3696 } 3697 3698 /* --------- Call to DIRTY helper --------- */ 3699 /* call complex ("dirty") helper function */ 3700 case Ist_Dirty: { 3701 IRDirty* d = stmt->Ist.Dirty.details; 3702 3703 /* Figure out the return type, if any. */ 3704 IRType retty = Ity_INVALID; 3705 if (d->tmp != IRTemp_INVALID) 3706 retty = typeOfIRTemp(env->type_env, d->tmp); 3707 3708 Bool retty_ok = False; 3709 switch (retty) { 3710 case Ity_INVALID: /* function doesn't return anything */ 3711 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 3712 case Ity_V128: 3713 retty_ok = True; break; 3714 default: 3715 break; 3716 } 3717 if (!retty_ok) 3718 break; /* will go to stmt_fail: */ 3719 3720 /* Marshal args, do the call, and set the return value to 0x555..555 3721 if this is a conditional call that returns a value and the 3722 call is skipped. */ 3723 UInt addToSp = 0; 3724 RetLoc rloc = mk_RetLoc_INVALID(); 3725 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args ); 3726 vassert(is_sane_RetLoc(rloc)); 3727 3728 /* Now figure out what to do with the returned value, if any. */ 3729 switch (retty) { 3730 case Ity_INVALID: { 3731 /* No return value. Nothing to do. */ 3732 vassert(d->tmp == IRTemp_INVALID); 3733 vassert(rloc.pri == RLPri_None); 3734 vassert(addToSp == 0); 3735 return; 3736 } 3737 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: { 3738 vassert(rloc.pri == RLPri_Int); 3739 vassert(addToSp == 0); 3740 /* The returned value is in x0. Park it in the register 3741 associated with tmp. */ 3742 HReg dst = lookupIRTemp(env, d->tmp); 3743 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) ); 3744 return; 3745 } 3746 case Ity_V128: { 3747 /* The returned value is on the stack, and *retloc tells 3748 us where. Fish it off the stack and then move the 3749 stack pointer upwards to clear it, as directed by 3750 doHelperCall. */ 3751 vassert(rloc.pri == RLPri_V128SpRel); 3752 vassert(rloc.spOff < 256); // stay sane 3753 vassert(addToSp >= 16); // ditto 3754 vassert(addToSp < 256); // ditto 3755 HReg dst = lookupIRTemp(env, d->tmp); 3756 HReg tmp = newVRegI(env); // the address of the returned value 3757 addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP 3758 addInstr(env, ARM64Instr_Arith(tmp, tmp, 3759 ARM64RIA_I12((UShort)rloc.spOff, 0), 3760 True/*isAdd*/ )); 3761 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp)); 3762 addInstr(env, ARM64Instr_AddToSP(addToSp)); 3763 return; 3764 } 3765 default: 3766 /*NOTREACHED*/ 3767 vassert(0); 3768 } 3769 break; 3770 } 3771 3772 /* --------- Load Linked and Store Conditional --------- */ 3773 case Ist_LLSC: { 3774 if (stmt->Ist.LLSC.storedata == NULL) { 3775 /* LL */ 3776 IRTemp res = stmt->Ist.LLSC.result; 3777 IRType ty = typeOfIRTemp(env->type_env, res); 3778 if (ty == Ity_I64 || ty == Ity_I32 3779 || ty == Ity_I16 || ty == Ity_I8) { 3780 Int szB = 0; 3781 HReg r_dst = lookupIRTemp(env, res); 3782 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); 3783 switch (ty) { 3784 case Ity_I8: szB = 1; break; 3785 case Ity_I16: szB = 2; break; 3786 case Ity_I32: szB = 4; break; 3787 case Ity_I64: szB = 8; break; 3788 default: vassert(0); 3789 } 3790 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr)); 3791 addInstr(env, ARM64Instr_LdrEX(szB)); 3792 addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2())); 3793 return; 3794 } 3795 goto stmt_fail; 3796 } else { 3797 /* SC */ 3798 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata); 3799 if (tyd == Ity_I64 || tyd == Ity_I32 3800 || tyd == Ity_I16 || tyd == Ity_I8) { 3801 Int szB = 0; 3802 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); 3803 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); 3804 switch (tyd) { 3805 case Ity_I8: szB = 1; break; 3806 case Ity_I16: szB = 2; break; 3807 case Ity_I32: szB = 4; break; 3808 case Ity_I64: szB = 8; break; 3809 default: vassert(0); 3810 } 3811 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD)); 3812 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA)); 3813 addInstr(env, ARM64Instr_StrEX(szB)); 3814 } else { 3815 goto stmt_fail; 3816 } 3817 /* now r0 is 1 if failed, 0 if success. Change to IR 3818 conventions (0 is fail, 1 is success). Also transfer 3819 result to r_res. */ 3820 IRTemp res = stmt->Ist.LLSC.result; 3821 IRType ty = typeOfIRTemp(env->type_env, res); 3822 HReg r_res = lookupIRTemp(env, res); 3823 ARM64RIL* one = mb_mkARM64RIL_I(1); 3824 vassert(ty == Ity_I1); 3825 vassert(one); 3826 addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one, 3827 ARM64lo_XOR)); 3828 /* And be conservative -- mask off all but the lowest bit. */ 3829 addInstr(env, ARM64Instr_Logic(r_res, r_res, one, 3830 ARM64lo_AND)); 3831 return; 3832 } 3833 break; 3834 } 3835 3836 /* --------- MEM FENCE --------- */ 3837 case Ist_MBE: 3838 switch (stmt->Ist.MBE.event) { 3839 case Imbe_Fence: 3840 addInstr(env, ARM64Instr_MFence()); 3841 return; 3842 default: 3843 break; 3844 } 3845 break; 3846 3847 /* --------- INSTR MARK --------- */ 3848 /* Doesn't generate any executable code ... */ 3849 case Ist_IMark: 3850 return; 3851 3852 /* --------- ABI HINT --------- */ 3853 /* These have no meaning (denotation in the IR) and so we ignore 3854 them ... if any actually made it this far. */ 3855 case Ist_AbiHint: 3856 return; 3857 3858 /* --------- NO-OP --------- */ 3859 case Ist_NoOp: 3860 return; 3861 3862 /* --------- EXIT --------- */ 3863 case Ist_Exit: { 3864 if (stmt->Ist.Exit.dst->tag != Ico_U64) 3865 vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value"); 3866 3867 ARM64CondCode cc 3868 = iselCondCode(env, stmt->Ist.Exit.guard); 3869 ARM64AMode* amPC 3870 = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP); 3871 3872 /* Case: boring transfer to known address */ 3873 if (stmt->Ist.Exit.jk == Ijk_Boring 3874 /*ATC || stmt->Ist.Exit.jk == Ijk_Call */ 3875 /*ATC || stmt->Ist.Exit.jk == Ijk_Ret */ ) { 3876 if (env->chainingAllowed) { 3877 /* .. almost always true .. */ 3878 /* Skip the event check at the dst if this is a forwards 3879 edge. */ 3880 Bool toFastEP 3881 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga; 3882 if (0) vex_printf("%s", toFastEP ? "Y" : ","); 3883 addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64, 3884 amPC, cc, toFastEP)); 3885 } else { 3886 /* .. very occasionally .. */ 3887 /* We can't use chaining, so ask for an assisted transfer, 3888 as that's the only alternative that is allowable. */ 3889 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 3890 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring)); 3891 } 3892 return; 3893 } 3894 3895 /* Do we ever expect to see any other kind? */ 3896 goto stmt_fail; 3897 } 3898 3899 default: break; 3900 } 3901 stmt_fail: 3902 ppIRStmt(stmt); 3903 vpanic("iselStmt"); 3904} 3905 3906 3907/*---------------------------------------------------------*/ 3908/*--- ISEL: Basic block terminators (Nexts) ---*/ 3909/*---------------------------------------------------------*/ 3910 3911static void iselNext ( ISelEnv* env, 3912 IRExpr* next, IRJumpKind jk, Int offsIP ) 3913{ 3914 if (vex_traceflags & VEX_TRACE_VCODE) { 3915 vex_printf( "\n-- PUT(%d) = ", offsIP); 3916 ppIRExpr( next ); 3917 vex_printf( "; exit-"); 3918 ppIRJumpKind(jk); 3919 vex_printf( "\n"); 3920 } 3921 3922 /* Case: boring transfer to known address */ 3923 if (next->tag == Iex_Const) { 3924 IRConst* cdst = next->Iex.Const.con; 3925 vassert(cdst->tag == Ico_U64); 3926 if (jk == Ijk_Boring || jk == Ijk_Call) { 3927 /* Boring transfer to known address */ 3928 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); 3929 if (env->chainingAllowed) { 3930 /* .. almost always true .. */ 3931 /* Skip the event check at the dst if this is a forwards 3932 edge. */ 3933 Bool toFastEP 3934 = ((Addr64)cdst->Ico.U64) > env->max_ga; 3935 if (0) vex_printf("%s", toFastEP ? "X" : "."); 3936 addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64, 3937 amPC, ARM64cc_AL, 3938 toFastEP)); 3939 } else { 3940 /* .. very occasionally .. */ 3941 /* We can't use chaining, so ask for an assisted transfer, 3942 as that's the only alternative that is allowable. */ 3943 HReg r = iselIntExpr_R(env, next); 3944 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, 3945 Ijk_Boring)); 3946 } 3947 return; 3948 } 3949 } 3950 3951 /* Case: call/return (==boring) transfer to any address */ 3952 switch (jk) { 3953 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { 3954 HReg r = iselIntExpr_R(env, next); 3955 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); 3956 if (env->chainingAllowed) { 3957 addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL)); 3958 } else { 3959 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, 3960 Ijk_Boring)); 3961 } 3962 return; 3963 } 3964 default: 3965 break; 3966 } 3967 3968 /* Case: assisted transfer to arbitrary address */ 3969 switch (jk) { 3970 /* Keep this list in sync with that for Ist_Exit above */ 3971 case Ijk_ClientReq: 3972 case Ijk_NoDecode: 3973 case Ijk_NoRedir: 3974 case Ijk_Sys_syscall: 3975 case Ijk_InvalICache: 3976 case Ijk_FlushDCache: 3977 case Ijk_SigTRAP: 3978 { 3979 HReg r = iselIntExpr_R(env, next); 3980 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); 3981 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk)); 3982 return; 3983 } 3984 default: 3985 break; 3986 } 3987 3988 vex_printf( "\n-- PUT(%d) = ", offsIP); 3989 ppIRExpr( next ); 3990 vex_printf( "; exit-"); 3991 ppIRJumpKind(jk); 3992 vex_printf( "\n"); 3993 vassert(0); // are we expecting any other kind? 3994} 3995 3996 3997/*---------------------------------------------------------*/ 3998/*--- Insn selector top-level ---*/ 3999/*---------------------------------------------------------*/ 4000 4001/* Translate an entire SB to arm64 code. */ 4002 4003HInstrArray* iselSB_ARM64 ( const IRSB* bb, 4004 VexArch arch_host, 4005 const VexArchInfo* archinfo_host, 4006 const VexAbiInfo* vbi/*UNUSED*/, 4007 Int offs_Host_EvC_Counter, 4008 Int offs_Host_EvC_FailAddr, 4009 Bool chainingAllowed, 4010 Bool addProfInc, 4011 Addr max_ga ) 4012{ 4013 Int i, j; 4014 HReg hreg, hregHI; 4015 ISelEnv* env; 4016 UInt hwcaps_host = archinfo_host->hwcaps; 4017 ARM64AMode *amCounter, *amFailAddr; 4018 4019 /* sanity ... */ 4020 vassert(arch_host == VexArchARM64); 4021 4022 /* Check that the host's endianness is as expected. */ 4023 vassert(archinfo_host->endness == VexEndnessLE); 4024 4025 /* guard against unexpected space regressions */ 4026 vassert(sizeof(ARM64Instr) <= 32); 4027 4028 /* Make up an initial environment to use. */ 4029 env = LibVEX_Alloc_inline(sizeof(ISelEnv)); 4030 env->vreg_ctr = 0; 4031 4032 /* Set up output code array. */ 4033 env->code = newHInstrArray(); 4034 4035 /* Copy BB's type env. */ 4036 env->type_env = bb->tyenv; 4037 4038 /* Make up an IRTemp -> virtual HReg mapping. This doesn't 4039 change as we go along. */ 4040 env->n_vregmap = bb->tyenv->types_used; 4041 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg)); 4042 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg)); 4043 4044 /* and finally ... */ 4045 env->chainingAllowed = chainingAllowed; 4046 env->hwcaps = hwcaps_host; 4047 env->previous_rm = NULL; 4048 env->max_ga = max_ga; 4049 4050 /* For each IR temporary, allocate a suitably-kinded virtual 4051 register. */ 4052 j = 0; 4053 for (i = 0; i < env->n_vregmap; i++) { 4054 hregHI = hreg = INVALID_HREG; 4055 switch (bb->tyenv->types[i]) { 4056 case Ity_I1: 4057 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: 4058 hreg = mkHReg(True, HRcInt64, 0, j++); 4059 break; 4060 case Ity_I128: 4061 hreg = mkHReg(True, HRcInt64, 0, j++); 4062 hregHI = mkHReg(True, HRcInt64, 0, j++); 4063 break; 4064 case Ity_F16: // we'll use HRcFlt64 regs for F16 too 4065 case Ity_F32: // we'll use HRcFlt64 regs for F32 too 4066 case Ity_F64: 4067 hreg = mkHReg(True, HRcFlt64, 0, j++); 4068 break; 4069 case Ity_V128: 4070 hreg = mkHReg(True, HRcVec128, 0, j++); 4071 break; 4072 case Ity_V256: 4073 hreg = mkHReg(True, HRcVec128, 0, j++); 4074 hregHI = mkHReg(True, HRcVec128, 0, j++); 4075 break; 4076 default: 4077 ppIRType(bb->tyenv->types[i]); 4078 vpanic("iselBB(arm64): IRTemp type"); 4079 } 4080 env->vregmap[i] = hreg; 4081 env->vregmapHI[i] = hregHI; 4082 } 4083 env->vreg_ctr = j; 4084 4085 /* The very first instruction must be an event check. */ 4086 amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter); 4087 amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr); 4088 addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr)); 4089 4090 /* Possibly a block counter increment (for profiling). At this 4091 point we don't know the address of the counter, so just pretend 4092 it is zero. It will have to be patched later, but before this 4093 translation is used, by a call to LibVEX_patchProfCtr. */ 4094 if (addProfInc) { 4095 addInstr(env, ARM64Instr_ProfInc()); 4096 } 4097 4098 /* Ok, finally we can iterate over the statements. */ 4099 for (i = 0; i < bb->stmts_used; i++) 4100 iselStmt(env, bb->stmts[i]); 4101 4102 iselNext(env, bb->next, bb->jumpkind, bb->offsIP); 4103 4104 /* record the number of vregs we used. */ 4105 env->code->n_vregs = env->vreg_ctr; 4106 return env->code; 4107} 4108 4109 4110/*---------------------------------------------------------------*/ 4111/*--- end host_arm64_isel.c ---*/ 4112/*---------------------------------------------------------------*/ 4113