1 2/*---------------------------------------------------------------*/ 3/*--- begin host_arm64_isel.c ---*/ 4/*---------------------------------------------------------------*/ 5 6/* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2013-2013 OpenWorks 11 info@open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31#include "libvex_basictypes.h" 32#include "libvex_ir.h" 33#include "libvex.h" 34#include "ir_match.h" 35 36#include "main_util.h" 37#include "main_globals.h" 38#include "host_generic_regs.h" 39#include "host_generic_simd64.h" // for 32-bit SIMD helpers 40#include "host_arm64_defs.h" 41 42 43//ZZ /*---------------------------------------------------------*/ 44//ZZ /*--- ARMvfp control word stuff ---*/ 45//ZZ /*---------------------------------------------------------*/ 46//ZZ 47//ZZ /* Vex-generated code expects to run with the FPU set as follows: all 48//ZZ exceptions masked, round-to-nearest, non-vector mode, with the NZCV 49//ZZ flags cleared, and FZ (flush to zero) disabled. Curiously enough, 50//ZZ this corresponds to a FPSCR value of zero. 51//ZZ 52//ZZ fpscr should therefore be zero on entry to Vex-generated code, and 53//ZZ should be unchanged at exit. (Or at least the bottom 28 bits 54//ZZ should be zero). 55//ZZ */ 56//ZZ 57//ZZ #define DEFAULT_FPSCR 0 58 59 60/*---------------------------------------------------------*/ 61/*--- ISelEnv ---*/ 62/*---------------------------------------------------------*/ 63 64/* This carries around: 65 66 - A mapping from IRTemp to IRType, giving the type of any IRTemp we 67 might encounter. This is computed before insn selection starts, 68 and does not change. 69 70 - A mapping from IRTemp to HReg. This tells the insn selector 71 which virtual register is associated with each IRTemp temporary. 72 This is computed before insn selection starts, and does not 73 change. We expect this mapping to map precisely the same set of 74 IRTemps as the type mapping does. 75 76 |vregmap| holds the primary register for the IRTemp. 77 |vregmapHI| is only used for 128-bit integer-typed 78 IRTemps. It holds the identity of a second 79 64-bit virtual HReg, which holds the high half 80 of the value. 81 82 - The code array, that is, the insns selected so far. 83 84 - A counter, for generating new virtual registers. 85 86 - The host hardware capabilities word. This is set at the start 87 and does not change. 88 89 - A Bool for indicating whether we may generate chain-me 90 instructions for control flow transfers, or whether we must use 91 XAssisted. 92 93 - The maximum guest address of any guest insn in this block. 94 Actually, the address of the highest-addressed byte from any insn 95 in this block. Is set at the start and does not change. This is 96 used for detecting jumps which are definitely forward-edges from 97 this block, and therefore can be made (chained) to the fast entry 98 point of the destination, thereby avoiding the destination's 99 event check. 100 101 - An IRExpr*, which may be NULL, holding the IR expression (an 102 IRRoundingMode-encoded value) to which the FPU's rounding mode 103 was most recently set. Setting to NULL is always safe. Used to 104 avoid redundant settings of the FPU's rounding mode, as 105 described in set_FPCR_rounding_mode below. 106 107 Note, this is all (well, mostly) host-independent. 108*/ 109 110typedef 111 struct { 112 /* Constant -- are set at the start and do not change. */ 113 IRTypeEnv* type_env; 114 115 HReg* vregmap; 116 HReg* vregmapHI; 117 Int n_vregmap; 118 119 UInt hwcaps; 120 121 Bool chainingAllowed; 122 Addr64 max_ga; 123 124 /* These are modified as we go along. */ 125 HInstrArray* code; 126 Int vreg_ctr; 127 128 IRExpr* previous_rm; 129 } 130 ISelEnv; 131 132static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) 133{ 134 vassert(tmp >= 0); 135 vassert(tmp < env->n_vregmap); 136 return env->vregmap[tmp]; 137} 138 139static void addInstr ( ISelEnv* env, ARM64Instr* instr ) 140{ 141 addHInstr(env->code, instr); 142 if (vex_traceflags & VEX_TRACE_VCODE) { 143 ppARM64Instr(instr); 144 vex_printf("\n"); 145 } 146} 147 148static HReg newVRegI ( ISelEnv* env ) 149{ 150 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/); 151 env->vreg_ctr++; 152 return reg; 153} 154 155static HReg newVRegD ( ISelEnv* env ) 156{ 157 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/); 158 env->vreg_ctr++; 159 return reg; 160} 161 162//ZZ static HReg newVRegF ( ISelEnv* env ) 163//ZZ { 164//ZZ HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/); 165//ZZ env->vreg_ctr++; 166//ZZ return reg; 167//ZZ } 168 169static HReg newVRegV ( ISelEnv* env ) 170{ 171 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/); 172 env->vreg_ctr++; 173 return reg; 174} 175 176//ZZ /* These are duplicated in guest_arm_toIR.c */ 177//ZZ static IRExpr* unop ( IROp op, IRExpr* a ) 178//ZZ { 179//ZZ return IRExpr_Unop(op, a); 180//ZZ } 181//ZZ 182//ZZ static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 183//ZZ { 184//ZZ return IRExpr_Binop(op, a1, a2); 185//ZZ } 186//ZZ 187//ZZ static IRExpr* bind ( Int binder ) 188//ZZ { 189//ZZ return IRExpr_Binder(binder); 190//ZZ } 191 192 193/*---------------------------------------------------------*/ 194/*--- ISEL: Forward declarations ---*/ 195/*---------------------------------------------------------*/ 196 197/* These are organised as iselXXX and iselXXX_wrk pairs. The 198 iselXXX_wrk do the real work, but are not to be called directly. 199 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then 200 checks that all returned registers are virtual. You should not 201 call the _wrk version directly. 202 203 Because some forms of ARM64 memory amodes are implicitly scaled by 204 the access size, iselIntExpr_AMode takes an IRType which tells it 205 the type of the access for which the amode is to be used. This 206 type needs to be correct, else you'll get incorrect code. 207*/ 208static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, 209 IRExpr* e, IRType dty ); 210static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, 211 IRExpr* e, IRType dty ); 212 213static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e ); 214static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e ); 215 216static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e ); 217static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e ); 218 219static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e ); 220static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e ); 221 222static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ); 223static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ); 224 225static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); 226static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); 227 228static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, 229 ISelEnv* env, IRExpr* e ); 230static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo, 231 ISelEnv* env, IRExpr* e ); 232 233 234//ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, 235//ZZ ISelEnv* env, IRExpr* e ); 236//ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo, 237//ZZ ISelEnv* env, IRExpr* e ); 238 239static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); 240static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); 241 242static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); 243static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); 244 245//ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e ); 246//ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e ); 247 248static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ); 249static HReg iselV128Expr ( ISelEnv* env, IRExpr* e ); 250 251static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 ); 252 253 254/*---------------------------------------------------------*/ 255/*--- ISEL: Misc helpers ---*/ 256/*---------------------------------------------------------*/ 257 258/* Generate an amode suitable for a 64-bit sized access relative to 259 the baseblock register (X21). This generates an RI12 amode, which 260 means its scaled by the access size, which is why the access size 261 -- 64 bit -- is stated explicitly here. Consequently |off| needs 262 to be divisible by 8. */ 263static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off ) 264{ 265 vassert(off < (8 << 12)); /* otherwise it's unrepresentable */ 266 vassert((off & 7) == 0); /* ditto */ 267 return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/); 268} 269 270/* Ditto, for 32 bit accesses. */ 271static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off ) 272{ 273 vassert(off < (4 << 12)); /* otherwise it's unrepresentable */ 274 vassert((off & 3) == 0); /* ditto */ 275 return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/); 276} 277 278/* Ditto, for 16 bit accesses. */ 279static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off ) 280{ 281 vassert(off < (2 << 12)); /* otherwise it's unrepresentable */ 282 vassert((off & 1) == 0); /* ditto */ 283 return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/); 284} 285 286/* Ditto, for 8 bit accesses. */ 287static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off ) 288{ 289 vassert(off < (1 << 12)); /* otherwise it's unrepresentable */ 290 return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/); 291} 292 293static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off ) 294{ 295 vassert(off < (1<<12)); 296 HReg r = newVRegI(env); 297 addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(), 298 ARM64RIA_I12(off,0), True/*isAdd*/)); 299 return r; 300} 301 302static HReg get_baseblock_register ( void ) 303{ 304 return hregARM64_X21(); 305} 306 307/* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in 308 a new register, and return the new register. */ 309static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src ) 310{ 311 HReg dst = newVRegI(env); 312 ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */ 313 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND)); 314 return dst; 315} 316 317/* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in 318 a new register, and return the new register. */ 319static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src ) 320{ 321 HReg dst = newVRegI(env); 322 ARM64RI6* n48 = ARM64RI6_I6(48); 323 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL)); 324 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR)); 325 return dst; 326} 327 328/* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in 329 a new register, and return the new register. */ 330static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src ) 331{ 332 HReg dst = newVRegI(env); 333 ARM64RI6* n48 = ARM64RI6_I6(48); 334 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL)); 335 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SHR)); 336 return dst; 337} 338 339/* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in 340 a new register, and return the new register. */ 341static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src ) 342{ 343 HReg dst = newVRegI(env); 344 ARM64RI6* n32 = ARM64RI6_I6(32); 345 addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL)); 346 addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR)); 347 return dst; 348} 349 350/* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in 351 a new register, and return the new register. */ 352static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src ) 353{ 354 HReg dst = newVRegI(env); 355 ARM64RI6* n56 = ARM64RI6_I6(56); 356 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL)); 357 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR)); 358 return dst; 359} 360 361static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src ) 362{ 363 HReg dst = newVRegI(env); 364 ARM64RI6* n56 = ARM64RI6_I6(56); 365 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL)); 366 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SHR)); 367 return dst; 368} 369 370/* Is this IRExpr_Const(IRConst_U64(0)) ? */ 371static Bool isZeroU64 ( IRExpr* e ) { 372 if (e->tag != Iex_Const) return False; 373 IRConst* con = e->Iex.Const.con; 374 vassert(con->tag == Ico_U64); 375 return con->Ico.U64 == 0; 376} 377 378 379/*---------------------------------------------------------*/ 380/*--- ISEL: FP rounding mode helpers ---*/ 381/*---------------------------------------------------------*/ 382 383/* Set the FP rounding mode: 'mode' is an I32-typed expression 384 denoting a value in the range 0 .. 3, indicating a round mode 385 encoded as per type IRRoundingMode -- the first four values only 386 (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the PPC 387 FSCR to have the same rounding. 388 389 For speed & simplicity, we're setting the *entire* FPCR here. 390 391 Setting the rounding mode is expensive. So this function tries to 392 avoid repeatedly setting the rounding mode to the same thing by 393 first comparing 'mode' to the 'mode' tree supplied in the previous 394 call to this function, if any. (The previous value is stored in 395 env->previous_rm.) If 'mode' is a single IR temporary 't' and 396 env->previous_rm is also just 't', then the setting is skipped. 397 398 This is safe because of the SSA property of IR: an IR temporary can 399 only be defined once and so will have the same value regardless of 400 where it appears in the block. Cool stuff, SSA. 401 402 A safety condition: all attempts to set the RM must be aware of 403 this mechanism - by being routed through the functions here. 404 405 Of course this only helps if blocks where the RM is set more than 406 once and it is set to the same value each time, *and* that value is 407 held in the same IR temporary each time. In order to assure the 408 latter as much as possible, the IR optimiser takes care to do CSE 409 on any block with any sign of floating point activity. 410*/ 411static 412void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode ) 413{ 414 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32); 415 416 /* Do we need to do anything? */ 417 if (env->previous_rm 418 && env->previous_rm->tag == Iex_RdTmp 419 && mode->tag == Iex_RdTmp 420 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) { 421 /* no - setting it to what it was before. */ 422 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32); 423 return; 424 } 425 426 /* No luck - we better set it, and remember what we set it to. */ 427 env->previous_rm = mode; 428 429 /* Only supporting the rounding-mode bits - the rest of FPCR is set 430 to zero - so we can set the whole register at once (faster). */ 431 432 /* This isn't simple, because 'mode' carries an IR rounding 433 encoding, and we need to translate that to an ARM64 FP one: 434 The IR encoding: 435 00 to nearest (the default) 436 10 to +infinity 437 01 to -infinity 438 11 to zero 439 The ARM64 FP encoding: 440 00 to nearest 441 01 to +infinity 442 10 to -infinity 443 11 to zero 444 Easy enough to do; just swap the two bits. 445 */ 446 HReg irrm = iselIntExpr_R(env, mode); 447 HReg tL = newVRegI(env); 448 HReg tR = newVRegI(env); 449 HReg t3 = newVRegI(env); 450 /* tL = irrm << 1; 451 tR = irrm >> 1; if we're lucky, these will issue together 452 tL &= 2; 453 tR &= 1; ditto 454 t3 = tL | tR; 455 t3 <<= 22; 456 fmxr fpscr, t3 457 */ 458 ARM64RIL* ril_one = mb_mkARM64RIL_I(1); 459 ARM64RIL* ril_two = mb_mkARM64RIL_I(2); 460 vassert(ril_one && ril_two); 461 addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL)); 462 addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR)); 463 addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND)); 464 addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND)); 465 addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR)); 466 addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL)); 467 addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3)); 468} 469 470 471/*---------------------------------------------------------*/ 472/*--- ISEL: Function call helpers ---*/ 473/*---------------------------------------------------------*/ 474 475/* Used only in doHelperCall. See big comment in doHelperCall re 476 handling of register-parameter args. This function figures out 477 whether evaluation of an expression might require use of a fixed 478 register. If in doubt return True (safe but suboptimal). 479*/ 480static 481Bool mightRequireFixedRegs ( IRExpr* e ) 482{ 483 if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) { 484 // These are always "safe" -- either a copy of SP in some 485 // arbitrary vreg, or a copy of x21, respectively. 486 return False; 487 } 488 /* Else it's a "normal" expression. */ 489 switch (e->tag) { 490 case Iex_RdTmp: case Iex_Const: case Iex_Get: 491 return False; 492 default: 493 return True; 494 } 495} 496 497 498/* Do a complete function call. |guard| is a Ity_Bit expression 499 indicating whether or not the call happens. If guard==NULL, the 500 call is unconditional. |retloc| is set to indicate where the 501 return value is after the call. The caller (of this fn) must 502 generate code to add |stackAdjustAfterCall| to the stack pointer 503 after the call is done. Returns True iff it managed to handle this 504 combination of arg/return types, else returns False. */ 505 506static 507Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall, 508 /*OUT*/RetLoc* retloc, 509 ISelEnv* env, 510 IRExpr* guard, 511 IRCallee* cee, IRType retTy, IRExpr** args ) 512{ 513 ARM64CondCode cc; 514 HReg argregs[ARM64_N_ARGREGS]; 515 HReg tmpregs[ARM64_N_ARGREGS]; 516 Bool go_fast; 517 Int n_args, i, nextArgReg; 518 ULong target; 519 520 vassert(ARM64_N_ARGREGS == 8); 521 522 /* Set default returns. We'll update them later if needed. */ 523 *stackAdjustAfterCall = 0; 524 *retloc = mk_RetLoc_INVALID(); 525 526 /* These are used for cross-checking that IR-level constraints on 527 the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */ 528 UInt nVECRETs = 0; 529 UInt nBBPTRs = 0; 530 531 /* Marshal args for a call and do the call. 532 533 This function only deals with a tiny set of possibilities, which 534 cover all helpers in practice. The restrictions are that only 535 arguments in registers are supported, hence only 536 ARM64_N_REGPARMS x 64 integer bits in total can be passed. In 537 fact the only supported arg type is I64. 538 539 The return type can be I{64,32} or V128. In the V128 case, it 540 is expected that |args| will contain the special node 541 IRExpr_VECRET(), in which case this routine generates code to 542 allocate space on the stack for the vector return value. Since 543 we are not passing any scalars on the stack, it is enough to 544 preallocate the return space before marshalling any arguments, 545 in this case. 546 547 |args| may also contain IRExpr_BBPTR(), in which case the 548 value in x21 is passed as the corresponding argument. 549 550 Generating code which is both efficient and correct when 551 parameters are to be passed in registers is difficult, for the 552 reasons elaborated in detail in comments attached to 553 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant 554 of the method described in those comments. 555 556 The problem is split into two cases: the fast scheme and the 557 slow scheme. In the fast scheme, arguments are computed 558 directly into the target (real) registers. This is only safe 559 when we can be sure that computation of each argument will not 560 trash any real registers set by computation of any other 561 argument. 562 563 In the slow scheme, all args are first computed into vregs, and 564 once they are all done, they are moved to the relevant real 565 regs. This always gives correct code, but it also gives a bunch 566 of vreg-to-rreg moves which are usually redundant but are hard 567 for the register allocator to get rid of. 568 569 To decide which scheme to use, all argument expressions are 570 first examined. If they are all so simple that it is clear they 571 will be evaluated without use of any fixed registers, use the 572 fast scheme, else use the slow scheme. Note also that only 573 unconditional calls may use the fast scheme, since having to 574 compute a condition expression could itself trash real 575 registers. 576 577 Note this requires being able to examine an expression and 578 determine whether or not evaluation of it might use a fixed 579 register. That requires knowledge of how the rest of this insn 580 selector works. Currently just the following 3 are regarded as 581 safe -- hopefully they cover the majority of arguments in 582 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. 583 */ 584 585 /* Note that the cee->regparms field is meaningless on ARM64 hosts 586 (since there is only one calling convention) and so we always 587 ignore it. */ 588 589 n_args = 0; 590 for (i = 0; args[i]; i++) { 591 IRExpr* arg = args[i]; 592 if (UNLIKELY(arg->tag == Iex_VECRET)) { 593 nVECRETs++; 594 } else if (UNLIKELY(arg->tag == Iex_BBPTR)) { 595 nBBPTRs++; 596 } 597 n_args++; 598 } 599 600 /* If this fails, the IR is ill-formed */ 601 vassert(nBBPTRs == 0 || nBBPTRs == 1); 602 603 /* If we have a VECRET, allocate space on the stack for the return 604 value, and record the stack pointer after that. */ 605 HReg r_vecRetAddr = INVALID_HREG; 606 if (nVECRETs == 1) { 607 vassert(retTy == Ity_V128 || retTy == Ity_V256); 608 vassert(retTy != Ity_V256); // we don't handle that yet (if ever) 609 r_vecRetAddr = newVRegI(env); 610 addInstr(env, ARM64Instr_AddToSP(-16)); 611 addInstr(env, ARM64Instr_FromSP(r_vecRetAddr)); 612 } else { 613 // If either of these fail, the IR is ill-formed 614 vassert(retTy != Ity_V128 && retTy != Ity_V256); 615 vassert(nVECRETs == 0); 616 } 617 618 argregs[0] = hregARM64_X0(); 619 argregs[1] = hregARM64_X1(); 620 argregs[2] = hregARM64_X2(); 621 argregs[3] = hregARM64_X3(); 622 argregs[4] = hregARM64_X4(); 623 argregs[5] = hregARM64_X5(); 624 argregs[6] = hregARM64_X6(); 625 argregs[7] = hregARM64_X7(); 626 627 tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG; 628 tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG; 629 630 /* First decide which scheme (slow or fast) is to be used. First 631 assume the fast scheme, and select slow if any contraindications 632 (wow) appear. */ 633 634 go_fast = True; 635 636 if (guard) { 637 if (guard->tag == Iex_Const 638 && guard->Iex.Const.con->tag == Ico_U1 639 && guard->Iex.Const.con->Ico.U1 == True) { 640 /* unconditional */ 641 } else { 642 /* Not manifestly unconditional -- be conservative. */ 643 go_fast = False; 644 } 645 } 646 647 if (go_fast) { 648 for (i = 0; i < n_args; i++) { 649 if (mightRequireFixedRegs(args[i])) { 650 go_fast = False; 651 break; 652 } 653 } 654 } 655 656 if (go_fast) { 657 if (retTy == Ity_V128 || retTy == Ity_V256) 658 go_fast = False; 659 } 660 661 /* At this point the scheme to use has been established. Generate 662 code to get the arg values into the argument rregs. If we run 663 out of arg regs, give up. */ 664 665 if (go_fast) { 666 667 /* FAST SCHEME */ 668 nextArgReg = 0; 669 670 for (i = 0; i < n_args; i++) { 671 IRExpr* arg = args[i]; 672 673 IRType aTy = Ity_INVALID; 674 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 675 aTy = typeOfIRExpr(env->type_env, args[i]); 676 677 if (nextArgReg >= ARM64_N_ARGREGS) 678 return False; /* out of argregs */ 679 680 if (aTy == Ity_I64) { 681 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg], 682 iselIntExpr_R(env, args[i]) )); 683 nextArgReg++; 684 } 685 else if (arg->tag == Iex_BBPTR) { 686 vassert(0); //ATC 687 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg], 688 hregARM64_X21() )); 689 nextArgReg++; 690 } 691 else if (arg->tag == Iex_VECRET) { 692 // because of the go_fast logic above, we can't get here, 693 // since vector return values makes us use the slow path 694 // instead. 695 vassert(0); 696 } 697 else 698 return False; /* unhandled arg type */ 699 } 700 701 /* Fast scheme only applies for unconditional calls. Hence: */ 702 cc = ARM64cc_AL; 703 704 } else { 705 706 /* SLOW SCHEME; move via temporaries */ 707 nextArgReg = 0; 708 709 for (i = 0; i < n_args; i++) { 710 IRExpr* arg = args[i]; 711 712 IRType aTy = Ity_INVALID; 713 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 714 aTy = typeOfIRExpr(env->type_env, args[i]); 715 716 if (nextArgReg >= ARM64_N_ARGREGS) 717 return False; /* out of argregs */ 718 719 if (aTy == Ity_I64) { 720 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]); 721 nextArgReg++; 722 } 723 else if (arg->tag == Iex_BBPTR) { 724 vassert(0); //ATC 725 tmpregs[nextArgReg] = hregARM64_X21(); 726 nextArgReg++; 727 } 728 else if (arg->tag == Iex_VECRET) { 729 vassert(!hregIsInvalid(r_vecRetAddr)); 730 tmpregs[nextArgReg] = r_vecRetAddr; 731 nextArgReg++; 732 } 733 else 734 return False; /* unhandled arg type */ 735 } 736 737 /* Now we can compute the condition. We can't do it earlier 738 because the argument computations could trash the condition 739 codes. Be a bit clever to handle the common case where the 740 guard is 1:Bit. */ 741 cc = ARM64cc_AL; 742 if (guard) { 743 if (guard->tag == Iex_Const 744 && guard->Iex.Const.con->tag == Ico_U1 745 && guard->Iex.Const.con->Ico.U1 == True) { 746 /* unconditional -- do nothing */ 747 } else { 748 cc = iselCondCode( env, guard ); 749 } 750 } 751 752 /* Move the args to their final destinations. */ 753 for (i = 0; i < nextArgReg; i++) { 754 vassert(!(hregIsInvalid(tmpregs[i]))); 755 /* None of these insns, including any spill code that might 756 be generated, may alter the condition codes. */ 757 addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) ); 758 } 759 760 } 761 762 /* Should be assured by checks above */ 763 vassert(nextArgReg <= ARM64_N_ARGREGS); 764 765 /* Do final checks, set the return values, and generate the call 766 instruction proper. */ 767 vassert(nBBPTRs == 0 || nBBPTRs == 1); 768 vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0); 769 vassert(*stackAdjustAfterCall == 0); 770 vassert(is_RetLoc_INVALID(*retloc)); 771 switch (retTy) { 772 case Ity_INVALID: 773 /* Function doesn't return a value. */ 774 *retloc = mk_RetLoc_simple(RLPri_None); 775 break; 776 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 777 *retloc = mk_RetLoc_simple(RLPri_Int); 778 break; 779 case Ity_V128: 780 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0); 781 *stackAdjustAfterCall = 16; 782 break; 783 case Ity_V256: 784 vassert(0); // ATC 785 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0); 786 *stackAdjustAfterCall = 32; 787 break; 788 default: 789 /* IR can denote other possible return types, but we don't 790 handle those here. */ 791 vassert(0); 792 } 793 794 /* Finally, generate the call itself. This needs the *retloc value 795 set in the switch above, which is why it's at the end. */ 796 797 /* nextArgReg doles out argument registers. Since these are 798 assigned in the order x0 .. x7, its numeric value at this point, 799 which must be between 0 and 8 inclusive, is going to be equal to 800 the number of arg regs in use for the call. Hence bake that 801 number into the call (we'll need to know it when doing register 802 allocation, to know what regs the call reads.) */ 803 804 target = (HWord)Ptr_to_ULong(cee->addr); 805 addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc )); 806 807 return True; /* success */ 808} 809 810 811/*---------------------------------------------------------*/ 812/*--- ISEL: Integer expressions (64/32 bit) ---*/ 813/*---------------------------------------------------------*/ 814 815/* Select insns for an integer-typed expression, and add them to the 816 code list. Return a reg holding the result. This reg will be a 817 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you 818 want to modify it, ask for a new vreg, copy it in there, and modify 819 the copy. The register allocator will do its best to map both 820 vregs to the same real register, so the copies will often disappear 821 later in the game. 822 823 This should handle expressions of 64- and 32-bit type. All results 824 are returned in a 64-bit register. For 32-bit expressions, the 825 upper 32 bits are arbitrary, so you should mask or sign extend 826 partial values if necessary. 827*/ 828 829/* --------------------- AMode --------------------- */ 830 831/* Return an AMode which computes the value of the specified 832 expression, possibly also adding insns to the code list as a 833 result. The expression may only be a 64-bit one. 834*/ 835 836static Bool isValidScale ( UChar scale ) 837{ 838 switch (scale) { 839 case 1: case 2: case 4: case 8: /* case 16: ??*/ return True; 840 default: return False; 841 } 842} 843 844static Bool sane_AMode ( ARM64AMode* am ) 845{ 846 switch (am->tag) { 847 case ARM64am_RI9: 848 return 849 toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64 850 && (hregIsVirtual(am->ARM64am.RI9.reg) 851 /* || sameHReg(am->ARM64am.RI9.reg, 852 hregARM64_X21()) */ ) 853 && am->ARM64am.RI9.simm9 >= -256 854 && am->ARM64am.RI9.simm9 <= 255 ); 855 case ARM64am_RI12: 856 return 857 toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64 858 && (hregIsVirtual(am->ARM64am.RI12.reg) 859 /* || sameHReg(am->ARM64am.RI12.reg, 860 hregARM64_X21()) */ ) 861 && am->ARM64am.RI12.uimm12 < 4096 862 && isValidScale(am->ARM64am.RI12.szB) ); 863 case ARM64am_RR: 864 return 865 toBool( hregClass(am->ARM64am.RR.base) == HRcInt64 866 && hregIsVirtual(am->ARM64am.RR.base) 867 && hregClass(am->ARM64am.RR.index) == HRcInt64 868 && hregIsVirtual(am->ARM64am.RR.index) ); 869 default: 870 vpanic("sane_AMode: unknown ARM64 AMode1 tag"); 871 } 872} 873 874static 875ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty ) 876{ 877 ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty); 878 vassert(sane_AMode(am)); 879 return am; 880} 881 882static 883ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty ) 884{ 885 IRType ty = typeOfIRExpr(env->type_env,e); 886 vassert(ty == Ity_I64); 887 888 ULong szBbits = 0; 889 switch (dty) { 890 case Ity_I64: szBbits = 3; break; 891 case Ity_I32: szBbits = 2; break; 892 case Ity_I16: szBbits = 1; break; 893 case Ity_I8: szBbits = 0; break; 894 default: vassert(0); 895 } 896 897 /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since 898 we're going to create an amode suitable for LDU* or STU* 899 instructions, which use unscaled immediate offsets. */ 900 if (e->tag == Iex_Binop 901 && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64) 902 && e->Iex.Binop.arg2->tag == Iex_Const 903 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) { 904 Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 905 if (simm >= -255 && simm <= 255) { 906 /* Although the gating condition might seem to be 907 simm >= -256 && simm <= 255 908 we will need to negate simm in the case where the op is Sub64. 909 Hence limit the lower value to -255 in order that its negation 910 is representable. */ 911 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 912 if (e->Iex.Binop.op == Iop_Sub64) simm = -simm; 913 return ARM64AMode_RI9(reg, (Int)simm); 914 } 915 } 916 917 /* Add64(expr, uimm12 * transfer-size) */ 918 if (e->tag == Iex_Binop 919 && e->Iex.Binop.op == Iop_Add64 920 && e->Iex.Binop.arg2->tag == Iex_Const 921 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) { 922 ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 923 ULong szB = 1 << szBbits; 924 if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */ 925 && (uimm >> szBbits) < 4096) { 926 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 927 return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB); 928 } 929 } 930 931 /* Add64(expr1, expr2) */ 932 if (e->tag == Iex_Binop 933 && e->Iex.Binop.op == Iop_Add64) { 934 HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 935 HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2); 936 return ARM64AMode_RR(reg1, reg2); 937 } 938 939 /* Doesn't match anything in particular. Generate it into 940 a register and use that. */ 941 HReg reg = iselIntExpr_R(env, e); 942 return ARM64AMode_RI9(reg, 0); 943} 944 945//ZZ /* --------------------- AModeV --------------------- */ 946//ZZ 947//ZZ /* Return an AModeV which computes the value of the specified 948//ZZ expression, possibly also adding insns to the code list as a 949//ZZ result. The expression may only be a 32-bit one. 950//ZZ */ 951//ZZ 952//ZZ static Bool sane_AModeV ( ARMAModeV* am ) 953//ZZ { 954//ZZ return toBool( hregClass(am->reg) == HRcInt32 955//ZZ && hregIsVirtual(am->reg) 956//ZZ && am->simm11 >= -1020 && am->simm11 <= 1020 957//ZZ && 0 == (am->simm11 & 3) ); 958//ZZ } 959//ZZ 960//ZZ static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e ) 961//ZZ { 962//ZZ ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e); 963//ZZ vassert(sane_AModeV(am)); 964//ZZ return am; 965//ZZ } 966//ZZ 967//ZZ static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e ) 968//ZZ { 969//ZZ IRType ty = typeOfIRExpr(env->type_env,e); 970//ZZ vassert(ty == Ity_I32); 971//ZZ 972//ZZ /* {Add32,Sub32}(expr, simm8 << 2) */ 973//ZZ if (e->tag == Iex_Binop 974//ZZ && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32) 975//ZZ && e->Iex.Binop.arg2->tag == Iex_Const 976//ZZ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) { 977//ZZ Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 978//ZZ if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) { 979//ZZ HReg reg; 980//ZZ if (e->Iex.Binop.op == Iop_Sub32) 981//ZZ simm = -simm; 982//ZZ reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 983//ZZ return mkARMAModeV(reg, simm); 984//ZZ } 985//ZZ } 986//ZZ 987//ZZ /* Doesn't match anything in particular. Generate it into 988//ZZ a register and use that. */ 989//ZZ { 990//ZZ HReg reg = iselIntExpr_R(env, e); 991//ZZ return mkARMAModeV(reg, 0); 992//ZZ } 993//ZZ 994//ZZ } 995//ZZ 996//ZZ /* -------------------- AModeN -------------------- */ 997//ZZ 998//ZZ static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e ) 999//ZZ { 1000//ZZ return iselIntExpr_AModeN_wrk(env, e); 1001//ZZ } 1002//ZZ 1003//ZZ static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e ) 1004//ZZ { 1005//ZZ HReg reg = iselIntExpr_R(env, e); 1006//ZZ return mkARMAModeN_R(reg); 1007//ZZ } 1008//ZZ 1009//ZZ 1010//ZZ /* --------------------- RI84 --------------------- */ 1011//ZZ 1012//ZZ /* Select instructions to generate 'e' into a RI84. If mayInv is 1013//ZZ true, then the caller will also accept an I84 form that denotes 1014//ZZ 'not e'. In this case didInv may not be NULL, and *didInv is set 1015//ZZ to True. This complication is so as to allow generation of an RI84 1016//ZZ which is suitable for use in either an AND or BIC instruction, 1017//ZZ without knowing (before this call) which one. 1018//ZZ */ 1019//ZZ static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv, 1020//ZZ ISelEnv* env, IRExpr* e ) 1021//ZZ { 1022//ZZ ARMRI84* ri; 1023//ZZ if (mayInv) 1024//ZZ vassert(didInv != NULL); 1025//ZZ ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e); 1026//ZZ /* sanity checks ... */ 1027//ZZ switch (ri->tag) { 1028//ZZ case ARMri84_I84: 1029//ZZ return ri; 1030//ZZ case ARMri84_R: 1031//ZZ vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32); 1032//ZZ vassert(hregIsVirtual(ri->ARMri84.R.reg)); 1033//ZZ return ri; 1034//ZZ default: 1035//ZZ vpanic("iselIntExpr_RI84: unknown arm RI84 tag"); 1036//ZZ } 1037//ZZ } 1038//ZZ 1039//ZZ /* DO NOT CALL THIS DIRECTLY ! */ 1040//ZZ static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv, 1041//ZZ ISelEnv* env, IRExpr* e ) 1042//ZZ { 1043//ZZ IRType ty = typeOfIRExpr(env->type_env,e); 1044//ZZ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1045//ZZ 1046//ZZ if (didInv) *didInv = False; 1047//ZZ 1048//ZZ /* special case: immediate */ 1049//ZZ if (e->tag == Iex_Const) { 1050//ZZ UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */ 1051//ZZ switch (e->Iex.Const.con->tag) { 1052//ZZ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1053//ZZ case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 1054//ZZ case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 1055//ZZ default: vpanic("iselIntExpr_RI84.Iex_Const(armh)"); 1056//ZZ } 1057//ZZ if (fitsIn8x4(&u8, &u4, u)) { 1058//ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 ); 1059//ZZ } 1060//ZZ if (mayInv && fitsIn8x4(&u8, &u4, ~u)) { 1061//ZZ vassert(didInv); 1062//ZZ *didInv = True; 1063//ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 ); 1064//ZZ } 1065//ZZ /* else fail, fall through to default case */ 1066//ZZ } 1067//ZZ 1068//ZZ /* default case: calculate into a register and return that */ 1069//ZZ { 1070//ZZ HReg r = iselIntExpr_R ( env, e ); 1071//ZZ return ARMRI84_R(r); 1072//ZZ } 1073//ZZ } 1074 1075 1076/* --------------------- RIA --------------------- */ 1077 1078/* Select instructions to generate 'e' into a RIA. */ 1079 1080static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e ) 1081{ 1082 ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e); 1083 /* sanity checks ... */ 1084 switch (ri->tag) { 1085 case ARM64riA_I12: 1086 vassert(ri->ARM64riA.I12.imm12 < 4096); 1087 vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12); 1088 return ri; 1089 case ARM64riA_R: 1090 vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64); 1091 vassert(hregIsVirtual(ri->ARM64riA.R.reg)); 1092 return ri; 1093 default: 1094 vpanic("iselIntExpr_RIA: unknown arm RIA tag"); 1095 } 1096} 1097 1098/* DO NOT CALL THIS DIRECTLY ! */ 1099static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e ) 1100{ 1101 IRType ty = typeOfIRExpr(env->type_env,e); 1102 vassert(ty == Ity_I64 || ty == Ity_I32); 1103 1104 /* special case: immediate */ 1105 if (e->tag == Iex_Const) { 1106 ULong u = 0xF000000ULL; /* invalid */ 1107 switch (e->Iex.Const.con->tag) { 1108 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break; 1109 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1110 default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)"); 1111 } 1112 if (0 == (u & ~(0xFFFULL << 0))) 1113 return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0); 1114 if (0 == (u & ~(0xFFFULL << 12))) 1115 return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12); 1116 /* else fail, fall through to default case */ 1117 } 1118 1119 /* default case: calculate into a register and return that */ 1120 { 1121 HReg r = iselIntExpr_R ( env, e ); 1122 return ARM64RIA_R(r); 1123 } 1124} 1125 1126 1127/* --------------------- RIL --------------------- */ 1128 1129/* Select instructions to generate 'e' into a RIL. At this point we 1130 have to deal with the strange bitfield-immediate encoding for logic 1131 instructions. */ 1132 1133 1134// The following four functions 1135// CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical 1136// are copied, with modifications, from 1137// https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc 1138// which has the following copyright notice: 1139/* 1140 Copyright 2013, ARM Limited 1141 All rights reserved. 1142 1143 Redistribution and use in source and binary forms, with or without 1144 modification, are permitted provided that the following conditions are met: 1145 1146 * Redistributions of source code must retain the above copyright notice, 1147 this list of conditions and the following disclaimer. 1148 * Redistributions in binary form must reproduce the above copyright notice, 1149 this list of conditions and the following disclaimer in the documentation 1150 and/or other materials provided with the distribution. 1151 * Neither the name of ARM Limited nor the names of its contributors may be 1152 used to endorse or promote products derived from this software without 1153 specific prior written permission. 1154 1155 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 1156 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 1157 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 1158 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 1159 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1160 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 1161 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 1162 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 1163 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1164 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1165*/ 1166 1167static Int CountLeadingZeros(ULong value, Int width) 1168{ 1169 vassert(width == 32 || width == 64); 1170 Int count = 0; 1171 ULong bit_test = 1ULL << (width - 1); 1172 while ((count < width) && ((bit_test & value) == 0)) { 1173 count++; 1174 bit_test >>= 1; 1175 } 1176 return count; 1177} 1178 1179static Int CountTrailingZeros(ULong value, Int width) 1180{ 1181 vassert(width == 32 || width == 64); 1182 Int count = 0; 1183 while ((count < width) && (((value >> count) & 1) == 0)) { 1184 count++; 1185 } 1186 return count; 1187} 1188 1189static Int CountSetBits(ULong value, Int width) 1190{ 1191 // TODO: Other widths could be added here, as the implementation already 1192 // supports them. 1193 vassert(width == 32 || width == 64); 1194 1195 // Mask out unused bits to ensure that they are not counted. 1196 value &= (0xffffffffffffffffULL >> (64-width)); 1197 1198 // Add up the set bits. 1199 // The algorithm works by adding pairs of bit fields together iteratively, 1200 // where the size of each bit field doubles each time. 1201 // An example for an 8-bit value: 1202 // Bits: h g f e d c b a 1203 // \ | \ | \ | \ | 1204 // value = h+g f+e d+c b+a 1205 // \ | \ | 1206 // value = h+g+f+e d+c+b+a 1207 // \ | 1208 // value = h+g+f+e+d+c+b+a 1209 value = ((value >> 1) & 0x5555555555555555ULL) 1210 + (value & 0x5555555555555555ULL); 1211 value = ((value >> 2) & 0x3333333333333333ULL) 1212 + (value & 0x3333333333333333ULL); 1213 value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL) 1214 + (value & 0x0f0f0f0f0f0f0f0fULL); 1215 value = ((value >> 8) & 0x00ff00ff00ff00ffULL) 1216 + (value & 0x00ff00ff00ff00ffULL); 1217 value = ((value >> 16) & 0x0000ffff0000ffffULL) 1218 + (value & 0x0000ffff0000ffffULL); 1219 value = ((value >> 32) & 0x00000000ffffffffULL) 1220 + (value & 0x00000000ffffffffULL); 1221 1222 return value; 1223} 1224 1225static Bool isImmLogical ( /*OUT*/UInt* n, 1226 /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r, 1227 ULong value, UInt width ) 1228{ 1229 // Test if a given value can be encoded in the immediate field of a 1230 // logical instruction. 1231 1232 // If it can be encoded, the function returns true, and values 1233 // pointed to by n, imm_s and imm_r are updated with immediates 1234 // encoded in the format required by the corresponding fields in the 1235 // logical instruction. If it can not be encoded, the function 1236 // returns false, and the values pointed to by n, imm_s and imm_r 1237 // are undefined. 1238 vassert(n != NULL && imm_s != NULL && imm_r != NULL); 1239 vassert(width == 32 || width == 64); 1240 1241 // Logical immediates are encoded using parameters n, imm_s and imm_r using 1242 // the following table: 1243 // 1244 // N imms immr size S R 1245 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) 1246 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) 1247 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) 1248 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) 1249 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) 1250 // 0 11110s xxxxxr 2 UInt(s) UInt(r) 1251 // (s bits must not be all set) 1252 // 1253 // A pattern is constructed of size bits, where the least significant S+1 1254 // bits are set. The pattern is rotated right by R, and repeated across a 1255 // 32 or 64-bit value, depending on destination register width. 1256 // 1257 // To test if an arbitrary immediate can be encoded using this scheme, an 1258 // iterative algorithm is used. 1259 // 1260 // TODO: This code does not consider using X/W register overlap to support 1261 // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits 1262 // are an encodable logical immediate. 1263 1264 // 1. If the value has all set or all clear bits, it can't be encoded. 1265 if ((value == 0) || (value == 0xffffffffffffffffULL) || 1266 ((width == 32) && (value == 0xffffffff))) { 1267 return False; 1268 } 1269 1270 UInt lead_zero = CountLeadingZeros(value, width); 1271 UInt lead_one = CountLeadingZeros(~value, width); 1272 UInt trail_zero = CountTrailingZeros(value, width); 1273 UInt trail_one = CountTrailingZeros(~value, width); 1274 UInt set_bits = CountSetBits(value, width); 1275 1276 // The fixed bits in the immediate s field. 1277 // If width == 64 (X reg), start at 0xFFFFFF80. 1278 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit 1279 // widths won't be executed. 1280 Int imm_s_fixed = (width == 64) ? -128 : -64; 1281 Int imm_s_mask = 0x3F; 1282 1283 for (;;) { 1284 // 2. If the value is two bits wide, it can be encoded. 1285 if (width == 2) { 1286 *n = 0; 1287 *imm_s = 0x3C; 1288 *imm_r = (value & 3) - 1; 1289 return True; 1290 } 1291 1292 *n = (width == 64) ? 1 : 0; 1293 *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask); 1294 if ((lead_zero + set_bits) == width) { 1295 *imm_r = 0; 1296 } else { 1297 *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one; 1298 } 1299 1300 // 3. If the sum of leading zeros, trailing zeros and set bits is equal to 1301 // the bit width of the value, it can be encoded. 1302 if (lead_zero + trail_zero + set_bits == width) { 1303 return True; 1304 } 1305 1306 // 4. If the sum of leading ones, trailing ones and unset bits in the 1307 // value is equal to the bit width of the value, it can be encoded. 1308 if (lead_one + trail_one + (width - set_bits) == width) { 1309 return True; 1310 } 1311 1312 // 5. If the most-significant half of the bitwise value is equal to the 1313 // least-significant half, return to step 2 using the least-significant 1314 // half of the value. 1315 ULong mask = (1ULL << (width >> 1)) - 1; 1316 if ((value & mask) == ((value >> (width >> 1)) & mask)) { 1317 width >>= 1; 1318 set_bits >>= 1; 1319 imm_s_fixed >>= 1; 1320 continue; 1321 } 1322 1323 // 6. Otherwise, the value can't be encoded. 1324 return False; 1325 } 1326} 1327 1328 1329/* Create a RIL for the given immediate, if it is representable, or 1330 return NULL if not. */ 1331 1332static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 ) 1333{ 1334 UInt n = 0, imm_s = 0, imm_r = 0; 1335 Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64); 1336 if (!ok) return NULL; 1337 vassert(n < 2 && imm_s < 64 && imm_r < 64); 1338 return ARM64RIL_I13(n, imm_r, imm_s); 1339} 1340 1341/* So, finally .. */ 1342 1343static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e ) 1344{ 1345 ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e); 1346 /* sanity checks ... */ 1347 switch (ri->tag) { 1348 case ARM64riL_I13: 1349 vassert(ri->ARM64riL.I13.bitN < 2); 1350 vassert(ri->ARM64riL.I13.immR < 64); 1351 vassert(ri->ARM64riL.I13.immS < 64); 1352 return ri; 1353 case ARM64riL_R: 1354 vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64); 1355 vassert(hregIsVirtual(ri->ARM64riL.R.reg)); 1356 return ri; 1357 default: 1358 vpanic("iselIntExpr_RIL: unknown arm RIL tag"); 1359 } 1360} 1361 1362/* DO NOT CALL THIS DIRECTLY ! */ 1363static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e ) 1364{ 1365 IRType ty = typeOfIRExpr(env->type_env,e); 1366 vassert(ty == Ity_I64 || ty == Ity_I32); 1367 1368 /* special case: immediate */ 1369 if (e->tag == Iex_Const) { 1370 ARM64RIL* maybe = NULL; 1371 if (ty == Ity_I64) { 1372 vassert(e->Iex.Const.con->tag == Ico_U64); 1373 maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64); 1374 } else { 1375 vassert(ty == Ity_I32); 1376 vassert(e->Iex.Const.con->tag == Ico_U32); 1377 UInt u32 = e->Iex.Const.con->Ico.U32; 1378 ULong u64 = (ULong)u32; 1379 /* First try with 32 leading zeroes. */ 1380 maybe = mb_mkARM64RIL_I(u64); 1381 /* If that doesn't work, try with 2 copies, since it doesn't 1382 matter what winds up in the upper 32 bits. */ 1383 if (!maybe) { 1384 maybe = mb_mkARM64RIL_I((u64 << 32) | u64); 1385 } 1386 } 1387 if (maybe) return maybe; 1388 /* else fail, fall through to default case */ 1389 } 1390 1391 /* default case: calculate into a register and return that */ 1392 { 1393 HReg r = iselIntExpr_R ( env, e ); 1394 return ARM64RIL_R(r); 1395 } 1396} 1397 1398 1399/* --------------------- RI6 --------------------- */ 1400 1401/* Select instructions to generate 'e' into a RI6. */ 1402 1403static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e ) 1404{ 1405 ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e); 1406 /* sanity checks ... */ 1407 switch (ri->tag) { 1408 case ARM64ri6_I6: 1409 vassert(ri->ARM64ri6.I6.imm6 < 64); 1410 vassert(ri->ARM64ri6.I6.imm6 > 0); 1411 return ri; 1412 case ARM64ri6_R: 1413 vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64); 1414 vassert(hregIsVirtual(ri->ARM64ri6.R.reg)); 1415 return ri; 1416 default: 1417 vpanic("iselIntExpr_RI6: unknown arm RI6 tag"); 1418 } 1419} 1420 1421/* DO NOT CALL THIS DIRECTLY ! */ 1422static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e ) 1423{ 1424 IRType ty = typeOfIRExpr(env->type_env,e); 1425 vassert(ty == Ity_I64 || ty == Ity_I8); 1426 1427 /* special case: immediate */ 1428 if (e->tag == Iex_Const) { 1429 switch (e->Iex.Const.con->tag) { 1430 case Ico_U8: { 1431 UInt u = e->Iex.Const.con->Ico.U8; 1432 if (u > 0 && u < 64) 1433 return ARM64RI6_I6(u); 1434 break; 1435 default: 1436 break; 1437 } 1438 } 1439 /* else fail, fall through to default case */ 1440 } 1441 1442 /* default case: calculate into a register and return that */ 1443 { 1444 HReg r = iselIntExpr_R ( env, e ); 1445 return ARM64RI6_R(r); 1446 } 1447} 1448 1449 1450/* ------------------- CondCode ------------------- */ 1451 1452/* Generate code to evaluated a bit-typed expression, returning the 1453 condition code which would correspond when the expression would 1454 notionally have returned 1. */ 1455 1456static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ) 1457{ 1458 ARM64CondCode cc = iselCondCode_wrk(env,e); 1459 vassert(cc != ARM64cc_NV); 1460 return cc; 1461} 1462 1463static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) 1464{ 1465 vassert(e); 1466 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); 1467 1468 /* var */ 1469 if (e->tag == Iex_RdTmp) { 1470 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp); 1471 /* Cmp doesn't modify rTmp; so this is OK. */ 1472 ARM64RIL* one = mb_mkARM64RIL_I(1); 1473 vassert(one); 1474 addInstr(env, ARM64Instr_Test(rTmp, one)); 1475 return ARM64cc_NE; 1476 } 1477 1478 /* Not1(e) */ 1479 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { 1480 /* Generate code for the arg, and negate the test condition */ 1481 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); 1482 if (cc == ARM64cc_AL || cc == ARM64cc_NV) { 1483 return ARM64cc_AL; 1484 } else { 1485 return 1 ^ cc; 1486 } 1487 } 1488 1489 /* --- patterns rooted at: 64to1 --- */ 1490 1491 if (e->tag == Iex_Unop 1492 && e->Iex.Unop.op == Iop_64to1) { 1493 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg); 1494 ARM64RIL* one = mb_mkARM64RIL_I(1); 1495 vassert(one); /* '1' must be representable */ 1496 addInstr(env, ARM64Instr_Test(rTmp, one)); 1497 return ARM64cc_NE; 1498 } 1499 1500 /* --- patterns rooted at: CmpNEZ8 --- */ 1501 1502 if (e->tag == Iex_Unop 1503 && e->Iex.Unop.op == Iop_CmpNEZ8) { 1504 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1505 ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF); 1506 addInstr(env, ARM64Instr_Test(r1, xFF)); 1507 return ARM64cc_NE; 1508 } 1509 1510 /* --- patterns rooted at: CmpNEZ64 --- */ 1511 1512 if (e->tag == Iex_Unop 1513 && e->Iex.Unop.op == Iop_CmpNEZ64) { 1514 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1515 ARM64RIA* zero = ARM64RIA_I12(0,0); 1516 addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/)); 1517 return ARM64cc_NE; 1518 } 1519 1520 /* --- patterns rooted at: CmpNEZ32 --- */ 1521 1522 if (e->tag == Iex_Unop 1523 && e->Iex.Unop.op == Iop_CmpNEZ32) { 1524 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1525 ARM64RIA* zero = ARM64RIA_I12(0,0); 1526 addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/)); 1527 return ARM64cc_NE; 1528 } 1529 1530 /* --- Cmp*64*(x,y) --- */ 1531 if (e->tag == Iex_Binop 1532 && (e->Iex.Binop.op == Iop_CmpEQ64 1533 || e->Iex.Binop.op == Iop_CmpNE64 1534 || e->Iex.Binop.op == Iop_CmpLT64S 1535 || e->Iex.Binop.op == Iop_CmpLT64U 1536 || e->Iex.Binop.op == Iop_CmpLE64S 1537 || e->Iex.Binop.op == Iop_CmpLE64U)) { 1538 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1539 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); 1540 addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/)); 1541 switch (e->Iex.Binop.op) { 1542 case Iop_CmpEQ64: return ARM64cc_EQ; 1543 case Iop_CmpNE64: return ARM64cc_NE; 1544 case Iop_CmpLT64S: return ARM64cc_LT; 1545 case Iop_CmpLT64U: return ARM64cc_CC; 1546 case Iop_CmpLE64S: return ARM64cc_LE; 1547 case Iop_CmpLE64U: return ARM64cc_LS; 1548 default: vpanic("iselCondCode(arm64): CmpXX64"); 1549 } 1550 } 1551 1552 /* --- Cmp*32*(x,y) --- */ 1553 if (e->tag == Iex_Binop 1554 && (e->Iex.Binop.op == Iop_CmpEQ32 1555 || e->Iex.Binop.op == Iop_CmpNE32 1556 || e->Iex.Binop.op == Iop_CmpLT32S 1557 || e->Iex.Binop.op == Iop_CmpLT32U 1558 || e->Iex.Binop.op == Iop_CmpLE32S 1559 || e->Iex.Binop.op == Iop_CmpLE32U)) { 1560 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1561 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); 1562 addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/)); 1563 switch (e->Iex.Binop.op) { 1564 case Iop_CmpEQ32: return ARM64cc_EQ; 1565 case Iop_CmpNE32: return ARM64cc_NE; 1566 case Iop_CmpLT32S: return ARM64cc_LT; 1567 case Iop_CmpLT32U: return ARM64cc_CC; 1568 case Iop_CmpLE32S: return ARM64cc_LE; 1569 case Iop_CmpLE32U: return ARM64cc_LS; 1570 default: vpanic("iselCondCode(arm64): CmpXX32"); 1571 } 1572 } 1573 1574//ZZ /* const */ 1575//ZZ /* Constant 1:Bit */ 1576//ZZ if (e->tag == Iex_Const) { 1577//ZZ HReg r; 1578//ZZ vassert(e->Iex.Const.con->tag == Ico_U1); 1579//ZZ vassert(e->Iex.Const.con->Ico.U1 == True 1580//ZZ || e->Iex.Const.con->Ico.U1 == False); 1581//ZZ r = newVRegI(env); 1582//ZZ addInstr(env, ARMInstr_Imm32(r, 0)); 1583//ZZ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r))); 1584//ZZ return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE; 1585//ZZ } 1586//ZZ 1587//ZZ // JRS 2013-Jan-03: this seems completely nonsensical 1588//ZZ /* --- CasCmpEQ* --- */ 1589//ZZ /* Ist_Cas has a dummy argument to compare with, so comparison is 1590//ZZ always true. */ 1591//ZZ //if (e->tag == Iex_Binop 1592//ZZ // && (e->Iex.Binop.op == Iop_CasCmpEQ32 1593//ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ16 1594//ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ8)) { 1595//ZZ // return ARMcc_AL; 1596//ZZ //} 1597 1598 ppIRExpr(e); 1599 vpanic("iselCondCode"); 1600} 1601 1602 1603/* --------------------- Reg --------------------- */ 1604 1605static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) 1606{ 1607 HReg r = iselIntExpr_R_wrk(env, e); 1608 /* sanity checks ... */ 1609# if 0 1610 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 1611# endif 1612 vassert(hregClass(r) == HRcInt64); 1613 vassert(hregIsVirtual(r)); 1614 return r; 1615} 1616 1617/* DO NOT CALL THIS DIRECTLY ! */ 1618static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) 1619{ 1620 IRType ty = typeOfIRExpr(env->type_env,e); 1621 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1622 1623 switch (e->tag) { 1624 1625 /* --------- TEMP --------- */ 1626 case Iex_RdTmp: { 1627 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 1628 } 1629 1630 /* --------- LOAD --------- */ 1631 case Iex_Load: { 1632 HReg dst = newVRegI(env); 1633 1634 if (e->Iex.Load.end != Iend_LE) 1635 goto irreducible; 1636 1637 if (ty == Ity_I64) { 1638 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); 1639 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode)); 1640 return dst; 1641 } 1642 if (ty == Ity_I32) { 1643 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); 1644 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode)); 1645 return dst; 1646 } 1647 if (ty == Ity_I16) { 1648 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); 1649 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode)); 1650 return dst; 1651 } 1652 if (ty == Ity_I8) { 1653 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); 1654 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode)); 1655 return dst; 1656 } 1657 break; 1658 } 1659 1660 /* --------- BINARY OP --------- */ 1661 case Iex_Binop: { 1662 1663 ARM64LogicOp lop = 0; /* invalid */ 1664 ARM64ShiftOp sop = 0; /* invalid */ 1665 1666 /* Special-case 0-x into a Neg instruction. Not because it's 1667 particularly useful but more so as to give value flow using 1668 this instruction, so as to check its assembly correctness for 1669 implementation of Left32/Left64. */ 1670 switch (e->Iex.Binop.op) { 1671 case Iop_Sub64: 1672 if (isZeroU64(e->Iex.Binop.arg1)) { 1673 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1674 HReg dst = newVRegI(env); 1675 addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG)); 1676 return dst; 1677 } 1678 break; 1679 default: 1680 break; 1681 } 1682 1683 /* ADD/SUB */ 1684 switch (e->Iex.Binop.op) { 1685 case Iop_Add64: case Iop_Add32: 1686 case Iop_Sub64: case Iop_Sub32: { 1687 Bool isAdd = e->Iex.Binop.op == Iop_Add64 1688 || e->Iex.Binop.op == Iop_Add32; 1689 HReg dst = newVRegI(env); 1690 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1691 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); 1692 addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd)); 1693 return dst; 1694 } 1695 default: 1696 break; 1697 } 1698 1699 /* AND/OR/XOR */ 1700 switch (e->Iex.Binop.op) { 1701 case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop; 1702 case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop; 1703 case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop; 1704 log_binop: { 1705 HReg dst = newVRegI(env); 1706 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1707 ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2); 1708 addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop)); 1709 return dst; 1710 } 1711 default: 1712 break; 1713 } 1714 1715 /* SHL/SHR/SAR */ 1716 switch (e->Iex.Binop.op) { 1717 case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop; 1718 case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop; 1719 case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop; 1720 sh_binop: { 1721 HReg dst = newVRegI(env); 1722 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1723 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2); 1724 addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop)); 1725 return dst; 1726 } 1727 case Iop_Shr32: 1728 case Iop_Sar32: { 1729 Bool zx = e->Iex.Binop.op == Iop_Shr32; 1730 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1731 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2); 1732 HReg dst = zx ? widen_z_32_to_64(env, argL) 1733 : widen_s_32_to_64(env, argL); 1734 addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR)); 1735 return dst; 1736 } 1737 default: break; 1738 } 1739 1740 /* MUL */ 1741 if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) { 1742 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1743 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1744 HReg dst = newVRegI(env); 1745 addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN)); 1746 return dst; 1747 } 1748 1749 /* MULL */ 1750 if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) { 1751 Bool isS = e->Iex.Binop.op == Iop_MullS32; 1752 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1753 HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL); 1754 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1755 HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR); 1756 HReg dst = newVRegI(env); 1757 addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN)); 1758 return dst; 1759 } 1760 1761 /* Handle misc other ops. */ 1762 1763 if (e->Iex.Binop.op == Iop_Max32U) { 1764 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1765 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1766 HReg dst = newVRegI(env); 1767 addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/)); 1768 addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS)); 1769 return dst; 1770 } 1771 1772 if (e->Iex.Binop.op == Iop_32HLto64) { 1773 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1774 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1775 HReg lo32 = widen_z_32_to_64(env, lo32s); 1776 HReg hi32 = newVRegI(env); 1777 addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32), 1778 ARM64sh_SHL)); 1779 addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32), 1780 ARM64lo_OR)); 1781 return hi32; 1782 } 1783 1784 if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) { 1785 Bool isD = e->Iex.Binop.op == Iop_CmpF64; 1786 HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1); 1787 HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2); 1788 HReg dst = newVRegI(env); 1789 HReg imm = newVRegI(env); 1790 /* Do the compare (FCMP), which sets NZCV in PSTATE. Then 1791 create in dst, the IRCmpF64Result encoded result. */ 1792 addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR)); 1793 addInstr(env, ARM64Instr_Imm64(dst, 0)); 1794 addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ 1795 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ)); 1796 addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT 1797 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI)); 1798 addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT 1799 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT)); 1800 addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN 1801 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS)); 1802 return dst; 1803 } 1804 1805 { /* local scope */ 1806 ARM64CvtOp cvt_op = ARM64cvt_INVALID; 1807 Bool srcIsD = False; 1808 switch (e->Iex.Binop.op) { 1809 case Iop_F64toI64S: 1810 cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break; 1811 case Iop_F64toI64U: 1812 cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break; 1813 case Iop_F64toI32S: 1814 cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break; 1815 case Iop_F64toI32U: 1816 cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break; 1817 case Iop_F32toI32S: 1818 cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break; 1819 case Iop_F32toI32U: 1820 cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break; 1821 case Iop_F32toI64S: 1822 cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break; 1823 case Iop_F32toI64U: 1824 cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break; 1825 default: 1826 break; 1827 } 1828 if (cvt_op != ARM64cvt_INVALID) { 1829 /* This is all a bit dodgy, because we can't handle a 1830 non-constant (not-known-at-JIT-time) rounding mode 1831 indication. That's because there's no instruction 1832 AFAICS that does this conversion but rounds according to 1833 FPCR.RM, so we have to bake the rounding mode into the 1834 instruction right now. But that should be OK because 1835 (1) the front end attaches a literal Irrm_ value to the 1836 conversion binop, and (2) iropt will never float that 1837 off via CSE, into a literal. Hence we should always 1838 have an Irrm_ value as the first arg. */ 1839 IRExpr* arg1 = e->Iex.Binop.arg1; 1840 if (arg1->tag != Iex_Const) goto irreducible; 1841 IRConst* arg1con = arg1->Iex.Const.con; 1842 vassert(arg1con->tag == Ico_U32); // else ill-typed IR 1843 UInt irrm = arg1con->Ico.U32; 1844 /* Find the ARM-encoded equivalent for |irrm|. */ 1845 UInt armrm = 4; /* impossible */ 1846 switch (irrm) { 1847 case Irrm_NEAREST: armrm = 0; break; 1848 case Irrm_NegINF: armrm = 2; break; 1849 case Irrm_PosINF: armrm = 1; break; 1850 case Irrm_ZERO: armrm = 3; break; 1851 default: goto irreducible; 1852 } 1853 HReg src = (srcIsD ? iselDblExpr : iselFltExpr) 1854 (env, e->Iex.Binop.arg2); 1855 HReg dst = newVRegI(env); 1856 addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm)); 1857 return dst; 1858 } 1859 } /* local scope */ 1860 1861//ZZ if (e->Iex.Binop.op == Iop_GetElem8x8 1862//ZZ || e->Iex.Binop.op == Iop_GetElem16x4 1863//ZZ || e->Iex.Binop.op == Iop_GetElem32x2) { 1864//ZZ HReg res = newVRegI(env); 1865//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1); 1866//ZZ UInt index, size; 1867//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || 1868//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 1869//ZZ vpanic("ARM target supports GetElem with constant " 1870//ZZ "second argument only\n"); 1871//ZZ } 1872//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1873//ZZ switch (e->Iex.Binop.op) { 1874//ZZ case Iop_GetElem8x8: vassert(index < 8); size = 0; break; 1875//ZZ case Iop_GetElem16x4: vassert(index < 4); size = 1; break; 1876//ZZ case Iop_GetElem32x2: vassert(index < 2); size = 2; break; 1877//ZZ default: vassert(0); 1878//ZZ } 1879//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS, 1880//ZZ mkARMNRS(ARMNRS_Reg, res, 0), 1881//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), 1882//ZZ size, False)); 1883//ZZ return res; 1884//ZZ } 1885//ZZ 1886//ZZ if (e->Iex.Binop.op == Iop_GetElem8x16 1887//ZZ || e->Iex.Binop.op == Iop_GetElem16x8 1888//ZZ || e->Iex.Binop.op == Iop_GetElem32x4) { 1889//ZZ HReg res = newVRegI(env); 1890//ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1); 1891//ZZ UInt index, size; 1892//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || 1893//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 1894//ZZ vpanic("ARM target supports GetElem with constant " 1895//ZZ "second argument only\n"); 1896//ZZ } 1897//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1898//ZZ switch (e->Iex.Binop.op) { 1899//ZZ case Iop_GetElem8x16: vassert(index < 16); size = 0; break; 1900//ZZ case Iop_GetElem16x8: vassert(index < 8); size = 1; break; 1901//ZZ case Iop_GetElem32x4: vassert(index < 4); size = 2; break; 1902//ZZ default: vassert(0); 1903//ZZ } 1904//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS, 1905//ZZ mkARMNRS(ARMNRS_Reg, res, 0), 1906//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), 1907//ZZ size, True)); 1908//ZZ return res; 1909//ZZ } 1910 1911 /* All cases involving host-side helper calls. */ 1912 void* fn = NULL; 1913 switch (e->Iex.Binop.op) { 1914//ZZ case Iop_Add16x2: 1915//ZZ fn = &h_generic_calc_Add16x2; break; 1916//ZZ case Iop_Sub16x2: 1917//ZZ fn = &h_generic_calc_Sub16x2; break; 1918//ZZ case Iop_HAdd16Ux2: 1919//ZZ fn = &h_generic_calc_HAdd16Ux2; break; 1920//ZZ case Iop_HAdd16Sx2: 1921//ZZ fn = &h_generic_calc_HAdd16Sx2; break; 1922//ZZ case Iop_HSub16Ux2: 1923//ZZ fn = &h_generic_calc_HSub16Ux2; break; 1924//ZZ case Iop_HSub16Sx2: 1925//ZZ fn = &h_generic_calc_HSub16Sx2; break; 1926//ZZ case Iop_QAdd16Sx2: 1927//ZZ fn = &h_generic_calc_QAdd16Sx2; break; 1928//ZZ case Iop_QAdd16Ux2: 1929//ZZ fn = &h_generic_calc_QAdd16Ux2; break; 1930//ZZ case Iop_QSub16Sx2: 1931//ZZ fn = &h_generic_calc_QSub16Sx2; break; 1932//ZZ case Iop_Add8x4: 1933//ZZ fn = &h_generic_calc_Add8x4; break; 1934//ZZ case Iop_Sub8x4: 1935//ZZ fn = &h_generic_calc_Sub8x4; break; 1936//ZZ case Iop_HAdd8Ux4: 1937//ZZ fn = &h_generic_calc_HAdd8Ux4; break; 1938//ZZ case Iop_HAdd8Sx4: 1939//ZZ fn = &h_generic_calc_HAdd8Sx4; break; 1940//ZZ case Iop_HSub8Ux4: 1941//ZZ fn = &h_generic_calc_HSub8Ux4; break; 1942//ZZ case Iop_HSub8Sx4: 1943//ZZ fn = &h_generic_calc_HSub8Sx4; break; 1944//ZZ case Iop_QAdd8Sx4: 1945//ZZ fn = &h_generic_calc_QAdd8Sx4; break; 1946//ZZ case Iop_QAdd8Ux4: 1947//ZZ fn = &h_generic_calc_QAdd8Ux4; break; 1948//ZZ case Iop_QSub8Sx4: 1949//ZZ fn = &h_generic_calc_QSub8Sx4; break; 1950//ZZ case Iop_QSub8Ux4: 1951//ZZ fn = &h_generic_calc_QSub8Ux4; break; 1952//ZZ case Iop_Sad8Ux4: 1953//ZZ fn = &h_generic_calc_Sad8Ux4; break; 1954//ZZ case Iop_QAdd32S: 1955//ZZ fn = &h_generic_calc_QAdd32S; break; 1956//ZZ case Iop_QSub32S: 1957//ZZ fn = &h_generic_calc_QSub32S; break; 1958//ZZ case Iop_QSub16Ux2: 1959//ZZ fn = &h_generic_calc_QSub16Ux2; break; 1960 case Iop_DivU32: 1961 fn = &h_calc_udiv32_w_arm_semantics; break; 1962 case Iop_DivS32: 1963 fn = &h_calc_sdiv32_w_arm_semantics; break; 1964 case Iop_DivU64: 1965 fn = &h_calc_udiv64_w_arm_semantics; break; 1966 case Iop_DivS64: 1967 fn = &h_calc_sdiv64_w_arm_semantics; break; 1968 default: 1969 break; 1970 } 1971 1972 if (fn) { 1973 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1974 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1975 HReg res = newVRegI(env); 1976 addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL)); 1977 addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR)); 1978 addInstr(env, ARM64Instr_Call( ARM64cc_AL, (HWord)Ptr_to_ULong(fn), 1979 2, mk_RetLoc_simple(RLPri_Int) )); 1980 addInstr(env, ARM64Instr_MovI(res, hregARM64_X0())); 1981 return res; 1982 } 1983 1984 break; 1985 } 1986 1987 /* --------- UNARY OP --------- */ 1988 case Iex_Unop: { 1989 1990 switch (e->Iex.Unop.op) { 1991 case Iop_16Uto64: { 1992 /* This probably doesn't occur often enough to be worth 1993 rolling the extension into the load. */ 1994 IRExpr* arg = e->Iex.Unop.arg; 1995 HReg src = iselIntExpr_R(env, arg); 1996 HReg dst = widen_z_16_to_64(env, src); 1997 return dst; 1998 } 1999 case Iop_32Uto64: { 2000 IRExpr* arg = e->Iex.Unop.arg; 2001 if (arg->tag == Iex_Load) { 2002 /* This correctly zero extends because _LdSt32 is 2003 defined to do a zero extending load. */ 2004 HReg dst = newVRegI(env); 2005 ARM64AMode* am 2006 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32); 2007 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am)); 2008 return dst; 2009 } 2010 /* else be lame and mask it */ 2011 HReg src = iselIntExpr_R(env, arg); 2012 HReg dst = widen_z_32_to_64(env, src); 2013 return dst; 2014 } 2015 case Iop_8Uto32: /* Just freeload on the 8Uto64 case */ 2016 case Iop_8Uto64: { 2017 IRExpr* arg = e->Iex.Unop.arg; 2018 if (arg->tag == Iex_Load) { 2019 /* This correctly zero extends because _LdSt8 is 2020 defined to do a zero extending load. */ 2021 HReg dst = newVRegI(env); 2022 ARM64AMode* am 2023 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8); 2024 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am)); 2025 return dst; 2026 } 2027 /* else be lame and mask it */ 2028 HReg src = iselIntExpr_R(env, arg); 2029 HReg dst = widen_z_8_to_64(env, src); 2030 return dst; 2031 } 2032 case Iop_128HIto64: { 2033 HReg rHi, rLo; 2034 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 2035 return rHi; /* and abandon rLo */ 2036 } 2037 case Iop_8Sto32: case Iop_8Sto64: { 2038 IRExpr* arg = e->Iex.Unop.arg; 2039 HReg src = iselIntExpr_R(env, arg); 2040 HReg dst = widen_s_8_to_64(env, src); 2041 return dst; 2042 } 2043 case Iop_16Sto32: case Iop_16Sto64: { 2044 IRExpr* arg = e->Iex.Unop.arg; 2045 HReg src = iselIntExpr_R(env, arg); 2046 HReg dst = widen_s_16_to_64(env, src); 2047 return dst; 2048 } 2049 case Iop_32Sto64: { 2050 IRExpr* arg = e->Iex.Unop.arg; 2051 HReg src = iselIntExpr_R(env, arg); 2052 HReg dst = widen_s_32_to_64(env, src); 2053 return dst; 2054 } 2055 case Iop_Not32: 2056 case Iop_Not64: { 2057 HReg dst = newVRegI(env); 2058 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2059 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT)); 2060 return dst; 2061 } 2062 case Iop_Clz64: { 2063 HReg dst = newVRegI(env); 2064 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2065 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ)); 2066 return dst; 2067 } 2068 case Iop_Left32: 2069 case Iop_Left64: { 2070 /* Left64(src) = src | -src. Left32 can use the same 2071 implementation since in that case we don't care what 2072 the upper 32 bits become. */ 2073 HReg dst = newVRegI(env); 2074 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2075 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); 2076 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), 2077 ARM64lo_OR)); 2078 return dst; 2079 } 2080 case Iop_CmpwNEZ64: { 2081 /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1 2082 = Left64(src) >>s 63 */ 2083 HReg dst = newVRegI(env); 2084 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2085 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); 2086 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), 2087 ARM64lo_OR)); 2088 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), 2089 ARM64sh_SAR)); 2090 return dst; 2091 } 2092 case Iop_CmpwNEZ32: { 2093 /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF) 2094 = Left64(src & 0xFFFFFFFF) >>s 63 */ 2095 HReg dst = newVRegI(env); 2096 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg); 2097 HReg src = widen_z_32_to_64(env, pre); 2098 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); 2099 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), 2100 ARM64lo_OR)); 2101 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), 2102 ARM64sh_SAR)); 2103 return dst; 2104 } 2105 case Iop_V128to64: case Iop_V128HIto64: { 2106 HReg dst = newVRegI(env); 2107 HReg src = iselV128Expr(env, e->Iex.Unop.arg); 2108 UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0; 2109 addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo)); 2110 return dst; 2111 } 2112 case Iop_1Sto32: 2113 case Iop_1Sto64: { 2114 /* As with the iselStmt case for 'tmp:I1 = expr', we could 2115 do a lot better here if it ever became necessary. */ 2116 HReg zero = newVRegI(env); 2117 HReg one = newVRegI(env); 2118 HReg dst = newVRegI(env); 2119 addInstr(env, ARM64Instr_Imm64(zero, 0)); 2120 addInstr(env, ARM64Instr_Imm64(one, 1)); 2121 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); 2122 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); 2123 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), 2124 ARM64sh_SHL)); 2125 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), 2126 ARM64sh_SAR)); 2127 return dst; 2128 } 2129 case Iop_NarrowUn16to8x8: 2130 case Iop_NarrowUn32to16x4: 2131 case Iop_NarrowUn64to32x2: { 2132 HReg src = iselV128Expr(env, e->Iex.Unop.arg); 2133 HReg tmp = newVRegV(env); 2134 HReg dst = newVRegI(env); 2135 UInt dszBlg2 = 3; /* illegal */ 2136 switch (e->Iex.Unop.op) { 2137 case Iop_NarrowUn16to8x8: dszBlg2 = 0; break; // 16to8_x8 2138 case Iop_NarrowUn32to16x4: dszBlg2 = 1; break; // 32to16_x4 2139 case Iop_NarrowUn64to32x2: dszBlg2 = 2; break; // 64to32_x2 2140 default: vassert(0); 2141 } 2142 addInstr(env, ARM64Instr_VNarrowV(dszBlg2, tmp, src)); 2143 addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/)); 2144 return dst; 2145 } 2146//ZZ case Iop_64HIto32: { 2147//ZZ HReg rHi, rLo; 2148//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 2149//ZZ return rHi; /* and abandon rLo .. poor wee thing :-) */ 2150//ZZ } 2151//ZZ case Iop_64to32: { 2152//ZZ HReg rHi, rLo; 2153//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 2154//ZZ return rLo; /* similar stupid comment to the above ... */ 2155//ZZ } 2156//ZZ case Iop_64to8: { 2157//ZZ HReg rHi, rLo; 2158//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 2159//ZZ HReg tHi = newVRegI(env); 2160//ZZ HReg tLo = newVRegI(env); 2161//ZZ HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg); 2162//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); 2163//ZZ rHi = tHi; 2164//ZZ rLo = tLo; 2165//ZZ } else { 2166//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 2167//ZZ } 2168//ZZ return rLo; 2169//ZZ } 2170 2171 case Iop_1Uto64: { 2172 /* 1Uto64(tmp). */ 2173 HReg dst = newVRegI(env); 2174 if (e->Iex.Unop.arg->tag == Iex_RdTmp) { 2175 ARM64RIL* one = mb_mkARM64RIL_I(1); 2176 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp); 2177 vassert(one); 2178 addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND)); 2179 } else { 2180 /* CLONE-01 */ 2181 HReg zero = newVRegI(env); 2182 HReg one = newVRegI(env); 2183 addInstr(env, ARM64Instr_Imm64(zero, 0)); 2184 addInstr(env, ARM64Instr_Imm64(one, 1)); 2185 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); 2186 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); 2187 } 2188 return dst; 2189 } 2190//ZZ case Iop_1Uto8: { 2191//ZZ HReg dst = newVRegI(env); 2192//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); 2193//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); 2194//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); 2195//ZZ return dst; 2196//ZZ } 2197//ZZ 2198//ZZ case Iop_1Sto32: { 2199//ZZ HReg dst = newVRegI(env); 2200//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); 2201//ZZ ARMRI5* amt = ARMRI5_I5(31); 2202//ZZ /* This is really rough. We could do much better here; 2203//ZZ perhaps mvn{cond} dst, #0 as the second insn? 2204//ZZ (same applies to 1Sto64) */ 2205//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); 2206//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); 2207//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt)); 2208//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt)); 2209//ZZ return dst; 2210//ZZ } 2211//ZZ 2212//ZZ case Iop_Clz32: { 2213//ZZ /* Count leading zeroes; easy on ARM. */ 2214//ZZ HReg dst = newVRegI(env); 2215//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2216//ZZ addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src)); 2217//ZZ return dst; 2218//ZZ } 2219//ZZ 2220//ZZ case Iop_CmpwNEZ32: { 2221//ZZ HReg dst = newVRegI(env); 2222//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2223//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src)); 2224//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src))); 2225//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31))); 2226//ZZ return dst; 2227//ZZ } 2228//ZZ 2229//ZZ case Iop_ReinterpF32asI32: { 2230//ZZ HReg dst = newVRegI(env); 2231//ZZ HReg src = iselFltExpr(env, e->Iex.Unop.arg); 2232//ZZ addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst)); 2233//ZZ return dst; 2234//ZZ } 2235 2236 case Iop_64to32: 2237 case Iop_64to16: 2238 case Iop_64to8: 2239 /* These are no-ops. */ 2240 return iselIntExpr_R(env, e->Iex.Unop.arg); 2241 2242 default: 2243 break; 2244 } 2245 2246//ZZ /* All Unop cases involving host-side helper calls. */ 2247//ZZ void* fn = NULL; 2248//ZZ switch (e->Iex.Unop.op) { 2249//ZZ case Iop_CmpNEZ16x2: 2250//ZZ fn = &h_generic_calc_CmpNEZ16x2; break; 2251//ZZ case Iop_CmpNEZ8x4: 2252//ZZ fn = &h_generic_calc_CmpNEZ8x4; break; 2253//ZZ default: 2254//ZZ break; 2255//ZZ } 2256//ZZ 2257//ZZ if (fn) { 2258//ZZ HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 2259//ZZ HReg res = newVRegI(env); 2260//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg)); 2261//ZZ addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 2262//ZZ 1, RetLocInt )); 2263//ZZ addInstr(env, mk_iMOVds_RR(res, hregARM_R0())); 2264//ZZ return res; 2265//ZZ } 2266 2267 break; 2268 } 2269 2270 /* --------- GET --------- */ 2271 case Iex_Get: { 2272 if (ty == Ity_I64 2273 && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) { 2274 HReg dst = newVRegI(env); 2275 ARM64AMode* am 2276 = mk_baseblock_64bit_access_amode(e->Iex.Get.offset); 2277 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am)); 2278 return dst; 2279 } 2280 if (ty == Ity_I32 2281 && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) { 2282 HReg dst = newVRegI(env); 2283 ARM64AMode* am 2284 = mk_baseblock_32bit_access_amode(e->Iex.Get.offset); 2285 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am)); 2286 return dst; 2287 } 2288 if (ty == Ity_I16 2289 && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) { 2290 HReg dst = newVRegI(env); 2291 ARM64AMode* am 2292 = mk_baseblock_16bit_access_amode(e->Iex.Get.offset); 2293 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am)); 2294 return dst; 2295 } 2296 if (ty == Ity_I8 2297 /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) { 2298 HReg dst = newVRegI(env); 2299 ARM64AMode* am 2300 = mk_baseblock_8bit_access_amode(e->Iex.Get.offset); 2301 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am)); 2302 return dst; 2303 } 2304 break; 2305 } 2306 2307 /* --------- CCALL --------- */ 2308 case Iex_CCall: { 2309 HReg dst = newVRegI(env); 2310 vassert(ty == e->Iex.CCall.retty); 2311 2312 /* be very restrictive for now. Only 64-bit ints allowed for 2313 args, and 64 bits for return type. Don't forget to change 2314 the RetLoc if more types are allowed in future. */ 2315 if (e->Iex.CCall.retty != Ity_I64) 2316 goto irreducible; 2317 2318 /* Marshal args, do the call, clear stack. */ 2319 UInt addToSp = 0; 2320 RetLoc rloc = mk_RetLoc_INVALID(); 2321 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, 2322 e->Iex.CCall.cee, e->Iex.CCall.retty, 2323 e->Iex.CCall.args ); 2324 /* */ 2325 if (ok) { 2326 vassert(is_sane_RetLoc(rloc)); 2327 vassert(rloc.pri == RLPri_Int); 2328 vassert(addToSp == 0); 2329 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0())); 2330 return dst; 2331 } 2332 /* else fall through; will hit the irreducible: label */ 2333 } 2334 2335 /* --------- LITERAL --------- */ 2336 /* 64-bit literals */ 2337 case Iex_Const: { 2338 ULong u = 0; 2339 HReg dst = newVRegI(env); 2340 switch (e->Iex.Const.con->tag) { 2341 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break; 2342 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 2343 case Ico_U16: u = e->Iex.Const.con->Ico.U16; break; 2344 case Ico_U8: u = e->Iex.Const.con->Ico.U8; break; 2345 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)"); 2346 } 2347 addInstr(env, ARM64Instr_Imm64(dst, u)); 2348 return dst; 2349 } 2350 2351 /* --------- MULTIPLEX --------- */ 2352 case Iex_ITE: { 2353 /* ITE(ccexpr, iftrue, iffalse) */ 2354 if (ty == Ity_I64 || ty == Ity_I32) { 2355 ARM64CondCode cc; 2356 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue); 2357 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse); 2358 HReg dst = newVRegI(env); 2359 cc = iselCondCode(env, e->Iex.ITE.cond); 2360 addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc)); 2361 return dst; 2362 } 2363 break; 2364 } 2365 2366 default: 2367 break; 2368 } /* switch (e->tag) */ 2369 2370 /* We get here if no pattern matched. */ 2371 irreducible: 2372 ppIRExpr(e); 2373 vpanic("iselIntExpr_R: cannot reduce tree"); 2374} 2375 2376 2377/*---------------------------------------------------------*/ 2378/*--- ISEL: Integer expressions (128 bit) ---*/ 2379/*---------------------------------------------------------*/ 2380 2381/* Compute a 128-bit value into a register pair, which is returned as 2382 the first two parameters. As with iselIntExpr_R, these may be 2383 either real or virtual regs; in any case they must not be changed 2384 by subsequent code emitted by the caller. */ 2385 2386static void iselInt128Expr ( HReg* rHi, HReg* rLo, 2387 ISelEnv* env, IRExpr* e ) 2388{ 2389 iselInt128Expr_wrk(rHi, rLo, env, e); 2390# if 0 2391 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2392# endif 2393 vassert(hregClass(*rHi) == HRcInt64); 2394 vassert(hregIsVirtual(*rHi)); 2395 vassert(hregClass(*rLo) == HRcInt64); 2396 vassert(hregIsVirtual(*rLo)); 2397} 2398 2399/* DO NOT CALL THIS DIRECTLY ! */ 2400static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, 2401 ISelEnv* env, IRExpr* e ) 2402{ 2403 vassert(e); 2404 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128); 2405 2406 /* --------- BINARY ops --------- */ 2407 if (e->tag == Iex_Binop) { 2408 switch (e->Iex.Binop.op) { 2409 /* 64 x 64 -> 128 multiply */ 2410 case Iop_MullU64: 2411 case Iop_MullS64: { 2412 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64); 2413 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 2414 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 2415 HReg dstLo = newVRegI(env); 2416 HReg dstHi = newVRegI(env); 2417 addInstr(env, ARM64Instr_Mul(dstLo, argL, argR, 2418 ARM64mul_PLAIN)); 2419 addInstr(env, ARM64Instr_Mul(dstHi, argL, argR, 2420 syned ? ARM64mul_SX : ARM64mul_ZX)); 2421 *rHi = dstHi; 2422 *rLo = dstLo; 2423 return; 2424 } 2425 /* 64HLto128(e1,e2) */ 2426 case Iop_64HLto128: 2427 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2428 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2429 return; 2430 default: 2431 break; 2432 } 2433 } /* if (e->tag == Iex_Binop) */ 2434 2435 ppIRExpr(e); 2436 vpanic("iselInt128Expr(arm64)"); 2437} 2438 2439 2440//ZZ /* -------------------- 64-bit -------------------- */ 2441//ZZ 2442//ZZ /* Compute a 64-bit value into a register pair, which is returned as 2443//ZZ the first two parameters. As with iselIntExpr_R, these may be 2444//ZZ either real or virtual regs; in any case they must not be changed 2445//ZZ by subsequent code emitted by the caller. */ 2446//ZZ 2447//ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) 2448//ZZ { 2449//ZZ iselInt64Expr_wrk(rHi, rLo, env, e); 2450//ZZ # if 0 2451//ZZ vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2452//ZZ # endif 2453//ZZ vassert(hregClass(*rHi) == HRcInt32); 2454//ZZ vassert(hregIsVirtual(*rHi)); 2455//ZZ vassert(hregClass(*rLo) == HRcInt32); 2456//ZZ vassert(hregIsVirtual(*rLo)); 2457//ZZ } 2458//ZZ 2459//ZZ /* DO NOT CALL THIS DIRECTLY ! */ 2460//ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) 2461//ZZ { 2462//ZZ vassert(e); 2463//ZZ vassert(typeOfIRExpr(env->type_env,e) == Ity_I64); 2464//ZZ 2465//ZZ /* 64-bit literal */ 2466//ZZ if (e->tag == Iex_Const) { 2467//ZZ ULong w64 = e->Iex.Const.con->Ico.U64; 2468//ZZ UInt wHi = toUInt(w64 >> 32); 2469//ZZ UInt wLo = toUInt(w64); 2470//ZZ HReg tHi = newVRegI(env); 2471//ZZ HReg tLo = newVRegI(env); 2472//ZZ vassert(e->Iex.Const.con->tag == Ico_U64); 2473//ZZ addInstr(env, ARMInstr_Imm32(tHi, wHi)); 2474//ZZ addInstr(env, ARMInstr_Imm32(tLo, wLo)); 2475//ZZ *rHi = tHi; 2476//ZZ *rLo = tLo; 2477//ZZ return; 2478//ZZ } 2479//ZZ 2480//ZZ /* read 64-bit IRTemp */ 2481//ZZ if (e->tag == Iex_RdTmp) { 2482//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 2483//ZZ HReg tHi = newVRegI(env); 2484//ZZ HReg tLo = newVRegI(env); 2485//ZZ HReg tmp = iselNeon64Expr(env, e); 2486//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); 2487//ZZ *rHi = tHi; 2488//ZZ *rLo = tLo; 2489//ZZ } else { 2490//ZZ lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp); 2491//ZZ } 2492//ZZ return; 2493//ZZ } 2494//ZZ 2495//ZZ /* 64-bit load */ 2496//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2497//ZZ HReg tLo, tHi, rA; 2498//ZZ vassert(e->Iex.Load.ty == Ity_I64); 2499//ZZ rA = iselIntExpr_R(env, e->Iex.Load.addr); 2500//ZZ tHi = newVRegI(env); 2501//ZZ tLo = newVRegI(env); 2502//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, 2503//ZZ tHi, ARMAMode1_RI(rA, 4))); 2504//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, 2505//ZZ tLo, ARMAMode1_RI(rA, 0))); 2506//ZZ *rHi = tHi; 2507//ZZ *rLo = tLo; 2508//ZZ return; 2509//ZZ } 2510//ZZ 2511//ZZ /* 64-bit GET */ 2512//ZZ if (e->tag == Iex_Get) { 2513//ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0); 2514//ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4); 2515//ZZ HReg tHi = newVRegI(env); 2516//ZZ HReg tLo = newVRegI(env); 2517//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4)); 2518//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0)); 2519//ZZ *rHi = tHi; 2520//ZZ *rLo = tLo; 2521//ZZ return; 2522//ZZ } 2523//ZZ 2524//ZZ /* --------- BINARY ops --------- */ 2525//ZZ if (e->tag == Iex_Binop) { 2526//ZZ switch (e->Iex.Binop.op) { 2527//ZZ 2528//ZZ /* 32 x 32 -> 64 multiply */ 2529//ZZ case Iop_MullS32: 2530//ZZ case Iop_MullU32: { 2531//ZZ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 2532//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 2533//ZZ HReg tHi = newVRegI(env); 2534//ZZ HReg tLo = newVRegI(env); 2535//ZZ ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32 2536//ZZ ? ARMmul_SX : ARMmul_ZX; 2537//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL)); 2538//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR)); 2539//ZZ addInstr(env, ARMInstr_Mul(mop)); 2540//ZZ addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1())); 2541//ZZ addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0())); 2542//ZZ *rHi = tHi; 2543//ZZ *rLo = tLo; 2544//ZZ return; 2545//ZZ } 2546//ZZ 2547//ZZ case Iop_Or64: { 2548//ZZ HReg xLo, xHi, yLo, yHi; 2549//ZZ HReg tHi = newVRegI(env); 2550//ZZ HReg tLo = newVRegI(env); 2551//ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2552//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2553//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi))); 2554//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo))); 2555//ZZ *rHi = tHi; 2556//ZZ *rLo = tLo; 2557//ZZ return; 2558//ZZ } 2559//ZZ 2560//ZZ case Iop_Add64: { 2561//ZZ HReg xLo, xHi, yLo, yHi; 2562//ZZ HReg tHi = newVRegI(env); 2563//ZZ HReg tLo = newVRegI(env); 2564//ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2565//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2566//ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo))); 2567//ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi))); 2568//ZZ *rHi = tHi; 2569//ZZ *rLo = tLo; 2570//ZZ return; 2571//ZZ } 2572//ZZ 2573//ZZ /* 32HLto64(e1,e2) */ 2574//ZZ case Iop_32HLto64: { 2575//ZZ *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2576//ZZ *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2577//ZZ return; 2578//ZZ } 2579//ZZ 2580//ZZ default: 2581//ZZ break; 2582//ZZ } 2583//ZZ } 2584//ZZ 2585//ZZ /* --------- UNARY ops --------- */ 2586//ZZ if (e->tag == Iex_Unop) { 2587//ZZ switch (e->Iex.Unop.op) { 2588//ZZ 2589//ZZ /* ReinterpF64asI64 */ 2590//ZZ case Iop_ReinterpF64asI64: { 2591//ZZ HReg dstHi = newVRegI(env); 2592//ZZ HReg dstLo = newVRegI(env); 2593//ZZ HReg src = iselDblExpr(env, e->Iex.Unop.arg); 2594//ZZ addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo)); 2595//ZZ *rHi = dstHi; 2596//ZZ *rLo = dstLo; 2597//ZZ return; 2598//ZZ } 2599//ZZ 2600//ZZ /* Left64(e) */ 2601//ZZ case Iop_Left64: { 2602//ZZ HReg yLo, yHi; 2603//ZZ HReg tHi = newVRegI(env); 2604//ZZ HReg tLo = newVRegI(env); 2605//ZZ HReg zero = newVRegI(env); 2606//ZZ /* yHi:yLo = arg */ 2607//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg); 2608//ZZ /* zero = 0 */ 2609//ZZ addInstr(env, ARMInstr_Imm32(zero, 0)); 2610//ZZ /* tLo = 0 - yLo, and set carry */ 2611//ZZ addInstr(env, ARMInstr_Alu(ARMalu_SUBS, 2612//ZZ tLo, zero, ARMRI84_R(yLo))); 2613//ZZ /* tHi = 0 - yHi - carry */ 2614//ZZ addInstr(env, ARMInstr_Alu(ARMalu_SBC, 2615//ZZ tHi, zero, ARMRI84_R(yHi))); 2616//ZZ /* So now we have tHi:tLo = -arg. To finish off, or 'arg' 2617//ZZ back in, so as to give the final result 2618//ZZ tHi:tLo = arg | -arg. */ 2619//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi))); 2620//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo))); 2621//ZZ *rHi = tHi; 2622//ZZ *rLo = tLo; 2623//ZZ return; 2624//ZZ } 2625//ZZ 2626//ZZ /* CmpwNEZ64(e) */ 2627//ZZ case Iop_CmpwNEZ64: { 2628//ZZ HReg srcLo, srcHi; 2629//ZZ HReg tmp1 = newVRegI(env); 2630//ZZ HReg tmp2 = newVRegI(env); 2631//ZZ /* srcHi:srcLo = arg */ 2632//ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); 2633//ZZ /* tmp1 = srcHi | srcLo */ 2634//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, 2635//ZZ tmp1, srcHi, ARMRI84_R(srcLo))); 2636//ZZ /* tmp2 = (tmp1 | -tmp1) >>s 31 */ 2637//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1)); 2638//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, 2639//ZZ tmp2, tmp2, ARMRI84_R(tmp1))); 2640//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, 2641//ZZ tmp2, tmp2, ARMRI5_I5(31))); 2642//ZZ *rHi = tmp2; 2643//ZZ *rLo = tmp2; 2644//ZZ return; 2645//ZZ } 2646//ZZ 2647//ZZ case Iop_1Sto64: { 2648//ZZ HReg dst = newVRegI(env); 2649//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); 2650//ZZ ARMRI5* amt = ARMRI5_I5(31); 2651//ZZ /* This is really rough. We could do much better here; 2652//ZZ perhaps mvn{cond} dst, #0 as the second insn? 2653//ZZ (same applies to 1Sto32) */ 2654//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); 2655//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); 2656//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt)); 2657//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt)); 2658//ZZ *rHi = dst; 2659//ZZ *rLo = dst; 2660//ZZ return; 2661//ZZ } 2662//ZZ 2663//ZZ default: 2664//ZZ break; 2665//ZZ } 2666//ZZ } /* if (e->tag == Iex_Unop) */ 2667//ZZ 2668//ZZ /* --------- MULTIPLEX --------- */ 2669//ZZ if (e->tag == Iex_ITE) { // VFD 2670//ZZ IRType tyC; 2671//ZZ HReg r1hi, r1lo, r0hi, r0lo, dstHi, dstLo; 2672//ZZ ARMCondCode cc; 2673//ZZ tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond); 2674//ZZ vassert(tyC == Ity_I1); 2675//ZZ iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue); 2676//ZZ iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse); 2677//ZZ dstHi = newVRegI(env); 2678//ZZ dstLo = newVRegI(env); 2679//ZZ addInstr(env, mk_iMOVds_RR(dstHi, r1hi)); 2680//ZZ addInstr(env, mk_iMOVds_RR(dstLo, r1lo)); 2681//ZZ cc = iselCondCode(env, e->Iex.ITE.cond); 2682//ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi))); 2683//ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo))); 2684//ZZ *rHi = dstHi; 2685//ZZ *rLo = dstLo; 2686//ZZ return; 2687//ZZ } 2688//ZZ 2689//ZZ /* It is convenient sometimes to call iselInt64Expr even when we 2690//ZZ have NEON support (e.g. in do_helper_call we need 64-bit 2691//ZZ arguments as 2 x 32 regs). */ 2692//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 2693//ZZ HReg tHi = newVRegI(env); 2694//ZZ HReg tLo = newVRegI(env); 2695//ZZ HReg tmp = iselNeon64Expr(env, e); 2696//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); 2697//ZZ *rHi = tHi; 2698//ZZ *rLo = tLo; 2699//ZZ return ; 2700//ZZ } 2701//ZZ 2702//ZZ ppIRExpr(e); 2703//ZZ vpanic("iselInt64Expr"); 2704//ZZ } 2705//ZZ 2706//ZZ 2707//ZZ /*---------------------------------------------------------*/ 2708//ZZ /*--- ISEL: Vector (NEON) expressions (64 bit) ---*/ 2709//ZZ /*---------------------------------------------------------*/ 2710//ZZ 2711//ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e ) 2712//ZZ { 2713//ZZ HReg r = iselNeon64Expr_wrk( env, e ); 2714//ZZ vassert(hregClass(r) == HRcFlt64); 2715//ZZ vassert(hregIsVirtual(r)); 2716//ZZ return r; 2717//ZZ } 2718//ZZ 2719//ZZ /* DO NOT CALL THIS DIRECTLY */ 2720//ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e ) 2721//ZZ { 2722//ZZ IRType ty = typeOfIRExpr(env->type_env, e); 2723//ZZ MatchInfo mi; 2724//ZZ vassert(e); 2725//ZZ vassert(ty == Ity_I64); 2726//ZZ 2727//ZZ if (e->tag == Iex_RdTmp) { 2728//ZZ return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2729//ZZ } 2730//ZZ 2731//ZZ if (e->tag == Iex_Const) { 2732//ZZ HReg rLo, rHi; 2733//ZZ HReg res = newVRegD(env); 2734//ZZ iselInt64Expr(&rHi, &rLo, env, e); 2735//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 2736//ZZ return res; 2737//ZZ } 2738//ZZ 2739//ZZ /* 64-bit load */ 2740//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2741//ZZ HReg res = newVRegD(env); 2742//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr); 2743//ZZ vassert(ty == Ity_I64); 2744//ZZ addInstr(env, ARMInstr_NLdStD(True, res, am)); 2745//ZZ return res; 2746//ZZ } 2747//ZZ 2748//ZZ /* 64-bit GET */ 2749//ZZ if (e->tag == Iex_Get) { 2750//ZZ HReg addr = newVRegI(env); 2751//ZZ HReg res = newVRegD(env); 2752//ZZ vassert(ty == Ity_I64); 2753//ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset)); 2754//ZZ addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr))); 2755//ZZ return res; 2756//ZZ } 2757//ZZ 2758//ZZ /* --------- BINARY ops --------- */ 2759//ZZ if (e->tag == Iex_Binop) { 2760//ZZ switch (e->Iex.Binop.op) { 2761//ZZ 2762//ZZ /* 32 x 32 -> 64 multiply */ 2763//ZZ case Iop_MullS32: 2764//ZZ case Iop_MullU32: { 2765//ZZ HReg rLo, rHi; 2766//ZZ HReg res = newVRegD(env); 2767//ZZ iselInt64Expr(&rHi, &rLo, env, e); 2768//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 2769//ZZ return res; 2770//ZZ } 2771//ZZ 2772//ZZ case Iop_And64: { 2773//ZZ HReg res = newVRegD(env); 2774//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2775//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2776//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND, 2777//ZZ res, argL, argR, 4, False)); 2778//ZZ return res; 2779//ZZ } 2780//ZZ case Iop_Or64: { 2781//ZZ HReg res = newVRegD(env); 2782//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2783//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2784//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 2785//ZZ res, argL, argR, 4, False)); 2786//ZZ return res; 2787//ZZ } 2788//ZZ case Iop_Xor64: { 2789//ZZ HReg res = newVRegD(env); 2790//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2791//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2792//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR, 2793//ZZ res, argL, argR, 4, False)); 2794//ZZ return res; 2795//ZZ } 2796//ZZ 2797//ZZ /* 32HLto64(e1,e2) */ 2798//ZZ case Iop_32HLto64: { 2799//ZZ HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2800//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2801//ZZ HReg res = newVRegD(env); 2802//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 2803//ZZ return res; 2804//ZZ } 2805//ZZ 2806//ZZ case Iop_Add8x8: 2807//ZZ case Iop_Add16x4: 2808//ZZ case Iop_Add32x2: 2809//ZZ case Iop_Add64: { 2810//ZZ HReg res = newVRegD(env); 2811//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2812//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2813//ZZ UInt size; 2814//ZZ switch (e->Iex.Binop.op) { 2815//ZZ case Iop_Add8x8: size = 0; break; 2816//ZZ case Iop_Add16x4: size = 1; break; 2817//ZZ case Iop_Add32x2: size = 2; break; 2818//ZZ case Iop_Add64: size = 3; break; 2819//ZZ default: vassert(0); 2820//ZZ } 2821//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADD, 2822//ZZ res, argL, argR, size, False)); 2823//ZZ return res; 2824//ZZ } 2825//ZZ case Iop_Add32Fx2: { 2826//ZZ HReg res = newVRegD(env); 2827//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2828//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2829//ZZ UInt size = 0; 2830//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP, 2831//ZZ res, argL, argR, size, False)); 2832//ZZ return res; 2833//ZZ } 2834//ZZ case Iop_Recps32Fx2: { 2835//ZZ HReg res = newVRegD(env); 2836//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2837//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2838//ZZ UInt size = 0; 2839//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS, 2840//ZZ res, argL, argR, size, False)); 2841//ZZ return res; 2842//ZZ } 2843//ZZ case Iop_Rsqrts32Fx2: { 2844//ZZ HReg res = newVRegD(env); 2845//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2846//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2847//ZZ UInt size = 0; 2848//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS, 2849//ZZ res, argL, argR, size, False)); 2850//ZZ return res; 2851//ZZ } 2852//ZZ 2853//ZZ // These 6 verified 18 Apr 2013 2854//ZZ case Iop_InterleaveHI32x2: 2855//ZZ case Iop_InterleaveLO32x2: 2856//ZZ case Iop_InterleaveOddLanes8x8: 2857//ZZ case Iop_InterleaveEvenLanes8x8: 2858//ZZ case Iop_InterleaveOddLanes16x4: 2859//ZZ case Iop_InterleaveEvenLanes16x4: { 2860//ZZ HReg rD = newVRegD(env); 2861//ZZ HReg rM = newVRegD(env); 2862//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2863//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2864//ZZ UInt size; 2865//ZZ Bool resRd; // is the result in rD or rM ? 2866//ZZ switch (e->Iex.Binop.op) { 2867//ZZ case Iop_InterleaveOddLanes8x8: resRd = False; size = 0; break; 2868//ZZ case Iop_InterleaveEvenLanes8x8: resRd = True; size = 0; break; 2869//ZZ case Iop_InterleaveOddLanes16x4: resRd = False; size = 1; break; 2870//ZZ case Iop_InterleaveEvenLanes16x4: resRd = True; size = 1; break; 2871//ZZ case Iop_InterleaveHI32x2: resRd = False; size = 2; break; 2872//ZZ case Iop_InterleaveLO32x2: resRd = True; size = 2; break; 2873//ZZ default: vassert(0); 2874//ZZ } 2875//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False)); 2876//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False)); 2877//ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False)); 2878//ZZ return resRd ? rD : rM; 2879//ZZ } 2880//ZZ 2881//ZZ // These 4 verified 18 Apr 2013 2882//ZZ case Iop_InterleaveHI8x8: 2883//ZZ case Iop_InterleaveLO8x8: 2884//ZZ case Iop_InterleaveHI16x4: 2885//ZZ case Iop_InterleaveLO16x4: { 2886//ZZ HReg rD = newVRegD(env); 2887//ZZ HReg rM = newVRegD(env); 2888//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2889//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2890//ZZ UInt size; 2891//ZZ Bool resRd; // is the result in rD or rM ? 2892//ZZ switch (e->Iex.Binop.op) { 2893//ZZ case Iop_InterleaveHI8x8: resRd = False; size = 0; break; 2894//ZZ case Iop_InterleaveLO8x8: resRd = True; size = 0; break; 2895//ZZ case Iop_InterleaveHI16x4: resRd = False; size = 1; break; 2896//ZZ case Iop_InterleaveLO16x4: resRd = True; size = 1; break; 2897//ZZ default: vassert(0); 2898//ZZ } 2899//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False)); 2900//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False)); 2901//ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False)); 2902//ZZ return resRd ? rD : rM; 2903//ZZ } 2904//ZZ 2905//ZZ // These 4 verified 18 Apr 2013 2906//ZZ case Iop_CatOddLanes8x8: 2907//ZZ case Iop_CatEvenLanes8x8: 2908//ZZ case Iop_CatOddLanes16x4: 2909//ZZ case Iop_CatEvenLanes16x4: { 2910//ZZ HReg rD = newVRegD(env); 2911//ZZ HReg rM = newVRegD(env); 2912//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2913//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2914//ZZ UInt size; 2915//ZZ Bool resRd; // is the result in rD or rM ? 2916//ZZ switch (e->Iex.Binop.op) { 2917//ZZ case Iop_CatOddLanes8x8: resRd = False; size = 0; break; 2918//ZZ case Iop_CatEvenLanes8x8: resRd = True; size = 0; break; 2919//ZZ case Iop_CatOddLanes16x4: resRd = False; size = 1; break; 2920//ZZ case Iop_CatEvenLanes16x4: resRd = True; size = 1; break; 2921//ZZ default: vassert(0); 2922//ZZ } 2923//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False)); 2924//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False)); 2925//ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False)); 2926//ZZ return resRd ? rD : rM; 2927//ZZ } 2928//ZZ 2929//ZZ case Iop_QAdd8Ux8: 2930//ZZ case Iop_QAdd16Ux4: 2931//ZZ case Iop_QAdd32Ux2: 2932//ZZ case Iop_QAdd64Ux1: { 2933//ZZ HReg res = newVRegD(env); 2934//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2935//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2936//ZZ UInt size; 2937//ZZ switch (e->Iex.Binop.op) { 2938//ZZ case Iop_QAdd8Ux8: size = 0; break; 2939//ZZ case Iop_QAdd16Ux4: size = 1; break; 2940//ZZ case Iop_QAdd32Ux2: size = 2; break; 2941//ZZ case Iop_QAdd64Ux1: size = 3; break; 2942//ZZ default: vassert(0); 2943//ZZ } 2944//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU, 2945//ZZ res, argL, argR, size, False)); 2946//ZZ return res; 2947//ZZ } 2948//ZZ case Iop_QAdd8Sx8: 2949//ZZ case Iop_QAdd16Sx4: 2950//ZZ case Iop_QAdd32Sx2: 2951//ZZ case Iop_QAdd64Sx1: { 2952//ZZ HReg res = newVRegD(env); 2953//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2954//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2955//ZZ UInt size; 2956//ZZ switch (e->Iex.Binop.op) { 2957//ZZ case Iop_QAdd8Sx8: size = 0; break; 2958//ZZ case Iop_QAdd16Sx4: size = 1; break; 2959//ZZ case Iop_QAdd32Sx2: size = 2; break; 2960//ZZ case Iop_QAdd64Sx1: size = 3; break; 2961//ZZ default: vassert(0); 2962//ZZ } 2963//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS, 2964//ZZ res, argL, argR, size, False)); 2965//ZZ return res; 2966//ZZ } 2967//ZZ case Iop_Sub8x8: 2968//ZZ case Iop_Sub16x4: 2969//ZZ case Iop_Sub32x2: 2970//ZZ case Iop_Sub64: { 2971//ZZ HReg res = newVRegD(env); 2972//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2973//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2974//ZZ UInt size; 2975//ZZ switch (e->Iex.Binop.op) { 2976//ZZ case Iop_Sub8x8: size = 0; break; 2977//ZZ case Iop_Sub16x4: size = 1; break; 2978//ZZ case Iop_Sub32x2: size = 2; break; 2979//ZZ case Iop_Sub64: size = 3; break; 2980//ZZ default: vassert(0); 2981//ZZ } 2982//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 2983//ZZ res, argL, argR, size, False)); 2984//ZZ return res; 2985//ZZ } 2986//ZZ case Iop_Sub32Fx2: { 2987//ZZ HReg res = newVRegD(env); 2988//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2989//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2990//ZZ UInt size = 0; 2991//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP, 2992//ZZ res, argL, argR, size, False)); 2993//ZZ return res; 2994//ZZ } 2995//ZZ case Iop_QSub8Ux8: 2996//ZZ case Iop_QSub16Ux4: 2997//ZZ case Iop_QSub32Ux2: 2998//ZZ case Iop_QSub64Ux1: { 2999//ZZ HReg res = newVRegD(env); 3000//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3001//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3002//ZZ UInt size; 3003//ZZ switch (e->Iex.Binop.op) { 3004//ZZ case Iop_QSub8Ux8: size = 0; break; 3005//ZZ case Iop_QSub16Ux4: size = 1; break; 3006//ZZ case Iop_QSub32Ux2: size = 2; break; 3007//ZZ case Iop_QSub64Ux1: size = 3; break; 3008//ZZ default: vassert(0); 3009//ZZ } 3010//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU, 3011//ZZ res, argL, argR, size, False)); 3012//ZZ return res; 3013//ZZ } 3014//ZZ case Iop_QSub8Sx8: 3015//ZZ case Iop_QSub16Sx4: 3016//ZZ case Iop_QSub32Sx2: 3017//ZZ case Iop_QSub64Sx1: { 3018//ZZ HReg res = newVRegD(env); 3019//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3020//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3021//ZZ UInt size; 3022//ZZ switch (e->Iex.Binop.op) { 3023//ZZ case Iop_QSub8Sx8: size = 0; break; 3024//ZZ case Iop_QSub16Sx4: size = 1; break; 3025//ZZ case Iop_QSub32Sx2: size = 2; break; 3026//ZZ case Iop_QSub64Sx1: size = 3; break; 3027//ZZ default: vassert(0); 3028//ZZ } 3029//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS, 3030//ZZ res, argL, argR, size, False)); 3031//ZZ return res; 3032//ZZ } 3033//ZZ case Iop_Max8Ux8: 3034//ZZ case Iop_Max16Ux4: 3035//ZZ case Iop_Max32Ux2: { 3036//ZZ HReg res = newVRegD(env); 3037//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3038//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3039//ZZ UInt size; 3040//ZZ switch (e->Iex.Binop.op) { 3041//ZZ case Iop_Max8Ux8: size = 0; break; 3042//ZZ case Iop_Max16Ux4: size = 1; break; 3043//ZZ case Iop_Max32Ux2: size = 2; break; 3044//ZZ default: vassert(0); 3045//ZZ } 3046//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU, 3047//ZZ res, argL, argR, size, False)); 3048//ZZ return res; 3049//ZZ } 3050//ZZ case Iop_Max8Sx8: 3051//ZZ case Iop_Max16Sx4: 3052//ZZ case Iop_Max32Sx2: { 3053//ZZ HReg res = newVRegD(env); 3054//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3055//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3056//ZZ UInt size; 3057//ZZ switch (e->Iex.Binop.op) { 3058//ZZ case Iop_Max8Sx8: size = 0; break; 3059//ZZ case Iop_Max16Sx4: size = 1; break; 3060//ZZ case Iop_Max32Sx2: size = 2; break; 3061//ZZ default: vassert(0); 3062//ZZ } 3063//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS, 3064//ZZ res, argL, argR, size, False)); 3065//ZZ return res; 3066//ZZ } 3067//ZZ case Iop_Min8Ux8: 3068//ZZ case Iop_Min16Ux4: 3069//ZZ case Iop_Min32Ux2: { 3070//ZZ HReg res = newVRegD(env); 3071//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3072//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3073//ZZ UInt size; 3074//ZZ switch (e->Iex.Binop.op) { 3075//ZZ case Iop_Min8Ux8: size = 0; break; 3076//ZZ case Iop_Min16Ux4: size = 1; break; 3077//ZZ case Iop_Min32Ux2: size = 2; break; 3078//ZZ default: vassert(0); 3079//ZZ } 3080//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU, 3081//ZZ res, argL, argR, size, False)); 3082//ZZ return res; 3083//ZZ } 3084//ZZ case Iop_Min8Sx8: 3085//ZZ case Iop_Min16Sx4: 3086//ZZ case Iop_Min32Sx2: { 3087//ZZ HReg res = newVRegD(env); 3088//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3089//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3090//ZZ UInt size; 3091//ZZ switch (e->Iex.Binop.op) { 3092//ZZ case Iop_Min8Sx8: size = 0; break; 3093//ZZ case Iop_Min16Sx4: size = 1; break; 3094//ZZ case Iop_Min32Sx2: size = 2; break; 3095//ZZ default: vassert(0); 3096//ZZ } 3097//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS, 3098//ZZ res, argL, argR, size, False)); 3099//ZZ return res; 3100//ZZ } 3101//ZZ case Iop_Sar8x8: 3102//ZZ case Iop_Sar16x4: 3103//ZZ case Iop_Sar32x2: { 3104//ZZ HReg res = newVRegD(env); 3105//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3106//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3107//ZZ HReg argR2 = newVRegD(env); 3108//ZZ HReg zero = newVRegD(env); 3109//ZZ UInt size; 3110//ZZ switch (e->Iex.Binop.op) { 3111//ZZ case Iop_Sar8x8: size = 0; break; 3112//ZZ case Iop_Sar16x4: size = 1; break; 3113//ZZ case Iop_Sar32x2: size = 2; break; 3114//ZZ case Iop_Sar64: size = 3; break; 3115//ZZ default: vassert(0); 3116//ZZ } 3117//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); 3118//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 3119//ZZ argR2, zero, argR, size, False)); 3120//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 3121//ZZ res, argL, argR2, size, False)); 3122//ZZ return res; 3123//ZZ } 3124//ZZ case Iop_Sal8x8: 3125//ZZ case Iop_Sal16x4: 3126//ZZ case Iop_Sal32x2: 3127//ZZ case Iop_Sal64x1: { 3128//ZZ HReg res = newVRegD(env); 3129//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3130//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3131//ZZ UInt size; 3132//ZZ switch (e->Iex.Binop.op) { 3133//ZZ case Iop_Sal8x8: size = 0; break; 3134//ZZ case Iop_Sal16x4: size = 1; break; 3135//ZZ case Iop_Sal32x2: size = 2; break; 3136//ZZ case Iop_Sal64x1: size = 3; break; 3137//ZZ default: vassert(0); 3138//ZZ } 3139//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 3140//ZZ res, argL, argR, size, False)); 3141//ZZ return res; 3142//ZZ } 3143//ZZ case Iop_Shr8x8: 3144//ZZ case Iop_Shr16x4: 3145//ZZ case Iop_Shr32x2: { 3146//ZZ HReg res = newVRegD(env); 3147//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3148//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3149//ZZ HReg argR2 = newVRegD(env); 3150//ZZ HReg zero = newVRegD(env); 3151//ZZ UInt size; 3152//ZZ switch (e->Iex.Binop.op) { 3153//ZZ case Iop_Shr8x8: size = 0; break; 3154//ZZ case Iop_Shr16x4: size = 1; break; 3155//ZZ case Iop_Shr32x2: size = 2; break; 3156//ZZ default: vassert(0); 3157//ZZ } 3158//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); 3159//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 3160//ZZ argR2, zero, argR, size, False)); 3161//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 3162//ZZ res, argL, argR2, size, False)); 3163//ZZ return res; 3164//ZZ } 3165//ZZ case Iop_Shl8x8: 3166//ZZ case Iop_Shl16x4: 3167//ZZ case Iop_Shl32x2: { 3168//ZZ HReg res = newVRegD(env); 3169//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3170//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3171//ZZ UInt size; 3172//ZZ switch (e->Iex.Binop.op) { 3173//ZZ case Iop_Shl8x8: size = 0; break; 3174//ZZ case Iop_Shl16x4: size = 1; break; 3175//ZZ case Iop_Shl32x2: size = 2; break; 3176//ZZ default: vassert(0); 3177//ZZ } 3178//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 3179//ZZ res, argL, argR, size, False)); 3180//ZZ return res; 3181//ZZ } 3182//ZZ case Iop_QShl8x8: 3183//ZZ case Iop_QShl16x4: 3184//ZZ case Iop_QShl32x2: 3185//ZZ case Iop_QShl64x1: { 3186//ZZ HReg res = newVRegD(env); 3187//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3188//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3189//ZZ UInt size; 3190//ZZ switch (e->Iex.Binop.op) { 3191//ZZ case Iop_QShl8x8: size = 0; break; 3192//ZZ case Iop_QShl16x4: size = 1; break; 3193//ZZ case Iop_QShl32x2: size = 2; break; 3194//ZZ case Iop_QShl64x1: size = 3; break; 3195//ZZ default: vassert(0); 3196//ZZ } 3197//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL, 3198//ZZ res, argL, argR, size, False)); 3199//ZZ return res; 3200//ZZ } 3201//ZZ case Iop_QSal8x8: 3202//ZZ case Iop_QSal16x4: 3203//ZZ case Iop_QSal32x2: 3204//ZZ case Iop_QSal64x1: { 3205//ZZ HReg res = newVRegD(env); 3206//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3207//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3208//ZZ UInt size; 3209//ZZ switch (e->Iex.Binop.op) { 3210//ZZ case Iop_QSal8x8: size = 0; break; 3211//ZZ case Iop_QSal16x4: size = 1; break; 3212//ZZ case Iop_QSal32x2: size = 2; break; 3213//ZZ case Iop_QSal64x1: size = 3; break; 3214//ZZ default: vassert(0); 3215//ZZ } 3216//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL, 3217//ZZ res, argL, argR, size, False)); 3218//ZZ return res; 3219//ZZ } 3220//ZZ case Iop_QShlN8x8: 3221//ZZ case Iop_QShlN16x4: 3222//ZZ case Iop_QShlN32x2: 3223//ZZ case Iop_QShlN64x1: { 3224//ZZ HReg res = newVRegD(env); 3225//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3226//ZZ UInt size, imm; 3227//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || 3228//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 3229//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " 3230//ZZ "second argument only\n"); 3231//ZZ } 3232//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 3233//ZZ switch (e->Iex.Binop.op) { 3234//ZZ case Iop_QShlN8x8: size = 8 | imm; break; 3235//ZZ case Iop_QShlN16x4: size = 16 | imm; break; 3236//ZZ case Iop_QShlN32x2: size = 32 | imm; break; 3237//ZZ case Iop_QShlN64x1: size = 64 | imm; break; 3238//ZZ default: vassert(0); 3239//ZZ } 3240//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU, 3241//ZZ res, argL, size, False)); 3242//ZZ return res; 3243//ZZ } 3244//ZZ case Iop_QShlN8Sx8: 3245//ZZ case Iop_QShlN16Sx4: 3246//ZZ case Iop_QShlN32Sx2: 3247//ZZ case Iop_QShlN64Sx1: { 3248//ZZ HReg res = newVRegD(env); 3249//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3250//ZZ UInt size, imm; 3251//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || 3252//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 3253//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " 3254//ZZ "second argument only\n"); 3255//ZZ } 3256//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 3257//ZZ switch (e->Iex.Binop.op) { 3258//ZZ case Iop_QShlN8Sx8: size = 8 | imm; break; 3259//ZZ case Iop_QShlN16Sx4: size = 16 | imm; break; 3260//ZZ case Iop_QShlN32Sx2: size = 32 | imm; break; 3261//ZZ case Iop_QShlN64Sx1: size = 64 | imm; break; 3262//ZZ default: vassert(0); 3263//ZZ } 3264//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS, 3265//ZZ res, argL, size, False)); 3266//ZZ return res; 3267//ZZ } 3268//ZZ case Iop_QSalN8x8: 3269//ZZ case Iop_QSalN16x4: 3270//ZZ case Iop_QSalN32x2: 3271//ZZ case Iop_QSalN64x1: { 3272//ZZ HReg res = newVRegD(env); 3273//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3274//ZZ UInt size, imm; 3275//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || 3276//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 3277//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " 3278//ZZ "second argument only\n"); 3279//ZZ } 3280//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 3281//ZZ switch (e->Iex.Binop.op) { 3282//ZZ case Iop_QSalN8x8: size = 8 | imm; break; 3283//ZZ case Iop_QSalN16x4: size = 16 | imm; break; 3284//ZZ case Iop_QSalN32x2: size = 32 | imm; break; 3285//ZZ case Iop_QSalN64x1: size = 64 | imm; break; 3286//ZZ default: vassert(0); 3287//ZZ } 3288//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS, 3289//ZZ res, argL, size, False)); 3290//ZZ return res; 3291//ZZ } 3292//ZZ case Iop_ShrN8x8: 3293//ZZ case Iop_ShrN16x4: 3294//ZZ case Iop_ShrN32x2: 3295//ZZ case Iop_Shr64: { 3296//ZZ HReg res = newVRegD(env); 3297//ZZ HReg tmp = newVRegD(env); 3298//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3299//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 3300//ZZ HReg argR2 = newVRegI(env); 3301//ZZ UInt size; 3302//ZZ switch (e->Iex.Binop.op) { 3303//ZZ case Iop_ShrN8x8: size = 0; break; 3304//ZZ case Iop_ShrN16x4: size = 1; break; 3305//ZZ case Iop_ShrN32x2: size = 2; break; 3306//ZZ case Iop_Shr64: size = 3; break; 3307//ZZ default: vassert(0); 3308//ZZ } 3309//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); 3310//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False)); 3311//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 3312//ZZ res, argL, tmp, size, False)); 3313//ZZ return res; 3314//ZZ } 3315//ZZ case Iop_ShlN8x8: 3316//ZZ case Iop_ShlN16x4: 3317//ZZ case Iop_ShlN32x2: 3318//ZZ case Iop_Shl64: { 3319//ZZ HReg res = newVRegD(env); 3320//ZZ HReg tmp = newVRegD(env); 3321//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3322//ZZ /* special-case Shl64(x, imm8) since the Neon front 3323//ZZ end produces a lot of those for V{LD,ST}{1,2,3,4}. */ 3324//ZZ if (e->Iex.Binop.op == Iop_Shl64 3325//ZZ && e->Iex.Binop.arg2->tag == Iex_Const) { 3326//ZZ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 3327//ZZ Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 3328//ZZ if (nshift >= 1 && nshift <= 63) { 3329//ZZ addInstr(env, ARMInstr_NShl64(res, argL, nshift)); 3330//ZZ return res; 3331//ZZ } 3332//ZZ /* else fall through to general case */ 3333//ZZ } 3334//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 3335//ZZ UInt size; 3336//ZZ switch (e->Iex.Binop.op) { 3337//ZZ case Iop_ShlN8x8: size = 0; break; 3338//ZZ case Iop_ShlN16x4: size = 1; break; 3339//ZZ case Iop_ShlN32x2: size = 2; break; 3340//ZZ case Iop_Shl64: size = 3; break; 3341//ZZ default: vassert(0); 3342//ZZ } 3343//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, 3344//ZZ tmp, argR, 0, False)); 3345//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 3346//ZZ res, argL, tmp, size, False)); 3347//ZZ return res; 3348//ZZ } 3349//ZZ case Iop_SarN8x8: 3350//ZZ case Iop_SarN16x4: 3351//ZZ case Iop_SarN32x2: 3352//ZZ case Iop_Sar64: { 3353//ZZ HReg res = newVRegD(env); 3354//ZZ HReg tmp = newVRegD(env); 3355//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3356//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 3357//ZZ HReg argR2 = newVRegI(env); 3358//ZZ UInt size; 3359//ZZ switch (e->Iex.Binop.op) { 3360//ZZ case Iop_SarN8x8: size = 0; break; 3361//ZZ case Iop_SarN16x4: size = 1; break; 3362//ZZ case Iop_SarN32x2: size = 2; break; 3363//ZZ case Iop_Sar64: size = 3; break; 3364//ZZ default: vassert(0); 3365//ZZ } 3366//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); 3367//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False)); 3368//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 3369//ZZ res, argL, tmp, size, False)); 3370//ZZ return res; 3371//ZZ } 3372//ZZ case Iop_CmpGT8Ux8: 3373//ZZ case Iop_CmpGT16Ux4: 3374//ZZ case Iop_CmpGT32Ux2: { 3375//ZZ HReg res = newVRegD(env); 3376//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3377//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3378//ZZ UInt size; 3379//ZZ switch (e->Iex.Binop.op) { 3380//ZZ case Iop_CmpGT8Ux8: size = 0; break; 3381//ZZ case Iop_CmpGT16Ux4: size = 1; break; 3382//ZZ case Iop_CmpGT32Ux2: size = 2; break; 3383//ZZ default: vassert(0); 3384//ZZ } 3385//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU, 3386//ZZ res, argL, argR, size, False)); 3387//ZZ return res; 3388//ZZ } 3389//ZZ case Iop_CmpGT8Sx8: 3390//ZZ case Iop_CmpGT16Sx4: 3391//ZZ case Iop_CmpGT32Sx2: { 3392//ZZ HReg res = newVRegD(env); 3393//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3394//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3395//ZZ UInt size; 3396//ZZ switch (e->Iex.Binop.op) { 3397//ZZ case Iop_CmpGT8Sx8: size = 0; break; 3398//ZZ case Iop_CmpGT16Sx4: size = 1; break; 3399//ZZ case Iop_CmpGT32Sx2: size = 2; break; 3400//ZZ default: vassert(0); 3401//ZZ } 3402//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS, 3403//ZZ res, argL, argR, size, False)); 3404//ZZ return res; 3405//ZZ } 3406//ZZ case Iop_CmpEQ8x8: 3407//ZZ case Iop_CmpEQ16x4: 3408//ZZ case Iop_CmpEQ32x2: { 3409//ZZ HReg res = newVRegD(env); 3410//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3411//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3412//ZZ UInt size; 3413//ZZ switch (e->Iex.Binop.op) { 3414//ZZ case Iop_CmpEQ8x8: size = 0; break; 3415//ZZ case Iop_CmpEQ16x4: size = 1; break; 3416//ZZ case Iop_CmpEQ32x2: size = 2; break; 3417//ZZ default: vassert(0); 3418//ZZ } 3419//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ, 3420//ZZ res, argL, argR, size, False)); 3421//ZZ return res; 3422//ZZ } 3423//ZZ case Iop_Mul8x8: 3424//ZZ case Iop_Mul16x4: 3425//ZZ case Iop_Mul32x2: { 3426//ZZ HReg res = newVRegD(env); 3427//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3428//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3429//ZZ UInt size = 0; 3430//ZZ switch(e->Iex.Binop.op) { 3431//ZZ case Iop_Mul8x8: size = 0; break; 3432//ZZ case Iop_Mul16x4: size = 1; break; 3433//ZZ case Iop_Mul32x2: size = 2; break; 3434//ZZ default: vassert(0); 3435//ZZ } 3436//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL, 3437//ZZ res, argL, argR, size, False)); 3438//ZZ return res; 3439//ZZ } 3440//ZZ case Iop_Mul32Fx2: { 3441//ZZ HReg res = newVRegD(env); 3442//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3443//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3444//ZZ UInt size = 0; 3445//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP, 3446//ZZ res, argL, argR, size, False)); 3447//ZZ return res; 3448//ZZ } 3449//ZZ case Iop_QDMulHi16Sx4: 3450//ZZ case Iop_QDMulHi32Sx2: { 3451//ZZ HReg res = newVRegD(env); 3452//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3453//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3454//ZZ UInt size = 0; 3455//ZZ switch(e->Iex.Binop.op) { 3456//ZZ case Iop_QDMulHi16Sx4: size = 1; break; 3457//ZZ case Iop_QDMulHi32Sx2: size = 2; break; 3458//ZZ default: vassert(0); 3459//ZZ } 3460//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH, 3461//ZZ res, argL, argR, size, False)); 3462//ZZ return res; 3463//ZZ } 3464//ZZ 3465//ZZ case Iop_QRDMulHi16Sx4: 3466//ZZ case Iop_QRDMulHi32Sx2: { 3467//ZZ HReg res = newVRegD(env); 3468//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3469//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3470//ZZ UInt size = 0; 3471//ZZ switch(e->Iex.Binop.op) { 3472//ZZ case Iop_QRDMulHi16Sx4: size = 1; break; 3473//ZZ case Iop_QRDMulHi32Sx2: size = 2; break; 3474//ZZ default: vassert(0); 3475//ZZ } 3476//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH, 3477//ZZ res, argL, argR, size, False)); 3478//ZZ return res; 3479//ZZ } 3480//ZZ 3481//ZZ case Iop_PwAdd8x8: 3482//ZZ case Iop_PwAdd16x4: 3483//ZZ case Iop_PwAdd32x2: { 3484//ZZ HReg res = newVRegD(env); 3485//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3486//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3487//ZZ UInt size = 0; 3488//ZZ switch(e->Iex.Binop.op) { 3489//ZZ case Iop_PwAdd8x8: size = 0; break; 3490//ZZ case Iop_PwAdd16x4: size = 1; break; 3491//ZZ case Iop_PwAdd32x2: size = 2; break; 3492//ZZ default: vassert(0); 3493//ZZ } 3494//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD, 3495//ZZ res, argL, argR, size, False)); 3496//ZZ return res; 3497//ZZ } 3498//ZZ case Iop_PwAdd32Fx2: { 3499//ZZ HReg res = newVRegD(env); 3500//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3501//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3502//ZZ UInt size = 0; 3503//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP, 3504//ZZ res, argL, argR, size, False)); 3505//ZZ return res; 3506//ZZ } 3507//ZZ case Iop_PwMin8Ux8: 3508//ZZ case Iop_PwMin16Ux4: 3509//ZZ case Iop_PwMin32Ux2: { 3510//ZZ HReg res = newVRegD(env); 3511//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3512//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3513//ZZ UInt size = 0; 3514//ZZ switch(e->Iex.Binop.op) { 3515//ZZ case Iop_PwMin8Ux8: size = 0; break; 3516//ZZ case Iop_PwMin16Ux4: size = 1; break; 3517//ZZ case Iop_PwMin32Ux2: size = 2; break; 3518//ZZ default: vassert(0); 3519//ZZ } 3520//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU, 3521//ZZ res, argL, argR, size, False)); 3522//ZZ return res; 3523//ZZ } 3524//ZZ case Iop_PwMin8Sx8: 3525//ZZ case Iop_PwMin16Sx4: 3526//ZZ case Iop_PwMin32Sx2: { 3527//ZZ HReg res = newVRegD(env); 3528//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3529//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3530//ZZ UInt size = 0; 3531//ZZ switch(e->Iex.Binop.op) { 3532//ZZ case Iop_PwMin8Sx8: size = 0; break; 3533//ZZ case Iop_PwMin16Sx4: size = 1; break; 3534//ZZ case Iop_PwMin32Sx2: size = 2; break; 3535//ZZ default: vassert(0); 3536//ZZ } 3537//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS, 3538//ZZ res, argL, argR, size, False)); 3539//ZZ return res; 3540//ZZ } 3541//ZZ case Iop_PwMax8Ux8: 3542//ZZ case Iop_PwMax16Ux4: 3543//ZZ case Iop_PwMax32Ux2: { 3544//ZZ HReg res = newVRegD(env); 3545//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3546//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3547//ZZ UInt size = 0; 3548//ZZ switch(e->Iex.Binop.op) { 3549//ZZ case Iop_PwMax8Ux8: size = 0; break; 3550//ZZ case Iop_PwMax16Ux4: size = 1; break; 3551//ZZ case Iop_PwMax32Ux2: size = 2; break; 3552//ZZ default: vassert(0); 3553//ZZ } 3554//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU, 3555//ZZ res, argL, argR, size, False)); 3556//ZZ return res; 3557//ZZ } 3558//ZZ case Iop_PwMax8Sx8: 3559//ZZ case Iop_PwMax16Sx4: 3560//ZZ case Iop_PwMax32Sx2: { 3561//ZZ HReg res = newVRegD(env); 3562//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3563//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3564//ZZ UInt size = 0; 3565//ZZ switch(e->Iex.Binop.op) { 3566//ZZ case Iop_PwMax8Sx8: size = 0; break; 3567//ZZ case Iop_PwMax16Sx4: size = 1; break; 3568//ZZ case Iop_PwMax32Sx2: size = 2; break; 3569//ZZ default: vassert(0); 3570//ZZ } 3571//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS, 3572//ZZ res, argL, argR, size, False)); 3573//ZZ return res; 3574//ZZ } 3575//ZZ case Iop_Perm8x8: { 3576//ZZ HReg res = newVRegD(env); 3577//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3578//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3579//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VTBL, 3580//ZZ res, argL, argR, 0, False)); 3581//ZZ return res; 3582//ZZ } 3583//ZZ case Iop_PolynomialMul8x8: { 3584//ZZ HReg res = newVRegD(env); 3585//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3586//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3587//ZZ UInt size = 0; 3588//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP, 3589//ZZ res, argL, argR, size, False)); 3590//ZZ return res; 3591//ZZ } 3592//ZZ case Iop_Max32Fx2: { 3593//ZZ HReg res = newVRegD(env); 3594//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3595//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3596//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF, 3597//ZZ res, argL, argR, 2, False)); 3598//ZZ return res; 3599//ZZ } 3600//ZZ case Iop_Min32Fx2: { 3601//ZZ HReg res = newVRegD(env); 3602//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3603//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3604//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF, 3605//ZZ res, argL, argR, 2, False)); 3606//ZZ return res; 3607//ZZ } 3608//ZZ case Iop_PwMax32Fx2: { 3609//ZZ HReg res = newVRegD(env); 3610//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3611//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3612//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF, 3613//ZZ res, argL, argR, 2, False)); 3614//ZZ return res; 3615//ZZ } 3616//ZZ case Iop_PwMin32Fx2: { 3617//ZZ HReg res = newVRegD(env); 3618//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3619//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3620//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF, 3621//ZZ res, argL, argR, 2, False)); 3622//ZZ return res; 3623//ZZ } 3624//ZZ case Iop_CmpGT32Fx2: { 3625//ZZ HReg res = newVRegD(env); 3626//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3627//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3628//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF, 3629//ZZ res, argL, argR, 2, False)); 3630//ZZ return res; 3631//ZZ } 3632//ZZ case Iop_CmpGE32Fx2: { 3633//ZZ HReg res = newVRegD(env); 3634//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3635//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3636//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF, 3637//ZZ res, argL, argR, 2, False)); 3638//ZZ return res; 3639//ZZ } 3640//ZZ case Iop_CmpEQ32Fx2: { 3641//ZZ HReg res = newVRegD(env); 3642//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3643//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3644//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF, 3645//ZZ res, argL, argR, 2, False)); 3646//ZZ return res; 3647//ZZ } 3648//ZZ case Iop_F32ToFixed32Ux2_RZ: 3649//ZZ case Iop_F32ToFixed32Sx2_RZ: 3650//ZZ case Iop_Fixed32UToF32x2_RN: 3651//ZZ case Iop_Fixed32SToF32x2_RN: { 3652//ZZ HReg res = newVRegD(env); 3653//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1); 3654//ZZ ARMNeonUnOp op; 3655//ZZ UInt imm6; 3656//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || 3657//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 3658//ZZ vpanic("ARM supports FP <-> Fixed conversion with constant " 3659//ZZ "second argument less than 33 only\n"); 3660//ZZ } 3661//ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 3662//ZZ vassert(imm6 <= 32 && imm6 > 0); 3663//ZZ imm6 = 64 - imm6; 3664//ZZ switch(e->Iex.Binop.op) { 3665//ZZ case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break; 3666//ZZ case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break; 3667//ZZ case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break; 3668//ZZ case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break; 3669//ZZ default: vassert(0); 3670//ZZ } 3671//ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False)); 3672//ZZ return res; 3673//ZZ } 3674//ZZ /* 3675//ZZ FIXME: is this here or not? 3676//ZZ case Iop_VDup8x8: 3677//ZZ case Iop_VDup16x4: 3678//ZZ case Iop_VDup32x2: { 3679//ZZ HReg res = newVRegD(env); 3680//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3681//ZZ UInt index; 3682//ZZ UInt imm4; 3683//ZZ UInt size = 0; 3684//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || 3685//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 3686//ZZ vpanic("ARM supports Iop_VDup with constant " 3687//ZZ "second argument less than 16 only\n"); 3688//ZZ } 3689//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 3690//ZZ switch(e->Iex.Binop.op) { 3691//ZZ case Iop_VDup8x8: imm4 = (index << 1) + 1; break; 3692//ZZ case Iop_VDup16x4: imm4 = (index << 2) + 2; break; 3693//ZZ case Iop_VDup32x2: imm4 = (index << 3) + 4; break; 3694//ZZ default: vassert(0); 3695//ZZ } 3696//ZZ if (imm4 >= 16) { 3697//ZZ vpanic("ARM supports Iop_VDup with constant " 3698//ZZ "second argument less than 16 only\n"); 3699//ZZ } 3700//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP, 3701//ZZ res, argL, imm4, False)); 3702//ZZ return res; 3703//ZZ } 3704//ZZ */ 3705//ZZ default: 3706//ZZ break; 3707//ZZ } 3708//ZZ } 3709//ZZ 3710//ZZ /* --------- UNARY ops --------- */ 3711//ZZ if (e->tag == Iex_Unop) { 3712//ZZ switch (e->Iex.Unop.op) { 3713//ZZ 3714//ZZ /* 32Uto64 */ 3715//ZZ case Iop_32Uto64: { 3716//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg); 3717//ZZ HReg rHi = newVRegI(env); 3718//ZZ HReg res = newVRegD(env); 3719//ZZ addInstr(env, ARMInstr_Imm32(rHi, 0)); 3720//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 3721//ZZ return res; 3722//ZZ } 3723//ZZ 3724//ZZ /* 32Sto64 */ 3725//ZZ case Iop_32Sto64: { 3726//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg); 3727//ZZ HReg rHi = newVRegI(env); 3728//ZZ addInstr(env, mk_iMOVds_RR(rHi, rLo)); 3729//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31))); 3730//ZZ HReg res = newVRegD(env); 3731//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 3732//ZZ return res; 3733//ZZ } 3734//ZZ 3735//ZZ /* The next 3 are pass-throughs */ 3736//ZZ /* ReinterpF64asI64 */ 3737//ZZ case Iop_ReinterpF64asI64: 3738//ZZ /* Left64(e) */ 3739//ZZ case Iop_Left64: 3740//ZZ /* CmpwNEZ64(e) */ 3741//ZZ case Iop_1Sto64: { 3742//ZZ HReg rLo, rHi; 3743//ZZ HReg res = newVRegD(env); 3744//ZZ iselInt64Expr(&rHi, &rLo, env, e); 3745//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 3746//ZZ return res; 3747//ZZ } 3748//ZZ 3749//ZZ case Iop_Not64: { 3750//ZZ DECLARE_PATTERN(p_veqz_8x8); 3751//ZZ DECLARE_PATTERN(p_veqz_16x4); 3752//ZZ DECLARE_PATTERN(p_veqz_32x2); 3753//ZZ DECLARE_PATTERN(p_vcge_8sx8); 3754//ZZ DECLARE_PATTERN(p_vcge_16sx4); 3755//ZZ DECLARE_PATTERN(p_vcge_32sx2); 3756//ZZ DECLARE_PATTERN(p_vcge_8ux8); 3757//ZZ DECLARE_PATTERN(p_vcge_16ux4); 3758//ZZ DECLARE_PATTERN(p_vcge_32ux2); 3759//ZZ DEFINE_PATTERN(p_veqz_8x8, 3760//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0)))); 3761//ZZ DEFINE_PATTERN(p_veqz_16x4, 3762//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0)))); 3763//ZZ DEFINE_PATTERN(p_veqz_32x2, 3764//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0)))); 3765//ZZ DEFINE_PATTERN(p_vcge_8sx8, 3766//ZZ unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0)))); 3767//ZZ DEFINE_PATTERN(p_vcge_16sx4, 3768//ZZ unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0)))); 3769//ZZ DEFINE_PATTERN(p_vcge_32sx2, 3770//ZZ unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0)))); 3771//ZZ DEFINE_PATTERN(p_vcge_8ux8, 3772//ZZ unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0)))); 3773//ZZ DEFINE_PATTERN(p_vcge_16ux4, 3774//ZZ unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0)))); 3775//ZZ DEFINE_PATTERN(p_vcge_32ux2, 3776//ZZ unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0)))); 3777//ZZ if (matchIRExpr(&mi, p_veqz_8x8, e)) { 3778//ZZ HReg res = newVRegD(env); 3779//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]); 3780//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False)); 3781//ZZ return res; 3782//ZZ } else if (matchIRExpr(&mi, p_veqz_16x4, e)) { 3783//ZZ HReg res = newVRegD(env); 3784//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]); 3785//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False)); 3786//ZZ return res; 3787//ZZ } else if (matchIRExpr(&mi, p_veqz_32x2, e)) { 3788//ZZ HReg res = newVRegD(env); 3789//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]); 3790//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False)); 3791//ZZ return res; 3792//ZZ } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) { 3793//ZZ HReg res = newVRegD(env); 3794//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3795//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3796//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3797//ZZ res, argL, argR, 0, False)); 3798//ZZ return res; 3799//ZZ } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) { 3800//ZZ HReg res = newVRegD(env); 3801//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3802//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3803//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3804//ZZ res, argL, argR, 1, False)); 3805//ZZ return res; 3806//ZZ } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) { 3807//ZZ HReg res = newVRegD(env); 3808//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3809//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3810//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3811//ZZ res, argL, argR, 2, False)); 3812//ZZ return res; 3813//ZZ } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) { 3814//ZZ HReg res = newVRegD(env); 3815//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3816//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3817//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3818//ZZ res, argL, argR, 0, False)); 3819//ZZ return res; 3820//ZZ } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) { 3821//ZZ HReg res = newVRegD(env); 3822//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3823//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3824//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3825//ZZ res, argL, argR, 1, False)); 3826//ZZ return res; 3827//ZZ } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) { 3828//ZZ HReg res = newVRegD(env); 3829//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3830//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3831//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3832//ZZ res, argL, argR, 2, False)); 3833//ZZ return res; 3834//ZZ } else { 3835//ZZ HReg res = newVRegD(env); 3836//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3837//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False)); 3838//ZZ return res; 3839//ZZ } 3840//ZZ } 3841//ZZ case Iop_Dup8x8: 3842//ZZ case Iop_Dup16x4: 3843//ZZ case Iop_Dup32x2: { 3844//ZZ HReg res, arg; 3845//ZZ UInt size; 3846//ZZ DECLARE_PATTERN(p_vdup_8x8); 3847//ZZ DECLARE_PATTERN(p_vdup_16x4); 3848//ZZ DECLARE_PATTERN(p_vdup_32x2); 3849//ZZ DEFINE_PATTERN(p_vdup_8x8, 3850//ZZ unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1)))); 3851//ZZ DEFINE_PATTERN(p_vdup_16x4, 3852//ZZ unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1)))); 3853//ZZ DEFINE_PATTERN(p_vdup_32x2, 3854//ZZ unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1)))); 3855//ZZ if (matchIRExpr(&mi, p_vdup_8x8, e)) { 3856//ZZ UInt index; 3857//ZZ UInt imm4; 3858//ZZ if (mi.bindee[1]->tag == Iex_Const && 3859//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3860//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3861//ZZ imm4 = (index << 1) + 1; 3862//ZZ if (index < 8) { 3863//ZZ res = newVRegD(env); 3864//ZZ arg = iselNeon64Expr(env, mi.bindee[0]); 3865//ZZ addInstr(env, ARMInstr_NUnaryS( 3866//ZZ ARMneon_VDUP, 3867//ZZ mkARMNRS(ARMNRS_Reg, res, 0), 3868//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), 3869//ZZ imm4, False 3870//ZZ )); 3871//ZZ return res; 3872//ZZ } 3873//ZZ } 3874//ZZ } else if (matchIRExpr(&mi, p_vdup_16x4, e)) { 3875//ZZ UInt index; 3876//ZZ UInt imm4; 3877//ZZ if (mi.bindee[1]->tag == Iex_Const && 3878//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3879//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3880//ZZ imm4 = (index << 2) + 2; 3881//ZZ if (index < 4) { 3882//ZZ res = newVRegD(env); 3883//ZZ arg = iselNeon64Expr(env, mi.bindee[0]); 3884//ZZ addInstr(env, ARMInstr_NUnaryS( 3885//ZZ ARMneon_VDUP, 3886//ZZ mkARMNRS(ARMNRS_Reg, res, 0), 3887//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), 3888//ZZ imm4, False 3889//ZZ )); 3890//ZZ return res; 3891//ZZ } 3892//ZZ } 3893//ZZ } else if (matchIRExpr(&mi, p_vdup_32x2, e)) { 3894//ZZ UInt index; 3895//ZZ UInt imm4; 3896//ZZ if (mi.bindee[1]->tag == Iex_Const && 3897//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3898//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3899//ZZ imm4 = (index << 3) + 4; 3900//ZZ if (index < 2) { 3901//ZZ res = newVRegD(env); 3902//ZZ arg = iselNeon64Expr(env, mi.bindee[0]); 3903//ZZ addInstr(env, ARMInstr_NUnaryS( 3904//ZZ ARMneon_VDUP, 3905//ZZ mkARMNRS(ARMNRS_Reg, res, 0), 3906//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), 3907//ZZ imm4, False 3908//ZZ )); 3909//ZZ return res; 3910//ZZ } 3911//ZZ } 3912//ZZ } 3913//ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg); 3914//ZZ res = newVRegD(env); 3915//ZZ switch (e->Iex.Unop.op) { 3916//ZZ case Iop_Dup8x8: size = 0; break; 3917//ZZ case Iop_Dup16x4: size = 1; break; 3918//ZZ case Iop_Dup32x2: size = 2; break; 3919//ZZ default: vassert(0); 3920//ZZ } 3921//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False)); 3922//ZZ return res; 3923//ZZ } 3924//ZZ case Iop_Abs8x8: 3925//ZZ case Iop_Abs16x4: 3926//ZZ case Iop_Abs32x2: { 3927//ZZ HReg res = newVRegD(env); 3928//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3929//ZZ UInt size = 0; 3930//ZZ switch(e->Iex.Binop.op) { 3931//ZZ case Iop_Abs8x8: size = 0; break; 3932//ZZ case Iop_Abs16x4: size = 1; break; 3933//ZZ case Iop_Abs32x2: size = 2; break; 3934//ZZ default: vassert(0); 3935//ZZ } 3936//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False)); 3937//ZZ return res; 3938//ZZ } 3939//ZZ case Iop_Reverse64_8x8: 3940//ZZ case Iop_Reverse64_16x4: 3941//ZZ case Iop_Reverse64_32x2: { 3942//ZZ HReg res = newVRegD(env); 3943//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3944//ZZ UInt size = 0; 3945//ZZ switch(e->Iex.Binop.op) { 3946//ZZ case Iop_Reverse64_8x8: size = 0; break; 3947//ZZ case Iop_Reverse64_16x4: size = 1; break; 3948//ZZ case Iop_Reverse64_32x2: size = 2; break; 3949//ZZ default: vassert(0); 3950//ZZ } 3951//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64, 3952//ZZ res, arg, size, False)); 3953//ZZ return res; 3954//ZZ } 3955//ZZ case Iop_Reverse32_8x8: 3956//ZZ case Iop_Reverse32_16x4: { 3957//ZZ HReg res = newVRegD(env); 3958//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3959//ZZ UInt size = 0; 3960//ZZ switch(e->Iex.Binop.op) { 3961//ZZ case Iop_Reverse32_8x8: size = 0; break; 3962//ZZ case Iop_Reverse32_16x4: size = 1; break; 3963//ZZ default: vassert(0); 3964//ZZ } 3965//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32, 3966//ZZ res, arg, size, False)); 3967//ZZ return res; 3968//ZZ } 3969//ZZ case Iop_Reverse16_8x8: { 3970//ZZ HReg res = newVRegD(env); 3971//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3972//ZZ UInt size = 0; 3973//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16, 3974//ZZ res, arg, size, False)); 3975//ZZ return res; 3976//ZZ } 3977//ZZ case Iop_CmpwNEZ64: { 3978//ZZ HReg x_lsh = newVRegD(env); 3979//ZZ HReg x_rsh = newVRegD(env); 3980//ZZ HReg lsh_amt = newVRegD(env); 3981//ZZ HReg rsh_amt = newVRegD(env); 3982//ZZ HReg zero = newVRegD(env); 3983//ZZ HReg tmp = newVRegD(env); 3984//ZZ HReg tmp2 = newVRegD(env); 3985//ZZ HReg res = newVRegD(env); 3986//ZZ HReg x = newVRegD(env); 3987//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3988//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False)); 3989//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False)); 3990//ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32))); 3991//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0))); 3992//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 3993//ZZ rsh_amt, zero, lsh_amt, 2, False)); 3994//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 3995//ZZ x_lsh, x, lsh_amt, 3, False)); 3996//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 3997//ZZ x_rsh, x, rsh_amt, 3, False)); 3998//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 3999//ZZ tmp, x_lsh, x_rsh, 0, False)); 4000//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 4001//ZZ res, tmp, x, 0, False)); 4002//ZZ return res; 4003//ZZ } 4004//ZZ case Iop_CmpNEZ8x8: 4005//ZZ case Iop_CmpNEZ16x4: 4006//ZZ case Iop_CmpNEZ32x2: { 4007//ZZ HReg res = newVRegD(env); 4008//ZZ HReg tmp = newVRegD(env); 4009//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4010//ZZ UInt size; 4011//ZZ switch (e->Iex.Unop.op) { 4012//ZZ case Iop_CmpNEZ8x8: size = 0; break; 4013//ZZ case Iop_CmpNEZ16x4: size = 1; break; 4014//ZZ case Iop_CmpNEZ32x2: size = 2; break; 4015//ZZ default: vassert(0); 4016//ZZ } 4017//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False)); 4018//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False)); 4019//ZZ return res; 4020//ZZ } 4021//ZZ case Iop_NarrowUn16to8x8: 4022//ZZ case Iop_NarrowUn32to16x4: 4023//ZZ case Iop_NarrowUn64to32x2: { 4024//ZZ HReg res = newVRegD(env); 4025//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4026//ZZ UInt size = 0; 4027//ZZ switch(e->Iex.Binop.op) { 4028//ZZ case Iop_NarrowUn16to8x8: size = 0; break; 4029//ZZ case Iop_NarrowUn32to16x4: size = 1; break; 4030//ZZ case Iop_NarrowUn64to32x2: size = 2; break; 4031//ZZ default: vassert(0); 4032//ZZ } 4033//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYN, 4034//ZZ res, arg, size, False)); 4035//ZZ return res; 4036//ZZ } 4037//ZZ case Iop_QNarrowUn16Sto8Sx8: 4038//ZZ case Iop_QNarrowUn32Sto16Sx4: 4039//ZZ case Iop_QNarrowUn64Sto32Sx2: { 4040//ZZ HReg res = newVRegD(env); 4041//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4042//ZZ UInt size = 0; 4043//ZZ switch(e->Iex.Binop.op) { 4044//ZZ case Iop_QNarrowUn16Sto8Sx8: size = 0; break; 4045//ZZ case Iop_QNarrowUn32Sto16Sx4: size = 1; break; 4046//ZZ case Iop_QNarrowUn64Sto32Sx2: size = 2; break; 4047//ZZ default: vassert(0); 4048//ZZ } 4049//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS, 4050//ZZ res, arg, size, False)); 4051//ZZ return res; 4052//ZZ } 4053//ZZ case Iop_QNarrowUn16Sto8Ux8: 4054//ZZ case Iop_QNarrowUn32Sto16Ux4: 4055//ZZ case Iop_QNarrowUn64Sto32Ux2: { 4056//ZZ HReg res = newVRegD(env); 4057//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4058//ZZ UInt size = 0; 4059//ZZ switch(e->Iex.Binop.op) { 4060//ZZ case Iop_QNarrowUn16Sto8Ux8: size = 0; break; 4061//ZZ case Iop_QNarrowUn32Sto16Ux4: size = 1; break; 4062//ZZ case Iop_QNarrowUn64Sto32Ux2: size = 2; break; 4063//ZZ default: vassert(0); 4064//ZZ } 4065//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS, 4066//ZZ res, arg, size, False)); 4067//ZZ return res; 4068//ZZ } 4069//ZZ case Iop_QNarrowUn16Uto8Ux8: 4070//ZZ case Iop_QNarrowUn32Uto16Ux4: 4071//ZZ case Iop_QNarrowUn64Uto32Ux2: { 4072//ZZ HReg res = newVRegD(env); 4073//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4074//ZZ UInt size = 0; 4075//ZZ switch(e->Iex.Binop.op) { 4076//ZZ case Iop_QNarrowUn16Uto8Ux8: size = 0; break; 4077//ZZ case Iop_QNarrowUn32Uto16Ux4: size = 1; break; 4078//ZZ case Iop_QNarrowUn64Uto32Ux2: size = 2; break; 4079//ZZ default: vassert(0); 4080//ZZ } 4081//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU, 4082//ZZ res, arg, size, False)); 4083//ZZ return res; 4084//ZZ } 4085//ZZ case Iop_PwAddL8Sx8: 4086//ZZ case Iop_PwAddL16Sx4: 4087//ZZ case Iop_PwAddL32Sx2: { 4088//ZZ HReg res = newVRegD(env); 4089//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4090//ZZ UInt size = 0; 4091//ZZ switch(e->Iex.Binop.op) { 4092//ZZ case Iop_PwAddL8Sx8: size = 0; break; 4093//ZZ case Iop_PwAddL16Sx4: size = 1; break; 4094//ZZ case Iop_PwAddL32Sx2: size = 2; break; 4095//ZZ default: vassert(0); 4096//ZZ } 4097//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS, 4098//ZZ res, arg, size, False)); 4099//ZZ return res; 4100//ZZ } 4101//ZZ case Iop_PwAddL8Ux8: 4102//ZZ case Iop_PwAddL16Ux4: 4103//ZZ case Iop_PwAddL32Ux2: { 4104//ZZ HReg res = newVRegD(env); 4105//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4106//ZZ UInt size = 0; 4107//ZZ switch(e->Iex.Binop.op) { 4108//ZZ case Iop_PwAddL8Ux8: size = 0; break; 4109//ZZ case Iop_PwAddL16Ux4: size = 1; break; 4110//ZZ case Iop_PwAddL32Ux2: size = 2; break; 4111//ZZ default: vassert(0); 4112//ZZ } 4113//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU, 4114//ZZ res, arg, size, False)); 4115//ZZ return res; 4116//ZZ } 4117//ZZ case Iop_Cnt8x8: { 4118//ZZ HReg res = newVRegD(env); 4119//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4120//ZZ UInt size = 0; 4121//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT, 4122//ZZ res, arg, size, False)); 4123//ZZ return res; 4124//ZZ } 4125//ZZ case Iop_Clz8Sx8: 4126//ZZ case Iop_Clz16Sx4: 4127//ZZ case Iop_Clz32Sx2: { 4128//ZZ HReg res = newVRegD(env); 4129//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4130//ZZ UInt size = 0; 4131//ZZ switch(e->Iex.Binop.op) { 4132//ZZ case Iop_Clz8Sx8: size = 0; break; 4133//ZZ case Iop_Clz16Sx4: size = 1; break; 4134//ZZ case Iop_Clz32Sx2: size = 2; break; 4135//ZZ default: vassert(0); 4136//ZZ } 4137//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, 4138//ZZ res, arg, size, False)); 4139//ZZ return res; 4140//ZZ } 4141//ZZ case Iop_Cls8Sx8: 4142//ZZ case Iop_Cls16Sx4: 4143//ZZ case Iop_Cls32Sx2: { 4144//ZZ HReg res = newVRegD(env); 4145//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4146//ZZ UInt size = 0; 4147//ZZ switch(e->Iex.Binop.op) { 4148//ZZ case Iop_Cls8Sx8: size = 0; break; 4149//ZZ case Iop_Cls16Sx4: size = 1; break; 4150//ZZ case Iop_Cls32Sx2: size = 2; break; 4151//ZZ default: vassert(0); 4152//ZZ } 4153//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS, 4154//ZZ res, arg, size, False)); 4155//ZZ return res; 4156//ZZ } 4157//ZZ case Iop_FtoI32Sx2_RZ: { 4158//ZZ HReg res = newVRegD(env); 4159//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4160//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS, 4161//ZZ res, arg, 2, False)); 4162//ZZ return res; 4163//ZZ } 4164//ZZ case Iop_FtoI32Ux2_RZ: { 4165//ZZ HReg res = newVRegD(env); 4166//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4167//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU, 4168//ZZ res, arg, 2, False)); 4169//ZZ return res; 4170//ZZ } 4171//ZZ case Iop_I32StoFx2: { 4172//ZZ HReg res = newVRegD(env); 4173//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4174//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF, 4175//ZZ res, arg, 2, False)); 4176//ZZ return res; 4177//ZZ } 4178//ZZ case Iop_I32UtoFx2: { 4179//ZZ HReg res = newVRegD(env); 4180//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4181//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF, 4182//ZZ res, arg, 2, False)); 4183//ZZ return res; 4184//ZZ } 4185//ZZ case Iop_F32toF16x4: { 4186//ZZ HReg res = newVRegD(env); 4187//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4188//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16, 4189//ZZ res, arg, 2, False)); 4190//ZZ return res; 4191//ZZ } 4192//ZZ case Iop_Recip32Fx2: { 4193//ZZ HReg res = newVRegD(env); 4194//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 4195//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF, 4196//ZZ res, argL, 0, False)); 4197//ZZ return res; 4198//ZZ } 4199//ZZ case Iop_Recip32x2: { 4200//ZZ HReg res = newVRegD(env); 4201//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 4202//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP, 4203//ZZ res, argL, 0, False)); 4204//ZZ return res; 4205//ZZ } 4206//ZZ case Iop_Abs32Fx2: { 4207//ZZ DECLARE_PATTERN(p_vabd_32fx2); 4208//ZZ DEFINE_PATTERN(p_vabd_32fx2, 4209//ZZ unop(Iop_Abs32Fx2, 4210//ZZ binop(Iop_Sub32Fx2, 4211//ZZ bind(0), 4212//ZZ bind(1)))); 4213//ZZ if (matchIRExpr(&mi, p_vabd_32fx2, e)) { 4214//ZZ HReg res = newVRegD(env); 4215//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); 4216//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); 4217//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP, 4218//ZZ res, argL, argR, 0, False)); 4219//ZZ return res; 4220//ZZ } else { 4221//ZZ HReg res = newVRegD(env); 4222//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4223//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP, 4224//ZZ res, arg, 0, False)); 4225//ZZ return res; 4226//ZZ } 4227//ZZ } 4228//ZZ case Iop_Rsqrte32Fx2: { 4229//ZZ HReg res = newVRegD(env); 4230//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4231//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP, 4232//ZZ res, arg, 0, False)); 4233//ZZ return res; 4234//ZZ } 4235//ZZ case Iop_Rsqrte32x2: { 4236//ZZ HReg res = newVRegD(env); 4237//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4238//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE, 4239//ZZ res, arg, 0, False)); 4240//ZZ return res; 4241//ZZ } 4242//ZZ case Iop_Neg32Fx2: { 4243//ZZ HReg res = newVRegD(env); 4244//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4245//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF, 4246//ZZ res, arg, 0, False)); 4247//ZZ return res; 4248//ZZ } 4249//ZZ default: 4250//ZZ break; 4251//ZZ } 4252//ZZ } /* if (e->tag == Iex_Unop) */ 4253//ZZ 4254//ZZ if (e->tag == Iex_Triop) { 4255//ZZ IRTriop *triop = e->Iex.Triop.details; 4256//ZZ 4257//ZZ switch (triop->op) { 4258//ZZ case Iop_Extract64: { 4259//ZZ HReg res = newVRegD(env); 4260//ZZ HReg argL = iselNeon64Expr(env, triop->arg1); 4261//ZZ HReg argR = iselNeon64Expr(env, triop->arg2); 4262//ZZ UInt imm4; 4263//ZZ if (triop->arg3->tag != Iex_Const || 4264//ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) { 4265//ZZ vpanic("ARM target supports Iop_Extract64 with constant " 4266//ZZ "third argument less than 16 only\n"); 4267//ZZ } 4268//ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8; 4269//ZZ if (imm4 >= 8) { 4270//ZZ vpanic("ARM target supports Iop_Extract64 with constant " 4271//ZZ "third argument less than 16 only\n"); 4272//ZZ } 4273//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT, 4274//ZZ res, argL, argR, imm4, False)); 4275//ZZ return res; 4276//ZZ } 4277//ZZ case Iop_SetElem8x8: 4278//ZZ case Iop_SetElem16x4: 4279//ZZ case Iop_SetElem32x2: { 4280//ZZ HReg res = newVRegD(env); 4281//ZZ HReg dreg = iselNeon64Expr(env, triop->arg1); 4282//ZZ HReg arg = iselIntExpr_R(env, triop->arg3); 4283//ZZ UInt index, size; 4284//ZZ if (triop->arg2->tag != Iex_Const || 4285//ZZ typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) { 4286//ZZ vpanic("ARM target supports SetElem with constant " 4287//ZZ "second argument only\n"); 4288//ZZ } 4289//ZZ index = triop->arg2->Iex.Const.con->Ico.U8; 4290//ZZ switch (triop->op) { 4291//ZZ case Iop_SetElem8x8: vassert(index < 8); size = 0; break; 4292//ZZ case Iop_SetElem16x4: vassert(index < 4); size = 1; break; 4293//ZZ case Iop_SetElem32x2: vassert(index < 2); size = 2; break; 4294//ZZ default: vassert(0); 4295//ZZ } 4296//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False)); 4297//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM, 4298//ZZ mkARMNRS(ARMNRS_Scalar, res, index), 4299//ZZ mkARMNRS(ARMNRS_Reg, arg, 0), 4300//ZZ size, False)); 4301//ZZ return res; 4302//ZZ } 4303//ZZ default: 4304//ZZ break; 4305//ZZ } 4306//ZZ } 4307//ZZ 4308//ZZ /* --------- MULTIPLEX --------- */ 4309//ZZ if (e->tag == Iex_ITE) { // VFD 4310//ZZ HReg rLo, rHi; 4311//ZZ HReg res = newVRegD(env); 4312//ZZ iselInt64Expr(&rHi, &rLo, env, e); 4313//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 4314//ZZ return res; 4315//ZZ } 4316//ZZ 4317//ZZ ppIRExpr(e); 4318//ZZ vpanic("iselNeon64Expr"); 4319//ZZ } 4320 4321 4322/*---------------------------------------------------------*/ 4323/*--- ISEL: Vector (NEON) expressions (128 bit) ---*/ 4324/*---------------------------------------------------------*/ 4325 4326static HReg iselV128Expr ( ISelEnv* env, IRExpr* e ) 4327{ 4328 HReg r = iselV128Expr_wrk( env, e ); 4329 vassert(hregClass(r) == HRcVec128); 4330 vassert(hregIsVirtual(r)); 4331 return r; 4332} 4333 4334/* DO NOT CALL THIS DIRECTLY */ 4335static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) 4336{ 4337 IRType ty = typeOfIRExpr(env->type_env, e); 4338 vassert(e); 4339 vassert(ty == Ity_V128); 4340 4341 if (e->tag == Iex_RdTmp) { 4342 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 4343 } 4344 4345 if (e->tag == Iex_Const) { 4346 /* Only a very limited range of constants is handled. */ 4347 vassert(e->Iex.Const.con->tag == Ico_V128); 4348 UShort con = e->Iex.Const.con->Ico.V128; 4349 if (con == 0x0000) { 4350 HReg res = newVRegV(env); 4351 addInstr(env, ARM64Instr_VImmQ(res, con)); 4352 return res; 4353 } 4354 /* Unhandled */ 4355 goto v128_expr_bad; 4356 } 4357 4358 if (e->tag == Iex_Load) { 4359 HReg res = newVRegV(env); 4360 HReg rN = iselIntExpr_R(env, e->Iex.Load.addr); 4361 vassert(ty == Ity_V128); 4362 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN)); 4363 return res; 4364 } 4365 4366 if (e->tag == Iex_Get) { 4367 UInt offs = (UInt)e->Iex.Get.offset; 4368 if (offs < (1<<12)) { 4369 HReg addr = mk_baseblock_128bit_access_addr(env, offs); 4370 HReg res = newVRegV(env); 4371 vassert(ty == Ity_V128); 4372 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr)); 4373 return res; 4374 } 4375 goto v128_expr_bad; 4376 } 4377 4378 if (e->tag == Iex_Unop) { 4379 4380 /* Iop_ZeroHIXXofV128 cases */ 4381 UShort imm16 = 0; 4382 switch (e->Iex.Unop.op) { 4383 case Iop_ZeroHI64ofV128: imm16 = 0x00FF; break; 4384 case Iop_ZeroHI96ofV128: imm16 = 0x000F; break; 4385 case Iop_ZeroHI112ofV128: imm16 = 0x0003; break; 4386 case Iop_ZeroHI120ofV128: imm16 = 0x0001; break; 4387 default: break; 4388 } 4389 if (imm16 != 0) { 4390 HReg src = iselV128Expr(env, e->Iex.Unop.arg); 4391 HReg imm = newVRegV(env); 4392 HReg res = newVRegV(env); 4393 addInstr(env, ARM64Instr_VImmQ(imm, imm16)); 4394 addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm)); 4395 return res; 4396 } 4397 4398 /* Other cases */ 4399 switch (e->Iex.Unop.op) { 4400 case Iop_Cnt8x16: 4401 case Iop_NotV128: 4402 case Iop_AddLV8Ux16: 4403 case Iop_AddLV16Ux8: 4404 case Iop_AddLV32Ux4: 4405 case Iop_AddLV8Sx16: 4406 case Iop_AddLV16Sx8: 4407 case Iop_AddLV32Sx4: 4408 case Iop_Abs64Fx2: 4409 case Iop_Abs32Fx4: 4410 case Iop_Neg64Fx2: 4411 case Iop_Neg32Fx4: { 4412 HReg res = newVRegV(env); 4413 HReg arg = iselV128Expr(env, e->Iex.Unop.arg); 4414 ARM64VecUnaryOp op = ARM64vecu_INVALID; 4415 switch (e->Iex.Unop.op) { 4416 case Iop_NotV128: op = ARM64vecu_NOT; break; 4417 case Iop_Cnt8x16: op = ARM64vecu_CNT; break; 4418 case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break; 4419 case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break; 4420 case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break; 4421 case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break; 4422 case Iop_AddLV8Ux16: op = ARM64vecu_UADDLV8x16; break; 4423 case Iop_AddLV16Ux8: op = ARM64vecu_UADDLV16x8; break; 4424 case Iop_AddLV32Ux4: op = ARM64vecu_UADDLV32x4; break; 4425 case Iop_AddLV8Sx16: op = ARM64vecu_SADDLV8x16; break; 4426 case Iop_AddLV16Sx8: op = ARM64vecu_SADDLV16x8; break; 4427 case Iop_AddLV32Sx4: op = ARM64vecu_SADDLV32x4; break; 4428 default: vassert(0); 4429 } 4430 addInstr(env, ARM64Instr_VUnaryV(op, res, arg)); 4431 return res; 4432 } 4433 case Iop_CmpNEZ8x16: 4434 case Iop_CmpNEZ16x8: 4435 case Iop_CmpNEZ32x4: 4436 case Iop_CmpNEZ64x2: { 4437 HReg arg = iselV128Expr(env, e->Iex.Unop.arg); 4438 HReg zero = newVRegV(env); 4439 HReg res = newVRegV(env); 4440 ARM64VecBinOp cmp = ARM64vecb_INVALID; 4441 switch (e->Iex.Unop.op) { 4442 case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break; 4443 case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break; 4444 case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break; 4445 case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break; 4446 default: vassert(0); 4447 } 4448 // This is pretty feeble. Better: use CMP against zero 4449 // and avoid the extra instruction and extra register. 4450 addInstr(env, ARM64Instr_VImmQ(zero, 0x0000)); 4451 addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero)); 4452 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res)); 4453 return res; 4454 } 4455 4456 case Iop_Widen8Uto16x8: 4457 case Iop_Widen16Uto32x4: 4458 case Iop_Widen32Uto64x2: 4459 case Iop_Widen8Sto16x8: 4460 case Iop_Widen16Sto32x4: 4461 case Iop_Widen32Sto64x2: { 4462 HReg res = newVRegV(env); 4463 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 4464 ARM64VecUnaryOp wop = ARM64vecu_INVALID; 4465 switch (e->Iex.Unop.op) { 4466 case Iop_Widen8Uto16x8: wop = ARM64vecu_VMOVL8U; break; 4467 case Iop_Widen16Uto32x4: wop = ARM64vecu_VMOVL16U; break; 4468 case Iop_Widen32Uto64x2: wop = ARM64vecu_VMOVL32U; break; 4469 case Iop_Widen8Sto16x8: wop = ARM64vecu_VMOVL8S; break; 4470 case Iop_Widen16Sto32x4: wop = ARM64vecu_VMOVL16S; break; 4471 case Iop_Widen32Sto64x2: wop = ARM64vecu_VMOVL32S; break; 4472 default: vassert(0); 4473 } 4474 addInstr(env, ARM64Instr_VUnaryV(wop, res, arg)); 4475 return res; 4476 } 4477//ZZ case Iop_NotV128: { 4478//ZZ DECLARE_PATTERN(p_veqz_8x16); 4479//ZZ DECLARE_PATTERN(p_veqz_16x8); 4480//ZZ DECLARE_PATTERN(p_veqz_32x4); 4481//ZZ DECLARE_PATTERN(p_vcge_8sx16); 4482//ZZ DECLARE_PATTERN(p_vcge_16sx8); 4483//ZZ DECLARE_PATTERN(p_vcge_32sx4); 4484//ZZ DECLARE_PATTERN(p_vcge_8ux16); 4485//ZZ DECLARE_PATTERN(p_vcge_16ux8); 4486//ZZ DECLARE_PATTERN(p_vcge_32ux4); 4487//ZZ DEFINE_PATTERN(p_veqz_8x16, 4488//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0)))); 4489//ZZ DEFINE_PATTERN(p_veqz_16x8, 4490//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0)))); 4491//ZZ DEFINE_PATTERN(p_veqz_32x4, 4492//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0)))); 4493//ZZ DEFINE_PATTERN(p_vcge_8sx16, 4494//ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0)))); 4495//ZZ DEFINE_PATTERN(p_vcge_16sx8, 4496//ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0)))); 4497//ZZ DEFINE_PATTERN(p_vcge_32sx4, 4498//ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0)))); 4499//ZZ DEFINE_PATTERN(p_vcge_8ux16, 4500//ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0)))); 4501//ZZ DEFINE_PATTERN(p_vcge_16ux8, 4502//ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0)))); 4503//ZZ DEFINE_PATTERN(p_vcge_32ux4, 4504//ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0)))); 4505//ZZ if (matchIRExpr(&mi, p_veqz_8x16, e)) { 4506//ZZ HReg res = newVRegV(env); 4507//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]); 4508//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True)); 4509//ZZ return res; 4510//ZZ } else if (matchIRExpr(&mi, p_veqz_16x8, e)) { 4511//ZZ HReg res = newVRegV(env); 4512//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]); 4513//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True)); 4514//ZZ return res; 4515//ZZ } else if (matchIRExpr(&mi, p_veqz_32x4, e)) { 4516//ZZ HReg res = newVRegV(env); 4517//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]); 4518//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True)); 4519//ZZ return res; 4520//ZZ } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) { 4521//ZZ HReg res = newVRegV(env); 4522//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); 4523//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); 4524//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 4525//ZZ res, argL, argR, 0, True)); 4526//ZZ return res; 4527//ZZ } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) { 4528//ZZ HReg res = newVRegV(env); 4529//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); 4530//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); 4531//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 4532//ZZ res, argL, argR, 1, True)); 4533//ZZ return res; 4534//ZZ } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) { 4535//ZZ HReg res = newVRegV(env); 4536//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); 4537//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); 4538//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 4539//ZZ res, argL, argR, 2, True)); 4540//ZZ return res; 4541//ZZ } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) { 4542//ZZ HReg res = newVRegV(env); 4543//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); 4544//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); 4545//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 4546//ZZ res, argL, argR, 0, True)); 4547//ZZ return res; 4548//ZZ } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) { 4549//ZZ HReg res = newVRegV(env); 4550//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); 4551//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); 4552//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 4553//ZZ res, argL, argR, 1, True)); 4554//ZZ return res; 4555//ZZ } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) { 4556//ZZ HReg res = newVRegV(env); 4557//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); 4558//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); 4559//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 4560//ZZ res, argL, argR, 2, True)); 4561//ZZ return res; 4562//ZZ } else { 4563//ZZ HReg res = newVRegV(env); 4564//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4565//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True)); 4566//ZZ return res; 4567//ZZ } 4568//ZZ } 4569//ZZ case Iop_Dup8x16: 4570//ZZ case Iop_Dup16x8: 4571//ZZ case Iop_Dup32x4: { 4572//ZZ HReg res, arg; 4573//ZZ UInt size; 4574//ZZ DECLARE_PATTERN(p_vdup_8x16); 4575//ZZ DECLARE_PATTERN(p_vdup_16x8); 4576//ZZ DECLARE_PATTERN(p_vdup_32x4); 4577//ZZ DEFINE_PATTERN(p_vdup_8x16, 4578//ZZ unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1)))); 4579//ZZ DEFINE_PATTERN(p_vdup_16x8, 4580//ZZ unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1)))); 4581//ZZ DEFINE_PATTERN(p_vdup_32x4, 4582//ZZ unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1)))); 4583//ZZ if (matchIRExpr(&mi, p_vdup_8x16, e)) { 4584//ZZ UInt index; 4585//ZZ UInt imm4; 4586//ZZ if (mi.bindee[1]->tag == Iex_Const && 4587//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 4588//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; 4589//ZZ imm4 = (index << 1) + 1; 4590//ZZ if (index < 8) { 4591//ZZ res = newVRegV(env); 4592//ZZ arg = iselNeon64Expr(env, mi.bindee[0]); 4593//ZZ addInstr(env, ARMInstr_NUnaryS( 4594//ZZ ARMneon_VDUP, 4595//ZZ mkARMNRS(ARMNRS_Reg, res, 0), 4596//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), 4597//ZZ imm4, True 4598//ZZ )); 4599//ZZ return res; 4600//ZZ } 4601//ZZ } 4602//ZZ } else if (matchIRExpr(&mi, p_vdup_16x8, e)) { 4603//ZZ UInt index; 4604//ZZ UInt imm4; 4605//ZZ if (mi.bindee[1]->tag == Iex_Const && 4606//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 4607//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; 4608//ZZ imm4 = (index << 2) + 2; 4609//ZZ if (index < 4) { 4610//ZZ res = newVRegV(env); 4611//ZZ arg = iselNeon64Expr(env, mi.bindee[0]); 4612//ZZ addInstr(env, ARMInstr_NUnaryS( 4613//ZZ ARMneon_VDUP, 4614//ZZ mkARMNRS(ARMNRS_Reg, res, 0), 4615//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), 4616//ZZ imm4, True 4617//ZZ )); 4618//ZZ return res; 4619//ZZ } 4620//ZZ } 4621//ZZ } else if (matchIRExpr(&mi, p_vdup_32x4, e)) { 4622//ZZ UInt index; 4623//ZZ UInt imm4; 4624//ZZ if (mi.bindee[1]->tag == Iex_Const && 4625//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 4626//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; 4627//ZZ imm4 = (index << 3) + 4; 4628//ZZ if (index < 2) { 4629//ZZ res = newVRegV(env); 4630//ZZ arg = iselNeon64Expr(env, mi.bindee[0]); 4631//ZZ addInstr(env, ARMInstr_NUnaryS( 4632//ZZ ARMneon_VDUP, 4633//ZZ mkARMNRS(ARMNRS_Reg, res, 0), 4634//ZZ mkARMNRS(ARMNRS_Scalar, arg, index), 4635//ZZ imm4, True 4636//ZZ )); 4637//ZZ return res; 4638//ZZ } 4639//ZZ } 4640//ZZ } 4641//ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg); 4642//ZZ res = newVRegV(env); 4643//ZZ switch (e->Iex.Unop.op) { 4644//ZZ case Iop_Dup8x16: size = 0; break; 4645//ZZ case Iop_Dup16x8: size = 1; break; 4646//ZZ case Iop_Dup32x4: size = 2; break; 4647//ZZ default: vassert(0); 4648//ZZ } 4649//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True)); 4650//ZZ return res; 4651//ZZ } 4652//ZZ case Iop_Abs8x16: 4653//ZZ case Iop_Abs16x8: 4654//ZZ case Iop_Abs32x4: { 4655//ZZ HReg res = newVRegV(env); 4656//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4657//ZZ UInt size = 0; 4658//ZZ switch(e->Iex.Binop.op) { 4659//ZZ case Iop_Abs8x16: size = 0; break; 4660//ZZ case Iop_Abs16x8: size = 1; break; 4661//ZZ case Iop_Abs32x4: size = 2; break; 4662//ZZ default: vassert(0); 4663//ZZ } 4664//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True)); 4665//ZZ return res; 4666//ZZ } 4667//ZZ case Iop_Reverse64_8x16: 4668//ZZ case Iop_Reverse64_16x8: 4669//ZZ case Iop_Reverse64_32x4: { 4670//ZZ HReg res = newVRegV(env); 4671//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4672//ZZ UInt size = 0; 4673//ZZ switch(e->Iex.Binop.op) { 4674//ZZ case Iop_Reverse64_8x16: size = 0; break; 4675//ZZ case Iop_Reverse64_16x8: size = 1; break; 4676//ZZ case Iop_Reverse64_32x4: size = 2; break; 4677//ZZ default: vassert(0); 4678//ZZ } 4679//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64, 4680//ZZ res, arg, size, True)); 4681//ZZ return res; 4682//ZZ } 4683//ZZ case Iop_Reverse32_8x16: 4684//ZZ case Iop_Reverse32_16x8: { 4685//ZZ HReg res = newVRegV(env); 4686//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4687//ZZ UInt size = 0; 4688//ZZ switch(e->Iex.Binop.op) { 4689//ZZ case Iop_Reverse32_8x16: size = 0; break; 4690//ZZ case Iop_Reverse32_16x8: size = 1; break; 4691//ZZ default: vassert(0); 4692//ZZ } 4693//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32, 4694//ZZ res, arg, size, True)); 4695//ZZ return res; 4696//ZZ } 4697//ZZ case Iop_Reverse16_8x16: { 4698//ZZ HReg res = newVRegV(env); 4699//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4700//ZZ UInt size = 0; 4701//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16, 4702//ZZ res, arg, size, True)); 4703//ZZ return res; 4704//ZZ } 4705//ZZ case Iop_CmpNEZ64x2: { 4706//ZZ HReg x_lsh = newVRegV(env); 4707//ZZ HReg x_rsh = newVRegV(env); 4708//ZZ HReg lsh_amt = newVRegV(env); 4709//ZZ HReg rsh_amt = newVRegV(env); 4710//ZZ HReg zero = newVRegV(env); 4711//ZZ HReg tmp = newVRegV(env); 4712//ZZ HReg tmp2 = newVRegV(env); 4713//ZZ HReg res = newVRegV(env); 4714//ZZ HReg x = newVRegV(env); 4715//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4716//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True)); 4717//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True)); 4718//ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32))); 4719//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0))); 4720//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 4721//ZZ rsh_amt, zero, lsh_amt, 2, True)); 4722//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 4723//ZZ x_lsh, x, lsh_amt, 3, True)); 4724//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 4725//ZZ x_rsh, x, rsh_amt, 3, True)); 4726//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 4727//ZZ tmp, x_lsh, x_rsh, 0, True)); 4728//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 4729//ZZ res, tmp, x, 0, True)); 4730//ZZ return res; 4731//ZZ } 4732//ZZ case Iop_Widen8Sto16x8: 4733//ZZ case Iop_Widen16Sto32x4: 4734//ZZ case Iop_Widen32Sto64x2: { 4735//ZZ HReg res = newVRegV(env); 4736//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4737//ZZ UInt size; 4738//ZZ switch (e->Iex.Unop.op) { 4739//ZZ case Iop_Widen8Sto16x8: size = 0; break; 4740//ZZ case Iop_Widen16Sto32x4: size = 1; break; 4741//ZZ case Iop_Widen32Sto64x2: size = 2; break; 4742//ZZ default: vassert(0); 4743//ZZ } 4744//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS, 4745//ZZ res, arg, size, True)); 4746//ZZ return res; 4747//ZZ } 4748//ZZ case Iop_PwAddL8Sx16: 4749//ZZ case Iop_PwAddL16Sx8: 4750//ZZ case Iop_PwAddL32Sx4: { 4751//ZZ HReg res = newVRegV(env); 4752//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4753//ZZ UInt size = 0; 4754//ZZ switch(e->Iex.Binop.op) { 4755//ZZ case Iop_PwAddL8Sx16: size = 0; break; 4756//ZZ case Iop_PwAddL16Sx8: size = 1; break; 4757//ZZ case Iop_PwAddL32Sx4: size = 2; break; 4758//ZZ default: vassert(0); 4759//ZZ } 4760//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS, 4761//ZZ res, arg, size, True)); 4762//ZZ return res; 4763//ZZ } 4764//ZZ case Iop_PwAddL8Ux16: 4765//ZZ case Iop_PwAddL16Ux8: 4766//ZZ case Iop_PwAddL32Ux4: { 4767//ZZ HReg res = newVRegV(env); 4768//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4769//ZZ UInt size = 0; 4770//ZZ switch(e->Iex.Binop.op) { 4771//ZZ case Iop_PwAddL8Ux16: size = 0; break; 4772//ZZ case Iop_PwAddL16Ux8: size = 1; break; 4773//ZZ case Iop_PwAddL32Ux4: size = 2; break; 4774//ZZ default: vassert(0); 4775//ZZ } 4776//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU, 4777//ZZ res, arg, size, True)); 4778//ZZ return res; 4779//ZZ } 4780//ZZ case Iop_Cnt8x16: { 4781//ZZ HReg res = newVRegV(env); 4782//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4783//ZZ UInt size = 0; 4784//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True)); 4785//ZZ return res; 4786//ZZ } 4787//ZZ case Iop_Clz8Sx16: 4788//ZZ case Iop_Clz16Sx8: 4789//ZZ case Iop_Clz32Sx4: { 4790//ZZ HReg res = newVRegV(env); 4791//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4792//ZZ UInt size = 0; 4793//ZZ switch(e->Iex.Binop.op) { 4794//ZZ case Iop_Clz8Sx16: size = 0; break; 4795//ZZ case Iop_Clz16Sx8: size = 1; break; 4796//ZZ case Iop_Clz32Sx4: size = 2; break; 4797//ZZ default: vassert(0); 4798//ZZ } 4799//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True)); 4800//ZZ return res; 4801//ZZ } 4802//ZZ case Iop_Cls8Sx16: 4803//ZZ case Iop_Cls16Sx8: 4804//ZZ case Iop_Cls32Sx4: { 4805//ZZ HReg res = newVRegV(env); 4806//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4807//ZZ UInt size = 0; 4808//ZZ switch(e->Iex.Binop.op) { 4809//ZZ case Iop_Cls8Sx16: size = 0; break; 4810//ZZ case Iop_Cls16Sx8: size = 1; break; 4811//ZZ case Iop_Cls32Sx4: size = 2; break; 4812//ZZ default: vassert(0); 4813//ZZ } 4814//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True)); 4815//ZZ return res; 4816//ZZ } 4817//ZZ case Iop_FtoI32Sx4_RZ: { 4818//ZZ HReg res = newVRegV(env); 4819//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4820//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS, 4821//ZZ res, arg, 2, True)); 4822//ZZ return res; 4823//ZZ } 4824//ZZ case Iop_FtoI32Ux4_RZ: { 4825//ZZ HReg res = newVRegV(env); 4826//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4827//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU, 4828//ZZ res, arg, 2, True)); 4829//ZZ return res; 4830//ZZ } 4831//ZZ case Iop_I32StoFx4: { 4832//ZZ HReg res = newVRegV(env); 4833//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4834//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF, 4835//ZZ res, arg, 2, True)); 4836//ZZ return res; 4837//ZZ } 4838//ZZ case Iop_I32UtoFx4: { 4839//ZZ HReg res = newVRegV(env); 4840//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4841//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF, 4842//ZZ res, arg, 2, True)); 4843//ZZ return res; 4844//ZZ } 4845//ZZ case Iop_F16toF32x4: { 4846//ZZ HReg res = newVRegV(env); 4847//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4848//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32, 4849//ZZ res, arg, 2, True)); 4850//ZZ return res; 4851//ZZ } 4852//ZZ case Iop_Recip32Fx4: { 4853//ZZ HReg res = newVRegV(env); 4854//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); 4855//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF, 4856//ZZ res, argL, 0, True)); 4857//ZZ return res; 4858//ZZ } 4859//ZZ case Iop_Recip32x4: { 4860//ZZ HReg res = newVRegV(env); 4861//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); 4862//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP, 4863//ZZ res, argL, 0, True)); 4864//ZZ return res; 4865//ZZ } 4866//ZZ case Iop_Rsqrte32Fx4: { 4867//ZZ HReg res = newVRegV(env); 4868//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); 4869//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP, 4870//ZZ res, argL, 0, True)); 4871//ZZ return res; 4872//ZZ } 4873//ZZ case Iop_Rsqrte32x4: { 4874//ZZ HReg res = newVRegV(env); 4875//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); 4876//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE, 4877//ZZ res, argL, 0, True)); 4878//ZZ return res; 4879//ZZ } 4880 /* ... */ 4881 default: 4882 break; 4883 } /* switch on the unop */ 4884 } /* if (e->tag == Iex_Unop) */ 4885 4886 if (e->tag == Iex_Binop) { 4887 switch (e->Iex.Binop.op) { 4888 case Iop_64HLtoV128: { 4889 HReg res = newVRegV(env); 4890 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 4891 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 4892 addInstr(env, ARM64Instr_VQfromXX(res, argL, argR)); 4893 return res; 4894 } 4895//ZZ case Iop_AndV128: { 4896//ZZ HReg res = newVRegV(env); 4897//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4898//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4899//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND, 4900//ZZ res, argL, argR, 4, True)); 4901//ZZ return res; 4902//ZZ } 4903//ZZ case Iop_OrV128: { 4904//ZZ HReg res = newVRegV(env); 4905//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4906//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4907//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 4908//ZZ res, argL, argR, 4, True)); 4909//ZZ return res; 4910//ZZ } 4911//ZZ case Iop_XorV128: { 4912//ZZ HReg res = newVRegV(env); 4913//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4914//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4915//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR, 4916//ZZ res, argL, argR, 4, True)); 4917//ZZ return res; 4918//ZZ } 4919//ZZ case Iop_Add8x16: 4920//ZZ case Iop_Add16x8: 4921//ZZ case Iop_Add32x4: 4922 case Iop_AndV128: 4923 case Iop_OrV128: 4924 case Iop_XorV128: 4925 case Iop_Max32Ux4: 4926 case Iop_Max16Ux8: 4927 case Iop_Max8Ux16: 4928 case Iop_Min32Ux4: 4929 case Iop_Min16Ux8: 4930 case Iop_Min8Ux16: 4931 case Iop_Max32Sx4: 4932 case Iop_Max16Sx8: 4933 case Iop_Max8Sx16: 4934 case Iop_Min32Sx4: 4935 case Iop_Min16Sx8: 4936 case Iop_Min8Sx16: 4937 case Iop_Add64x2: 4938 case Iop_Add32x4: 4939 case Iop_Add16x8: 4940 case Iop_Add8x16: 4941 case Iop_Sub64x2: 4942 case Iop_Sub32x4: 4943 case Iop_Sub16x8: 4944 case Iop_Sub8x16: 4945 case Iop_Mul32x4: 4946 case Iop_Mul16x8: 4947 case Iop_Mul8x16: 4948 case Iop_CmpEQ64x2: 4949 case Iop_CmpEQ32x4: 4950 case Iop_CmpEQ16x8: 4951 case Iop_CmpEQ8x16: 4952 case Iop_CmpGT64Ux2: 4953 case Iop_CmpGT32Ux4: 4954 case Iop_CmpGT16Ux8: 4955 case Iop_CmpGT8Ux16: 4956 case Iop_CmpGT64Sx2: 4957 case Iop_CmpGT32Sx4: 4958 case Iop_CmpGT16Sx8: 4959 case Iop_CmpGT8Sx16: 4960 case Iop_CmpEQ64Fx2: 4961 case Iop_CmpEQ32Fx4: 4962 case Iop_CmpLE64Fx2: 4963 case Iop_CmpLE32Fx4: 4964 case Iop_CmpLT64Fx2: 4965 case Iop_CmpLT32Fx4: 4966 case Iop_Perm8x16: 4967 { 4968 HReg res = newVRegV(env); 4969 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); 4970 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2); 4971 Bool sw = False; 4972 ARM64VecBinOp op = ARM64vecb_INVALID; 4973 switch (e->Iex.Binop.op) { 4974 case Iop_AndV128: op = ARM64vecb_AND; break; 4975 case Iop_OrV128: op = ARM64vecb_ORR; break; 4976 case Iop_XorV128: op = ARM64vecb_XOR; break; 4977 case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break; 4978 case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break; 4979 case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break; 4980 case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break; 4981 case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break; 4982 case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break; 4983 case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break; 4984 case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break; 4985 case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break; 4986 case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break; 4987 case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break; 4988 case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break; 4989 case Iop_Add64x2: op = ARM64vecb_ADD64x2; break; 4990 case Iop_Add32x4: op = ARM64vecb_ADD32x4; break; 4991 case Iop_Add16x8: op = ARM64vecb_ADD16x8; break; 4992 case Iop_Add8x16: op = ARM64vecb_ADD8x16; break; 4993 case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break; 4994 case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break; 4995 case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break; 4996 case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break; 4997 case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break; 4998 case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break; 4999 case Iop_Mul8x16: op = ARM64vecb_MUL8x16; break; 5000 case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break; 5001 case Iop_CmpEQ32x4: op = ARM64vecb_CMEQ32x4; break; 5002 case Iop_CmpEQ16x8: op = ARM64vecb_CMEQ16x8; break; 5003 case Iop_CmpEQ8x16: op = ARM64vecb_CMEQ8x16; break; 5004 case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break; 5005 case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break; 5006 case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break; 5007 case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break; 5008 case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break; 5009 case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break; 5010 case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break; 5011 case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break; 5012 case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break; 5013 case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break; 5014 case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break; 5015 case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break; 5016 case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break; 5017 case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break; 5018 case Iop_Perm8x16: op = ARM64vecb_TBL1; break; 5019 default: vassert(0); 5020 } 5021 if (sw) { 5022 addInstr(env, ARM64Instr_VBinV(op, res, argR, argL)); 5023 } else { 5024 addInstr(env, ARM64Instr_VBinV(op, res, argL, argR)); 5025 } 5026 return res; 5027 } 5028//ZZ case Iop_Add32Fx4: { 5029//ZZ HReg res = newVRegV(env); 5030//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5031//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5032//ZZ UInt size = 0; 5033//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP, 5034//ZZ res, argL, argR, size, True)); 5035//ZZ return res; 5036//ZZ } 5037//ZZ case Iop_Recps32Fx4: { 5038//ZZ HReg res = newVRegV(env); 5039//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5040//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5041//ZZ UInt size = 0; 5042//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS, 5043//ZZ res, argL, argR, size, True)); 5044//ZZ return res; 5045//ZZ } 5046//ZZ case Iop_Rsqrts32Fx4: { 5047//ZZ HReg res = newVRegV(env); 5048//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5049//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5050//ZZ UInt size = 0; 5051//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS, 5052//ZZ res, argL, argR, size, True)); 5053//ZZ return res; 5054//ZZ } 5055//ZZ 5056//ZZ // These 6 verified 18 Apr 2013 5057//ZZ case Iop_InterleaveEvenLanes8x16: 5058//ZZ case Iop_InterleaveOddLanes8x16: 5059//ZZ case Iop_InterleaveEvenLanes16x8: 5060//ZZ case Iop_InterleaveOddLanes16x8: 5061//ZZ case Iop_InterleaveEvenLanes32x4: 5062//ZZ case Iop_InterleaveOddLanes32x4: { 5063//ZZ HReg rD = newVRegV(env); 5064//ZZ HReg rM = newVRegV(env); 5065//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5066//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5067//ZZ UInt size; 5068//ZZ Bool resRd; // is the result in rD or rM ? 5069//ZZ switch (e->Iex.Binop.op) { 5070//ZZ case Iop_InterleaveOddLanes8x16: resRd = False; size = 0; break; 5071//ZZ case Iop_InterleaveEvenLanes8x16: resRd = True; size = 0; break; 5072//ZZ case Iop_InterleaveOddLanes16x8: resRd = False; size = 1; break; 5073//ZZ case Iop_InterleaveEvenLanes16x8: resRd = True; size = 1; break; 5074//ZZ case Iop_InterleaveOddLanes32x4: resRd = False; size = 2; break; 5075//ZZ case Iop_InterleaveEvenLanes32x4: resRd = True; size = 2; break; 5076//ZZ default: vassert(0); 5077//ZZ } 5078//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True)); 5079//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True)); 5080//ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True)); 5081//ZZ return resRd ? rD : rM; 5082//ZZ } 5083//ZZ 5084//ZZ // These 6 verified 18 Apr 2013 5085//ZZ case Iop_InterleaveHI8x16: 5086//ZZ case Iop_InterleaveLO8x16: 5087//ZZ case Iop_InterleaveHI16x8: 5088//ZZ case Iop_InterleaveLO16x8: 5089//ZZ case Iop_InterleaveHI32x4: 5090//ZZ case Iop_InterleaveLO32x4: { 5091//ZZ HReg rD = newVRegV(env); 5092//ZZ HReg rM = newVRegV(env); 5093//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5094//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5095//ZZ UInt size; 5096//ZZ Bool resRd; // is the result in rD or rM ? 5097//ZZ switch (e->Iex.Binop.op) { 5098//ZZ case Iop_InterleaveHI8x16: resRd = False; size = 0; break; 5099//ZZ case Iop_InterleaveLO8x16: resRd = True; size = 0; break; 5100//ZZ case Iop_InterleaveHI16x8: resRd = False; size = 1; break; 5101//ZZ case Iop_InterleaveLO16x8: resRd = True; size = 1; break; 5102//ZZ case Iop_InterleaveHI32x4: resRd = False; size = 2; break; 5103//ZZ case Iop_InterleaveLO32x4: resRd = True; size = 2; break; 5104//ZZ default: vassert(0); 5105//ZZ } 5106//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True)); 5107//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True)); 5108//ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True)); 5109//ZZ return resRd ? rD : rM; 5110//ZZ } 5111//ZZ 5112//ZZ // These 6 verified 18 Apr 2013 5113//ZZ case Iop_CatOddLanes8x16: 5114//ZZ case Iop_CatEvenLanes8x16: 5115//ZZ case Iop_CatOddLanes16x8: 5116//ZZ case Iop_CatEvenLanes16x8: 5117//ZZ case Iop_CatOddLanes32x4: 5118//ZZ case Iop_CatEvenLanes32x4: { 5119//ZZ HReg rD = newVRegV(env); 5120//ZZ HReg rM = newVRegV(env); 5121//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5122//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5123//ZZ UInt size; 5124//ZZ Bool resRd; // is the result in rD or rM ? 5125//ZZ switch (e->Iex.Binop.op) { 5126//ZZ case Iop_CatOddLanes8x16: resRd = False; size = 0; break; 5127//ZZ case Iop_CatEvenLanes8x16: resRd = True; size = 0; break; 5128//ZZ case Iop_CatOddLanes16x8: resRd = False; size = 1; break; 5129//ZZ case Iop_CatEvenLanes16x8: resRd = True; size = 1; break; 5130//ZZ case Iop_CatOddLanes32x4: resRd = False; size = 2; break; 5131//ZZ case Iop_CatEvenLanes32x4: resRd = True; size = 2; break; 5132//ZZ default: vassert(0); 5133//ZZ } 5134//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True)); 5135//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True)); 5136//ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True)); 5137//ZZ return resRd ? rD : rM; 5138//ZZ } 5139//ZZ 5140//ZZ case Iop_QAdd8Ux16: 5141//ZZ case Iop_QAdd16Ux8: 5142//ZZ case Iop_QAdd32Ux4: 5143//ZZ case Iop_QAdd64Ux2: { 5144//ZZ HReg res = newVRegV(env); 5145//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5146//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5147//ZZ UInt size; 5148//ZZ switch (e->Iex.Binop.op) { 5149//ZZ case Iop_QAdd8Ux16: size = 0; break; 5150//ZZ case Iop_QAdd16Ux8: size = 1; break; 5151//ZZ case Iop_QAdd32Ux4: size = 2; break; 5152//ZZ case Iop_QAdd64Ux2: size = 3; break; 5153//ZZ default: 5154//ZZ ppIROp(e->Iex.Binop.op); 5155//ZZ vpanic("Illegal element size in VQADDU"); 5156//ZZ } 5157//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU, 5158//ZZ res, argL, argR, size, True)); 5159//ZZ return res; 5160//ZZ } 5161//ZZ case Iop_QAdd8Sx16: 5162//ZZ case Iop_QAdd16Sx8: 5163//ZZ case Iop_QAdd32Sx4: 5164//ZZ case Iop_QAdd64Sx2: { 5165//ZZ HReg res = newVRegV(env); 5166//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5167//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5168//ZZ UInt size; 5169//ZZ switch (e->Iex.Binop.op) { 5170//ZZ case Iop_QAdd8Sx16: size = 0; break; 5171//ZZ case Iop_QAdd16Sx8: size = 1; break; 5172//ZZ case Iop_QAdd32Sx4: size = 2; break; 5173//ZZ case Iop_QAdd64Sx2: size = 3; break; 5174//ZZ default: 5175//ZZ ppIROp(e->Iex.Binop.op); 5176//ZZ vpanic("Illegal element size in VQADDS"); 5177//ZZ } 5178//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS, 5179//ZZ res, argL, argR, size, True)); 5180//ZZ return res; 5181//ZZ } 5182//ZZ case Iop_Sub8x16: 5183//ZZ case Iop_Sub16x8: 5184//ZZ case Iop_Sub32x4: 5185//ZZ case Iop_Sub64x2: { 5186//ZZ HReg res = newVRegV(env); 5187//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5188//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5189//ZZ UInt size; 5190//ZZ switch (e->Iex.Binop.op) { 5191//ZZ case Iop_Sub8x16: size = 0; break; 5192//ZZ case Iop_Sub16x8: size = 1; break; 5193//ZZ case Iop_Sub32x4: size = 2; break; 5194//ZZ case Iop_Sub64x2: size = 3; break; 5195//ZZ default: 5196//ZZ ppIROp(e->Iex.Binop.op); 5197//ZZ vpanic("Illegal element size in VSUB"); 5198//ZZ } 5199//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 5200//ZZ res, argL, argR, size, True)); 5201//ZZ return res; 5202//ZZ } 5203//ZZ case Iop_Sub32Fx4: { 5204//ZZ HReg res = newVRegV(env); 5205//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5206//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5207//ZZ UInt size = 0; 5208//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP, 5209//ZZ res, argL, argR, size, True)); 5210//ZZ return res; 5211//ZZ } 5212//ZZ case Iop_QSub8Ux16: 5213//ZZ case Iop_QSub16Ux8: 5214//ZZ case Iop_QSub32Ux4: 5215//ZZ case Iop_QSub64Ux2: { 5216//ZZ HReg res = newVRegV(env); 5217//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5218//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5219//ZZ UInt size; 5220//ZZ switch (e->Iex.Binop.op) { 5221//ZZ case Iop_QSub8Ux16: size = 0; break; 5222//ZZ case Iop_QSub16Ux8: size = 1; break; 5223//ZZ case Iop_QSub32Ux4: size = 2; break; 5224//ZZ case Iop_QSub64Ux2: size = 3; break; 5225//ZZ default: 5226//ZZ ppIROp(e->Iex.Binop.op); 5227//ZZ vpanic("Illegal element size in VQSUBU"); 5228//ZZ } 5229//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU, 5230//ZZ res, argL, argR, size, True)); 5231//ZZ return res; 5232//ZZ } 5233//ZZ case Iop_QSub8Sx16: 5234//ZZ case Iop_QSub16Sx8: 5235//ZZ case Iop_QSub32Sx4: 5236//ZZ case Iop_QSub64Sx2: { 5237//ZZ HReg res = newVRegV(env); 5238//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5239//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5240//ZZ UInt size; 5241//ZZ switch (e->Iex.Binop.op) { 5242//ZZ case Iop_QSub8Sx16: size = 0; break; 5243//ZZ case Iop_QSub16Sx8: size = 1; break; 5244//ZZ case Iop_QSub32Sx4: size = 2; break; 5245//ZZ case Iop_QSub64Sx2: size = 3; break; 5246//ZZ default: 5247//ZZ ppIROp(e->Iex.Binop.op); 5248//ZZ vpanic("Illegal element size in VQSUBS"); 5249//ZZ } 5250//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS, 5251//ZZ res, argL, argR, size, True)); 5252//ZZ return res; 5253//ZZ } 5254//ZZ case Iop_Max8Ux16: 5255//ZZ case Iop_Max16Ux8: 5256//ZZ case Iop_Max32Ux4: { 5257//ZZ HReg res = newVRegV(env); 5258//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5259//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5260//ZZ UInt size; 5261//ZZ switch (e->Iex.Binop.op) { 5262//ZZ case Iop_Max8Ux16: size = 0; break; 5263//ZZ case Iop_Max16Ux8: size = 1; break; 5264//ZZ case Iop_Max32Ux4: size = 2; break; 5265//ZZ default: vpanic("Illegal element size in VMAXU"); 5266//ZZ } 5267//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU, 5268//ZZ res, argL, argR, size, True)); 5269//ZZ return res; 5270//ZZ } 5271//ZZ case Iop_Max8Sx16: 5272//ZZ case Iop_Max16Sx8: 5273//ZZ case Iop_Max32Sx4: { 5274//ZZ HReg res = newVRegV(env); 5275//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5276//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5277//ZZ UInt size; 5278//ZZ switch (e->Iex.Binop.op) { 5279//ZZ case Iop_Max8Sx16: size = 0; break; 5280//ZZ case Iop_Max16Sx8: size = 1; break; 5281//ZZ case Iop_Max32Sx4: size = 2; break; 5282//ZZ default: vpanic("Illegal element size in VMAXU"); 5283//ZZ } 5284//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS, 5285//ZZ res, argL, argR, size, True)); 5286//ZZ return res; 5287//ZZ } 5288//ZZ case Iop_Min8Ux16: 5289//ZZ case Iop_Min16Ux8: 5290//ZZ case Iop_Min32Ux4: { 5291//ZZ HReg res = newVRegV(env); 5292//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5293//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5294//ZZ UInt size; 5295//ZZ switch (e->Iex.Binop.op) { 5296//ZZ case Iop_Min8Ux16: size = 0; break; 5297//ZZ case Iop_Min16Ux8: size = 1; break; 5298//ZZ case Iop_Min32Ux4: size = 2; break; 5299//ZZ default: vpanic("Illegal element size in VMAXU"); 5300//ZZ } 5301//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU, 5302//ZZ res, argL, argR, size, True)); 5303//ZZ return res; 5304//ZZ } 5305//ZZ case Iop_Min8Sx16: 5306//ZZ case Iop_Min16Sx8: 5307//ZZ case Iop_Min32Sx4: { 5308//ZZ HReg res = newVRegV(env); 5309//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5310//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5311//ZZ UInt size; 5312//ZZ switch (e->Iex.Binop.op) { 5313//ZZ case Iop_Min8Sx16: size = 0; break; 5314//ZZ case Iop_Min16Sx8: size = 1; break; 5315//ZZ case Iop_Min32Sx4: size = 2; break; 5316//ZZ default: vpanic("Illegal element size in VMAXU"); 5317//ZZ } 5318//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS, 5319//ZZ res, argL, argR, size, True)); 5320//ZZ return res; 5321//ZZ } 5322//ZZ case Iop_Sar8x16: 5323//ZZ case Iop_Sar16x8: 5324//ZZ case Iop_Sar32x4: 5325//ZZ case Iop_Sar64x2: { 5326//ZZ HReg res = newVRegV(env); 5327//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5328//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5329//ZZ HReg argR2 = newVRegV(env); 5330//ZZ HReg zero = newVRegV(env); 5331//ZZ UInt size; 5332//ZZ switch (e->Iex.Binop.op) { 5333//ZZ case Iop_Sar8x16: size = 0; break; 5334//ZZ case Iop_Sar16x8: size = 1; break; 5335//ZZ case Iop_Sar32x4: size = 2; break; 5336//ZZ case Iop_Sar64x2: size = 3; break; 5337//ZZ default: vassert(0); 5338//ZZ } 5339//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); 5340//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 5341//ZZ argR2, zero, argR, size, True)); 5342//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 5343//ZZ res, argL, argR2, size, True)); 5344//ZZ return res; 5345//ZZ } 5346//ZZ case Iop_Sal8x16: 5347//ZZ case Iop_Sal16x8: 5348//ZZ case Iop_Sal32x4: 5349//ZZ case Iop_Sal64x2: { 5350//ZZ HReg res = newVRegV(env); 5351//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5352//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5353//ZZ UInt size; 5354//ZZ switch (e->Iex.Binop.op) { 5355//ZZ case Iop_Sal8x16: size = 0; break; 5356//ZZ case Iop_Sal16x8: size = 1; break; 5357//ZZ case Iop_Sal32x4: size = 2; break; 5358//ZZ case Iop_Sal64x2: size = 3; break; 5359//ZZ default: vassert(0); 5360//ZZ } 5361//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 5362//ZZ res, argL, argR, size, True)); 5363//ZZ return res; 5364//ZZ } 5365//ZZ case Iop_Shr8x16: 5366//ZZ case Iop_Shr16x8: 5367//ZZ case Iop_Shr32x4: 5368//ZZ case Iop_Shr64x2: { 5369//ZZ HReg res = newVRegV(env); 5370//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5371//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5372//ZZ HReg argR2 = newVRegV(env); 5373//ZZ HReg zero = newVRegV(env); 5374//ZZ UInt size; 5375//ZZ switch (e->Iex.Binop.op) { 5376//ZZ case Iop_Shr8x16: size = 0; break; 5377//ZZ case Iop_Shr16x8: size = 1; break; 5378//ZZ case Iop_Shr32x4: size = 2; break; 5379//ZZ case Iop_Shr64x2: size = 3; break; 5380//ZZ default: vassert(0); 5381//ZZ } 5382//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); 5383//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 5384//ZZ argR2, zero, argR, size, True)); 5385//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 5386//ZZ res, argL, argR2, size, True)); 5387//ZZ return res; 5388//ZZ } 5389//ZZ case Iop_Shl8x16: 5390//ZZ case Iop_Shl16x8: 5391//ZZ case Iop_Shl32x4: 5392//ZZ case Iop_Shl64x2: { 5393//ZZ HReg res = newVRegV(env); 5394//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5395//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5396//ZZ UInt size; 5397//ZZ switch (e->Iex.Binop.op) { 5398//ZZ case Iop_Shl8x16: size = 0; break; 5399//ZZ case Iop_Shl16x8: size = 1; break; 5400//ZZ case Iop_Shl32x4: size = 2; break; 5401//ZZ case Iop_Shl64x2: size = 3; break; 5402//ZZ default: vassert(0); 5403//ZZ } 5404//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 5405//ZZ res, argL, argR, size, True)); 5406//ZZ return res; 5407//ZZ } 5408//ZZ case Iop_QShl8x16: 5409//ZZ case Iop_QShl16x8: 5410//ZZ case Iop_QShl32x4: 5411//ZZ case Iop_QShl64x2: { 5412//ZZ HReg res = newVRegV(env); 5413//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5414//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5415//ZZ UInt size; 5416//ZZ switch (e->Iex.Binop.op) { 5417//ZZ case Iop_QShl8x16: size = 0; break; 5418//ZZ case Iop_QShl16x8: size = 1; break; 5419//ZZ case Iop_QShl32x4: size = 2; break; 5420//ZZ case Iop_QShl64x2: size = 3; break; 5421//ZZ default: vassert(0); 5422//ZZ } 5423//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL, 5424//ZZ res, argL, argR, size, True)); 5425//ZZ return res; 5426//ZZ } 5427//ZZ case Iop_QSal8x16: 5428//ZZ case Iop_QSal16x8: 5429//ZZ case Iop_QSal32x4: 5430//ZZ case Iop_QSal64x2: { 5431//ZZ HReg res = newVRegV(env); 5432//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5433//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5434//ZZ UInt size; 5435//ZZ switch (e->Iex.Binop.op) { 5436//ZZ case Iop_QSal8x16: size = 0; break; 5437//ZZ case Iop_QSal16x8: size = 1; break; 5438//ZZ case Iop_QSal32x4: size = 2; break; 5439//ZZ case Iop_QSal64x2: size = 3; break; 5440//ZZ default: vassert(0); 5441//ZZ } 5442//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL, 5443//ZZ res, argL, argR, size, True)); 5444//ZZ return res; 5445//ZZ } 5446//ZZ case Iop_QShlN8x16: 5447//ZZ case Iop_QShlN16x8: 5448//ZZ case Iop_QShlN32x4: 5449//ZZ case Iop_QShlN64x2: { 5450//ZZ HReg res = newVRegV(env); 5451//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5452//ZZ UInt size, imm; 5453//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || 5454//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 5455//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " 5456//ZZ "second argument only\n"); 5457//ZZ } 5458//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 5459//ZZ switch (e->Iex.Binop.op) { 5460//ZZ case Iop_QShlN8x16: size = 8 | imm; break; 5461//ZZ case Iop_QShlN16x8: size = 16 | imm; break; 5462//ZZ case Iop_QShlN32x4: size = 32 | imm; break; 5463//ZZ case Iop_QShlN64x2: size = 64 | imm; break; 5464//ZZ default: vassert(0); 5465//ZZ } 5466//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU, 5467//ZZ res, argL, size, True)); 5468//ZZ return res; 5469//ZZ } 5470//ZZ case Iop_QShlN8Sx16: 5471//ZZ case Iop_QShlN16Sx8: 5472//ZZ case Iop_QShlN32Sx4: 5473//ZZ case Iop_QShlN64Sx2: { 5474//ZZ HReg res = newVRegV(env); 5475//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5476//ZZ UInt size, imm; 5477//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || 5478//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 5479//ZZ vpanic("ARM taget supports Iop_QShlNASxB with constant " 5480//ZZ "second argument only\n"); 5481//ZZ } 5482//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 5483//ZZ switch (e->Iex.Binop.op) { 5484//ZZ case Iop_QShlN8Sx16: size = 8 | imm; break; 5485//ZZ case Iop_QShlN16Sx8: size = 16 | imm; break; 5486//ZZ case Iop_QShlN32Sx4: size = 32 | imm; break; 5487//ZZ case Iop_QShlN64Sx2: size = 64 | imm; break; 5488//ZZ default: vassert(0); 5489//ZZ } 5490//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS, 5491//ZZ res, argL, size, True)); 5492//ZZ return res; 5493//ZZ } 5494//ZZ case Iop_QSalN8x16: 5495//ZZ case Iop_QSalN16x8: 5496//ZZ case Iop_QSalN32x4: 5497//ZZ case Iop_QSalN64x2: { 5498//ZZ HReg res = newVRegV(env); 5499//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5500//ZZ UInt size, imm; 5501//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || 5502//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 5503//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " 5504//ZZ "second argument only\n"); 5505//ZZ } 5506//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 5507//ZZ switch (e->Iex.Binop.op) { 5508//ZZ case Iop_QSalN8x16: size = 8 | imm; break; 5509//ZZ case Iop_QSalN16x8: size = 16 | imm; break; 5510//ZZ case Iop_QSalN32x4: size = 32 | imm; break; 5511//ZZ case Iop_QSalN64x2: size = 64 | imm; break; 5512//ZZ default: vassert(0); 5513//ZZ } 5514//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS, 5515//ZZ res, argL, size, True)); 5516//ZZ return res; 5517//ZZ } 5518 case Iop_ShrN64x2: 5519 case Iop_ShrN32x4: 5520 case Iop_ShrN16x8: 5521 case Iop_ShrN8x16: 5522 case Iop_SarN64x2: 5523 case Iop_SarN32x4: 5524 case Iop_SarN16x8: 5525 case Iop_SarN8x16: 5526 case Iop_ShlN64x2: 5527 case Iop_ShlN32x4: 5528 case Iop_ShlN16x8: 5529 case Iop_ShlN8x16: 5530 { 5531 IRExpr* argL = e->Iex.Binop.arg1; 5532 IRExpr* argR = e->Iex.Binop.arg2; 5533 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) { 5534 UInt amt = argR->Iex.Const.con->Ico.U8; 5535 UInt limit = 0; 5536 ARM64VecShiftOp op = ARM64vecsh_INVALID; 5537 switch (e->Iex.Binop.op) { 5538 case Iop_ShrN64x2: 5539 op = ARM64vecsh_USHR64x2; limit = 63; break; 5540 case Iop_ShrN32x4: 5541 op = ARM64vecsh_USHR32x4; limit = 31; break; 5542 case Iop_ShrN16x8: 5543 op = ARM64vecsh_USHR16x8; limit = 15; break; 5544 case Iop_ShrN8x16: 5545 op = ARM64vecsh_USHR8x16; limit = 7; break; 5546 case Iop_SarN64x2: 5547 op = ARM64vecsh_SSHR64x2; limit = 63; break; 5548 case Iop_SarN32x4: 5549 op = ARM64vecsh_SSHR32x4; limit = 31; break; 5550 case Iop_SarN16x8: 5551 op = ARM64vecsh_SSHR16x8; limit = 15; break; 5552 case Iop_SarN8x16: 5553 op = ARM64vecsh_SSHR8x16; limit = 7; break; 5554 case Iop_ShlN64x2: 5555 op = ARM64vecsh_SHL64x2; limit = 63; break; 5556 case Iop_ShlN32x4: 5557 op = ARM64vecsh_SHL32x4; limit = 31; break; 5558 case Iop_ShlN16x8: 5559 op = ARM64vecsh_SHL16x8; limit = 15; break; 5560 case Iop_ShlN8x16: 5561 op = ARM64vecsh_SHL8x16; limit = 7; break; 5562 default: 5563 vassert(0); 5564 } 5565 if (op != ARM64vecsh_INVALID && amt >= 0 && amt <= limit) { 5566 HReg src = iselV128Expr(env, argL); 5567 HReg dst = newVRegV(env); 5568 if (amt > 0) { 5569 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt)); 5570 } else { 5571 dst = src; 5572 } 5573 return dst; 5574 } 5575 } 5576 /* else fall out; this is unhandled */ 5577 break; 5578 } 5579//ZZ case Iop_CmpGT8Ux16: 5580//ZZ case Iop_CmpGT16Ux8: 5581//ZZ case Iop_CmpGT32Ux4: { 5582//ZZ HReg res = newVRegV(env); 5583//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5584//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5585//ZZ UInt size; 5586//ZZ switch (e->Iex.Binop.op) { 5587//ZZ case Iop_CmpGT8Ux16: size = 0; break; 5588//ZZ case Iop_CmpGT16Ux8: size = 1; break; 5589//ZZ case Iop_CmpGT32Ux4: size = 2; break; 5590//ZZ default: vassert(0); 5591//ZZ } 5592//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU, 5593//ZZ res, argL, argR, size, True)); 5594//ZZ return res; 5595//ZZ } 5596//ZZ case Iop_CmpGT8Sx16: 5597//ZZ case Iop_CmpGT16Sx8: 5598//ZZ case Iop_CmpGT32Sx4: { 5599//ZZ HReg res = newVRegV(env); 5600//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5601//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5602//ZZ UInt size; 5603//ZZ switch (e->Iex.Binop.op) { 5604//ZZ case Iop_CmpGT8Sx16: size = 0; break; 5605//ZZ case Iop_CmpGT16Sx8: size = 1; break; 5606//ZZ case Iop_CmpGT32Sx4: size = 2; break; 5607//ZZ default: vassert(0); 5608//ZZ } 5609//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS, 5610//ZZ res, argL, argR, size, True)); 5611//ZZ return res; 5612//ZZ } 5613//ZZ case Iop_CmpEQ8x16: 5614//ZZ case Iop_CmpEQ16x8: 5615//ZZ case Iop_CmpEQ32x4: { 5616//ZZ HReg res = newVRegV(env); 5617//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5618//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5619//ZZ UInt size; 5620//ZZ switch (e->Iex.Binop.op) { 5621//ZZ case Iop_CmpEQ8x16: size = 0; break; 5622//ZZ case Iop_CmpEQ16x8: size = 1; break; 5623//ZZ case Iop_CmpEQ32x4: size = 2; break; 5624//ZZ default: vassert(0); 5625//ZZ } 5626//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ, 5627//ZZ res, argL, argR, size, True)); 5628//ZZ return res; 5629//ZZ } 5630//ZZ case Iop_Mul8x16: 5631//ZZ case Iop_Mul16x8: 5632//ZZ case Iop_Mul32x4: { 5633//ZZ HReg res = newVRegV(env); 5634//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5635//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5636//ZZ UInt size = 0; 5637//ZZ switch(e->Iex.Binop.op) { 5638//ZZ case Iop_Mul8x16: size = 0; break; 5639//ZZ case Iop_Mul16x8: size = 1; break; 5640//ZZ case Iop_Mul32x4: size = 2; break; 5641//ZZ default: vassert(0); 5642//ZZ } 5643//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL, 5644//ZZ res, argL, argR, size, True)); 5645//ZZ return res; 5646//ZZ } 5647//ZZ case Iop_Mul32Fx4: { 5648//ZZ HReg res = newVRegV(env); 5649//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5650//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5651//ZZ UInt size = 0; 5652//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP, 5653//ZZ res, argL, argR, size, True)); 5654//ZZ return res; 5655//ZZ } 5656 case Iop_Mull8Ux8: 5657 case Iop_Mull16Ux4: 5658 case Iop_Mull32Ux2: { 5659 HReg res = newVRegV(env); 5660 HReg argL = iselDblExpr(env, e->Iex.Binop.arg1); 5661 HReg argR = iselDblExpr(env, e->Iex.Binop.arg2); 5662 UInt size = 0; 5663 ARM64VecBinOp op = ARM64vecb_INVALID; 5664 5665 switch(e->Iex.Binop.op) { 5666 case Iop_Mull8Ux8: op = ARM64vecb_UMULL8x8; break; 5667 case Iop_Mull16Ux4: op = ARM64vecb_UMULL16x4; break; 5668 case Iop_Mull32Ux2: op = ARM64vecb_UMULL32x2; break; 5669 default: vassert(0); 5670 } 5671 addInstr(env, ARM64Instr_VBinV(op, res, argL, argR)); 5672 return res; 5673 } 5674//ZZ 5675//ZZ case Iop_Mull8Sx8: 5676//ZZ case Iop_Mull16Sx4: 5677//ZZ case Iop_Mull32Sx2: { 5678//ZZ HReg res = newVRegV(env); 5679//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 5680//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 5681//ZZ UInt size = 0; 5682//ZZ switch(e->Iex.Binop.op) { 5683//ZZ case Iop_Mull8Sx8: size = 0; break; 5684//ZZ case Iop_Mull16Sx4: size = 1; break; 5685//ZZ case Iop_Mull32Sx2: size = 2; break; 5686//ZZ default: vassert(0); 5687//ZZ } 5688//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS, 5689//ZZ res, argL, argR, size, True)); 5690//ZZ return res; 5691//ZZ } 5692//ZZ 5693//ZZ case Iop_QDMulHi16Sx8: 5694//ZZ case Iop_QDMulHi32Sx4: { 5695//ZZ HReg res = newVRegV(env); 5696//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5697//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5698//ZZ UInt size = 0; 5699//ZZ switch(e->Iex.Binop.op) { 5700//ZZ case Iop_QDMulHi16Sx8: size = 1; break; 5701//ZZ case Iop_QDMulHi32Sx4: size = 2; break; 5702//ZZ default: vassert(0); 5703//ZZ } 5704//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH, 5705//ZZ res, argL, argR, size, True)); 5706//ZZ return res; 5707//ZZ } 5708//ZZ 5709//ZZ case Iop_QRDMulHi16Sx8: 5710//ZZ case Iop_QRDMulHi32Sx4: { 5711//ZZ HReg res = newVRegV(env); 5712//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5713//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5714//ZZ UInt size = 0; 5715//ZZ switch(e->Iex.Binop.op) { 5716//ZZ case Iop_QRDMulHi16Sx8: size = 1; break; 5717//ZZ case Iop_QRDMulHi32Sx4: size = 2; break; 5718//ZZ default: vassert(0); 5719//ZZ } 5720//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH, 5721//ZZ res, argL, argR, size, True)); 5722//ZZ return res; 5723//ZZ } 5724//ZZ 5725//ZZ case Iop_QDMulLong16Sx4: 5726//ZZ case Iop_QDMulLong32Sx2: { 5727//ZZ HReg res = newVRegV(env); 5728//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 5729//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 5730//ZZ UInt size = 0; 5731//ZZ switch(e->Iex.Binop.op) { 5732//ZZ case Iop_QDMulLong16Sx4: size = 1; break; 5733//ZZ case Iop_QDMulLong32Sx2: size = 2; break; 5734//ZZ default: vassert(0); 5735//ZZ } 5736//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL, 5737//ZZ res, argL, argR, size, True)); 5738//ZZ return res; 5739//ZZ } 5740//ZZ case Iop_PolynomialMul8x16: { 5741//ZZ HReg res = newVRegV(env); 5742//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5743//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5744//ZZ UInt size = 0; 5745//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP, 5746//ZZ res, argL, argR, size, True)); 5747//ZZ return res; 5748//ZZ } 5749//ZZ case Iop_Max32Fx4: { 5750//ZZ HReg res = newVRegV(env); 5751//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5752//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5753//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF, 5754//ZZ res, argL, argR, 2, True)); 5755//ZZ return res; 5756//ZZ } 5757//ZZ case Iop_Min32Fx4: { 5758//ZZ HReg res = newVRegV(env); 5759//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5760//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5761//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF, 5762//ZZ res, argL, argR, 2, True)); 5763//ZZ return res; 5764//ZZ } 5765//ZZ case Iop_PwMax32Fx4: { 5766//ZZ HReg res = newVRegV(env); 5767//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5768//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5769//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF, 5770//ZZ res, argL, argR, 2, True)); 5771//ZZ return res; 5772//ZZ } 5773//ZZ case Iop_PwMin32Fx4: { 5774//ZZ HReg res = newVRegV(env); 5775//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5776//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5777//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF, 5778//ZZ res, argL, argR, 2, True)); 5779//ZZ return res; 5780//ZZ } 5781//ZZ case Iop_CmpGT32Fx4: { 5782//ZZ HReg res = newVRegV(env); 5783//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5784//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5785//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF, 5786//ZZ res, argL, argR, 2, True)); 5787//ZZ return res; 5788//ZZ } 5789//ZZ case Iop_CmpGE32Fx4: { 5790//ZZ HReg res = newVRegV(env); 5791//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5792//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5793//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF, 5794//ZZ res, argL, argR, 2, True)); 5795//ZZ return res; 5796//ZZ } 5797//ZZ case Iop_CmpEQ32Fx4: { 5798//ZZ HReg res = newVRegV(env); 5799//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5800//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5801//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF, 5802//ZZ res, argL, argR, 2, True)); 5803//ZZ return res; 5804//ZZ } 5805//ZZ 5806//ZZ case Iop_PolynomialMull8x8: { 5807//ZZ HReg res = newVRegV(env); 5808//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 5809//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 5810//ZZ UInt size = 0; 5811//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP, 5812//ZZ res, argL, argR, size, True)); 5813//ZZ return res; 5814//ZZ } 5815//ZZ case Iop_F32ToFixed32Ux4_RZ: 5816//ZZ case Iop_F32ToFixed32Sx4_RZ: 5817//ZZ case Iop_Fixed32UToF32x4_RN: 5818//ZZ case Iop_Fixed32SToF32x4_RN: { 5819//ZZ HReg res = newVRegV(env); 5820//ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1); 5821//ZZ ARMNeonUnOp op; 5822//ZZ UInt imm6; 5823//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || 5824//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 5825//ZZ vpanic("ARM supports FP <-> Fixed conversion with constant " 5826//ZZ "second argument less than 33 only\n"); 5827//ZZ } 5828//ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 5829//ZZ vassert(imm6 <= 32 && imm6 > 0); 5830//ZZ imm6 = 64 - imm6; 5831//ZZ switch(e->Iex.Binop.op) { 5832//ZZ case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break; 5833//ZZ case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break; 5834//ZZ case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break; 5835//ZZ case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break; 5836//ZZ default: vassert(0); 5837//ZZ } 5838//ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True)); 5839//ZZ return res; 5840//ZZ } 5841//ZZ /* 5842//ZZ FIXME remove if not used 5843//ZZ case Iop_VDup8x16: 5844//ZZ case Iop_VDup16x8: 5845//ZZ case Iop_VDup32x4: { 5846//ZZ HReg res = newVRegV(env); 5847//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 5848//ZZ UInt imm4; 5849//ZZ UInt index; 5850//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || 5851//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 5852//ZZ vpanic("ARM supports Iop_VDup with constant " 5853//ZZ "second argument less than 16 only\n"); 5854//ZZ } 5855//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 5856//ZZ switch(e->Iex.Binop.op) { 5857//ZZ case Iop_VDup8x16: imm4 = (index << 1) + 1; break; 5858//ZZ case Iop_VDup16x8: imm4 = (index << 2) + 2; break; 5859//ZZ case Iop_VDup32x4: imm4 = (index << 3) + 4; break; 5860//ZZ default: vassert(0); 5861//ZZ } 5862//ZZ if (imm4 >= 16) { 5863//ZZ vpanic("ARM supports Iop_VDup with constant " 5864//ZZ "second argument less than 16 only\n"); 5865//ZZ } 5866//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP, 5867//ZZ res, argL, imm4, True)); 5868//ZZ return res; 5869//ZZ } 5870//ZZ */ 5871//ZZ case Iop_PwAdd8x16: 5872//ZZ case Iop_PwAdd16x8: 5873//ZZ case Iop_PwAdd32x4: { 5874//ZZ HReg res = newVRegV(env); 5875//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5876//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5877//ZZ UInt size = 0; 5878//ZZ switch(e->Iex.Binop.op) { 5879//ZZ case Iop_PwAdd8x16: size = 0; break; 5880//ZZ case Iop_PwAdd16x8: size = 1; break; 5881//ZZ case Iop_PwAdd32x4: size = 2; break; 5882//ZZ default: vassert(0); 5883//ZZ } 5884//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD, 5885//ZZ res, argL, argR, size, True)); 5886//ZZ return res; 5887//ZZ } 5888 /* ... */ 5889 default: 5890 break; 5891 } /* switch on the binop */ 5892 } /* if (e->tag == Iex_Binop) */ 5893 5894 if (e->tag == Iex_Triop) { 5895 IRTriop* triop = e->Iex.Triop.details; 5896 ARM64VecBinOp vecbop = ARM64vecb_INVALID; 5897 switch (triop->op) { 5898 case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break; 5899 case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break; 5900 case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break; 5901 case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break; 5902 case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break; 5903 case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break; 5904 case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break; 5905 case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break; 5906 default: break; 5907 } 5908 if (vecbop != ARM64vecb_INVALID) { 5909 HReg argL = iselV128Expr(env, triop->arg2); 5910 HReg argR = iselV128Expr(env, triop->arg3); 5911 HReg dst = newVRegV(env); 5912 set_FPCR_rounding_mode(env, triop->arg1); 5913 addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR)); 5914 return dst; 5915 } 5916 5917//ZZ switch (triop->op) { 5918//ZZ case Iop_ExtractV128: { 5919//ZZ HReg res = newVRegV(env); 5920//ZZ HReg argL = iselNeonExpr(env, triop->arg1); 5921//ZZ HReg argR = iselNeonExpr(env, triop->arg2); 5922//ZZ UInt imm4; 5923//ZZ if (triop->arg3->tag != Iex_Const || 5924//ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) { 5925//ZZ vpanic("ARM target supports Iop_ExtractV128 with constant " 5926//ZZ "third argument less than 16 only\n"); 5927//ZZ } 5928//ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8; 5929//ZZ if (imm4 >= 16) { 5930//ZZ vpanic("ARM target supports Iop_ExtractV128 with constant " 5931//ZZ "third argument less than 16 only\n"); 5932//ZZ } 5933//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT, 5934//ZZ res, argL, argR, imm4, True)); 5935//ZZ return res; 5936//ZZ } 5937//ZZ default: 5938//ZZ break; 5939//ZZ } 5940 } 5941 5942//ZZ if (e->tag == Iex_ITE) { // VFD 5943//ZZ ARMCondCode cc; 5944//ZZ HReg r1 = iselNeonExpr(env, e->Iex.ITE.iftrue); 5945//ZZ HReg r0 = iselNeonExpr(env, e->Iex.ITE.iffalse); 5946//ZZ HReg dst = newVRegV(env); 5947//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True)); 5948//ZZ cc = iselCondCode(env, e->Iex.ITE.cond); 5949//ZZ addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0)); 5950//ZZ return dst; 5951//ZZ } 5952 5953 v128_expr_bad: 5954 ppIRExpr(e); 5955 vpanic("iselV128Expr_wrk"); 5956} 5957 5958 5959/*---------------------------------------------------------*/ 5960/*--- ISEL: Floating point expressions (64 bit) ---*/ 5961/*---------------------------------------------------------*/ 5962 5963/* Compute a 64-bit floating point value into a register, the identity 5964 of which is returned. As with iselIntExpr_R, the reg may be either 5965 real or virtual; in any case it must not be changed by subsequent 5966 code emitted by the caller. */ 5967 5968static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ) 5969{ 5970 HReg r = iselDblExpr_wrk( env, e ); 5971# if 0 5972 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5973# endif 5974 vassert(hregClass(r) == HRcFlt64); 5975 vassert(hregIsVirtual(r)); 5976 return r; 5977} 5978 5979/* DO NOT CALL THIS DIRECTLY */ 5980static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) 5981{ 5982 IRType ty = typeOfIRExpr(env->type_env,e); 5983 vassert(e); 5984 vassert(ty == Ity_F64 || ty == Ity_I64); 5985 5986 if (e->tag == Iex_RdTmp) { 5987 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 5988 } 5989 5990 if (e->tag == Iex_Const) { 5991 IRConst* con = e->Iex.Const.con; 5992 if (con->tag == Ico_F64i) { 5993 HReg src = newVRegI(env); 5994 HReg dst = newVRegD(env); 5995 addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i)); 5996 addInstr(env, ARM64Instr_VDfromX(dst, src)); 5997 return dst; 5998 } 5999 } 6000 6001 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 6002 vassert(e->Iex.Load.ty == Ity_F64 || e->Iex.Load.ty == Ity_I64); 6003 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr); 6004 HReg res = newVRegD(env); 6005 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0)); 6006 return res; 6007 } 6008 6009 if (e->tag == Iex_Get) { 6010 Int offs = e->Iex.Get.offset; 6011 if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) { 6012 HReg rD = newVRegD(env); 6013 HReg rN = get_baseblock_register(); 6014 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs)); 6015 return rD; 6016 } 6017 } 6018 6019 if (e->tag == Iex_Unop) { 6020 switch (e->Iex.Unop.op) { 6021//ZZ case Iop_ReinterpI64asF64: { 6022//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 6023//ZZ return iselNeon64Expr(env, e->Iex.Unop.arg); 6024//ZZ } else { 6025//ZZ HReg srcHi, srcLo; 6026//ZZ HReg dst = newVRegD(env); 6027//ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); 6028//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo)); 6029//ZZ return dst; 6030//ZZ } 6031//ZZ } 6032 case Iop_NegF64: { 6033 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 6034 HReg dst = newVRegD(env); 6035 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src)); 6036 return dst; 6037 } 6038 case Iop_AbsF64: { 6039 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 6040 HReg dst = newVRegD(env); 6041 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src)); 6042 return dst; 6043 } 6044 case Iop_F32toF64: { 6045 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 6046 HReg dst = newVRegD(env); 6047 addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src)); 6048 return dst; 6049 } 6050 case Iop_I32UtoF64: 6051 case Iop_I32StoF64: { 6052 /* Rounding mode is not involved here, since the 6053 conversion can always be done without loss of 6054 precision. */ 6055 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 6056 HReg dst = newVRegD(env); 6057 Bool syned = e->Iex.Unop.op == Iop_I32StoF64; 6058 ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U; 6059 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src)); 6060 return dst; 6061 } 6062 default: 6063 break; 6064 } 6065 } 6066 6067 if (e->tag == Iex_Binop) { 6068 switch (e->Iex.Binop.op) { 6069 case Iop_RoundF64toInt: { 6070 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 6071 HReg dst = newVRegD(env); 6072 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 6073 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINT, dst, src)); 6074 return dst; 6075 } 6076 case Iop_SqrtF64: { 6077 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 6078 HReg dst = newVRegD(env); 6079 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 6080 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_SQRT, dst, src)); 6081 return dst; 6082 } 6083 case Iop_I64StoF64: 6084 case Iop_I64UtoF64: { 6085 ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64 6086 ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U; 6087 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 6088 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 6089 HReg dstS = newVRegD(env); 6090 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI)); 6091 return dstS; 6092 } 6093 default: 6094 break; 6095 } 6096 } 6097 6098 if (e->tag == Iex_Triop) { 6099 IRTriop* triop = e->Iex.Triop.details; 6100 ARM64FpBinOp dblop = ARM64fpb_INVALID; 6101 switch (triop->op) { 6102 case Iop_DivF64: dblop = ARM64fpb_DIV; break; 6103 case Iop_MulF64: dblop = ARM64fpb_MUL; break; 6104 case Iop_SubF64: dblop = ARM64fpb_SUB; break; 6105 case Iop_AddF64: dblop = ARM64fpb_ADD; break; 6106 default: break; 6107 } 6108 if (dblop != ARM64fpb_INVALID) { 6109 HReg argL = iselDblExpr(env, triop->arg2); 6110 HReg argR = iselDblExpr(env, triop->arg3); 6111 HReg dst = newVRegD(env); 6112 set_FPCR_rounding_mode(env, triop->arg1); 6113 addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR)); 6114 return dst; 6115 } 6116 } 6117 6118//ZZ if (e->tag == Iex_ITE) { // VFD 6119//ZZ if (ty == Ity_F64 6120//ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) { 6121//ZZ HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue); 6122//ZZ HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse); 6123//ZZ HReg dst = newVRegD(env); 6124//ZZ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1)); 6125//ZZ ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond); 6126//ZZ addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0)); 6127//ZZ return dst; 6128//ZZ } 6129//ZZ } 6130 6131 ppIRExpr(e); 6132 vpanic("iselDblExpr_wrk"); 6133} 6134 6135 6136/*---------------------------------------------------------*/ 6137/*--- ISEL: Floating point expressions (32 bit) ---*/ 6138/*---------------------------------------------------------*/ 6139 6140/* Compute a 32-bit floating point value into a register, the identity 6141 of which is returned. As with iselIntExpr_R, the reg may be either 6142 real or virtual; in any case it must not be changed by subsequent 6143 code emitted by the caller. Values are generated into HRcFlt64 6144 registers despite the values themselves being Ity_F32s. */ 6145 6146static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) 6147{ 6148 HReg r = iselFltExpr_wrk( env, e ); 6149# if 0 6150 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 6151# endif 6152 vassert(hregClass(r) == HRcFlt64); 6153 vassert(hregIsVirtual(r)); 6154 return r; 6155} 6156 6157/* DO NOT CALL THIS DIRECTLY */ 6158static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) 6159{ 6160 IRType ty = typeOfIRExpr(env->type_env,e); 6161 vassert(e); 6162 vassert(ty == Ity_F32); 6163 6164 if (e->tag == Iex_RdTmp) { 6165 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 6166 } 6167 6168 if (e->tag == Iex_Const) { 6169 /* This is something of a kludge. Since a 32 bit floating point 6170 zero is just .. all zeroes, just create a 64 bit zero word 6171 and transfer it. This avoids having to create a SfromW 6172 instruction for this specific case. */ 6173 IRConst* con = e->Iex.Const.con; 6174 if (con->tag == Ico_F32i && con->Ico.F32i == 0) { 6175 HReg src = newVRegI(env); 6176 HReg dst = newVRegD(env); 6177 addInstr(env, ARM64Instr_Imm64(src, 0)); 6178 addInstr(env, ARM64Instr_VDfromX(dst, src)); 6179 return dst; 6180 } 6181 } 6182 6183//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 6184//ZZ ARMAModeV* am; 6185//ZZ HReg res = newVRegF(env); 6186//ZZ vassert(e->Iex.Load.ty == Ity_F32); 6187//ZZ am = iselIntExpr_AModeV(env, e->Iex.Load.addr); 6188//ZZ addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am)); 6189//ZZ return res; 6190//ZZ } 6191 6192 if (e->tag == Iex_Get) { 6193 Int offs = e->Iex.Get.offset; 6194 if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) { 6195 HReg rD = newVRegD(env); 6196 HReg rN = get_baseblock_register(); 6197 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs)); 6198 return rD; 6199 } 6200 } 6201 6202 if (e->tag == Iex_Unop) { 6203 switch (e->Iex.Unop.op) { 6204//ZZ case Iop_ReinterpI32asF32: { 6205//ZZ HReg dst = newVRegF(env); 6206//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 6207//ZZ addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src)); 6208//ZZ return dst; 6209//ZZ } 6210 case Iop_NegF32: { 6211 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 6212 HReg dst = newVRegD(env); 6213 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src)); 6214 return dst; 6215 } 6216 case Iop_AbsF32: { 6217 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 6218 HReg dst = newVRegD(env); 6219 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src)); 6220 return dst; 6221 } 6222 default: 6223 break; 6224 } 6225 } 6226 6227 if (e->tag == Iex_Binop) { 6228 switch (e->Iex.Binop.op) { 6229 case Iop_RoundF32toInt: { 6230 HReg src = iselFltExpr(env, e->Iex.Binop.arg2); 6231 HReg dst = newVRegD(env); 6232 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 6233 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINT, dst, src)); 6234 return dst; 6235 } 6236 case Iop_SqrtF32: { 6237 HReg src = iselFltExpr(env, e->Iex.Binop.arg2); 6238 HReg dst = newVRegD(env); 6239 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 6240 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_SQRT, dst, src)); 6241 return dst; 6242 } 6243 case Iop_F64toF32: { 6244 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2); 6245 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 6246 HReg dstS = newVRegD(env); 6247 addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD)); 6248 return dstS; 6249 } 6250 case Iop_I32UtoF32: 6251 case Iop_I32StoF32: 6252 case Iop_I64UtoF32: 6253 case Iop_I64StoF32: { 6254 ARM64CvtOp cvt_op = ARM64cvt_INVALID; 6255 switch (e->Iex.Binop.op) { 6256 case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break; 6257 case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break; 6258 case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break; 6259 case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break; 6260 default: vassert(0); 6261 } 6262 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 6263 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 6264 HReg dstS = newVRegD(env); 6265 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI)); 6266 return dstS; 6267 } 6268 default: 6269 break; 6270 } 6271 } 6272 6273 if (e->tag == Iex_Triop) { 6274 IRTriop* triop = e->Iex.Triop.details; 6275 ARM64FpBinOp sglop = ARM64fpb_INVALID; 6276 switch (triop->op) { 6277 case Iop_DivF32: sglop = ARM64fpb_DIV; break; 6278 case Iop_MulF32: sglop = ARM64fpb_MUL; break; 6279 case Iop_SubF32: sglop = ARM64fpb_SUB; break; 6280 case Iop_AddF32: sglop = ARM64fpb_ADD; break; 6281 default: break; 6282 } 6283 if (sglop != ARM64fpb_INVALID) { 6284 HReg argL = iselFltExpr(env, triop->arg2); 6285 HReg argR = iselFltExpr(env, triop->arg3); 6286 HReg dst = newVRegD(env); 6287 set_FPCR_rounding_mode(env, triop->arg1); 6288 addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR)); 6289 return dst; 6290 } 6291 } 6292 6293//ZZ 6294//ZZ if (e->tag == Iex_ITE) { // VFD 6295//ZZ if (ty == Ity_F32 6296//ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) { 6297//ZZ ARMCondCode cc; 6298//ZZ HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue); 6299//ZZ HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse); 6300//ZZ HReg dst = newVRegF(env); 6301//ZZ addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1)); 6302//ZZ cc = iselCondCode(env, e->Iex.ITE.cond); 6303//ZZ addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0)); 6304//ZZ return dst; 6305//ZZ } 6306//ZZ } 6307 6308 ppIRExpr(e); 6309 vpanic("iselFltExpr_wrk"); 6310} 6311 6312 6313/*---------------------------------------------------------*/ 6314/*--- ISEL: Statements ---*/ 6315/*---------------------------------------------------------*/ 6316 6317static void iselStmt ( ISelEnv* env, IRStmt* stmt ) 6318{ 6319 if (vex_traceflags & VEX_TRACE_VCODE) { 6320 vex_printf("\n-- "); 6321 ppIRStmt(stmt); 6322 vex_printf("\n"); 6323 } 6324 switch (stmt->tag) { 6325 6326 /* --------- STORE --------- */ 6327 /* little-endian write to memory */ 6328 case Ist_Store: { 6329 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); 6330 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); 6331 IREndness end = stmt->Ist.Store.end; 6332 6333 if (tya != Ity_I64 || end != Iend_LE) 6334 goto stmt_fail; 6335 6336 if (tyd == Ity_I64) { 6337 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 6338 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); 6339 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am)); 6340 return; 6341 } 6342 if (tyd == Ity_I32) { 6343 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 6344 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); 6345 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am)); 6346 return; 6347 } 6348 if (tyd == Ity_I16) { 6349 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 6350 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); 6351 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am)); 6352 return; 6353 } 6354 if (tyd == Ity_I8) { 6355 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 6356 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); 6357 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am)); 6358 return; 6359 } 6360 if (tyd == Ity_V128) { 6361 HReg qD = iselV128Expr(env, stmt->Ist.Store.data); 6362 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); 6363 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr)); 6364 return; 6365 } 6366 if (tyd == Ity_F64) { 6367 HReg dD = iselDblExpr(env, stmt->Ist.Store.data); 6368 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); 6369 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0)); 6370 return; 6371 } 6372 if (tyd == Ity_F32) { 6373 HReg sD = iselFltExpr(env, stmt->Ist.Store.data); 6374 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); 6375 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0)); 6376 return; 6377 } 6378 6379//ZZ if (tyd == Ity_I16) { 6380//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 6381//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr); 6382//ZZ addInstr(env, ARMInstr_LdSt16(ARMcc_AL, 6383//ZZ False/*!isLoad*/, 6384//ZZ False/*!isSignedLoad*/, rD, am)); 6385//ZZ return; 6386//ZZ } 6387//ZZ if (tyd == Ity_I8) { 6388//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 6389//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr); 6390//ZZ addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am)); 6391//ZZ return; 6392//ZZ } 6393//ZZ if (tyd == Ity_I64) { 6394//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 6395//ZZ HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data); 6396//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr); 6397//ZZ addInstr(env, ARMInstr_NLdStD(False, dD, am)); 6398//ZZ } else { 6399//ZZ HReg rDhi, rDlo, rA; 6400//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data); 6401//ZZ rA = iselIntExpr_R(env, stmt->Ist.Store.addr); 6402//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi, 6403//ZZ ARMAMode1_RI(rA,4))); 6404//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo, 6405//ZZ ARMAMode1_RI(rA,0))); 6406//ZZ } 6407//ZZ return; 6408//ZZ } 6409//ZZ if (tyd == Ity_F64) { 6410//ZZ HReg dD = iselDblExpr(env, stmt->Ist.Store.data); 6411//ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr); 6412//ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am)); 6413//ZZ return; 6414//ZZ } 6415//ZZ if (tyd == Ity_F32) { 6416//ZZ HReg fD = iselFltExpr(env, stmt->Ist.Store.data); 6417//ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr); 6418//ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am)); 6419//ZZ return; 6420//ZZ } 6421//ZZ if (tyd == Ity_V128) { 6422//ZZ HReg qD = iselNeonExpr(env, stmt->Ist.Store.data); 6423//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr); 6424//ZZ addInstr(env, ARMInstr_NLdStQ(False, qD, am)); 6425//ZZ return; 6426//ZZ } 6427 6428 break; 6429 } 6430 6431//ZZ /* --------- CONDITIONAL STORE --------- */ 6432//ZZ /* conditional little-endian write to memory */ 6433//ZZ case Ist_StoreG: { 6434//ZZ IRStoreG* sg = stmt->Ist.StoreG.details; 6435//ZZ IRType tya = typeOfIRExpr(env->type_env, sg->addr); 6436//ZZ IRType tyd = typeOfIRExpr(env->type_env, sg->data); 6437//ZZ IREndness end = sg->end; 6438//ZZ 6439//ZZ if (tya != Ity_I32 || end != Iend_LE) 6440//ZZ goto stmt_fail; 6441//ZZ 6442//ZZ switch (tyd) { 6443//ZZ case Ity_I8: 6444//ZZ case Ity_I32: { 6445//ZZ HReg rD = iselIntExpr_R(env, sg->data); 6446//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, sg->addr); 6447//ZZ ARMCondCode cc = iselCondCode(env, sg->guard); 6448//ZZ addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U) 6449//ZZ (cc, False/*!isLoad*/, rD, am)); 6450//ZZ return; 6451//ZZ } 6452//ZZ case Ity_I16: { 6453//ZZ HReg rD = iselIntExpr_R(env, sg->data); 6454//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, sg->addr); 6455//ZZ ARMCondCode cc = iselCondCode(env, sg->guard); 6456//ZZ addInstr(env, ARMInstr_LdSt16(cc, 6457//ZZ False/*!isLoad*/, 6458//ZZ False/*!isSignedLoad*/, rD, am)); 6459//ZZ return; 6460//ZZ } 6461//ZZ default: 6462//ZZ break; 6463//ZZ } 6464//ZZ break; 6465//ZZ } 6466//ZZ 6467//ZZ /* --------- CONDITIONAL LOAD --------- */ 6468//ZZ /* conditional little-endian load from memory */ 6469//ZZ case Ist_LoadG: { 6470//ZZ IRLoadG* lg = stmt->Ist.LoadG.details; 6471//ZZ IRType tya = typeOfIRExpr(env->type_env, lg->addr); 6472//ZZ IREndness end = lg->end; 6473//ZZ 6474//ZZ if (tya != Ity_I32 || end != Iend_LE) 6475//ZZ goto stmt_fail; 6476//ZZ 6477//ZZ switch (lg->cvt) { 6478//ZZ case ILGop_8Uto32: 6479//ZZ case ILGop_Ident32: { 6480//ZZ HReg rAlt = iselIntExpr_R(env, lg->alt); 6481//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, lg->addr); 6482//ZZ HReg rD = lookupIRTemp(env, lg->dst); 6483//ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt)); 6484//ZZ ARMCondCode cc = iselCondCode(env, lg->guard); 6485//ZZ addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32 6486//ZZ : ARMInstr_LdSt8U) 6487//ZZ (cc, True/*isLoad*/, rD, am)); 6488//ZZ return; 6489//ZZ } 6490//ZZ case ILGop_16Sto32: 6491//ZZ case ILGop_16Uto32: 6492//ZZ case ILGop_8Sto32: { 6493//ZZ HReg rAlt = iselIntExpr_R(env, lg->alt); 6494//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, lg->addr); 6495//ZZ HReg rD = lookupIRTemp(env, lg->dst); 6496//ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt)); 6497//ZZ ARMCondCode cc = iselCondCode(env, lg->guard); 6498//ZZ if (lg->cvt == ILGop_8Sto32) { 6499//ZZ addInstr(env, ARMInstr_Ld8S(cc, rD, am)); 6500//ZZ } else { 6501//ZZ vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32); 6502//ZZ Bool sx = lg->cvt == ILGop_16Sto32; 6503//ZZ addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am)); 6504//ZZ } 6505//ZZ return; 6506//ZZ } 6507//ZZ default: 6508//ZZ break; 6509//ZZ } 6510//ZZ break; 6511//ZZ } 6512 6513 /* --------- PUT --------- */ 6514 /* write guest state, fixed offset */ 6515 case Ist_Put: { 6516 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); 6517 UInt offs = (UInt)stmt->Ist.Put.offset; 6518 if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) { 6519 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); 6520 ARM64AMode* am = mk_baseblock_64bit_access_amode(offs); 6521 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am)); 6522 return; 6523 } 6524 if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) { 6525 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); 6526 ARM64AMode* am = mk_baseblock_32bit_access_amode(offs); 6527 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am)); 6528 return; 6529 } 6530 if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) { 6531 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); 6532 ARM64AMode* am = mk_baseblock_16bit_access_amode(offs); 6533 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am)); 6534 return; 6535 } 6536 if (tyd == Ity_I8 && offs < (1<<12)) { 6537 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); 6538 ARM64AMode* am = mk_baseblock_8bit_access_amode(offs); 6539 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am)); 6540 return; 6541 } 6542 if (tyd == Ity_V128 && offs < (1<<12)) { 6543 HReg qD = iselV128Expr(env, stmt->Ist.Put.data); 6544 HReg addr = mk_baseblock_128bit_access_addr(env, offs); 6545 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr)); 6546 return; 6547 } 6548 if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) { 6549 HReg dD = iselDblExpr(env, stmt->Ist.Put.data); 6550 HReg bbp = get_baseblock_register(); 6551 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs)); 6552 return; 6553 } 6554 if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) { 6555 HReg dD = iselFltExpr(env, stmt->Ist.Put.data); 6556 HReg bbp = get_baseblock_register(); 6557 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, dD, bbp, offs)); 6558 return; 6559 } 6560 6561//ZZ if (tyd == Ity_I64) { 6562//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 6563//ZZ HReg addr = newVRegI(env); 6564//ZZ HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data); 6565//ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), 6566//ZZ stmt->Ist.Put.offset)); 6567//ZZ addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr))); 6568//ZZ } else { 6569//ZZ HReg rDhi, rDlo; 6570//ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), 6571//ZZ stmt->Ist.Put.offset + 0); 6572//ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), 6573//ZZ stmt->Ist.Put.offset + 4); 6574//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data); 6575//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, 6576//ZZ rDhi, am4)); 6577//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, 6578//ZZ rDlo, am0)); 6579//ZZ } 6580//ZZ return; 6581//ZZ } 6582//ZZ if (tyd == Ity_F64) { 6583//ZZ // XXX This won't work if offset > 1020 or is not 0 % 4. 6584//ZZ // In which case we'll have to generate more longwinded code. 6585//ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset); 6586//ZZ HReg rD = iselDblExpr(env, stmt->Ist.Put.data); 6587//ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am)); 6588//ZZ return; 6589//ZZ } 6590//ZZ if (tyd == Ity_F32) { 6591//ZZ // XXX This won't work if offset > 1020 or is not 0 % 4. 6592//ZZ // In which case we'll have to generate more longwinded code. 6593//ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset); 6594//ZZ HReg rD = iselFltExpr(env, stmt->Ist.Put.data); 6595//ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am)); 6596//ZZ return; 6597//ZZ } 6598 break; 6599 } 6600 6601 /* --------- TMP --------- */ 6602 /* assign value to temporary */ 6603 case Ist_WrTmp: { 6604 IRTemp tmp = stmt->Ist.WrTmp.tmp; 6605 IRType ty = typeOfIRTemp(env->type_env, tmp); 6606 6607 if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { 6608 /* We could do a lot better here. But for the time being: */ 6609 HReg dst = lookupIRTemp(env, tmp); 6610 HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data); 6611 addInstr(env, ARM64Instr_MovI(dst, rD)); 6612 return; 6613 } 6614 if (ty == Ity_I1) { 6615 /* Here, we are generating a I1 value into a 64 bit register. 6616 Make sure the value in the register is only zero or one, 6617 but no other. This allows optimisation of the 6618 1Uto64(tmp:I1) case, by making it simply a copy of the 6619 register holding 'tmp'. The point being that the value in 6620 the register holding 'tmp' can only have been created 6621 here. LATER: that seems dangerous; safer to do 'tmp & 1' 6622 in that case. Also, could do this just with a single CINC 6623 insn. */ 6624 /* CLONE-01 */ 6625 HReg zero = newVRegI(env); 6626 HReg one = newVRegI(env); 6627 HReg dst = lookupIRTemp(env, tmp); 6628 addInstr(env, ARM64Instr_Imm64(zero, 0)); 6629 addInstr(env, ARM64Instr_Imm64(one, 1)); 6630 ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data); 6631 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); 6632 return; 6633 } 6634 if (ty == Ity_F64) { 6635 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); 6636 HReg dst = lookupIRTemp(env, tmp); 6637 addInstr(env, ARM64Instr_VMov(8, dst, src)); 6638 return; 6639 } 6640 if (ty == Ity_F32) { 6641 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); 6642 HReg dst = lookupIRTemp(env, tmp); 6643 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src)); 6644 return; 6645 } 6646 if (ty == Ity_V128) { 6647 HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data); 6648 HReg dst = lookupIRTemp(env, tmp); 6649 addInstr(env, ARM64Instr_VMov(16, dst, src)); 6650 return; 6651 } 6652 break; 6653 } 6654 6655 /* --------- Call to DIRTY helper --------- */ 6656 /* call complex ("dirty") helper function */ 6657 case Ist_Dirty: { 6658 IRDirty* d = stmt->Ist.Dirty.details; 6659 6660 /* Figure out the return type, if any. */ 6661 IRType retty = Ity_INVALID; 6662 if (d->tmp != IRTemp_INVALID) 6663 retty = typeOfIRTemp(env->type_env, d->tmp); 6664 6665 Bool retty_ok = False; 6666 switch (retty) { 6667 case Ity_INVALID: /* function doesn't return anything */ 6668 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 6669 case Ity_V128: 6670 retty_ok = True; break; 6671 default: 6672 break; 6673 } 6674 if (!retty_ok) 6675 break; /* will go to stmt_fail: */ 6676 6677 /* Marshal args, do the call, and set the return value to 0x555..555 6678 if this is a conditional call that returns a value and the 6679 call is skipped. */ 6680 UInt addToSp = 0; 6681 RetLoc rloc = mk_RetLoc_INVALID(); 6682 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args ); 6683 vassert(is_sane_RetLoc(rloc)); 6684 6685 /* Now figure out what to do with the returned value, if any. */ 6686 switch (retty) { 6687 case Ity_INVALID: { 6688 /* No return value. Nothing to do. */ 6689 vassert(d->tmp == IRTemp_INVALID); 6690 vassert(rloc.pri == RLPri_None); 6691 vassert(addToSp == 0); 6692 return; 6693 } 6694 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: { 6695 vassert(rloc.pri == RLPri_Int); 6696 vassert(addToSp == 0); 6697 /* The returned value is in x0. Park it in the register 6698 associated with tmp. */ 6699 HReg dst = lookupIRTemp(env, d->tmp); 6700 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) ); 6701 return; 6702 } 6703 case Ity_V128: { 6704 /* The returned value is on the stack, and *retloc tells 6705 us where. Fish it off the stack and then move the 6706 stack pointer upwards to clear it, as directed by 6707 doHelperCall. */ 6708 vassert(rloc.pri == RLPri_V128SpRel); 6709 vassert(rloc.spOff < 256); // stay sane 6710 vassert(addToSp >= 16); // ditto 6711 vassert(addToSp < 256); // ditto 6712 HReg dst = lookupIRTemp(env, d->tmp); 6713 HReg tmp = newVRegI(env); // the address of the returned value 6714 addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP 6715 addInstr(env, ARM64Instr_Arith(tmp, tmp, 6716 ARM64RIA_I12((UShort)rloc.spOff, 0), 6717 True/*isAdd*/ )); 6718 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp)); 6719 addInstr(env, ARM64Instr_AddToSP(addToSp)); 6720 return; 6721 } 6722 default: 6723 /*NOTREACHED*/ 6724 vassert(0); 6725 } 6726 break; 6727 } 6728 6729 /* --------- Load Linked and Store Conditional --------- */ 6730 case Ist_LLSC: { 6731 if (stmt->Ist.LLSC.storedata == NULL) { 6732 /* LL */ 6733 IRTemp res = stmt->Ist.LLSC.result; 6734 IRType ty = typeOfIRTemp(env->type_env, res); 6735 if (ty == Ity_I64 || ty == Ity_I32 6736 || ty == Ity_I16 || ty == Ity_I8) { 6737 Int szB = 0; 6738 HReg r_dst = lookupIRTemp(env, res); 6739 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); 6740 switch (ty) { 6741 case Ity_I8: szB = 1; break; 6742 case Ity_I16: szB = 2; break; 6743 case Ity_I32: szB = 4; break; 6744 case Ity_I64: szB = 8; break; 6745 default: vassert(0); 6746 } 6747 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr)); 6748 addInstr(env, ARM64Instr_LdrEX(szB)); 6749 addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2())); 6750 return; 6751 } 6752 goto stmt_fail; 6753 } else { 6754 /* SC */ 6755 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata); 6756 if (tyd == Ity_I64 || tyd == Ity_I32 6757 || tyd == Ity_I16 || tyd == Ity_I8) { 6758 Int szB = 0; 6759 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); 6760 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); 6761 switch (tyd) { 6762 case Ity_I8: szB = 1; break; 6763 case Ity_I16: szB = 2; break; 6764 case Ity_I32: szB = 4; break; 6765 case Ity_I64: szB = 8; break; 6766 default: vassert(0); 6767 } 6768 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD)); 6769 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA)); 6770 addInstr(env, ARM64Instr_StrEX(szB)); 6771 } else { 6772 goto stmt_fail; 6773 } 6774 /* now r0 is 1 if failed, 0 if success. Change to IR 6775 conventions (0 is fail, 1 is success). Also transfer 6776 result to r_res. */ 6777 IRTemp res = stmt->Ist.LLSC.result; 6778 IRType ty = typeOfIRTemp(env->type_env, res); 6779 HReg r_res = lookupIRTemp(env, res); 6780 ARM64RIL* one = mb_mkARM64RIL_I(1); 6781 vassert(ty == Ity_I1); 6782 vassert(one); 6783 addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one, 6784 ARM64lo_XOR)); 6785 /* And be conservative -- mask off all but the lowest bit. */ 6786 addInstr(env, ARM64Instr_Logic(r_res, r_res, one, 6787 ARM64lo_AND)); 6788 return; 6789 } 6790 break; 6791 } 6792 6793 /* --------- MEM FENCE --------- */ 6794 case Ist_MBE: 6795 switch (stmt->Ist.MBE.event) { 6796 case Imbe_Fence: 6797 addInstr(env, ARM64Instr_MFence()); 6798 return; 6799//ZZ case Imbe_CancelReservation: 6800//ZZ addInstr(env, ARMInstr_CLREX()); 6801//ZZ return; 6802 default: 6803 break; 6804 } 6805 break; 6806 6807 /* --------- INSTR MARK --------- */ 6808 /* Doesn't generate any executable code ... */ 6809 case Ist_IMark: 6810 return; 6811 6812 /* --------- NO-OP --------- */ 6813 case Ist_NoOp: 6814 return; 6815 6816 /* --------- EXIT --------- */ 6817 case Ist_Exit: { 6818 if (stmt->Ist.Exit.dst->tag != Ico_U64) 6819 vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value"); 6820 6821 ARM64CondCode cc 6822 = iselCondCode(env, stmt->Ist.Exit.guard); 6823 ARM64AMode* amPC 6824 = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP); 6825 6826 /* Case: boring transfer to known address */ 6827 if (stmt->Ist.Exit.jk == Ijk_Boring 6828 /*ATC || stmt->Ist.Exit.jk == Ijk_Call */ 6829 /*ATC || stmt->Ist.Exit.jk == Ijk_Ret */ ) { 6830 if (env->chainingAllowed) { 6831 /* .. almost always true .. */ 6832 /* Skip the event check at the dst if this is a forwards 6833 edge. */ 6834 Bool toFastEP 6835 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga; 6836 if (0) vex_printf("%s", toFastEP ? "Y" : ","); 6837 addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64, 6838 amPC, cc, toFastEP)); 6839 } else { 6840 /* .. very occasionally .. */ 6841 /* We can't use chaining, so ask for an assisted transfer, 6842 as that's the only alternative that is allowable. */ 6843 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 6844 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring)); 6845 } 6846 return; 6847 } 6848 6849//ZZ /* Case: assisted transfer to arbitrary address */ 6850//ZZ switch (stmt->Ist.Exit.jk) { 6851//ZZ /* Keep this list in sync with that in iselNext below */ 6852//ZZ case Ijk_ClientReq: 6853//ZZ case Ijk_NoDecode: 6854//ZZ case Ijk_NoRedir: 6855//ZZ case Ijk_Sys_syscall: 6856//ZZ case Ijk_InvalICache: 6857//ZZ case Ijk_Yield: 6858//ZZ { 6859//ZZ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 6860//ZZ addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, 6861//ZZ stmt->Ist.Exit.jk)); 6862//ZZ return; 6863//ZZ } 6864//ZZ default: 6865//ZZ break; 6866//ZZ } 6867 6868 /* Do we ever expect to see any other kind? */ 6869 goto stmt_fail; 6870 } 6871 6872 default: break; 6873 } 6874 stmt_fail: 6875 ppIRStmt(stmt); 6876 vpanic("iselStmt"); 6877} 6878 6879 6880/*---------------------------------------------------------*/ 6881/*--- ISEL: Basic block terminators (Nexts) ---*/ 6882/*---------------------------------------------------------*/ 6883 6884static void iselNext ( ISelEnv* env, 6885 IRExpr* next, IRJumpKind jk, Int offsIP ) 6886{ 6887 if (vex_traceflags & VEX_TRACE_VCODE) { 6888 vex_printf( "\n-- PUT(%d) = ", offsIP); 6889 ppIRExpr( next ); 6890 vex_printf( "; exit-"); 6891 ppIRJumpKind(jk); 6892 vex_printf( "\n"); 6893 } 6894 6895 /* Case: boring transfer to known address */ 6896 if (next->tag == Iex_Const) { 6897 IRConst* cdst = next->Iex.Const.con; 6898 vassert(cdst->tag == Ico_U64); 6899 if (jk == Ijk_Boring || jk == Ijk_Call) { 6900 /* Boring transfer to known address */ 6901 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); 6902 if (env->chainingAllowed) { 6903 /* .. almost always true .. */ 6904 /* Skip the event check at the dst if this is a forwards 6905 edge. */ 6906 Bool toFastEP 6907 = ((Addr64)cdst->Ico.U64) > env->max_ga; 6908 if (0) vex_printf("%s", toFastEP ? "X" : "."); 6909 addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64, 6910 amPC, ARM64cc_AL, 6911 toFastEP)); 6912 } else { 6913 /* .. very occasionally .. */ 6914 /* We can't use chaining, so ask for an assisted transfer, 6915 as that's the only alternative that is allowable. */ 6916 HReg r = iselIntExpr_R(env, next); 6917 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, 6918 Ijk_Boring)); 6919 } 6920 return; 6921 } 6922 } 6923 6924 /* Case: call/return (==boring) transfer to any address */ 6925 switch (jk) { 6926 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { 6927 HReg r = iselIntExpr_R(env, next); 6928 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); 6929 if (env->chainingAllowed) { 6930 addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL)); 6931 } else { 6932 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, 6933 Ijk_Boring)); 6934 } 6935 return; 6936 } 6937 default: 6938 break; 6939 } 6940 6941 /* Case: assisted transfer to arbitrary address */ 6942 switch (jk) { 6943 /* Keep this list in sync with that for Ist_Exit above */ 6944 case Ijk_ClientReq: 6945 case Ijk_NoDecode: 6946 case Ijk_NoRedir: 6947 case Ijk_Sys_syscall: 6948 case Ijk_InvalICache: 6949 case Ijk_FlushDCache: 6950//ZZ case Ijk_Yield: 6951 { 6952 HReg r = iselIntExpr_R(env, next); 6953 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); 6954 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk)); 6955 return; 6956 } 6957 default: 6958 break; 6959 } 6960 6961 vex_printf( "\n-- PUT(%d) = ", offsIP); 6962 ppIRExpr( next ); 6963 vex_printf( "; exit-"); 6964 ppIRJumpKind(jk); 6965 vex_printf( "\n"); 6966 vassert(0); // are we expecting any other kind? 6967} 6968 6969 6970/*---------------------------------------------------------*/ 6971/*--- Insn selector top-level ---*/ 6972/*---------------------------------------------------------*/ 6973 6974/* Translate an entire SB to arm64 code. */ 6975 6976HInstrArray* iselSB_ARM64 ( IRSB* bb, 6977 VexArch arch_host, 6978 VexArchInfo* archinfo_host, 6979 VexAbiInfo* vbi/*UNUSED*/, 6980 Int offs_Host_EvC_Counter, 6981 Int offs_Host_EvC_FailAddr, 6982 Bool chainingAllowed, 6983 Bool addProfInc, 6984 Addr64 max_ga ) 6985{ 6986 Int i, j; 6987 HReg hreg, hregHI; 6988 ISelEnv* env; 6989 UInt hwcaps_host = archinfo_host->hwcaps; 6990 ARM64AMode *amCounter, *amFailAddr; 6991 6992 /* sanity ... */ 6993 vassert(arch_host == VexArchARM64); 6994 6995 /* guard against unexpected space regressions */ 6996 vassert(sizeof(ARM64Instr) <= 32); 6997 6998 /* Make up an initial environment to use. */ 6999 env = LibVEX_Alloc(sizeof(ISelEnv)); 7000 env->vreg_ctr = 0; 7001 7002 /* Set up output code array. */ 7003 env->code = newHInstrArray(); 7004 7005 /* Copy BB's type env. */ 7006 env->type_env = bb->tyenv; 7007 7008 /* Make up an IRTemp -> virtual HReg mapping. This doesn't 7009 change as we go along. */ 7010 env->n_vregmap = bb->tyenv->types_used; 7011 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 7012 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 7013 7014 /* and finally ... */ 7015 env->chainingAllowed = chainingAllowed; 7016 env->hwcaps = hwcaps_host; 7017 env->previous_rm = NULL; 7018 env->max_ga = max_ga; 7019 7020 /* For each IR temporary, allocate a suitably-kinded virtual 7021 register. */ 7022 j = 0; 7023 for (i = 0; i < env->n_vregmap; i++) { 7024 hregHI = hreg = INVALID_HREG; 7025 switch (bb->tyenv->types[i]) { 7026 case Ity_I1: 7027 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: 7028 hreg = mkHReg(j++, HRcInt64, True); 7029 break; 7030 case Ity_I128: 7031 hreg = mkHReg(j++, HRcInt64, True); 7032 hregHI = mkHReg(j++, HRcInt64, True); 7033 break; 7034 case Ity_F32: // we'll use HRcFlt64 regs for F32 too 7035 case Ity_F64: 7036 hreg = mkHReg(j++, HRcFlt64, True); 7037 break; 7038 case Ity_V128: 7039 hreg = mkHReg(j++, HRcVec128, True); 7040 break; 7041 default: 7042 ppIRType(bb->tyenv->types[i]); 7043 vpanic("iselBB(arm64): IRTemp type"); 7044 } 7045 env->vregmap[i] = hreg; 7046 env->vregmapHI[i] = hregHI; 7047 } 7048 env->vreg_ctr = j; 7049 7050 /* The very first instruction must be an event check. */ 7051 amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter); 7052 amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr); 7053 addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr)); 7054 7055 /* Possibly a block counter increment (for profiling). At this 7056 point we don't know the address of the counter, so just pretend 7057 it is zero. It will have to be patched later, but before this 7058 translation is used, by a call to LibVEX_patchProfCtr. */ 7059 if (addProfInc) { 7060 vassert(0); 7061 //addInstr(env, ARM64Instr_ProfInc()); 7062 } 7063 7064 /* Ok, finally we can iterate over the statements. */ 7065 for (i = 0; i < bb->stmts_used; i++) 7066 iselStmt(env, bb->stmts[i]); 7067 7068 iselNext(env, bb->next, bb->jumpkind, bb->offsIP); 7069 7070 /* record the number of vregs we used. */ 7071 env->code->n_vregs = env->vreg_ctr; 7072 return env->code; 7073} 7074 7075 7076/*---------------------------------------------------------------*/ 7077/*--- end host_arm64_isel.c ---*/ 7078/*---------------------------------------------------------------*/ 7079