guest_arm64_toIR.c revision 6d9b01c06e645ea0b0a3544e968961aaa7bc6a66
1/* -*- mode: C; c-basic-offset: 3; -*- */ 2 3/*--------------------------------------------------------------------*/ 4/*--- begin guest_arm64_toIR.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2013-2013 OpenWorks 12 info@open-works.net 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 27 02110-1301, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30*/ 31 32//ZZ /* XXXX thumb to check: 33//ZZ that all cases where putIRegT writes r15, we generate a jump. 34//ZZ 35//ZZ All uses of newTemp assign to an IRTemp and not a UInt 36//ZZ 37//ZZ For all thumb loads and stores, including VFP ones, new-ITSTATE is 38//ZZ backed out before the memory op, and restored afterwards. This 39//ZZ needs to happen even after we go uncond. (and for sure it doesn't 40//ZZ happen for VFP loads/stores right now). 41//ZZ 42//ZZ VFP on thumb: check that we exclude all r13/r15 cases that we 43//ZZ should. 44//ZZ 45//ZZ XXXX thumb to do: improve the ITSTATE-zeroing optimisation by 46//ZZ taking into account the number of insns guarded by an IT. 47//ZZ 48//ZZ remove the nasty hack, in the spechelper, of looking for Or32(..., 49//ZZ 0xE0) in as the first arg to armg_calculate_condition, and instead 50//ZZ use Slice44 as specified in comments in the spechelper. 51//ZZ 52//ZZ add specialisations for armg_calculate_flag_c and _v, as they 53//ZZ are moderately often needed in Thumb code. 54//ZZ 55//ZZ Correctness: ITSTATE handling in Thumb SVCs is wrong. 56//ZZ 57//ZZ Correctness (obscure): in m_transtab, when invalidating code 58//ZZ address ranges, invalidate up to 18 bytes after the end of the 59//ZZ range. This is because the ITSTATE optimisation at the top of 60//ZZ _THUMB_WRK below analyses up to 18 bytes before the start of any 61//ZZ given instruction, and so might depend on the invalidated area. 62//ZZ */ 63//ZZ 64//ZZ /* Limitations, etc 65//ZZ 66//ZZ - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD. 67//ZZ These instructions are non-restartable in the case where the 68//ZZ transfer(s) fault. 69//ZZ 70//ZZ - SWP: the restart jump back is Ijk_Boring; it should be 71//ZZ Ijk_NoRedir but that's expensive. See comments on casLE() in 72//ZZ guest_x86_toIR.c. 73//ZZ */ 74 75/* "Special" instructions. 76 77 This instruction decoder can decode four special instructions 78 which mean nothing natively (are no-ops as far as regs/mem are 79 concerned) but have meaning for supporting Valgrind. A special 80 instruction is flagged by a 16-byte preamble: 81 82 93CC0D8C 93CC358C 93CCCD8C 93CCF58C 83 (ror x12, x12, #3; ror x12, x12, #13 84 ror x12, x12, #51; ror x12, x12, #61) 85 86 Following that, one of the following 3 are allowed 87 (standard interpretation in parentheses): 88 89 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 ) 90 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR 91 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8 92 AA090129 (orr x9,x9,x9) IR injection 93 94 Any other bytes following the 16-byte preamble are illegal and 95 constitute a failure in instruction decoding. This all assumes 96 that the preamble will never occur except in specific code 97 fragments designed for Valgrind to catch. 98*/ 99 100/* Translates ARM64 code to IR. */ 101 102#include "libvex_basictypes.h" 103#include "libvex_ir.h" 104#include "libvex.h" 105#include "libvex_guest_arm64.h" 106 107#include "main_util.h" 108#include "main_globals.h" 109#include "guest_generic_bb_to_IR.h" 110#include "guest_arm64_defs.h" 111 112 113/*------------------------------------------------------------*/ 114/*--- Globals ---*/ 115/*------------------------------------------------------------*/ 116 117/* These are set at the start of the translation of a instruction, so 118 that we don't have to pass them around endlessly. CONST means does 119 not change during translation of the instruction. 120*/ 121 122/* CONST: is the host bigendian? We need to know this in order to do 123 sub-register accesses to the SIMD/FP registers correctly. */ 124static Bool host_is_bigendian; 125 126/* CONST: The guest address for the instruction currently being 127 translated. */ 128static Addr64 guest_PC_curr_instr; 129 130/* MOD: The IRSB* into which we're generating code. */ 131static IRSB* irsb; 132 133 134/*------------------------------------------------------------*/ 135/*--- Debugging output ---*/ 136/*------------------------------------------------------------*/ 137 138#define DIP(format, args...) \ 139 if (vex_traceflags & VEX_TRACE_FE) \ 140 vex_printf(format, ## args) 141 142#define DIS(buf, format, args...) \ 143 if (vex_traceflags & VEX_TRACE_FE) \ 144 vex_sprintf(buf, format, ## args) 145 146 147/*------------------------------------------------------------*/ 148/*--- Helper bits and pieces for deconstructing the ---*/ 149/*--- arm insn stream. ---*/ 150/*------------------------------------------------------------*/ 151 152/* Do a little-endian load of a 32-bit word, regardless of the 153 endianness of the underlying host. */ 154static inline UInt getUIntLittleEndianly ( UChar* p ) 155{ 156 UInt w = 0; 157 w = (w << 8) | p[3]; 158 w = (w << 8) | p[2]; 159 w = (w << 8) | p[1]; 160 w = (w << 8) | p[0]; 161 return w; 162} 163 164/* Sign extend a N-bit value up to 64 bits, by copying 165 bit N-1 into all higher positions. */ 166static ULong sx_to_64 ( ULong x, UInt n ) 167{ 168 vassert(n > 1 && n < 64); 169 Long r = (Long)x; 170 r = (r << (64-n)) >> (64-n); 171 return (ULong)r; 172} 173 174//ZZ /* Do a little-endian load of a 16-bit word, regardless of the 175//ZZ endianness of the underlying host. */ 176//ZZ static inline UShort getUShortLittleEndianly ( UChar* p ) 177//ZZ { 178//ZZ UShort w = 0; 179//ZZ w = (w << 8) | p[1]; 180//ZZ w = (w << 8) | p[0]; 181//ZZ return w; 182//ZZ } 183//ZZ 184//ZZ static UInt ROR32 ( UInt x, UInt sh ) { 185//ZZ vassert(sh >= 0 && sh < 32); 186//ZZ if (sh == 0) 187//ZZ return x; 188//ZZ else 189//ZZ return (x << (32-sh)) | (x >> sh); 190//ZZ } 191//ZZ 192//ZZ static Int popcount32 ( UInt x ) 193//ZZ { 194//ZZ Int res = 0, i; 195//ZZ for (i = 0; i < 32; i++) { 196//ZZ res += (x & 1); 197//ZZ x >>= 1; 198//ZZ } 199//ZZ return res; 200//ZZ } 201//ZZ 202//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b ) 203//ZZ { 204//ZZ UInt mask = 1 << ix; 205//ZZ x &= ~mask; 206//ZZ x |= ((b << ix) & mask); 207//ZZ return x; 208//ZZ } 209 210#define BITS2(_b1,_b0) \ 211 (((_b1) << 1) | (_b0)) 212 213#define BITS3(_b2,_b1,_b0) \ 214 (((_b2) << 2) | ((_b1) << 1) | (_b0)) 215 216#define BITS4(_b3,_b2,_b1,_b0) \ 217 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0)) 218 219#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 220 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \ 221 | BITS4((_b3),(_b2),(_b1),(_b0))) 222 223#define BITS5(_b4,_b3,_b2,_b1,_b0) \ 224 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0))) 225#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \ 226 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 227#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 228 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 229 230#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 231 (((_b8) << 8) \ 232 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 233 234#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 235 (((_b9) << 9) | ((_b8) << 8) \ 236 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 237 238#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 239 (((_b10) << 10) \ 240 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)) 241 242#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 243 (((_b11) << 11) \ 244 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)) 245 246// produces _uint[_bMax:_bMin] 247#define SLICE_UInt(_uint,_bMax,_bMin) \ 248 (( ((UInt)(_uint)) >> (_bMin)) \ 249 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL)) 250 251 252/*------------------------------------------------------------*/ 253/*--- Helper bits and pieces for creating IR fragments. ---*/ 254/*------------------------------------------------------------*/ 255 256static IRExpr* mkV128 ( UShort w ) 257{ 258 return IRExpr_Const(IRConst_V128(w)); 259} 260 261static IRExpr* mkU64 ( ULong i ) 262{ 263 return IRExpr_Const(IRConst_U64(i)); 264} 265 266static IRExpr* mkU32 ( UInt i ) 267{ 268 return IRExpr_Const(IRConst_U32(i)); 269} 270 271static IRExpr* mkU8 ( UInt i ) 272{ 273 vassert(i < 256); 274 return IRExpr_Const(IRConst_U8( (UChar)i )); 275} 276 277static IRExpr* mkexpr ( IRTemp tmp ) 278{ 279 return IRExpr_RdTmp(tmp); 280} 281 282static IRExpr* unop ( IROp op, IRExpr* a ) 283{ 284 return IRExpr_Unop(op, a); 285} 286 287static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 288{ 289 return IRExpr_Binop(op, a1, a2); 290} 291 292static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 293{ 294 return IRExpr_Triop(op, a1, a2, a3); 295} 296 297static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 298{ 299 return IRExpr_Load(Iend_LE, ty, addr); 300} 301 302/* Add a statement to the list held by "irbb". */ 303static void stmt ( IRStmt* st ) 304{ 305 addStmtToIRSB( irsb, st ); 306} 307 308static void assign ( IRTemp dst, IRExpr* e ) 309{ 310 stmt( IRStmt_WrTmp(dst, e) ); 311} 312 313static void storeLE ( IRExpr* addr, IRExpr* data ) 314{ 315 stmt( IRStmt_Store(Iend_LE, addr, data) ); 316} 317 318//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT ) 319//ZZ { 320//ZZ if (guardT == IRTemp_INVALID) { 321//ZZ /* unconditional */ 322//ZZ storeLE(addr, data); 323//ZZ } else { 324//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data, 325//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) ); 326//ZZ } 327//ZZ } 328//ZZ 329//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt, 330//ZZ IRExpr* addr, IRExpr* alt, 331//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 332//ZZ { 333//ZZ if (guardT == IRTemp_INVALID) { 334//ZZ /* unconditional */ 335//ZZ IRExpr* loaded = NULL; 336//ZZ switch (cvt) { 337//ZZ case ILGop_Ident32: 338//ZZ loaded = loadLE(Ity_I32, addr); break; 339//ZZ case ILGop_8Uto32: 340//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break; 341//ZZ case ILGop_8Sto32: 342//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break; 343//ZZ case ILGop_16Uto32: 344//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break; 345//ZZ case ILGop_16Sto32: 346//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break; 347//ZZ default: 348//ZZ vassert(0); 349//ZZ } 350//ZZ vassert(loaded != NULL); 351//ZZ assign(dst, loaded); 352//ZZ } else { 353//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the 354//ZZ loaded data before putting the data in 'dst'. If the load 355//ZZ does not take place, 'alt' is placed directly in 'dst'. */ 356//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt, 357//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) ); 358//ZZ } 359//ZZ } 360 361/* Generate a new temporary of the given type. */ 362static IRTemp newTemp ( IRType ty ) 363{ 364 vassert(isPlausibleIRType(ty)); 365 return newIRTemp( irsb->tyenv, ty ); 366} 367 368//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type 369//ZZ IRRoundingMode. */ 370//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 371//ZZ { 372//ZZ return mkU32(Irrm_NEAREST); 373//ZZ } 374//ZZ 375//ZZ /* Generate an expression for SRC rotated right by ROT. */ 376//ZZ static IRExpr* genROR32( IRTemp src, Int rot ) 377//ZZ { 378//ZZ vassert(rot >= 0 && rot < 32); 379//ZZ if (rot == 0) 380//ZZ return mkexpr(src); 381//ZZ return 382//ZZ binop(Iop_Or32, 383//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)), 384//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot))); 385//ZZ } 386//ZZ 387//ZZ static IRExpr* mkU128 ( ULong i ) 388//ZZ { 389//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i)); 390//ZZ } 391//ZZ 392//ZZ /* Generate a 4-aligned version of the given expression if 393//ZZ the given condition is true. Else return it unchanged. */ 394//ZZ static IRExpr* align4if ( IRExpr* e, Bool b ) 395//ZZ { 396//ZZ if (b) 397//ZZ return binop(Iop_And32, e, mkU32(~3)); 398//ZZ else 399//ZZ return e; 400//ZZ } 401 402/* Other IR construction helpers. */ 403static IROp mkAND ( IRType ty ) { 404 switch (ty) { 405 case Ity_I32: return Iop_And32; 406 case Ity_I64: return Iop_And64; 407 default: vpanic("mkAND"); 408 } 409} 410 411static IROp mkOR ( IRType ty ) { 412 switch (ty) { 413 case Ity_I32: return Iop_Or32; 414 case Ity_I64: return Iop_Or64; 415 default: vpanic("mkOR"); 416 } 417} 418 419static IROp mkXOR ( IRType ty ) { 420 switch (ty) { 421 case Ity_I32: return Iop_Xor32; 422 case Ity_I64: return Iop_Xor64; 423 default: vpanic("mkXOR"); 424 } 425} 426 427static IROp mkSHL ( IRType ty ) { 428 switch (ty) { 429 case Ity_I32: return Iop_Shl32; 430 case Ity_I64: return Iop_Shl64; 431 default: vpanic("mkSHL"); 432 } 433} 434 435static IROp mkSHR ( IRType ty ) { 436 switch (ty) { 437 case Ity_I32: return Iop_Shr32; 438 case Ity_I64: return Iop_Shr64; 439 default: vpanic("mkSHR"); 440 } 441} 442 443static IROp mkSAR ( IRType ty ) { 444 switch (ty) { 445 case Ity_I32: return Iop_Sar32; 446 case Ity_I64: return Iop_Sar64; 447 default: vpanic("mkSAR"); 448 } 449} 450 451static IROp mkNOT ( IRType ty ) { 452 switch (ty) { 453 case Ity_I32: return Iop_Not32; 454 case Ity_I64: return Iop_Not64; 455 default: vpanic("mkNOT"); 456 } 457} 458 459static IROp mkADD ( IRType ty ) { 460 switch (ty) { 461 case Ity_I32: return Iop_Add32; 462 case Ity_I64: return Iop_Add64; 463 default: vpanic("mkADD"); 464 } 465} 466 467static IROp mkSUB ( IRType ty ) { 468 switch (ty) { 469 case Ity_I32: return Iop_Sub32; 470 case Ity_I64: return Iop_Sub64; 471 default: vpanic("mkSUB"); 472 } 473} 474 475static IROp mkADDF ( IRType ty ) { 476 switch (ty) { 477 case Ity_F32: return Iop_AddF32; 478 case Ity_F64: return Iop_AddF64; 479 default: vpanic("mkADDF"); 480 } 481} 482 483static IROp mkSUBF ( IRType ty ) { 484 switch (ty) { 485 case Ity_F32: return Iop_SubF32; 486 case Ity_F64: return Iop_SubF64; 487 default: vpanic("mkSUBF"); 488 } 489} 490 491static IROp mkMULF ( IRType ty ) { 492 switch (ty) { 493 case Ity_F32: return Iop_MulF32; 494 case Ity_F64: return Iop_MulF64; 495 default: vpanic("mkMULF"); 496 } 497} 498 499static IROp mkDIVF ( IRType ty ) { 500 switch (ty) { 501 case Ity_F32: return Iop_DivF32; 502 case Ity_F64: return Iop_DivF64; 503 default: vpanic("mkMULF"); 504 } 505} 506 507static IROp mkNEGF ( IRType ty ) { 508 switch (ty) { 509 case Ity_F32: return Iop_NegF32; 510 case Ity_F64: return Iop_NegF64; 511 default: vpanic("mkNEGF"); 512 } 513} 514 515static IROp mkABSF ( IRType ty ) { 516 switch (ty) { 517 case Ity_F32: return Iop_AbsF32; 518 case Ity_F64: return Iop_AbsF64; 519 default: vpanic("mkNEGF"); 520 } 521} 522 523static IROp mkSQRTF ( IRType ty ) { 524 switch (ty) { 525 case Ity_F32: return Iop_SqrtF32; 526 case Ity_F64: return Iop_SqrtF64; 527 default: vpanic("mkNEGF"); 528 } 529} 530 531static IRExpr* mkU ( IRType ty, ULong imm ) { 532 switch (ty) { 533 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL)); 534 case Ity_I64: return mkU64(imm); 535 default: vpanic("mkU"); 536 } 537} 538 539/* Generate IR to create 'arg rotated right by imm', for sane values 540 of 'ty' and 'imm'. */ 541static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm ) 542{ 543 UInt w = 0; 544 if (ty == Ity_I64) { 545 w = 64; 546 } else { 547 vassert(ty == Ity_I32); 548 w = 32; 549 } 550 vassert(w != 0); 551 vassert(imm < w); 552 if (imm == 0) { 553 return arg; 554 } 555 IRTemp res = newTemp(ty); 556 assign(res, binop(mkOR(ty), 557 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)), 558 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) )); 559 return res; 560} 561 562/* Generate IR to set the returned temp to either all-zeroes or 563 all ones, as a copy of arg<imm>. */ 564static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm ) 565{ 566 UInt w = 0; 567 if (ty == Ity_I64) { 568 w = 64; 569 } else { 570 vassert(ty == Ity_I32); 571 w = 32; 572 } 573 vassert(w != 0); 574 vassert(imm < w); 575 IRTemp res = newTemp(ty); 576 assign(res, binop(mkSAR(ty), 577 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)), 578 mkU8(w - 1))); 579 return res; 580} 581 582/* U-widen 8/16/32/64 bit int expr to 64. */ 583static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e ) 584{ 585 switch (srcTy) { 586 case Ity_I64: return e; 587 case Ity_I32: return unop(Iop_32Uto64, e); 588 case Ity_I16: return unop(Iop_16Uto64, e); 589 case Ity_I8: return unop(Iop_8Uto64, e); 590 default: vpanic("widenUto64(arm64)"); 591 } 592} 593 594/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some 595 of these combinations make sense. */ 596static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e ) 597{ 598 switch (dstTy) { 599 case Ity_I64: return e; 600 case Ity_I32: return unop(Iop_64to32, e); 601 case Ity_I16: return unop(Iop_64to16, e); 602 case Ity_I8: return unop(Iop_64to8, e); 603 default: vpanic("narrowFrom64(arm64)"); 604 } 605} 606 607 608/*------------------------------------------------------------*/ 609/*--- Helpers for accessing guest registers. ---*/ 610/*------------------------------------------------------------*/ 611 612#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0) 613#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1) 614#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2) 615#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3) 616#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4) 617#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5) 618#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6) 619#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7) 620#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8) 621#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9) 622#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10) 623#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11) 624#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12) 625#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13) 626#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14) 627#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15) 628#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16) 629#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17) 630#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18) 631#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19) 632#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20) 633#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21) 634#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22) 635#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23) 636#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24) 637#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25) 638#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26) 639#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27) 640#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28) 641#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29) 642#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30) 643 644#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP) 645#define OFFB_PC offsetof(VexGuestARM64State,guest_PC) 646 647#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP) 648#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1) 649#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2) 650#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP) 651 652#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0) 653#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR) 654 655#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0) 656#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1) 657#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2) 658#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3) 659#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4) 660#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5) 661#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6) 662#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7) 663#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8) 664#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9) 665#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10) 666#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11) 667#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12) 668#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13) 669#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14) 670#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15) 671#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16) 672#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17) 673#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18) 674#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19) 675#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20) 676#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21) 677#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22) 678#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23) 679#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24) 680#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25) 681#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26) 682#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27) 683#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28) 684#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29) 685#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30) 686#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31) 687 688#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR) 689#define OFFB_FPSR offsetof(VexGuestARM64State,guest_FPSR) 690//ZZ #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO) 691//ZZ #define OFFB_ITSTATE offsetof(VexGuestARMState,guest_ITSTATE) 692//ZZ #define OFFB_QFLAG32 offsetof(VexGuestARMState,guest_QFLAG32) 693//ZZ #define OFFB_GEFLAG0 offsetof(VexGuestARMState,guest_GEFLAG0) 694//ZZ #define OFFB_GEFLAG1 offsetof(VexGuestARMState,guest_GEFLAG1) 695//ZZ #define OFFB_GEFLAG2 offsetof(VexGuestARMState,guest_GEFLAG2) 696//ZZ #define OFFB_GEFLAG3 offsetof(VexGuestARMState,guest_GEFLAG3) 697 698#define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART) 699#define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN) 700 701 702/* ---------------- Integer registers ---------------- */ 703 704static Int offsetIReg64 ( UInt iregNo ) 705{ 706 /* Do we care about endianness here? We do if sub-parts of integer 707 registers are accessed. */ 708 switch (iregNo) { 709 case 0: return OFFB_X0; 710 case 1: return OFFB_X1; 711 case 2: return OFFB_X2; 712 case 3: return OFFB_X3; 713 case 4: return OFFB_X4; 714 case 5: return OFFB_X5; 715 case 6: return OFFB_X6; 716 case 7: return OFFB_X7; 717 case 8: return OFFB_X8; 718 case 9: return OFFB_X9; 719 case 10: return OFFB_X10; 720 case 11: return OFFB_X11; 721 case 12: return OFFB_X12; 722 case 13: return OFFB_X13; 723 case 14: return OFFB_X14; 724 case 15: return OFFB_X15; 725 case 16: return OFFB_X16; 726 case 17: return OFFB_X17; 727 case 18: return OFFB_X18; 728 case 19: return OFFB_X19; 729 case 20: return OFFB_X20; 730 case 21: return OFFB_X21; 731 case 22: return OFFB_X22; 732 case 23: return OFFB_X23; 733 case 24: return OFFB_X24; 734 case 25: return OFFB_X25; 735 case 26: return OFFB_X26; 736 case 27: return OFFB_X27; 737 case 28: return OFFB_X28; 738 case 29: return OFFB_X29; 739 case 30: return OFFB_X30; 740 /* but not 31 */ 741 default: vassert(0); 742 } 743} 744 745static Int offsetIReg64orSP ( UInt iregNo ) 746{ 747 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo); 748} 749 750static const HChar* nameIReg64orZR ( UInt iregNo ) 751{ 752 vassert(iregNo < 32); 753 static const HChar* names[32] 754 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 755 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 756 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 757 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" }; 758 return names[iregNo]; 759} 760 761static const HChar* nameIReg64orSP ( UInt iregNo ) 762{ 763 if (iregNo == 31) { 764 return "sp"; 765 } 766 vassert(iregNo < 31); 767 return nameIReg64orZR(iregNo); 768} 769 770static IRExpr* getIReg64orSP ( UInt iregNo ) 771{ 772 vassert(iregNo < 32); 773 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ); 774} 775 776static IRExpr* getIReg64orZR ( UInt iregNo ) 777{ 778 if (iregNo == 31) { 779 return mkU64(0); 780 } 781 vassert(iregNo < 31); 782 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ); 783} 784 785static void putIReg64orSP ( UInt iregNo, IRExpr* e ) 786{ 787 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 788 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) ); 789} 790 791static void putIReg64orZR ( UInt iregNo, IRExpr* e ) 792{ 793 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 794 if (iregNo == 31) { 795 return; 796 } 797 vassert(iregNo < 31); 798 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) ); 799} 800 801static const HChar* nameIReg32orZR ( UInt iregNo ) 802{ 803 vassert(iregNo < 32); 804 static const HChar* names[32] 805 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", 806 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", 807 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", 808 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" }; 809 return names[iregNo]; 810} 811 812static const HChar* nameIReg32orSP ( UInt iregNo ) 813{ 814 if (iregNo == 31) { 815 return "wsp"; 816 } 817 vassert(iregNo < 31); 818 return nameIReg32orZR(iregNo); 819} 820 821static IRExpr* getIReg32orSP ( UInt iregNo ) 822{ 823 vassert(iregNo < 32); 824 return unop(Iop_64to32, 825 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 )); 826} 827 828static IRExpr* getIReg32orZR ( UInt iregNo ) 829{ 830 if (iregNo == 31) { 831 return mkU32(0); 832 } 833 vassert(iregNo < 31); 834 return unop(Iop_64to32, 835 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 )); 836} 837 838static void putIReg32orSP ( UInt iregNo, IRExpr* e ) 839{ 840 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 841 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) ); 842} 843 844static void putIReg32orZR ( UInt iregNo, IRExpr* e ) 845{ 846 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 847 if (iregNo == 31) { 848 return; 849 } 850 vassert(iregNo < 31); 851 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) ); 852} 853 854static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo ) 855{ 856 vassert(is64 == True || is64 == False); 857 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo); 858} 859 860static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo ) 861{ 862 vassert(is64 == True || is64 == False); 863 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo); 864} 865 866static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo ) 867{ 868 vassert(is64 == True || is64 == False); 869 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo); 870} 871 872static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e ) 873{ 874 vassert(is64 == True || is64 == False); 875 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e); 876} 877 878static void putPC ( IRExpr* e ) 879{ 880 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 881 stmt( IRStmt_Put(OFFB_PC, e) ); 882} 883 884 885/* ---------------- Vector (Q) registers ---------------- */ 886 887static Int offsetQReg128 ( UInt qregNo ) 888{ 889 /* We don't care about endianness at this point. It only becomes 890 relevant when dealing with sections of these registers.*/ 891 switch (qregNo) { 892 case 0: return OFFB_Q0; 893 case 1: return OFFB_Q1; 894 case 2: return OFFB_Q2; 895 case 3: return OFFB_Q3; 896 case 4: return OFFB_Q4; 897 case 5: return OFFB_Q5; 898 case 6: return OFFB_Q6; 899 case 7: return OFFB_Q7; 900 case 8: return OFFB_Q8; 901 case 9: return OFFB_Q9; 902 case 10: return OFFB_Q10; 903 case 11: return OFFB_Q11; 904 case 12: return OFFB_Q12; 905 case 13: return OFFB_Q13; 906 case 14: return OFFB_Q14; 907 case 15: return OFFB_Q15; 908 case 16: return OFFB_Q16; 909 case 17: return OFFB_Q17; 910 case 18: return OFFB_Q18; 911 case 19: return OFFB_Q19; 912 case 20: return OFFB_Q20; 913 case 21: return OFFB_Q21; 914 case 22: return OFFB_Q22; 915 case 23: return OFFB_Q23; 916 case 24: return OFFB_Q24; 917 case 25: return OFFB_Q25; 918 case 26: return OFFB_Q26; 919 case 27: return OFFB_Q27; 920 case 28: return OFFB_Q28; 921 case 29: return OFFB_Q29; 922 case 30: return OFFB_Q30; 923 case 31: return OFFB_Q31; 924 default: vassert(0); 925 } 926} 927 928/* Write to a complete Qreg. */ 929static void putQReg128 ( UInt qregNo, IRExpr* e ) 930{ 931 vassert(qregNo < 32); 932 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128); 933 stmt( IRStmt_Put(offsetQReg128(qregNo), e) ); 934} 935 936/* Read a complete Qreg. */ 937static IRExpr* getQReg128 ( UInt qregNo ) 938{ 939 vassert(qregNo < 32); 940 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128); 941} 942 943/* Produce the IR type for some sub-part of a vector. For 32- and 64- 944 bit sub-parts we can choose either integer or float types, and 945 choose float on the basis that that is the common use case and so 946 will give least interference with Put-to-Get forwarding later 947 on. */ 948static IRType preferredVectorSubTypeFromSize ( UInt szB ) 949{ 950 switch (szB) { 951 case 1: return Ity_I8; 952 case 2: return Ity_I16; 953 case 4: return Ity_I32; //Ity_F32; 954 case 8: return Ity_F64; 955 case 16: return Ity_V128; 956 default: vassert(0); 957 } 958} 959 960/* Find the offset of the laneNo'th lane of type laneTy in the given 961 Qreg. Since the host is little-endian, the least significant lane 962 has the lowest offset. */ 963static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo ) 964{ 965 vassert(!host_is_bigendian); 966 Int base = offsetQReg128(qregNo); 967 /* Since the host is little-endian, the least significant lane 968 will be at the lowest address. */ 969 /* Restrict this to known types, so as to avoid silently accepting 970 stupid types. */ 971 UInt laneSzB = 0; 972 switch (laneTy) { 973 case Ity_I8: laneSzB = 1; break; 974 case Ity_I16: laneSzB = 2; break; 975 case Ity_F32: case Ity_I32: laneSzB = 4; break; 976 case Ity_F64: case Ity_I64: laneSzB = 8; break; 977 case Ity_V128: laneSzB = 16; break; 978 default: break; 979 } 980 vassert(laneSzB > 0); 981 UInt minOff = laneNo * laneSzB; 982 UInt maxOff = minOff + laneSzB - 1; 983 vassert(maxOff < 16); 984 return base + minOff; 985} 986 987/* Put to the least significant lane of a Qreg. */ 988static void putQRegLO ( UInt qregNo, IRExpr* e ) 989{ 990 IRType ty = typeOfIRExpr(irsb->tyenv, e); 991 Int off = offsetQRegLane(qregNo, ty, 0); 992 switch (ty) { 993 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: 994 case Ity_F32: case Ity_F64: case Ity_V128: 995 break; 996 default: 997 vassert(0); // Other cases are probably invalid 998 } 999 stmt(IRStmt_Put(off, e)); 1000} 1001 1002/* Get from the least significant lane of a Qreg. */ 1003static IRExpr* getQRegLO ( UInt qregNo, IRType ty ) 1004{ 1005 Int off = offsetQRegLane(qregNo, ty, 0); 1006 switch (ty) { 1007 case Ity_I8: 1008 case Ity_I16: 1009 case Ity_I32: case Ity_I64: 1010 case Ity_F32: case Ity_F64: case Ity_V128: 1011 break; 1012 default: 1013 vassert(0); // Other cases are ATC 1014 } 1015 return IRExpr_Get(off, ty); 1016} 1017 1018static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy ) 1019{ 1020 static const HChar* namesQ[32] 1021 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 1022 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", 1023 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23", 1024 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" }; 1025 static const HChar* namesD[32] 1026 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", 1027 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", 1028 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", 1029 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" }; 1030 static const HChar* namesS[32] 1031 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", 1032 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", 1033 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", 1034 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" }; 1035 static const HChar* namesH[32] 1036 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7", 1037 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15", 1038 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23", 1039 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" }; 1040 static const HChar* namesB[32] 1041 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", 1042 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15", 1043 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23", 1044 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" }; 1045 vassert(qregNo < 32); 1046 switch (sizeofIRType(laneTy)) { 1047 case 1: return namesB[qregNo]; 1048 case 2: return namesH[qregNo]; 1049 case 4: return namesS[qregNo]; 1050 case 8: return namesD[qregNo]; 1051 case 16: return namesQ[qregNo]; 1052 default: vassert(0); 1053 } 1054 /*NOTREACHED*/ 1055} 1056 1057static const HChar* nameQReg128 ( UInt qregNo ) 1058{ 1059 return nameQRegLO(qregNo, Ity_V128); 1060} 1061 1062/* Find the offset of the most significant half (8 bytes) of the given 1063 Qreg. This requires knowing the endianness of the host. */ 1064static Int offsetQRegHI64 ( UInt qregNo ) 1065{ 1066 return offsetQRegLane(qregNo, Ity_I64, 1); 1067} 1068 1069static IRExpr* getQRegHI64 ( UInt qregNo ) 1070{ 1071 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64); 1072} 1073 1074static void putQRegHI64 ( UInt qregNo, IRExpr* e ) 1075{ 1076 IRType ty = typeOfIRExpr(irsb->tyenv, e); 1077 Int off = offsetQRegHI64(qregNo); 1078 switch (ty) { 1079 case Ity_I64: case Ity_F64: 1080 break; 1081 default: 1082 vassert(0); // Other cases are plain wrong 1083 } 1084 stmt(IRStmt_Put(off, e)); 1085} 1086 1087/* Put to a specified lane of a Qreg. */ 1088static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e ) 1089{ 1090 IRType laneTy = typeOfIRExpr(irsb->tyenv, e); 1091 Int off = offsetQRegLane(qregNo, laneTy, laneNo); 1092 switch (laneTy) { 1093 case Ity_F64: case Ity_I64: 1094 case Ity_I32: case Ity_F32: 1095 case Ity_I16: 1096 case Ity_I8: 1097 break; 1098 default: 1099 vassert(0); // Other cases are ATC 1100 } 1101 stmt(IRStmt_Put(off, e)); 1102} 1103 1104/* Get from a specified lane of a Qreg. */ 1105static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy ) 1106{ 1107 Int off = offsetQRegLane(qregNo, laneTy, laneNo); 1108 switch (laneTy) { 1109 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 1110 case Ity_F64: 1111 break; 1112 default: 1113 vassert(0); // Other cases are ATC 1114 } 1115 return IRExpr_Get(off, laneTy); 1116} 1117 1118 1119//ZZ /* ---------------- Misc registers ---------------- */ 1120//ZZ 1121//ZZ static void putMiscReg32 ( UInt gsoffset, 1122//ZZ IRExpr* e, /* :: Ity_I32 */ 1123//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */) 1124//ZZ { 1125//ZZ switch (gsoffset) { 1126//ZZ case OFFB_FPSCR: break; 1127//ZZ case OFFB_QFLAG32: break; 1128//ZZ case OFFB_GEFLAG0: break; 1129//ZZ case OFFB_GEFLAG1: break; 1130//ZZ case OFFB_GEFLAG2: break; 1131//ZZ case OFFB_GEFLAG3: break; 1132//ZZ default: vassert(0); /* awaiting more cases */ 1133//ZZ } 1134//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 1135//ZZ 1136//ZZ if (guardT == IRTemp_INVALID) { 1137//ZZ /* unconditional write */ 1138//ZZ stmt(IRStmt_Put(gsoffset, e)); 1139//ZZ } else { 1140//ZZ stmt(IRStmt_Put( 1141//ZZ gsoffset, 1142//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)), 1143//ZZ e, IRExpr_Get(gsoffset, Ity_I32) ) 1144//ZZ )); 1145//ZZ } 1146//ZZ } 1147//ZZ 1148//ZZ static IRTemp get_ITSTATE ( void ) 1149//ZZ { 1150//ZZ ASSERT_IS_THUMB; 1151//ZZ IRTemp t = newTemp(Ity_I32); 1152//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32)); 1153//ZZ return t; 1154//ZZ } 1155//ZZ 1156//ZZ static void put_ITSTATE ( IRTemp t ) 1157//ZZ { 1158//ZZ ASSERT_IS_THUMB; 1159//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) ); 1160//ZZ } 1161//ZZ 1162//ZZ static IRTemp get_QFLAG32 ( void ) 1163//ZZ { 1164//ZZ IRTemp t = newTemp(Ity_I32); 1165//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32)); 1166//ZZ return t; 1167//ZZ } 1168//ZZ 1169//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT ) 1170//ZZ { 1171//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT ); 1172//ZZ } 1173//ZZ 1174//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program 1175//ZZ Status Register) to indicate that overflow or saturation occurred. 1176//ZZ Nb: t must be zero to denote no saturation, and any nonzero 1177//ZZ value to indicate saturation. */ 1178//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT ) 1179//ZZ { 1180//ZZ IRTemp old = get_QFLAG32(); 1181//ZZ IRTemp nyu = newTemp(Ity_I32); 1182//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) ); 1183//ZZ put_QFLAG32(nyu, condT); 1184//ZZ } 1185 1186 1187/* ---------------- FPCR stuff ---------------- */ 1188 1189/* Generate IR to get hold of the rounding mode bits in FPCR, and 1190 convert them to IR format. Bind the final result to the 1191 returned temp. */ 1192static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void ) 1193{ 1194 /* The ARMvfp encoding for rounding mode bits is: 1195 00 to nearest 1196 01 to +infinity 1197 10 to -infinity 1198 11 to zero 1199 We need to convert that to the IR encoding: 1200 00 to nearest (the default) 1201 10 to +infinity 1202 01 to -infinity 1203 11 to zero 1204 Which can be done by swapping bits 0 and 1. 1205 The rmode bits are at 23:22 in FPSCR. 1206 */ 1207 IRTemp armEncd = newTemp(Ity_I32); 1208 IRTemp swapped = newTemp(Ity_I32); 1209 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that 1210 we don't zero out bits 24 and above, since the assignment to 1211 'swapped' will mask them out anyway. */ 1212 assign(armEncd, 1213 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22))); 1214 /* Now swap them. */ 1215 assign(swapped, 1216 binop(Iop_Or32, 1217 binop(Iop_And32, 1218 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)), 1219 mkU32(2)), 1220 binop(Iop_And32, 1221 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)), 1222 mkU32(1)) 1223 )); 1224 return swapped; 1225} 1226 1227 1228/*------------------------------------------------------------*/ 1229/*--- Helpers for flag handling and conditional insns ---*/ 1230/*------------------------------------------------------------*/ 1231 1232static const HChar* nameARM64Condcode ( ARM64Condcode cond ) 1233{ 1234 switch (cond) { 1235 case ARM64CondEQ: return "eq"; 1236 case ARM64CondNE: return "ne"; 1237 case ARM64CondCS: return "cs"; // or 'hs' 1238 case ARM64CondCC: return "cc"; // or 'lo' 1239 case ARM64CondMI: return "mi"; 1240 case ARM64CondPL: return "pl"; 1241 case ARM64CondVS: return "vs"; 1242 case ARM64CondVC: return "vc"; 1243 case ARM64CondHI: return "hi"; 1244 case ARM64CondLS: return "ls"; 1245 case ARM64CondGE: return "ge"; 1246 case ARM64CondLT: return "lt"; 1247 case ARM64CondGT: return "gt"; 1248 case ARM64CondLE: return "le"; 1249 case ARM64CondAL: return "al"; 1250 case ARM64CondNV: return "nv"; 1251 default: vpanic("name_ARM64Condcode"); 1252 } 1253} 1254 1255/* and a handy shorthand for it */ 1256static const HChar* nameCC ( ARM64Condcode cond ) { 1257 return nameARM64Condcode(cond); 1258} 1259 1260 1261/* Build IR to calculate some particular condition from stored 1262 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type 1263 Ity_I64, suitable for narrowing. Although the return type is 1264 Ity_I64, the returned value is either 0 or 1. 'cond' must be 1265 :: Ity_I64 and must denote the condition to compute in 1266 bits 7:4, and be zero everywhere else. 1267*/ 1268static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond ) 1269{ 1270 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64); 1271 /* And 'cond' had better produce a value in which only bits 7:4 are 1272 nonzero. However, obviously we can't assert for that. */ 1273 1274 /* So what we're constructing for the first argument is 1275 "(cond << 4) | stored-operation". 1276 However, as per comments above, 'cond' must be supplied 1277 pre-shifted to this function. 1278 1279 This pairing scheme requires that the ARM64_CC_OP_ values all fit 1280 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest 1281 8 bits of the first argument. */ 1282 IRExpr** args 1283 = mkIRExprVec_4( 1284 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond), 1285 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1286 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1287 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) 1288 ); 1289 IRExpr* call 1290 = mkIRExprCCall( 1291 Ity_I64, 1292 0/*regparm*/, 1293 "arm64g_calculate_condition", &arm64g_calculate_condition, 1294 args 1295 ); 1296 1297 /* Exclude the requested condition, OP and NDEP from definedness 1298 checking. We're only interested in DEP1 and DEP2. */ 1299 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1300 return call; 1301} 1302 1303 1304/* Build IR to calculate some particular condition from stored 1305 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type 1306 Ity_I64, suitable for narrowing. Although the return type is 1307 Ity_I64, the returned value is either 0 or 1. 1308*/ 1309static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond ) 1310{ 1311 /* First arg is "(cond << 4) | condition". This requires that the 1312 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a 1313 (COND, OP) pair in the lowest 8 bits of the first argument. */ 1314 vassert(cond >= 0 && cond <= 15); 1315 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) ); 1316} 1317 1318 1319/* Build IR to calculate just the carry flag from stored 1320 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1321 Ity_I64. */ 1322static IRExpr* mk_arm64g_calculate_flag_c ( void ) 1323{ 1324 IRExpr** args 1325 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1326 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1327 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1328 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1329 IRExpr* call 1330 = mkIRExprCCall( 1331 Ity_I64, 1332 0/*regparm*/, 1333 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c, 1334 args 1335 ); 1336 /* Exclude OP and NDEP from definedness checking. We're only 1337 interested in DEP1 and DEP2. */ 1338 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1339 return call; 1340} 1341 1342 1343//ZZ /* Build IR to calculate just the overflow flag from stored 1344//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1345//ZZ Ity_I32. */ 1346//ZZ static IRExpr* mk_armg_calculate_flag_v ( void ) 1347//ZZ { 1348//ZZ IRExpr** args 1349//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 1350//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1351//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1352//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 1353//ZZ IRExpr* call 1354//ZZ = mkIRExprCCall( 1355//ZZ Ity_I32, 1356//ZZ 0/*regparm*/, 1357//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v, 1358//ZZ args 1359//ZZ ); 1360//ZZ /* Exclude OP and NDEP from definedness checking. We're only 1361//ZZ interested in DEP1 and DEP2. */ 1362//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1363//ZZ return call; 1364//ZZ } 1365 1366 1367/* Build IR to calculate N Z C V in bits 31:28 of the 1368 returned word. */ 1369static IRExpr* mk_arm64g_calculate_flags_nzcv ( void ) 1370{ 1371 IRExpr** args 1372 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1373 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1374 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1375 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1376 IRExpr* call 1377 = mkIRExprCCall( 1378 Ity_I64, 1379 0/*regparm*/, 1380 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv, 1381 args 1382 ); 1383 /* Exclude OP and NDEP from definedness checking. We're only 1384 interested in DEP1 and DEP2. */ 1385 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1386 return call; 1387} 1388 1389 1390/* Build IR to set the flags thunk, in the most general case. */ 1391static 1392void setFlags_D1_D2_ND ( UInt cc_op, 1393 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep ) 1394{ 1395 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64)); 1396 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64)); 1397 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64)); 1398 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER); 1399 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) )); 1400 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) )); 1401 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) )); 1402 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) )); 1403} 1404 1405/* Build IR to set the flags thunk after ADD or SUB. */ 1406static 1407void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR ) 1408{ 1409 IRTemp argL64 = IRTemp_INVALID; 1410 IRTemp argR64 = IRTemp_INVALID; 1411 IRTemp z64 = newTemp(Ity_I64); 1412 if (is64) { 1413 argL64 = argL; 1414 argR64 = argR; 1415 } else { 1416 argL64 = newTemp(Ity_I64); 1417 argR64 = newTemp(Ity_I64); 1418 assign(argL64, unop(Iop_32Uto64, mkexpr(argL))); 1419 assign(argR64, unop(Iop_32Uto64, mkexpr(argR))); 1420 } 1421 assign(z64, mkU64(0)); 1422 UInt cc_op = ARM64G_CC_OP_NUMBER; 1423 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; } 1424 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; } 1425 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; } 1426 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; } 1427 else { vassert(0); } 1428 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64); 1429} 1430 1431static 1432void setFlags_ADC_SBC(Bool is64, Bool isSBC, IRTemp argL, IRTemp argR, IRTemp oldC) 1433{ 1434 IRTemp argL64 = IRTemp_INVALID; 1435 IRTemp argR64 = IRTemp_INVALID; 1436 IRTemp oldC64 = IRTemp_INVALID; 1437 if (is64) { 1438 argL64 = argL; 1439 argR64 = argR; 1440 oldC64 = oldC; 1441 } else { 1442 argL64 = newTemp(Ity_I64); 1443 argR64 = newTemp(Ity_I64); 1444 oldC64 = newTemp(Ity_I64); 1445 assign(argL64, unop(Iop_32Uto64, mkexpr(argL))); 1446 assign(argR64, unop(Iop_32Uto64, mkexpr(argR))); 1447 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC))); 1448 } 1449 UInt cc_op = ARM64G_CC_OP_NUMBER; 1450 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; } 1451 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; } 1452 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; } 1453 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; } 1454 else { vassert(0); } 1455 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64); 1456} 1457 1458/* Build IR to set the flags thunk after ADD or SUB, if the given 1459 condition evaluates to True at run time. If not, the flags are set 1460 to the specified NZCV value. */ 1461static 1462void setFlags_ADD_SUB_conditionally ( 1463 Bool is64, Bool isSUB, 1464 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv 1465 ) 1466{ 1467 /* Generate IR as follows: 1468 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY) 1469 CC_DEP1 = ITE(cond, argL64, nzcv << 28) 1470 CC_DEP2 = ITE(cond, argR64, 0) 1471 CC_NDEP = 0 1472 */ 1473 1474 IRTemp z64 = newTemp(Ity_I64); 1475 assign(z64, mkU64(0)); 1476 1477 /* Establish the operation and operands for the True case. */ 1478 IRTemp t_dep1 = IRTemp_INVALID; 1479 IRTemp t_dep2 = IRTemp_INVALID; 1480 UInt t_op = ARM64G_CC_OP_NUMBER; 1481 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; } 1482 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; } 1483 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; } 1484 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; } 1485 else { vassert(0); } 1486 /* */ 1487 if (is64) { 1488 t_dep1 = argL; 1489 t_dep2 = argR; 1490 } else { 1491 t_dep1 = newTemp(Ity_I64); 1492 t_dep2 = newTemp(Ity_I64); 1493 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL))); 1494 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR))); 1495 } 1496 1497 /* Establish the operation and operands for the False case. */ 1498 IRTemp f_dep1 = newTemp(Ity_I64); 1499 IRTemp f_dep2 = z64; 1500 UInt f_op = ARM64G_CC_OP_COPY; 1501 assign(f_dep1, mkU64(nzcv << 28)); 1502 1503 /* Final thunk values */ 1504 IRTemp dep1 = newTemp(Ity_I64); 1505 IRTemp dep2 = newTemp(Ity_I64); 1506 IRTemp op = newTemp(Ity_I64); 1507 1508 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op))); 1509 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1))); 1510 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2))); 1511 1512 /* finally .. */ 1513 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) )); 1514 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) )); 1515 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) )); 1516 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) )); 1517} 1518 1519/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */ 1520static 1521void setFlags_LOGIC ( Bool is64, IRTemp res ) 1522{ 1523 IRTemp res64 = IRTemp_INVALID; 1524 IRTemp z64 = newTemp(Ity_I64); 1525 UInt cc_op = ARM64G_CC_OP_NUMBER; 1526 if (is64) { 1527 res64 = res; 1528 cc_op = ARM64G_CC_OP_LOGIC64; 1529 } else { 1530 res64 = newTemp(Ity_I64); 1531 assign(res64, unop(Iop_32Uto64, mkexpr(res))); 1532 cc_op = ARM64G_CC_OP_LOGIC32; 1533 } 1534 assign(z64, mkU64(0)); 1535 setFlags_D1_D2_ND(cc_op, res64, z64, z64); 1536} 1537 1538/* Build IR to set the flags thunk to a given NZCV value. NZCV is 1539 located in bits 31:28 of the supplied value. */ 1540static 1541void setFlags_COPY ( IRTemp nzcv_28x0 ) 1542{ 1543 IRTemp z64 = newTemp(Ity_I64); 1544 assign(z64, mkU64(0)); 1545 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64); 1546} 1547 1548 1549//ZZ /* Minor variant of the above that sets NDEP to zero (if it 1550//ZZ sets it at all) */ 1551//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1, 1552//ZZ IRTemp t_dep2, 1553//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 1554//ZZ { 1555//ZZ IRTemp z32 = newTemp(Ity_I32); 1556//ZZ assign( z32, mkU32(0) ); 1557//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT ); 1558//ZZ } 1559//ZZ 1560//ZZ 1561//ZZ /* Minor variant of the above that sets DEP2 to zero (if it 1562//ZZ sets it at all) */ 1563//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1, 1564//ZZ IRTemp t_ndep, 1565//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 1566//ZZ { 1567//ZZ IRTemp z32 = newTemp(Ity_I32); 1568//ZZ assign( z32, mkU32(0) ); 1569//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT ); 1570//ZZ } 1571//ZZ 1572//ZZ 1573//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it 1574//ZZ sets them at all) */ 1575//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1, 1576//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 1577//ZZ { 1578//ZZ IRTemp z32 = newTemp(Ity_I32); 1579//ZZ assign( z32, mkU32(0) ); 1580//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT ); 1581//ZZ } 1582 1583 1584/*------------------------------------------------------------*/ 1585/*--- Misc math helpers ---*/ 1586/*------------------------------------------------------------*/ 1587 1588/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */ 1589static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh ) 1590{ 1591 IRTemp maskT = newTemp(Ity_I64); 1592 IRTemp res = newTemp(Ity_I64); 1593 vassert(sh >= 1 && sh <= 63); 1594 assign(maskT, mkU64(mask)); 1595 assign( res, 1596 binop(Iop_Or64, 1597 binop(Iop_Shr64, 1598 binop(Iop_And64,mkexpr(x),mkexpr(maskT)), 1599 mkU8(sh)), 1600 binop(Iop_And64, 1601 binop(Iop_Shl64,mkexpr(x),mkU8(sh)), 1602 mkexpr(maskT)) 1603 ) 1604 ); 1605 return res; 1606} 1607 1608/* Generates byte swaps within 32-bit lanes. */ 1609static IRTemp math_UINTSWAP64 ( IRTemp src ) 1610{ 1611 IRTemp res; 1612 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 1613 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16); 1614 return res; 1615} 1616 1617/* Generates byte swaps within 16-bit lanes. */ 1618static IRTemp math_USHORTSWAP64 ( IRTemp src ) 1619{ 1620 IRTemp res; 1621 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 1622 return res; 1623} 1624 1625/* Generates a 64-bit byte swap. */ 1626static IRTemp math_BYTESWAP64 ( IRTemp src ) 1627{ 1628 IRTemp res; 1629 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 1630 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16); 1631 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32); 1632 return res; 1633} 1634 1635/* Generates a 64-bit bit swap. */ 1636static IRTemp math_BITSWAP64 ( IRTemp src ) 1637{ 1638 IRTemp res; 1639 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1); 1640 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2); 1641 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4); 1642 return math_BYTESWAP64(res); 1643} 1644 1645/* Duplicates the bits at the bottom of the given word to fill the 1646 whole word. src :: Ity_I64 is assumed to have zeroes everywhere 1647 except for the bottom bits. */ 1648static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy ) 1649{ 1650 if (srcTy == Ity_I8) { 1651 IRTemp t16 = newTemp(Ity_I64); 1652 assign(t16, binop(Iop_Or64, mkexpr(src), 1653 binop(Iop_Shl64, mkexpr(src), mkU8(8)))); 1654 IRTemp t32 = newTemp(Ity_I64); 1655 assign(t32, binop(Iop_Or64, mkexpr(t16), 1656 binop(Iop_Shl64, mkexpr(t16), mkU8(16)))); 1657 IRTemp t64 = newTemp(Ity_I64); 1658 assign(t64, binop(Iop_Or64, mkexpr(t32), 1659 binop(Iop_Shl64, mkexpr(t32), mkU8(32)))); 1660 return t64; 1661 } 1662 if (srcTy == Ity_I16) { 1663 IRTemp t32 = newTemp(Ity_I64); 1664 assign(t32, binop(Iop_Or64, mkexpr(src), 1665 binop(Iop_Shl64, mkexpr(src), mkU8(16)))); 1666 IRTemp t64 = newTemp(Ity_I64); 1667 assign(t64, binop(Iop_Or64, mkexpr(t32), 1668 binop(Iop_Shl64, mkexpr(t32), mkU8(32)))); 1669 return t64; 1670 } 1671 if (srcTy == Ity_I32) { 1672 IRTemp t64 = newTemp(Ity_I64); 1673 assign(t64, binop(Iop_Or64, mkexpr(src), 1674 binop(Iop_Shl64, mkexpr(src), mkU8(32)))); 1675 return t64; 1676 } 1677 if (srcTy == Ity_I64) { 1678 return src; 1679 } 1680 vassert(0); 1681} 1682 1683 1684/*------------------------------------------------------------*/ 1685/*--- FP comparison helpers ---*/ 1686/*------------------------------------------------------------*/ 1687 1688/* irRes :: Ity_I32 holds a floating point comparison result encoded 1689 as an IRCmpF64Result. Generate code to convert it to an 1690 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value. 1691 Assign a new temp to hold that value, and return the temp. */ 1692static 1693IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 ) 1694{ 1695 IRTemp ix = newTemp(Ity_I64); 1696 IRTemp termL = newTemp(Ity_I64); 1697 IRTemp termR = newTemp(Ity_I64); 1698 IRTemp nzcv = newTemp(Ity_I64); 1699 IRTemp irRes = newTemp(Ity_I64); 1700 1701 /* This is where the fun starts. We have to convert 'irRes' from 1702 an IR-convention return result (IRCmpF64Result) to an 1703 ARM-encoded (N,Z,C,V) group. The final result is in the bottom 1704 4 bits of 'nzcv'. */ 1705 /* Map compare result from IR to ARM(nzcv) */ 1706 /* 1707 FP cmp result | IR | ARM(nzcv) 1708 -------------------------------- 1709 UN 0x45 0011 1710 LT 0x01 1000 1711 GT 0x00 0010 1712 EQ 0x40 0110 1713 */ 1714 /* Now since you're probably wondering WTF .. 1715 1716 ix fishes the useful bits out of the IR value, bits 6 and 0, and 1717 places them side by side, giving a number which is 0, 1, 2 or 3. 1718 1719 termL is a sequence cooked up by GNU superopt. It converts ix 1720 into an almost correct value NZCV value (incredibly), except 1721 for the case of UN, where it produces 0100 instead of the 1722 required 0011. 1723 1724 termR is therefore a correction term, also computed from ix. It 1725 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get 1726 the final correct value, we subtract termR from termL. 1727 1728 Don't take my word for it. There's a test program at the bottom 1729 of guest_arm_toIR.c, to try this out with. 1730 */ 1731 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32))); 1732 1733 assign( 1734 ix, 1735 binop(Iop_Or64, 1736 binop(Iop_And64, 1737 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)), 1738 mkU64(3)), 1739 binop(Iop_And64, mkexpr(irRes), mkU64(1)))); 1740 1741 assign( 1742 termL, 1743 binop(Iop_Add64, 1744 binop(Iop_Shr64, 1745 binop(Iop_Sub64, 1746 binop(Iop_Shl64, 1747 binop(Iop_Xor64, mkexpr(ix), mkU64(1)), 1748 mkU8(62)), 1749 mkU64(1)), 1750 mkU8(61)), 1751 mkU64(1))); 1752 1753 assign( 1754 termR, 1755 binop(Iop_And64, 1756 binop(Iop_And64, 1757 mkexpr(ix), 1758 binop(Iop_Shr64, mkexpr(ix), mkU8(1))), 1759 mkU64(1))); 1760 1761 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR))); 1762 return nzcv; 1763} 1764 1765 1766/*------------------------------------------------------------*/ 1767/*--- Data processing (immediate) ---*/ 1768/*------------------------------------------------------------*/ 1769 1770/* Helper functions for supporting "DecodeBitMasks" */ 1771 1772static ULong dbm_ROR ( Int width, ULong x, Int rot ) 1773{ 1774 vassert(width > 0 && width <= 64); 1775 vassert(rot >= 0 && rot < width); 1776 if (rot == 0) return x; 1777 ULong res = x >> rot; 1778 res |= (x << (width - rot)); 1779 if (width < 64) 1780 res &= ((1ULL << width) - 1); 1781 return res; 1782} 1783 1784static ULong dbm_RepTo64( Int esize, ULong x ) 1785{ 1786 switch (esize) { 1787 case 64: 1788 return x; 1789 case 32: 1790 x &= 0xFFFFFFFF; x |= (x << 32); 1791 return x; 1792 case 16: 1793 x &= 0xFFFF; x |= (x << 16); x |= (x << 32); 1794 return x; 1795 case 8: 1796 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32); 1797 return x; 1798 case 4: 1799 x &= 0xF; x |= (x << 4); x |= (x << 8); 1800 x |= (x << 16); x |= (x << 32); 1801 return x; 1802 case 2: 1803 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8); 1804 x |= (x << 16); x |= (x << 32); 1805 return x; 1806 default: 1807 break; 1808 } 1809 vpanic("dbm_RepTo64"); 1810 /*NOTREACHED*/ 1811 return 0; 1812} 1813 1814static Int dbm_highestSetBit ( ULong x ) 1815{ 1816 Int i; 1817 for (i = 63; i >= 0; i--) { 1818 if (x & (1ULL << i)) 1819 return i; 1820 } 1821 vassert(x == 0); 1822 return -1; 1823} 1824 1825static 1826Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask, 1827 ULong immN, ULong imms, ULong immr, Bool immediate, 1828 UInt M /*32 or 64*/) 1829{ 1830 vassert(immN < (1ULL << 1)); 1831 vassert(imms < (1ULL << 6)); 1832 vassert(immr < (1ULL << 6)); 1833 vassert(immediate == False || immediate == True); 1834 vassert(M == 32 || M == 64); 1835 1836 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) ); 1837 if (len < 1) { /* printf("fail1\n"); */ return False; } 1838 vassert(len <= 6); 1839 vassert(M >= (1 << len)); 1840 1841 vassert(len >= 1 && len <= 6); 1842 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len); 1843 (1 << len) - 1; 1844 vassert(levels >= 1 && levels <= 63); 1845 1846 if (immediate && ((imms & levels) == levels)) { 1847 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */ 1848 return False; 1849 } 1850 1851 ULong S = imms & levels; 1852 ULong R = immr & levels; 1853 Int diff = S - R; 1854 diff &= 63; 1855 Int esize = 1 << len; 1856 vassert(2 <= esize && esize <= 64); 1857 1858 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the 1859 same below with d. S can be 63 in which case we have an out of 1860 range and hence undefined shift. */ 1861 vassert(S >= 0 && S <= 63); 1862 vassert(esize >= (S+1)); 1863 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1) 1864 //(1ULL << (S+1)) - 1; 1865 ((1ULL << S) - 1) + (1ULL << S); 1866 1867 Int d = // diff<len-1:0> 1868 diff & ((1 << len)-1); 1869 vassert(esize >= (d+1)); 1870 vassert(d >= 0 && d <= 63); 1871 1872 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1) 1873 //(1ULL << (d+1)) - 1; 1874 ((1ULL << d) - 1) + (1ULL << d); 1875 1876 if (esize != 64) vassert(elem_s < (1ULL << esize)); 1877 if (esize != 64) vassert(elem_d < (1ULL << esize)); 1878 1879 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R)); 1880 if (tmask) *tmask = dbm_RepTo64(esize, elem_d); 1881 1882 return True; 1883} 1884 1885 1886static 1887Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres, 1888 UInt insn) 1889{ 1890# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 1891 1892 /* insn[28:23] 1893 10000x PC-rel addressing 1894 10001x Add/subtract (immediate) 1895 100100 Logical (immediate) 1896 100101 Move Wide (immediate) 1897 100110 Bitfield 1898 100111 Extract 1899 */ 1900 1901 /* ------------------ ADD/SUB{,S} imm12 ------------------ */ 1902 if (INSN(28,24) == BITS5(1,0,0,0,1)) { 1903 Bool is64 = INSN(31,31) == 1; 1904 Bool isSub = INSN(30,30) == 1; 1905 Bool setCC = INSN(29,29) == 1; 1906 UInt sh = INSN(23,22); 1907 UInt uimm12 = INSN(21,10); 1908 UInt nn = INSN(9,5); 1909 UInt dd = INSN(4,0); 1910 const HChar* nm = isSub ? "sub" : "add"; 1911 if (sh >= 2) { 1912 /* Invalid; fall through */ 1913 } else { 1914 vassert(sh <= 1); 1915 uimm12 <<= (12 * sh); 1916 if (is64) { 1917 IRTemp argL = newTemp(Ity_I64); 1918 IRTemp argR = newTemp(Ity_I64); 1919 IRTemp res = newTemp(Ity_I64); 1920 assign(argL, getIReg64orSP(nn)); 1921 assign(argR, mkU64(uimm12)); 1922 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64, 1923 mkexpr(argL), mkexpr(argR))); 1924 if (setCC) { 1925 putIReg64orZR(dd, mkexpr(res)); 1926 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR); 1927 DIP("%ss %s, %s, 0x%x\n", 1928 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12); 1929 } else { 1930 putIReg64orSP(dd, mkexpr(res)); 1931 DIP("%s %s, %s, 0x%x\n", 1932 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12); 1933 } 1934 } else { 1935 IRTemp argL = newTemp(Ity_I32); 1936 IRTemp argR = newTemp(Ity_I32); 1937 IRTemp res = newTemp(Ity_I32); 1938 assign(argL, getIReg32orSP(nn)); 1939 assign(argR, mkU32(uimm12)); 1940 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32, 1941 mkexpr(argL), mkexpr(argR))); 1942 if (setCC) { 1943 putIReg32orZR(dd, mkexpr(res)); 1944 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR); 1945 DIP("%ss %s, %s, 0x%x\n", 1946 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12); 1947 } else { 1948 putIReg32orSP(dd, mkexpr(res)); 1949 DIP("%s %s, %s, 0x%x\n", 1950 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12); 1951 } 1952 } 1953 return True; 1954 } 1955 } 1956 1957 /* -------------------- ADR/ADRP -------------------- */ 1958 if (INSN(28,24) == BITS5(1,0,0,0,0)) { 1959 UInt bP = INSN(31,31); 1960 UInt immLo = INSN(30,29); 1961 UInt immHi = INSN(23,5); 1962 UInt rD = INSN(4,0); 1963 ULong uimm = (immHi << 2) | immLo; 1964 ULong simm = sx_to_64(uimm, 21); 1965 ULong val; 1966 if (bP) { 1967 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12); 1968 } else { 1969 val = guest_PC_curr_instr + simm; 1970 } 1971 putIReg64orZR(rD, mkU64(val)); 1972 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val); 1973 return True; 1974 } 1975 1976 /* -------------------- LOGIC(imm) -------------------- */ 1977 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) { 1978 /* 31 30 28 22 21 15 9 4 1979 sf op 100100 N immr imms Rn Rd 1980 op=00: AND Rd|SP, Rn, #imm 1981 op=01: ORR Rd|SP, Rn, #imm 1982 op=10: EOR Rd|SP, Rn, #imm 1983 op=11: ANDS Rd|ZR, Rn, #imm 1984 */ 1985 Bool is64 = INSN(31,31) == 1; 1986 UInt op = INSN(30,29); 1987 UInt N = INSN(22,22); 1988 UInt immR = INSN(21,16); 1989 UInt immS = INSN(15,10); 1990 UInt nn = INSN(9,5); 1991 UInt dd = INSN(4,0); 1992 ULong imm = 0; 1993 Bool ok; 1994 if (N == 1 && !is64) 1995 goto after_logic_imm; /* not allowed; fall through */ 1996 ok = dbm_DecodeBitMasks(&imm, NULL, 1997 N, immS, immR, True, is64 ? 64 : 32); 1998 if (!ok) 1999 goto after_logic_imm; 2000 2001 const HChar* names[4] = { "and", "orr", "eor", "ands" }; 2002 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 }; 2003 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 }; 2004 2005 vassert(op < 4); 2006 if (is64) { 2007 IRExpr* argL = getIReg64orZR(nn); 2008 IRExpr* argR = mkU64(imm); 2009 IRTemp res = newTemp(Ity_I64); 2010 assign(res, binop(ops64[op], argL, argR)); 2011 if (op < 3) { 2012 putIReg64orSP(dd, mkexpr(res)); 2013 DIP("%s %s, %s, 0x%llx\n", names[op], 2014 nameIReg64orSP(dd), nameIReg64orZR(nn), imm); 2015 } else { 2016 putIReg64orZR(dd, mkexpr(res)); 2017 setFlags_LOGIC(True/*is64*/, res); 2018 DIP("%s %s, %s, 0x%llx\n", names[op], 2019 nameIReg64orZR(dd), nameIReg64orZR(nn), imm); 2020 } 2021 } else { 2022 IRExpr* argL = getIReg32orZR(nn); 2023 IRExpr* argR = mkU32((UInt)imm); 2024 IRTemp res = newTemp(Ity_I32); 2025 assign(res, binop(ops32[op], argL, argR)); 2026 if (op < 3) { 2027 putIReg32orSP(dd, mkexpr(res)); 2028 DIP("%s %s, %s, 0x%x\n", names[op], 2029 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm); 2030 } else { 2031 putIReg32orZR(dd, mkexpr(res)); 2032 setFlags_LOGIC(False/*!is64*/, res); 2033 DIP("%s %s, %s, 0x%x\n", names[op], 2034 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm); 2035 } 2036 } 2037 return True; 2038 } 2039 after_logic_imm: 2040 2041 /* -------------------- MOV{Z,N,K} -------------------- */ 2042 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) { 2043 /* 31 30 28 22 20 4 2044 | | | | | | 2045 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw)) 2046 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw)) 2047 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw)) 2048 */ 2049 Bool is64 = INSN(31,31) == 1; 2050 UInt subopc = INSN(30,29); 2051 UInt hw = INSN(22,21); 2052 UInt imm16 = INSN(20,5); 2053 UInt dd = INSN(4,0); 2054 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) { 2055 /* invalid; fall through */ 2056 } else { 2057 ULong imm64 = ((ULong)imm16) << (16 * hw); 2058 if (!is64) 2059 vassert(imm64 < 0x100000000ULL); 2060 switch (subopc) { 2061 case BITS2(1,0): // MOVZ 2062 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64)); 2063 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64); 2064 break; 2065 case BITS2(0,0): // MOVN 2066 imm64 = ~imm64; 2067 if (!is64) 2068 imm64 &= 0xFFFFFFFFULL; 2069 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64)); 2070 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64); 2071 break; 2072 case BITS2(1,1): // MOVK 2073 /* This is more complex. We are inserting a slice into 2074 the destination register, so we need to have the old 2075 value of it. */ 2076 if (is64) { 2077 IRTemp old = newTemp(Ity_I64); 2078 assign(old, getIReg64orZR(dd)); 2079 ULong mask = 0xFFFFULL << (16 * hw); 2080 IRExpr* res 2081 = binop(Iop_Or64, 2082 binop(Iop_And64, mkexpr(old), mkU64(~mask)), 2083 mkU64(imm64)); 2084 putIReg64orZR(dd, res); 2085 DIP("movk %s, 0x%x, lsl %u\n", 2086 nameIReg64orZR(dd), imm16, 16*hw); 2087 } else { 2088 IRTemp old = newTemp(Ity_I32); 2089 assign(old, getIReg32orZR(dd)); 2090 vassert(hw <= 1); 2091 UInt mask = 0xFFFF << (16 * hw); 2092 IRExpr* res 2093 = binop(Iop_Or32, 2094 binop(Iop_And32, mkexpr(old), mkU32(~mask)), 2095 mkU32((UInt)imm64)); 2096 putIReg32orZR(dd, res); 2097 DIP("movk %s, 0x%x, lsl %u\n", 2098 nameIReg32orZR(dd), imm16, 16*hw); 2099 } 2100 break; 2101 default: 2102 vassert(0); 2103 } 2104 return True; 2105 } 2106 } 2107 2108 /* -------------------- {U,S,}BFM -------------------- */ 2109 /* 30 28 22 21 15 9 4 2110 2111 sf 10 100110 N immr imms nn dd 2112 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2113 UBFM Xd, Xn, #immr, #imms when sf=1, N=1 2114 2115 sf 00 100110 N immr imms nn dd 2116 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2117 SBFM Xd, Xn, #immr, #imms when sf=1, N=1 2118 2119 sf 01 100110 N immr imms nn dd 2120 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2121 BFM Xd, Xn, #immr, #imms when sf=1, N=1 2122 */ 2123 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) { 2124 UInt sf = INSN(31,31); 2125 UInt opc = INSN(30,29); 2126 UInt N = INSN(22,22); 2127 UInt immR = INSN(21,16); 2128 UInt immS = INSN(15,10); 2129 UInt nn = INSN(9,5); 2130 UInt dd = INSN(4,0); 2131 Bool inZero = False; 2132 Bool extend = False; 2133 const HChar* nm = "???"; 2134 /* skip invalid combinations */ 2135 switch (opc) { 2136 case BITS2(0,0): 2137 inZero = True; extend = True; nm = "sbfm"; break; 2138 case BITS2(0,1): 2139 inZero = False; extend = False; nm = "bfm"; break; 2140 case BITS2(1,0): 2141 inZero = True; extend = False; nm = "ubfm"; break; 2142 case BITS2(1,1): 2143 goto after_bfm; /* invalid */ 2144 default: 2145 vassert(0); 2146 } 2147 if (sf == 1 && N != 1) goto after_bfm; 2148 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0 2149 || ((immS >> 5) & 1) != 0)) goto after_bfm; 2150 ULong wmask = 0, tmask = 0; 2151 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask, 2152 N, immS, immR, False, sf == 1 ? 64 : 32); 2153 if (!ok) goto after_bfm; /* hmmm */ 2154 2155 Bool is64 = sf == 1; 2156 IRType ty = is64 ? Ity_I64 : Ity_I32; 2157 2158 IRTemp dst = newTemp(ty); 2159 IRTemp src = newTemp(ty); 2160 IRTemp bot = newTemp(ty); 2161 IRTemp top = newTemp(ty); 2162 IRTemp res = newTemp(ty); 2163 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd)); 2164 assign(src, getIRegOrZR(is64, nn)); 2165 /* perform bitfield move on low bits */ 2166 assign(bot, binop(mkOR(ty), 2167 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)), 2168 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)), 2169 mkU(ty, wmask)))); 2170 /* determine extension bits (sign, zero or dest register) */ 2171 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst)); 2172 /* combine extension bits and result bits */ 2173 assign(res, binop(mkOR(ty), 2174 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)), 2175 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask)))); 2176 putIRegOrZR(is64, dd, mkexpr(res)); 2177 DIP("%s %s, %s, immR=%u, immS=%u\n", 2178 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS); 2179 return True; 2180 } 2181 after_bfm: 2182 2183 /* ---------------------- EXTR ---------------------- */ 2184 /* 30 28 22 20 15 9 4 2185 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6 2186 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32 2187 */ 2188 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) { 2189 Bool is64 = INSN(31,31) == 1; 2190 UInt mm = INSN(20,16); 2191 UInt imm6 = INSN(15,10); 2192 UInt nn = INSN(9,5); 2193 UInt dd = INSN(4,0); 2194 Bool valid = True; 2195 if (INSN(31,31) != INSN(22,22)) 2196 valid = False; 2197 if (!is64 && imm6 >= 32) 2198 valid = False; 2199 if (!valid) goto after_extr; 2200 IRType ty = is64 ? Ity_I64 : Ity_I32; 2201 IRTemp srcHi = newTemp(ty); 2202 IRTemp srcLo = newTemp(ty); 2203 IRTemp res = newTemp(ty); 2204 assign(srcHi, getIRegOrZR(is64, nn)); 2205 assign(srcLo, getIRegOrZR(is64, mm)); 2206 if (imm6 == 0) { 2207 assign(res, mkexpr(srcLo)); 2208 } else { 2209 UInt szBits = 8 * sizeofIRType(ty); 2210 vassert(imm6 > 0 && imm6 < szBits); 2211 assign(res, binop(mkOR(ty), 2212 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)), 2213 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6)))); 2214 } 2215 putIRegOrZR(is64, dd, mkexpr(res)); 2216 DIP("extr %s, %s, %s, #%u\n", 2217 nameIRegOrZR(is64,dd), 2218 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6); 2219 return True; 2220 } 2221 after_extr: 2222 2223 vex_printf("ARM64 front end: data_processing_immediate\n"); 2224 return False; 2225# undef INSN 2226} 2227 2228 2229/*------------------------------------------------------------*/ 2230/*--- Data processing (register) instructions ---*/ 2231/*------------------------------------------------------------*/ 2232 2233static const HChar* nameSH ( UInt sh ) { 2234 switch (sh) { 2235 case 0: return "lsl"; 2236 case 1: return "lsr"; 2237 case 2: return "asr"; 2238 case 3: return "ror"; 2239 default: vassert(0); 2240 } 2241} 2242 2243/* Generate IR to get a register value, possibly shifted by an 2244 immediate. Returns either a 32- or 64-bit temporary holding the 2245 result. After the shift, the value can optionally be NOT-ed 2246 too. 2247 2248 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be 2249 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR 2250 isn't allowed, but it's the job of the caller to check that. 2251*/ 2252static IRTemp getShiftedIRegOrZR ( Bool is64, 2253 UInt sh_how, UInt sh_amt, UInt regNo, 2254 Bool invert ) 2255{ 2256 vassert(sh_how < 4); 2257 vassert(sh_amt < (is64 ? 64 : 32)); 2258 IRType ty = is64 ? Ity_I64 : Ity_I32; 2259 IRTemp t0 = newTemp(ty); 2260 assign(t0, getIRegOrZR(is64, regNo)); 2261 IRTemp t1 = newTemp(ty); 2262 switch (sh_how) { 2263 case BITS2(0,0): 2264 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt))); 2265 break; 2266 case BITS2(0,1): 2267 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt))); 2268 break; 2269 case BITS2(1,0): 2270 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt))); 2271 break; 2272 case BITS2(1,1): 2273 assign(t1, mkexpr(mathROR(ty, t0, sh_amt))); 2274 break; 2275 default: 2276 vassert(0); 2277 } 2278 if (invert) { 2279 IRTemp t2 = newTemp(ty); 2280 assign(t2, unop(mkNOT(ty), mkexpr(t1))); 2281 return t2; 2282 } else { 2283 return t1; 2284 } 2285} 2286 2287 2288static 2289Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres, 2290 UInt insn) 2291{ 2292# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 2293 2294 /* ------------------- ADD/SUB(reg) ------------------- */ 2295 /* x==0 => 32 bit op x==1 => 64 bit op 2296 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED) 2297 2298 31 30 29 28 23 21 20 15 9 4 2299 | | | | | | | | | | 2300 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6) 2301 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6) 2302 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6) 2303 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6) 2304 */ 2305 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) { 2306 UInt bX = INSN(31,31); 2307 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */ 2308 UInt bS = INSN(29, 29); /* set flags? */ 2309 UInt sh = INSN(23,22); 2310 UInt rM = INSN(20,16); 2311 UInt imm6 = INSN(15,10); 2312 UInt rN = INSN(9,5); 2313 UInt rD = INSN(4,0); 2314 Bool isSUB = bOP == 1; 2315 Bool is64 = bX == 1; 2316 IRType ty = is64 ? Ity_I64 : Ity_I32; 2317 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) { 2318 /* invalid; fall through */ 2319 } else { 2320 IRTemp argL = newTemp(ty); 2321 assign(argL, getIRegOrZR(is64, rN)); 2322 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False); 2323 IROp op = isSUB ? mkSUB(ty) : mkADD(ty); 2324 IRTemp res = newTemp(ty); 2325 assign(res, binop(op, mkexpr(argL), mkexpr(argR))); 2326 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res)); 2327 if (bS) { 2328 setFlags_ADD_SUB(is64, isSUB, argL, argR); 2329 } 2330 DIP("%s%s %s, %s, %s, %s #%u\n", 2331 bOP ? "sub" : "add", bS ? "s" : "", 2332 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2333 nameIRegOrZR(is64, rM), nameSH(sh), imm6); 2334 return True; 2335 } 2336 } 2337 2338 /* ------------------- ADC/SBC(reg) ------------------- */ 2339 /* x==0 => 32 bit op x==1 => 64 bit op 2340 2341 31 30 29 28 23 21 20 15 9 4 2342 | | | | | | | | | | 2343 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm 2344 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm 2345 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm 2346 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm 2347 */ 2348 2349 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) { 2350 UInt bX = INSN(31,31); 2351 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */ 2352 UInt bS = INSN(29,29); /* set flags */ 2353 UInt rM = INSN(20,16); 2354 UInt rN = INSN(9,5); 2355 UInt rD = INSN(4,0); 2356 2357 Bool isSUB = bOP == 1; 2358 Bool is64 = bX == 1; 2359 IRType ty = is64 ? Ity_I64 : Ity_I32; 2360 2361 IRTemp oldC = newTemp(ty); 2362 assign(oldC, 2363 is64 ? mk_arm64g_calculate_flag_c() 2364 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) ); 2365 2366 IRTemp argL = newTemp(ty); 2367 assign(argL, getIRegOrZR(is64, rN)); 2368 IRTemp argR = newTemp(ty); 2369 assign(argR, getIRegOrZR(is64, rM)); 2370 2371 IROp op = isSUB ? mkSUB(ty) : mkADD(ty); 2372 IRTemp res = newTemp(ty); 2373 if (isSUB) { 2374 IRExpr* one = is64 ? mkU64(1) : mkU32(1); 2375 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32; 2376 assign(res, 2377 binop(op, 2378 binop(op, mkexpr(argL), mkexpr(argR)), 2379 binop(xorOp, mkexpr(oldC), one))); 2380 } else { 2381 assign(res, 2382 binop(op, 2383 binop(op, mkexpr(argL), mkexpr(argR)), 2384 mkexpr(oldC))); 2385 } 2386 2387 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res)); 2388 2389 if (bS) { 2390 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC); 2391 } 2392 2393 DIP("%s%s %s, %s, %s\n", 2394 bOP ? "sbc" : "adc", bS ? "s" : "", 2395 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2396 nameIRegOrZR(is64, rM)); 2397 return True; 2398 } 2399 2400 2401 2402 /* -------------------- LOGIC(reg) -------------------- */ 2403 /* x==0 => 32 bit op x==1 => 64 bit op 2404 N==0 => inv? is no-op (no inversion) 2405 N==1 => inv? is NOT 2406 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR 2407 2408 31 30 28 23 21 20 15 9 4 2409 | | | | | | | | | 2410 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6)) 2411 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6)) 2412 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6)) 2413 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6)) 2414 With N=1, the names are: BIC ORN EON BICS 2415 */ 2416 if (INSN(28,24) == BITS5(0,1,0,1,0)) { 2417 UInt bX = INSN(31,31); 2418 UInt sh = INSN(23,22); 2419 UInt bN = INSN(21,21); 2420 UInt rM = INSN(20,16); 2421 UInt imm6 = INSN(15,10); 2422 UInt rN = INSN(9,5); 2423 UInt rD = INSN(4,0); 2424 Bool is64 = bX == 1; 2425 IRType ty = is64 ? Ity_I64 : Ity_I32; 2426 if (!is64 && imm6 > 31) { 2427 /* invalid; fall though */ 2428 } else { 2429 IRTemp argL = newTemp(ty); 2430 assign(argL, getIRegOrZR(is64, rN)); 2431 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1); 2432 IROp op = Iop_INVALID; 2433 switch (INSN(30,29)) { 2434 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break; 2435 case BITS2(0,1): op = mkOR(ty); break; 2436 case BITS2(1,0): op = mkXOR(ty); break; 2437 default: vassert(0); 2438 } 2439 IRTemp res = newTemp(ty); 2440 assign(res, binop(op, mkexpr(argL), mkexpr(argR))); 2441 if (INSN(30,29) == BITS2(1,1)) { 2442 setFlags_LOGIC(is64, res); 2443 } 2444 putIRegOrZR(is64, rD, mkexpr(res)); 2445 2446 static const HChar* names_op[8] 2447 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" }; 2448 vassert(((bN << 2) | INSN(30,29)) < 8); 2449 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)]; 2450 /* Special-case the printing of "MOV" */ 2451 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) { 2452 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD), 2453 nameIRegOrZR(is64, rM)); 2454 } else { 2455 DIP("%s %s, %s, %s, %s #%u\n", nm_op, 2456 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2457 nameIRegOrZR(is64, rM), nameSH(sh), imm6); 2458 } 2459 return True; 2460 } 2461 } 2462 2463 /* -------------------- {U,S}MULH -------------------- */ 2464 /* 31 23 22 20 15 9 4 2465 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm 2466 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm 2467 */ 2468 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) 2469 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) { 2470 Bool isU = INSN(23,23) == 1; 2471 UInt mm = INSN(20,16); 2472 UInt nn = INSN(9,5); 2473 UInt dd = INSN(4,0); 2474 putIReg64orZR(dd, unop(Iop_128HIto64, 2475 binop(isU ? Iop_MullU64 : Iop_MullS64, 2476 getIReg64orZR(nn), getIReg64orZR(mm)))); 2477 DIP("%cmulh %s, %s, %s\n", 2478 isU ? 'u' : 's', 2479 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm)); 2480 return True; 2481 } 2482 2483 /* -------------------- M{ADD,SUB} -------------------- */ 2484 /* 31 30 20 15 14 9 4 2485 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n 2486 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n 2487 */ 2488 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) { 2489 Bool is64 = INSN(31,31) == 1; 2490 UInt mm = INSN(20,16); 2491 Bool isAdd = INSN(15,15) == 0; 2492 UInt aa = INSN(14,10); 2493 UInt nn = INSN(9,5); 2494 UInt dd = INSN(4,0); 2495 if (is64) { 2496 putIReg64orZR( 2497 dd, 2498 binop(isAdd ? Iop_Add64 : Iop_Sub64, 2499 getIReg64orZR(aa), 2500 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn)))); 2501 } else { 2502 putIReg32orZR( 2503 dd, 2504 binop(isAdd ? Iop_Add32 : Iop_Sub32, 2505 getIReg32orZR(aa), 2506 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn)))); 2507 } 2508 DIP("%s %s, %s, %s, %s\n", 2509 isAdd ? "madd" : "msub", 2510 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), 2511 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa)); 2512 return True; 2513 } 2514 2515 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */ 2516 /* 31 30 28 20 15 11 9 4 2517 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm 2518 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm 2519 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm 2520 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm 2521 In all cases, the operation is: Rd = if cond then Rn else OP(Rm) 2522 */ 2523 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) { 2524 Bool is64 = INSN(31,31) == 1; 2525 UInt b30 = INSN(30,30); 2526 UInt mm = INSN(20,16); 2527 UInt cond = INSN(15,12); 2528 UInt b10 = INSN(10,10); 2529 UInt nn = INSN(9,5); 2530 UInt dd = INSN(4,0); 2531 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */ 2532 IRType ty = is64 ? Ity_I64 : Ity_I32; 2533 IRExpr* argL = getIRegOrZR(is64, nn); 2534 IRExpr* argR = getIRegOrZR(is64, mm); 2535 switch (op) { 2536 case BITS2(0,0): 2537 break; 2538 case BITS2(0,1): 2539 argR = binop(mkADD(ty), argR, mkU(ty,1)); 2540 break; 2541 case BITS2(1,0): 2542 argR = unop(mkNOT(ty), argR); 2543 break; 2544 case BITS2(1,1): 2545 argR = binop(mkSUB(ty), mkU(ty,0), argR); 2546 break; 2547 default: 2548 vassert(0); 2549 } 2550 putIRegOrZR( 2551 is64, dd, 2552 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)), 2553 argL, argR) 2554 ); 2555 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" }; 2556 DIP("%s %s, %s, %s, %s\n", op_nm[op], 2557 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), 2558 nameIRegOrZR(is64, mm), nameCC(cond)); 2559 return True; 2560 } 2561 2562 /* -------------- ADD/SUB(extended reg) -------------- */ 2563 /* 28 20 15 12 9 4 2564 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld 2565 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld 2566 2567 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld 2568 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld 2569 2570 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld 2571 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld 2572 2573 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld 2574 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld 2575 2576 The 'm' operand is extended per opt, thusly: 2577 2578 000 Xm & 0xFF UXTB 2579 001 Xm & 0xFFFF UXTH 2580 010 Xm & (2^32)-1 UXTW 2581 011 Xm UXTX 2582 2583 100 Xm sx from bit 7 SXTB 2584 101 Xm sx from bit 15 SXTH 2585 110 Xm sx from bit 31 SXTW 2586 111 Xm SXTX 2587 2588 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity 2589 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX 2590 are the identity operation on Wm. 2591 2592 After extension, the value is shifted left by imm3 bits, which 2593 may only be in the range 0 .. 4 inclusive. 2594 */ 2595 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) { 2596 Bool is64 = INSN(31,31) == 1; 2597 Bool isSub = INSN(30,30) == 1; 2598 Bool setCC = INSN(29,29) == 1; 2599 UInt mm = INSN(20,16); 2600 UInt opt = INSN(15,13); 2601 UInt imm3 = INSN(12,10); 2602 UInt nn = INSN(9,5); 2603 UInt dd = INSN(4,0); 2604 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx", 2605 "sxtb", "sxth", "sxtw", "sxtx" }; 2606 /* Do almost the same thing in the 32- and 64-bit cases. */ 2607 IRTemp xN = newTemp(Ity_I64); 2608 IRTemp xM = newTemp(Ity_I64); 2609 assign(xN, getIReg64orSP(nn)); 2610 assign(xM, getIReg64orZR(mm)); 2611 IRExpr* xMw = mkexpr(xM); /* "xM widened" */ 2612 Int shSX = 0; 2613 /* widen Xm .. */ 2614 switch (opt) { 2615 case BITS3(0,0,0): // UXTB 2616 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break; 2617 case BITS3(0,0,1): // UXTH 2618 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break; 2619 case BITS3(0,1,0): // UXTW -- noop for the 32bit case 2620 if (is64) { 2621 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw)); 2622 } 2623 break; 2624 case BITS3(0,1,1): // UXTX -- always a noop 2625 break; 2626 case BITS3(1,0,0): // SXTB 2627 shSX = 56; goto sxTo64; 2628 case BITS3(1,0,1): // SXTH 2629 shSX = 48; goto sxTo64; 2630 case BITS3(1,1,0): // SXTW -- noop for the 32bit case 2631 if (is64) { 2632 shSX = 32; goto sxTo64; 2633 } 2634 break; 2635 case BITS3(1,1,1): // SXTX -- always a noop 2636 break; 2637 sxTo64: 2638 vassert(shSX >= 32); 2639 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)), 2640 mkU8(shSX)); 2641 break; 2642 default: 2643 vassert(0); 2644 } 2645 /* and now shift */ 2646 IRTemp argL = xN; 2647 IRTemp argR = newTemp(Ity_I64); 2648 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3))); 2649 IRTemp res = newTemp(Ity_I64); 2650 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64, 2651 mkexpr(argL), mkexpr(argR))); 2652 if (is64) { 2653 if (setCC) { 2654 putIReg64orZR(dd, mkexpr(res)); 2655 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR); 2656 } else { 2657 putIReg64orSP(dd, mkexpr(res)); 2658 } 2659 } else { 2660 if (setCC) { 2661 IRTemp argL32 = newTemp(Ity_I32); 2662 IRTemp argR32 = newTemp(Ity_I32); 2663 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res))); 2664 assign(argL32, unop(Iop_64to32, mkexpr(argL))); 2665 assign(argR32, unop(Iop_64to32, mkexpr(argR))); 2666 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32); 2667 } else { 2668 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res))); 2669 } 2670 } 2671 DIP("%s%s %s, %s, %s %s lsl %u\n", 2672 isSub ? "sub" : "add", setCC ? "s" : "", 2673 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd), 2674 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm), 2675 nameExt[opt], imm3); 2676 return True; 2677 } 2678 2679 /* ---------------- CCMP/CCMN(imm) ---------------- */ 2680 /* Bizarrely, these appear in the "data processing register" 2681 category, even though they are operations against an 2682 immediate. */ 2683 /* 31 29 20 15 11 9 3 2684 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond 2685 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond 2686 2687 Operation is: 2688 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv 2689 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv 2690 */ 2691 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0) 2692 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) { 2693 Bool is64 = INSN(31,31) == 1; 2694 Bool isSUB = INSN(30,30) == 1; 2695 UInt imm5 = INSN(20,16); 2696 UInt cond = INSN(15,12); 2697 UInt nn = INSN(9,5); 2698 UInt nzcv = INSN(3,0); 2699 2700 IRTemp condT = newTemp(Ity_I1); 2701 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond))); 2702 2703 IRType ty = is64 ? Ity_I64 : Ity_I32; 2704 IRTemp argL = newTemp(ty); 2705 IRTemp argR = newTemp(ty); 2706 2707 if (is64) { 2708 assign(argL, getIReg64orZR(nn)); 2709 assign(argR, mkU64(imm5)); 2710 } else { 2711 assign(argL, getIReg32orZR(nn)); 2712 assign(argR, mkU32(imm5)); 2713 } 2714 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv); 2715 2716 DIP("ccm%c %s, #%u, #%u, %s\n", 2717 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn), 2718 imm5, nzcv, nameCC(cond)); 2719 return True; 2720 } 2721 2722 /* ---------------- CCMP/CCMN(reg) ---------------- */ 2723 /* 31 29 20 15 11 9 3 2724 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond 2725 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond 2726 Operation is: 2727 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv 2728 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv 2729 */ 2730 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0) 2731 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) { 2732 Bool is64 = INSN(31,31) == 1; 2733 Bool isSUB = INSN(30,30) == 1; 2734 UInt mm = INSN(20,16); 2735 UInt cond = INSN(15,12); 2736 UInt nn = INSN(9,5); 2737 UInt nzcv = INSN(3,0); 2738 2739 IRTemp condT = newTemp(Ity_I1); 2740 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond))); 2741 2742 IRType ty = is64 ? Ity_I64 : Ity_I32; 2743 IRTemp argL = newTemp(ty); 2744 IRTemp argR = newTemp(ty); 2745 2746 if (is64) { 2747 assign(argL, getIReg64orZR(nn)); 2748 assign(argR, getIReg64orZR(mm)); 2749 } else { 2750 assign(argL, getIReg32orZR(nn)); 2751 assign(argR, getIReg32orZR(mm)); 2752 } 2753 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv); 2754 2755 DIP("ccm%c %s, %s, #%u, %s\n", 2756 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn), 2757 nameIRegOrZR(is64, mm), nzcv, nameCC(cond)); 2758 return True; 2759 } 2760 2761 2762 /* -------------- REV/REV16/REV32/RBIT -------------- */ 2763 /* 31 30 28 20 15 11 9 4 2764 2765 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn 2766 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn 2767 2768 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn 2769 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn 2770 2771 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn 2772 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn 2773 2774 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn 2775 */ 2776 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0) 2777 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) { 2778 UInt b31 = INSN(31,31); 2779 UInt opc = INSN(11,10); 2780 2781 UInt ix = 0; 2782 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1; 2783 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2; 2784 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3; 2785 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4; 2786 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5; 2787 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6; 2788 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7; 2789 if (ix >= 1 && ix <= 7) { 2790 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7; 2791 UInt nn = INSN(9,5); 2792 UInt dd = INSN(4,0); 2793 IRTemp src = newTemp(Ity_I64); 2794 IRTemp dst = IRTemp_INVALID; 2795 IRTemp (*math)(IRTemp) = NULL; 2796 switch (ix) { 2797 case 1: case 2: math = math_BYTESWAP64; break; 2798 case 3: case 4: math = math_BITSWAP64; break; 2799 case 5: case 6: math = math_USHORTSWAP64; break; 2800 case 7: math = math_UINTSWAP64; break; 2801 default: vassert(0); 2802 } 2803 const HChar* names[7] 2804 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" }; 2805 const HChar* nm = names[ix-1]; 2806 vassert(math); 2807 if (ix == 6) { 2808 /* This has to be special cased, since the logic below doesn't 2809 handle it correctly. */ 2810 assign(src, getIReg64orZR(nn)); 2811 dst = math(src); 2812 putIReg64orZR(dd, 2813 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst)))); 2814 } else if (is64) { 2815 assign(src, getIReg64orZR(nn)); 2816 dst = math(src); 2817 putIReg64orZR(dd, mkexpr(dst)); 2818 } else { 2819 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32))); 2820 dst = math(src); 2821 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst))); 2822 } 2823 DIP("%s %s, %s\n", nm, 2824 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn)); 2825 return True; 2826 } 2827 /* else fall through */ 2828 } 2829 2830 /* -------------------- CLZ/CLS -------------------- */ 2831 /* 30 28 24 20 15 9 4 2832 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn 2833 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn 2834 */ 2835 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0) 2836 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) { 2837 Bool is64 = INSN(31,31) == 1; 2838 Bool isCLS = INSN(10,10) == 1; 2839 UInt nn = INSN(9,5); 2840 UInt dd = INSN(4,0); 2841 IRTemp src = newTemp(Ity_I64); 2842 IRTemp dst = newTemp(Ity_I64); 2843 if (!isCLS) { // CLS not yet supported 2844 if (is64) { 2845 assign(src, getIReg64orZR(nn)); 2846 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)), 2847 mkU64(64), 2848 unop(Iop_Clz64, mkexpr(src)))); 2849 putIReg64orZR(dd, mkexpr(dst)); 2850 } else { 2851 assign(src, binop(Iop_Shl64, 2852 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32))); 2853 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)), 2854 mkU64(32), 2855 unop(Iop_Clz64, mkexpr(src)))); 2856 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst))); 2857 } 2858 DIP("cl%c %s, %s\n", 2859 isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn)); 2860 return True; 2861 } 2862 } 2863 2864 /* -------------------- LSLV/LSRV/ASRV -------------------- */ 2865 /* 30 28 20 15 11 9 4 2866 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm 2867 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm 2868 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm 2869 */ 2870 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0) 2871 && INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) { 2872 Bool is64 = INSN(31,31) == 1; 2873 UInt mm = INSN(20,16); 2874 UInt op = INSN(11,10); 2875 UInt nn = INSN(9,5); 2876 UInt dd = INSN(4,0); 2877 IRType ty = is64 ? Ity_I64 : Ity_I32; 2878 IRTemp srcL = newTemp(ty); 2879 IRTemp srcR = newTemp(Ity_I8); 2880 IRTemp res = newTemp(ty); 2881 IROp iop = Iop_INVALID; 2882 assign(srcL, getIRegOrZR(is64, nn)); 2883 assign(srcR, 2884 unop(Iop_64to8, 2885 binop(Iop_And64, 2886 getIReg64orZR(mm), mkU64(is64 ? 63 : 31)))); 2887 switch (op) { 2888 case BITS2(0,0): iop = mkSHL(ty); break; 2889 case BITS2(0,1): iop = mkSHR(ty); break; 2890 case BITS2(1,0): iop = mkSAR(ty); break; 2891 default: vassert(0); 2892 } 2893 assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR))); 2894 putIRegOrZR(is64, dd, mkexpr(res)); 2895 vassert(op < 3); 2896 const HChar* names[3] = { "lslv", "lsrv", "asrv" }; 2897 DIP("%s %s, %s, %s\n", 2898 names[op], nameIRegOrZR(is64,dd), 2899 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm)); 2900 return True; 2901 } 2902 2903 /* -------------------- SDIV/UDIV -------------------- */ 2904 /* 30 28 20 15 10 9 4 2905 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm 2906 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm 2907 */ 2908 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0) 2909 && INSN(15,11) == BITS5(0,0,0,0,1)) { 2910 Bool is64 = INSN(31,31) == 1; 2911 UInt mm = INSN(20,16); 2912 Bool isS = INSN(10,10) == 1; 2913 UInt nn = INSN(9,5); 2914 UInt dd = INSN(4,0); 2915 if (isS) { 2916 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32, 2917 getIRegOrZR(is64, nn), 2918 getIRegOrZR(is64, mm))); 2919 } else { 2920 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32, 2921 getIRegOrZR(is64, nn), 2922 getIRegOrZR(is64, mm))); 2923 } 2924 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u', 2925 nameIRegOrZR(is64, dd), 2926 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm)); 2927 return True; 2928 } 2929 2930 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */ 2931 /* 31 23 20 15 14 9 4 2932 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa 2933 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa 2934 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa 2935 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa 2936 with operation 2937 Xd = Xa +/- (Wn *u/s Wm) 2938 */ 2939 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) { 2940 Bool isU = INSN(23,23) == 1; 2941 UInt mm = INSN(20,16); 2942 Bool isAdd = INSN(15,15) == 0; 2943 UInt aa = INSN(14,10); 2944 UInt nn = INSN(9,5); 2945 UInt dd = INSN(4,0); 2946 IRTemp wN = newTemp(Ity_I32); 2947 IRTemp wM = newTemp(Ity_I32); 2948 IRTemp xA = newTemp(Ity_I64); 2949 IRTemp muld = newTemp(Ity_I64); 2950 IRTemp res = newTemp(Ity_I64); 2951 assign(wN, getIReg32orZR(nn)); 2952 assign(wM, getIReg32orZR(mm)); 2953 assign(xA, getIReg64orZR(aa)); 2954 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32, 2955 mkexpr(wN), mkexpr(wM))); 2956 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64, 2957 mkexpr(xA), mkexpr(muld))); 2958 putIReg64orZR(dd, mkexpr(res)); 2959 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub", 2960 nameIReg64orZR(dd), nameIReg32orZR(nn), 2961 nameIReg32orZR(mm), nameIReg64orZR(aa)); 2962 return True; 2963 } 2964 vex_printf("ARM64 front end: data_processing_register\n"); 2965 return False; 2966# undef INSN 2967} 2968 2969 2970/*------------------------------------------------------------*/ 2971/*--- Load and Store instructions ---*/ 2972/*------------------------------------------------------------*/ 2973 2974/* Generate the EA for a "reg + reg" style amode. This is done from 2975 parts of the insn, but for sanity checking sake it takes the whole 2976 insn. This appears to depend on insn[15:12], with opt=insn[15:13] 2977 and S=insn[12]: 2978 2979 The possible forms, along with their opt:S values, are: 2980 011:0 Xn|SP + Xm 2981 111:0 Xn|SP + Xm 2982 011:1 Xn|SP + Xm * transfer_szB 2983 111:1 Xn|SP + Xm * transfer_szB 2984 010:0 Xn|SP + 32Uto64(Wm) 2985 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB 2986 110:0 Xn|SP + 32Sto64(Wm) 2987 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB 2988 2989 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of 2990 the transfer size is insn[23,31,30]. For integer loads/stores, 2991 insn[23] is zero, hence szLg2 can be at most 3 in such cases. 2992 2993 If the decoding fails, it returns IRTemp_INVALID. 2994 2995 isInt is True iff this is decoding is for transfers to/from integer 2996 registers. If False it is for transfers to/from vector registers. 2997*/ 2998static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt ) 2999{ 3000 UInt optS = SLICE_UInt(insn, 15, 12); 3001 UInt mm = SLICE_UInt(insn, 20, 16); 3002 UInt nn = SLICE_UInt(insn, 9, 5); 3003 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2)) 3004 | SLICE_UInt(insn, 31, 30); // Log2 of the size 3005 3006 buf[0] = 0; 3007 3008 /* Sanity checks, that this really is a load/store insn. */ 3009 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0)) 3010 goto fail; 3011 3012 if (isInt 3013 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/ 3014 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/ 3015 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/ 3016 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/ 3017 goto fail; 3018 3019 if (!isInt 3020 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/ 3021 goto fail; 3022 3023 /* Throw out non-verified but possibly valid cases. */ 3024 switch (szLg2) { 3025 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec 3026 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec 3027 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec 3028 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec 3029 case BITS3(1,0,0): // can only ever be valid for the vector case 3030 if (isInt) goto fail; else goto fail; 3031 case BITS3(1,0,1): // these sizes are never valid 3032 case BITS3(1,1,0): 3033 case BITS3(1,1,1): goto fail; 3034 3035 default: vassert(0); 3036 } 3037 3038 IRExpr* rhs = NULL; 3039 switch (optS) { 3040 case BITS4(1,1,1,0): goto fail; //ATC 3041 case BITS4(0,1,1,0): 3042 rhs = getIReg64orZR(mm); 3043 vex_sprintf(buf, "[%s, %s]", 3044 nameIReg64orZR(nn), nameIReg64orZR(mm)); 3045 break; 3046 case BITS4(1,1,1,1): goto fail; //ATC 3047 case BITS4(0,1,1,1): 3048 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2)); 3049 vex_sprintf(buf, "[%s, %s lsl %u]", 3050 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2); 3051 break; 3052 case BITS4(0,1,0,0): 3053 rhs = unop(Iop_32Uto64, getIReg32orZR(mm)); 3054 vex_sprintf(buf, "[%s, %s uxtx]", 3055 nameIReg64orZR(nn), nameIReg32orZR(mm)); 3056 break; 3057 case BITS4(0,1,0,1): 3058 rhs = binop(Iop_Shl64, 3059 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2)); 3060 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]", 3061 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2); 3062 break; 3063 case BITS4(1,1,0,0): 3064 rhs = unop(Iop_32Sto64, getIReg32orZR(mm)); 3065 vex_sprintf(buf, "[%s, %s sxtx]", 3066 nameIReg64orZR(nn), nameIReg32orZR(mm)); 3067 break; 3068 case BITS4(1,1,0,1): 3069 rhs = binop(Iop_Shl64, 3070 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2)); 3071 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]", 3072 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2); 3073 break; 3074 default: 3075 /* The rest appear to be genuinely invalid */ 3076 goto fail; 3077 } 3078 3079 vassert(rhs); 3080 IRTemp res = newTemp(Ity_I64); 3081 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs)); 3082 return res; 3083 3084 fail: 3085 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS); 3086 return IRTemp_INVALID; 3087} 3088 3089 3090/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest 3091 bits of DATAE :: Ity_I64. */ 3092static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE ) 3093{ 3094 IRExpr* addrE = mkexpr(addr); 3095 switch (szB) { 3096 case 8: 3097 storeLE(addrE, dataE); 3098 break; 3099 case 4: 3100 storeLE(addrE, unop(Iop_64to32, dataE)); 3101 break; 3102 case 2: 3103 storeLE(addrE, unop(Iop_64to16, dataE)); 3104 break; 3105 case 1: 3106 storeLE(addrE, unop(Iop_64to8, dataE)); 3107 break; 3108 default: 3109 vassert(0); 3110 } 3111} 3112 3113 3114/* Generate an 8/16/32/64 bit unsigned widening load from ADDR, 3115 placing the result in an Ity_I64 temporary. */ 3116static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr ) 3117{ 3118 IRTemp res = newTemp(Ity_I64); 3119 IRExpr* addrE = mkexpr(addr); 3120 switch (szB) { 3121 case 8: 3122 assign(res, loadLE(Ity_I64,addrE)); 3123 break; 3124 case 4: 3125 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE))); 3126 break; 3127 case 2: 3128 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE))); 3129 break; 3130 case 1: 3131 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE))); 3132 break; 3133 default: 3134 vassert(0); 3135 } 3136 return res; 3137} 3138 3139 3140static 3141Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn) 3142{ 3143# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 3144 3145 /* ------------ LDR,STR (immediate, uimm12) ----------- */ 3146 /* uimm12 is scaled by the transfer size 3147 3148 31 29 26 21 9 4 3149 | | | | | | 3150 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8] 3151 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8] 3152 3153 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4] 3154 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4] 3155 3156 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2] 3157 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2] 3158 3159 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1] 3160 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1] 3161 */ 3162 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) { 3163 UInt szLg2 = INSN(31,30); 3164 UInt szB = 1 << szLg2; 3165 Bool isLD = INSN(22,22) == 1; 3166 UInt offs = INSN(21,10) * szB; 3167 UInt nn = INSN(9,5); 3168 UInt tt = INSN(4,0); 3169 IRTemp ta = newTemp(Ity_I64); 3170 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs))); 3171 if (nn == 31) { /* FIXME generate stack alignment check */ } 3172 vassert(szLg2 < 4); 3173 if (isLD) { 3174 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta))); 3175 } else { 3176 gen_narrowing_store(szB, ta, getIReg64orZR(tt)); 3177 } 3178 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" }; 3179 const HChar* st_name[4] = { "strb", "strh", "str", "str" }; 3180 DIP("%s %s, [%s, #%u]\n", 3181 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt), 3182 nameIReg64orSP(nn), offs); 3183 return True; 3184 } 3185 3186 /* ------------ LDUR,STUR (immediate, simm9) ----------- */ 3187 /* 3188 31 29 26 20 11 9 4 3189 | | | | | | | 3190 (at-Rn-then-Rn=EA) | | | 3191 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9 3192 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9 3193 3194 (at-EA-then-Rn=EA) 3195 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]! 3196 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]! 3197 3198 (at-EA) 3199 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9] 3200 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9] 3201 3202 simm9 is unscaled. 3203 3204 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the 3205 load case this is because would create two competing values for 3206 Rt. In the store case the reason is unclear, but the spec 3207 disallows it anyway. 3208 3209 Stores are narrowing, loads are unsigned widening. sz encodes 3210 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8. 3211 */ 3212 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1)) 3213 == BITS9(1,1,1, 0,0,0,0,0, 0)) { 3214 UInt szLg2 = INSN(31,30); 3215 UInt szB = 1 << szLg2; 3216 Bool isLoad = INSN(22,22) == 1; 3217 UInt imm9 = INSN(20,12); 3218 UInt nn = INSN(9,5); 3219 UInt tt = INSN(4,0); 3220 Bool wBack = INSN(10,10) == 1; 3221 UInt how = INSN(11,10); 3222 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) { 3223 /* undecodable; fall through */ 3224 } else { 3225 if (nn == 31) { /* FIXME generate stack alignment check */ } 3226 3227 // Compute the transfer address TA and the writeback address WA. 3228 IRTemp tRN = newTemp(Ity_I64); 3229 assign(tRN, getIReg64orSP(nn)); 3230 IRTemp tEA = newTemp(Ity_I64); 3231 Long simm9 = (Long)sx_to_64(imm9, 9); 3232 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 3233 3234 IRTemp tTA = newTemp(Ity_I64); 3235 IRTemp tWA = newTemp(Ity_I64); 3236 switch (how) { 3237 case BITS2(0,1): 3238 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 3239 case BITS2(1,1): 3240 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 3241 case BITS2(0,0): 3242 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 3243 default: 3244 vassert(0); /* NOTREACHED */ 3245 } 3246 3247 /* Normally rN would be updated after the transfer. However, in 3248 the special case typifed by 3249 str x30, [sp,#-16]! 3250 it is necessary to update SP before the transfer, (1) 3251 because Memcheck will otherwise complain about a write 3252 below the stack pointer, and (2) because the segfault 3253 stack extension mechanism will otherwise extend the stack 3254 only down to SP before the instruction, which might not be 3255 far enough, if the -16 bit takes the actual access 3256 address to the next page. 3257 */ 3258 Bool earlyWBack 3259 = wBack && simm9 < 0 && szB == 8 3260 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn; 3261 3262 if (wBack && earlyWBack) 3263 putIReg64orSP(nn, mkexpr(tEA)); 3264 3265 if (isLoad) { 3266 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA))); 3267 } else { 3268 gen_narrowing_store(szB, tTA, getIReg64orZR(tt)); 3269 } 3270 3271 if (wBack && !earlyWBack) 3272 putIReg64orSP(nn, mkexpr(tEA)); 3273 3274 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" }; 3275 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" }; 3276 const HChar* fmt_str = NULL; 3277 switch (how) { 3278 case BITS2(0,1): 3279 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 3280 break; 3281 case BITS2(1,1): 3282 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 3283 break; 3284 case BITS2(0,0): 3285 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n"; 3286 break; 3287 default: 3288 vassert(0); 3289 } 3290 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2], 3291 nameIRegOrZR(szB == 8, tt), 3292 nameIReg64orSP(nn), simm9); 3293 return True; 3294 } 3295 } 3296 3297 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */ 3298 /* L==1 => mm==LD 3299 L==0 => mm==ST 3300 x==0 => 32 bit transfers, and zero extended loads 3301 x==1 => 64 bit transfers 3302 simm7 is scaled by the (single-register) transfer size 3303 3304 (at-Rn-then-Rn=EA) 3305 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm 3306 3307 (at-EA-then-Rn=EA) 3308 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]! 3309 3310 (at-EA) 3311 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm] 3312 */ 3313 3314 UInt insn_30_23 = INSN(30,23); 3315 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1) 3316 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1) 3317 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) { 3318 UInt bL = INSN(22,22); 3319 UInt bX = INSN(31,31); 3320 UInt bWBack = INSN(23,23); 3321 UInt rT1 = INSN(4,0); 3322 UInt rN = INSN(9,5); 3323 UInt rT2 = INSN(14,10); 3324 Long simm7 = (Long)sx_to_64(INSN(21,15), 7); 3325 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31) 3326 || (bL && rT1 == rT2)) { 3327 /* undecodable; fall through */ 3328 } else { 3329 if (rN == 31) { /* FIXME generate stack alignment check */ } 3330 3331 // Compute the transfer address TA and the writeback address WA. 3332 IRTemp tRN = newTemp(Ity_I64); 3333 assign(tRN, getIReg64orSP(rN)); 3334 IRTemp tEA = newTemp(Ity_I64); 3335 simm7 = (bX ? 8 : 4) * simm7; 3336 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7))); 3337 3338 IRTemp tTA = newTemp(Ity_I64); 3339 IRTemp tWA = newTemp(Ity_I64); 3340 switch (INSN(24,23)) { 3341 case BITS2(0,1): 3342 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 3343 case BITS2(1,1): 3344 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 3345 case BITS2(1,0): 3346 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 3347 default: 3348 vassert(0); /* NOTREACHED */ 3349 } 3350 3351 /* Normally rN would be updated after the transfer. However, in 3352 the special case typifed by 3353 stp x29, x30, [sp,#-112]! 3354 it is necessary to update SP before the transfer, (1) 3355 because Memcheck will otherwise complain about a write 3356 below the stack pointer, and (2) because the segfault 3357 stack extension mechanism will otherwise extend the stack 3358 only down to SP before the instruction, which might not be 3359 far enough, if the -112 bit takes the actual access 3360 address to the next page. 3361 */ 3362 Bool earlyWBack 3363 = bWBack && simm7 < 0 3364 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0; 3365 3366 if (bWBack && earlyWBack) 3367 putIReg64orSP(rN, mkexpr(tEA)); 3368 3369 /**/ if (bL == 1 && bX == 1) { 3370 // 64 bit load 3371 putIReg64orZR(rT1, loadLE(Ity_I64, 3372 binop(Iop_Add64,mkexpr(tTA),mkU64(0)))); 3373 putIReg64orZR(rT2, loadLE(Ity_I64, 3374 binop(Iop_Add64,mkexpr(tTA),mkU64(8)))); 3375 } else if (bL == 1 && bX == 0) { 3376 // 32 bit load 3377 putIReg32orZR(rT1, loadLE(Ity_I32, 3378 binop(Iop_Add64,mkexpr(tTA),mkU64(0)))); 3379 putIReg32orZR(rT2, loadLE(Ity_I32, 3380 binop(Iop_Add64,mkexpr(tTA),mkU64(4)))); 3381 } else if (bL == 0 && bX == 1) { 3382 // 64 bit store 3383 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)), 3384 getIReg64orZR(rT1)); 3385 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)), 3386 getIReg64orZR(rT2)); 3387 } else { 3388 vassert(bL == 0 && bX == 0); 3389 // 32 bit store 3390 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)), 3391 getIReg32orZR(rT1)); 3392 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)), 3393 getIReg32orZR(rT2)); 3394 } 3395 3396 if (bWBack && !earlyWBack) 3397 putIReg64orSP(rN, mkexpr(tEA)); 3398 3399 const HChar* fmt_str = NULL; 3400 switch (INSN(24,23)) { 3401 case BITS2(0,1): 3402 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 3403 break; 3404 case BITS2(1,1): 3405 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 3406 break; 3407 case BITS2(1,0): 3408 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n"; 3409 break; 3410 default: 3411 vassert(0); 3412 } 3413 DIP(fmt_str, bL == 0 ? "st" : "ld", 3414 nameIRegOrZR(bX == 1, rT1), 3415 nameIRegOrZR(bX == 1, rT2), 3416 nameIReg64orSP(rN), simm7); 3417 return True; 3418 } 3419 } 3420 3421 /* ---------------- LDR (literal, int reg) ---------------- */ 3422 /* 31 29 23 4 3423 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)] 3424 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)] 3425 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)] 3426 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)] 3427 Just handles the first two cases for now. 3428 */ 3429 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) { 3430 UInt imm19 = INSN(23,5); 3431 UInt rT = INSN(4,0); 3432 UInt bX = INSN(30,30); 3433 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21); 3434 if (bX) { 3435 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea))); 3436 } else { 3437 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea))); 3438 } 3439 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea); 3440 return True; 3441 } 3442 3443 /* -------------- {LD,ST}R (integer register) --------------- */ 3444 /* 31 29 20 15 12 11 9 4 3445 | | | | | | | | 3446 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}] 3447 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}] 3448 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}] 3449 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}] 3450 3451 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}] 3452 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}] 3453 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}] 3454 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}] 3455 */ 3456 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0) 3457 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 3458 HChar dis_buf[64]; 3459 UInt szLg2 = INSN(31,30); 3460 Bool isLD = INSN(22,22) == 1; 3461 UInt tt = INSN(4,0); 3462 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/); 3463 if (ea != IRTemp_INVALID) { 3464 switch (szLg2) { 3465 case 3: /* 64 bit */ 3466 if (isLD) { 3467 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea))); 3468 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf); 3469 } else { 3470 storeLE(mkexpr(ea), getIReg64orZR(tt)); 3471 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf); 3472 } 3473 break; 3474 case 2: /* 32 bit */ 3475 if (isLD) { 3476 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea))); 3477 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf); 3478 } else { 3479 storeLE(mkexpr(ea), getIReg32orZR(tt)); 3480 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf); 3481 } 3482 break; 3483 case 1: /* 16 bit */ 3484 if (isLD) { 3485 putIReg64orZR(tt, unop(Iop_16Uto64, 3486 loadLE(Ity_I16, mkexpr(ea)))); 3487 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf); 3488 } else { 3489 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt))); 3490 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf); 3491 } 3492 break; 3493 case 0: /* 8 bit */ 3494 if (isLD) { 3495 putIReg64orZR(tt, unop(Iop_8Uto64, 3496 loadLE(Ity_I8, mkexpr(ea)))); 3497 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf); 3498 } else { 3499 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt))); 3500 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf); 3501 } 3502 break; 3503 default: 3504 vassert(0); 3505 } 3506 return True; 3507 } 3508 } 3509 3510 /* -------------- LDRS{B,H,W} (uimm12) -------------- */ 3511 /* 31 29 26 23 21 9 4 3512 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4] 3513 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2] 3514 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1] 3515 where 3516 Rt is Wt when x==1, Xt when x==0 3517 */ 3518 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) { 3519 /* Further checks on bits 31:30 and 22 */ 3520 Bool valid = False; 3521 switch ((INSN(31,30) << 1) | INSN(22,22)) { 3522 case BITS3(1,0,0): 3523 case BITS3(0,1,0): case BITS3(0,1,1): 3524 case BITS3(0,0,0): case BITS3(0,0,1): 3525 valid = True; 3526 break; 3527 } 3528 if (valid) { 3529 UInt szLg2 = INSN(31,30); 3530 UInt bitX = INSN(22,22); 3531 UInt imm12 = INSN(21,10); 3532 UInt nn = INSN(9,5); 3533 UInt tt = INSN(4,0); 3534 UInt szB = 1 << szLg2; 3535 IRExpr* ea = binop(Iop_Add64, 3536 getIReg64orSP(nn), mkU64(imm12 * szB)); 3537 switch (szB) { 3538 case 4: 3539 vassert(bitX == 0); 3540 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea))); 3541 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt), 3542 nameIReg64orSP(nn), imm12 * szB); 3543 break; 3544 case 2: 3545 if (bitX == 1) { 3546 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea))); 3547 } else { 3548 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea))); 3549 } 3550 DIP("ldrsh %s, [%s, #%u]\n", 3551 nameIRegOrZR(bitX == 0, tt), 3552 nameIReg64orSP(nn), imm12 * szB); 3553 break; 3554 case 1: 3555 if (bitX == 1) { 3556 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea))); 3557 } else { 3558 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea))); 3559 } 3560 DIP("ldrsb %s, [%s, #%u]\n", 3561 nameIRegOrZR(bitX == 0, tt), 3562 nameIReg64orSP(nn), imm12 * szB); 3563 break; 3564 default: 3565 vassert(0); 3566 } 3567 return True; 3568 } 3569 /* else fall through */ 3570 } 3571 3572 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */ 3573 /* (at-Rn-then-Rn=EA) 3574 31 29 23 21 20 11 9 4 3575 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9 3576 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9 3577 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9 3578 3579 (at-EA-then-Rn=EA) 3580 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]! 3581 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]! 3582 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]! 3583 where 3584 Rt is Wt when x==1, Xt when x==0 3585 transfer-at-Rn when [11]==0, at EA when [11]==1 3586 */ 3587 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 3588 && INSN(21,21) == 0 && INSN(10,10) == 1) { 3589 /* Further checks on bits 31:30 and 22 */ 3590 Bool valid = False; 3591 switch ((INSN(31,30) << 1) | INSN(22,22)) { 3592 case BITS3(1,0,0): // LDRSW Xt 3593 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt 3594 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt 3595 valid = True; 3596 break; 3597 } 3598 if (valid) { 3599 UInt szLg2 = INSN(31,30); 3600 UInt imm9 = INSN(20,12); 3601 Bool atRN = INSN(11,11) == 0; 3602 UInt nn = INSN(9,5); 3603 UInt tt = INSN(4,0); 3604 IRTemp tRN = newTemp(Ity_I64); 3605 IRTemp tEA = newTemp(Ity_I64); 3606 IRTemp tTA = IRTemp_INVALID; 3607 ULong simm9 = sx_to_64(imm9, 9); 3608 Bool is64 = INSN(22,22) == 0; 3609 assign(tRN, getIReg64orSP(nn)); 3610 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 3611 tTA = atRN ? tRN : tEA; 3612 HChar ch = '?'; 3613 /* There are 5 cases: 3614 byte load, SX to 64 3615 byte load, SX to 32, ZX to 64 3616 halfword load, SX to 64 3617 halfword load, SX to 32, ZX to 64 3618 word load, SX to 64 3619 The ifs below handle them in the listed order. 3620 */ 3621 if (szLg2 == 0) { 3622 ch = 'b'; 3623 if (is64) { 3624 putIReg64orZR(tt, unop(Iop_8Sto64, 3625 loadLE(Ity_I8, mkexpr(tTA)))); 3626 } else { 3627 putIReg32orZR(tt, unop(Iop_8Sto32, 3628 loadLE(Ity_I8, mkexpr(tTA)))); 3629 } 3630 } 3631 else if (szLg2 == 1) { 3632 ch = 'h'; 3633 if (is64) { 3634 putIReg64orZR(tt, unop(Iop_16Sto64, 3635 loadLE(Ity_I16, mkexpr(tTA)))); 3636 } else { 3637 putIReg32orZR(tt, unop(Iop_16Sto32, 3638 loadLE(Ity_I16, mkexpr(tTA)))); 3639 } 3640 } 3641 else if (szLg2 == 2 && is64) { 3642 ch = 'w'; 3643 putIReg64orZR(tt, unop(Iop_32Sto64, 3644 loadLE(Ity_I32, mkexpr(tTA)))); 3645 } 3646 else { 3647 vassert(0); 3648 } 3649 putIReg64orSP(nn, mkexpr(tEA)); 3650 DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!", 3651 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9); 3652 return True; 3653 } 3654 /* else fall through */ 3655 } 3656 3657 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */ 3658 /* 31 29 23 21 20 11 9 4 3659 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9] 3660 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9] 3661 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9] 3662 where 3663 Rt is Wt when x==1, Xt when x==0 3664 */ 3665 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 3666 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) { 3667 /* Further checks on bits 31:30 and 22 */ 3668 Bool valid = False; 3669 switch ((INSN(31,30) << 1) | INSN(22,22)) { 3670 case BITS3(1,0,0): // LDURSW Xt 3671 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt 3672 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt 3673 valid = True; 3674 break; 3675 } 3676 if (valid) { 3677 UInt szLg2 = INSN(31,30); 3678 UInt imm9 = INSN(20,12); 3679 UInt nn = INSN(9,5); 3680 UInt tt = INSN(4,0); 3681 IRTemp tRN = newTemp(Ity_I64); 3682 IRTemp tEA = newTemp(Ity_I64); 3683 ULong simm9 = sx_to_64(imm9, 9); 3684 Bool is64 = INSN(22,22) == 0; 3685 assign(tRN, getIReg64orSP(nn)); 3686 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 3687 HChar ch = '?'; 3688 /* There are 5 cases: 3689 byte load, SX to 64 3690 byte load, SX to 32, ZX to 64 3691 halfword load, SX to 64 3692 halfword load, SX to 32, ZX to 64 3693 word load, SX to 64 3694 The ifs below handle them in the listed order. 3695 */ 3696 if (szLg2 == 0) { 3697 ch = 'b'; 3698 if (is64) { 3699 putIReg64orZR(tt, unop(Iop_8Sto64, 3700 loadLE(Ity_I8, mkexpr(tEA)))); 3701 } else { 3702 putIReg32orZR(tt, unop(Iop_8Sto32, 3703 loadLE(Ity_I8, mkexpr(tEA)))); 3704 } 3705 } 3706 else if (szLg2 == 1) { 3707 ch = 'h'; 3708 if (is64) { 3709 putIReg64orZR(tt, unop(Iop_16Sto64, 3710 loadLE(Ity_I16, mkexpr(tEA)))); 3711 } else { 3712 putIReg32orZR(tt, unop(Iop_16Sto32, 3713 loadLE(Ity_I16, mkexpr(tEA)))); 3714 } 3715 } 3716 else if (szLg2 == 2 && is64) { 3717 ch = 'w'; 3718 putIReg64orZR(tt, unop(Iop_32Sto64, 3719 loadLE(Ity_I32, mkexpr(tEA)))); 3720 } 3721 else { 3722 vassert(0); 3723 } 3724 DIP("ldurs%c %s, [%s, #%lld]", 3725 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9); 3726 return True; 3727 } 3728 /* else fall through */ 3729 } 3730 3731 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */ 3732 /* L==1 => mm==LD 3733 L==0 => mm==ST 3734 sz==00 => 32 bit (S) transfers 3735 sz==01 => 64 bit (D) transfers 3736 sz==10 => 128 bit (Q) transfers 3737 sz==11 isn't allowed 3738 simm7 is scaled by the (single-register) transfer size 3739 3740 31 29 22 21 14 9 4 3741 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm 3742 (at-Rn-then-Rn=EA) 3743 3744 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]! 3745 (at-EA-then-Rn=EA) 3746 3747 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm] 3748 (at-EA) 3749 */ 3750 3751 UInt insn_29_23 = INSN(29,23); 3752 if (insn_29_23 == BITS7(1,0,1,1,0,0,1) 3753 || insn_29_23 == BITS7(1,0,1,1,0,1,1) 3754 || insn_29_23 == BITS7(1,0,1,1,0,1,0)) { 3755 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units 3756 Bool isLD = INSN(22,22) == 1; 3757 Bool wBack = INSN(23,23) == 1; 3758 Long simm7 = (Long)sx_to_64(INSN(21,15), 7); 3759 UInt tt2 = INSN(14,10); 3760 UInt nn = INSN(9,5); 3761 UInt tt1 = INSN(4,0); 3762 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) { 3763 /* undecodable; fall through */ 3764 } else { 3765 if (nn == 31) { /* FIXME generate stack alignment check */ } 3766 3767 // Compute the transfer address TA and the writeback address WA. 3768 UInt szB = 4 << szSlg2; /* szB is the per-register size */ 3769 IRTemp tRN = newTemp(Ity_I64); 3770 assign(tRN, getIReg64orSP(nn)); 3771 IRTemp tEA = newTemp(Ity_I64); 3772 simm7 = szB * simm7; 3773 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7))); 3774 3775 IRTemp tTA = newTemp(Ity_I64); 3776 IRTemp tWA = newTemp(Ity_I64); 3777 switch (INSN(24,23)) { 3778 case BITS2(0,1): 3779 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 3780 case BITS2(1,1): 3781 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 3782 case BITS2(1,0): 3783 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 3784 default: 3785 vassert(0); /* NOTREACHED */ 3786 } 3787 3788 IRType ty = Ity_INVALID; 3789 switch (szB) { 3790 case 4: ty = Ity_F32; break; 3791 case 8: ty = Ity_F64; break; 3792 case 16: ty = Ity_V128; break; 3793 default: vassert(0); 3794 } 3795 3796 /* Normally rN would be updated after the transfer. However, in 3797 the special cases typifed by 3798 stp q0, q1, [sp,#-512]! 3799 stp d0, d1, [sp,#-512]! 3800 stp s0, s1, [sp,#-512]! 3801 it is necessary to update SP before the transfer, (1) 3802 because Memcheck will otherwise complain about a write 3803 below the stack pointer, and (2) because the segfault 3804 stack extension mechanism will otherwise extend the stack 3805 only down to SP before the instruction, which might not be 3806 far enough, if the -512 bit takes the actual access 3807 address to the next page. 3808 */ 3809 Bool earlyWBack 3810 = wBack && simm7 < 0 3811 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD; 3812 3813 if (wBack && earlyWBack) 3814 putIReg64orSP(nn, mkexpr(tEA)); 3815 3816 if (isLD) { 3817 if (szB < 16) { 3818 putQReg128(tt1, mkV128(0x0000)); 3819 } 3820 putQRegLO(tt1, 3821 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0)))); 3822 if (szB < 16) { 3823 putQReg128(tt2, mkV128(0x0000)); 3824 } 3825 putQRegLO(tt2, 3826 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB)))); 3827 } else { 3828 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)), 3829 getQRegLO(tt1, ty)); 3830 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)), 3831 getQRegLO(tt2, ty)); 3832 } 3833 3834 if (wBack && !earlyWBack) 3835 putIReg64orSP(nn, mkexpr(tEA)); 3836 3837 const HChar* fmt_str = NULL; 3838 switch (INSN(24,23)) { 3839 case BITS2(0,1): 3840 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 3841 break; 3842 case BITS2(1,1): 3843 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 3844 break; 3845 case BITS2(1,0): 3846 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n"; 3847 break; 3848 default: 3849 vassert(0); 3850 } 3851 DIP(fmt_str, isLD ? "ld" : "st", 3852 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty), 3853 nameIReg64orSP(nn), simm7); 3854 return True; 3855 } 3856 } 3857 3858 /* -------------- {LD,ST}R (vector register) --------------- */ 3859 /* 31 29 23 20 15 12 11 9 4 3860 | | | | | | | | | 3861 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}] 3862 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}] 3863 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}] 3864 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}] 3865 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}] 3866 3867 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}] 3868 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}] 3869 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}] 3870 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}] 3871 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}] 3872 */ 3873 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 3874 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 3875 HChar dis_buf[64]; 3876 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 3877 Bool isLD = INSN(22,22) == 1; 3878 UInt tt = INSN(4,0); 3879 if (szLg2 >= 4) goto after_LDR_STR_vector_register; 3880 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/); 3881 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register; 3882 switch (szLg2) { 3883 case 0: /* 8 bit */ 3884 if (isLD) { 3885 putQReg128(tt, mkV128(0x0000)); 3886 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea))); 3887 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf); 3888 } else { 3889 vassert(0); //ATC 3890 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8)); 3891 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf); 3892 } 3893 break; 3894 case 1: 3895 if (isLD) { 3896 putQReg128(tt, mkV128(0x0000)); 3897 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea))); 3898 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf); 3899 } else { 3900 vassert(0); //ATC 3901 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16)); 3902 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf); 3903 } 3904 break; 3905 case 2: /* 32 bit */ 3906 if (isLD) { 3907 putQReg128(tt, mkV128(0x0000)); 3908 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea))); 3909 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf); 3910 } else { 3911 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32)); 3912 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf); 3913 } 3914 break; 3915 case 3: /* 64 bit */ 3916 if (isLD) { 3917 putQReg128(tt, mkV128(0x0000)); 3918 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea))); 3919 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf); 3920 } else { 3921 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64)); 3922 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf); 3923 } 3924 break; 3925 case 4: return False; //ATC 3926 default: vassert(0); 3927 } 3928 return True; 3929 } 3930 after_LDR_STR_vector_register: 3931 3932 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */ 3933 /* 31 29 22 20 15 12 11 9 4 3934 | | | | | | | | | 3935 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}] 3936 3937 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}] 3938 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}] 3939 3940 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}] 3941 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}] 3942 */ 3943 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 3944 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 3945 HChar dis_buf[64]; 3946 UInt szLg2 = INSN(31,30); 3947 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64 3948 UInt tt = INSN(4,0); 3949 if (szLg2 == 3) goto after_LDRS_integer_register; 3950 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/); 3951 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register; 3952 /* Enumerate the 5 variants explicitly. */ 3953 if (szLg2 == 2/*32 bit*/ && sxTo64) { 3954 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea)))); 3955 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf); 3956 return True; 3957 } 3958 else 3959 if (szLg2 == 1/*16 bit*/) { 3960 if (sxTo64) { 3961 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea)))); 3962 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf); 3963 } else { 3964 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea)))); 3965 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf); 3966 } 3967 return True; 3968 } 3969 else 3970 if (szLg2 == 0/*8 bit*/) { 3971 if (sxTo64) { 3972 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea)))); 3973 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf); 3974 } else { 3975 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea)))); 3976 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf); 3977 } 3978 return True; 3979 } 3980 /* else it's an invalid combination */ 3981 } 3982 after_LDRS_integer_register: 3983 3984 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */ 3985 /* This is the Unsigned offset variant only. The Post-Index and 3986 Pre-Index variants are below. 3987 3988 31 29 23 21 9 4 3989 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1] 3990 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2] 3991 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4] 3992 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8] 3993 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16] 3994 3995 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1] 3996 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2] 3997 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4] 3998 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8] 3999 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16] 4000 */ 4001 if (INSN(29,24) == BITS6(1,1,1,1,0,1) 4002 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) { 4003 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 4004 Bool isLD = INSN(22,22) == 1; 4005 UInt pimm12 = INSN(21,10) << szLg2; 4006 UInt nn = INSN(9,5); 4007 UInt tt = INSN(4,0); 4008 IRTemp tEA = newTemp(Ity_I64); 4009 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 4010 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12))); 4011 if (isLD) { 4012 if (szLg2 < 4) { 4013 putQReg128(tt, mkV128(0x0000)); 4014 } 4015 putQRegLO(tt, loadLE(ty, mkexpr(tEA))); 4016 } else { 4017 storeLE(mkexpr(tEA), getQRegLO(tt, ty)); 4018 } 4019 DIP("%s %s, [%s, #%u]\n", 4020 isLD ? "ldr" : "str", 4021 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12); 4022 return True; 4023 } 4024 4025 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */ 4026 /* These are the Post-Index and Pre-Index variants. 4027 4028 31 29 23 20 11 9 4 4029 (at-Rn-then-Rn=EA) 4030 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm 4031 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm 4032 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm 4033 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm 4034 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm 4035 4036 (at-EA-then-Rn=EA) 4037 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]! 4038 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]! 4039 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]! 4040 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]! 4041 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]! 4042 4043 Stores are the same except with bit 22 set to 0. 4044 */ 4045 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 4046 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4 4047 && INSN(21,21) == 0 && INSN(10,10) == 1) { 4048 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 4049 Bool isLD = INSN(22,22) == 1; 4050 UInt imm9 = INSN(20,12); 4051 Bool atRN = INSN(11,11) == 0; 4052 UInt nn = INSN(9,5); 4053 UInt tt = INSN(4,0); 4054 IRTemp tRN = newTemp(Ity_I64); 4055 IRTemp tEA = newTemp(Ity_I64); 4056 IRTemp tTA = IRTemp_INVALID; 4057 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 4058 ULong simm9 = sx_to_64(imm9, 9); 4059 assign(tRN, getIReg64orSP(nn)); 4060 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 4061 tTA = atRN ? tRN : tEA; 4062 if (isLD) { 4063 if (szLg2 < 4) { 4064 putQReg128(tt, mkV128(0x0000)); 4065 } 4066 putQRegLO(tt, loadLE(ty, mkexpr(tTA))); 4067 } else { 4068 storeLE(mkexpr(tTA), getQRegLO(tt, ty)); 4069 } 4070 putIReg64orSP(nn, mkexpr(tEA)); 4071 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n", 4072 isLD ? "ldr" : "str", 4073 nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9); 4074 return True; 4075 } 4076 4077 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */ 4078 /* 31 29 23 20 11 9 4 4079 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm] 4080 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm] 4081 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm] 4082 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm] 4083 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm] 4084 4085 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm] 4086 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm] 4087 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm] 4088 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm] 4089 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm] 4090 */ 4091 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 4092 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4 4093 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) { 4094 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 4095 Bool isLD = INSN(22,22) == 1; 4096 UInt imm9 = INSN(20,12); 4097 UInt nn = INSN(9,5); 4098 UInt tt = INSN(4,0); 4099 ULong simm9 = sx_to_64(imm9, 9); 4100 IRTemp tEA = newTemp(Ity_I64); 4101 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 4102 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9))); 4103 if (isLD) { 4104 if (szLg2 < 4) { 4105 putQReg128(tt, mkV128(0x0000)); 4106 } 4107 putQRegLO(tt, loadLE(ty, mkexpr(tEA))); 4108 } else { 4109 storeLE(mkexpr(tEA), getQRegLO(tt, ty)); 4110 } 4111 DIP("%s %s, [%s, #%lld]\n", 4112 isLD ? "ldur" : "stur", 4113 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9); 4114 return True; 4115 } 4116 4117 /* ---------------- LDR (literal, SIMD&FP) ---------------- */ 4118 /* 31 29 23 4 4119 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)] 4120 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)] 4121 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)] 4122 */ 4123 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) { 4124 UInt szB = 4 << INSN(31,30); 4125 UInt imm19 = INSN(23,5); 4126 UInt tt = INSN(4,0); 4127 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21); 4128 IRType ty = preferredVectorSubTypeFromSize(szB); 4129 putQReg128(tt, mkV128(0x0000)); 4130 putQRegLO(tt, loadLE(ty, mkU64(ea))); 4131 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea); 4132 return True; 4133 } 4134 4135 /* ---------- LD1/ST1 (single structure, no offset) ---------- */ 4136 /* 31 23 15 4137 0Q00 1101 0L00 0000 xx0S sz N T 4138 ---- 4139 opcode 4140 1011 1111 1011 1111 0010 00 0 0 <- mask 4141 0000 1101 0000 0000 0000 00 0 0 <- result 4142 4143 FIXME does this assume that the host is little endian? 4144 */ 4145 4146 if ((insn & 0xBFBF2000) == 0x0D000000) { 4147 Bool isLD = INSN(22,22) == 1; 4148 UInt rN = INSN(9,5); 4149 UInt vT = INSN(4,0); 4150 UInt q = INSN(30, 30); 4151 UInt xx = INSN(15, 14); 4152 UInt opcode = INSN(15, 13); 4153 UInt s = INSN(12, 12); 4154 UInt sz = INSN(11, 10); 4155 4156 UInt index = (q << 3) | (s << 2) | sz; 4157 const HChar* name = ""; 4158 Bool valid = False; 4159 IRType laneTy = Ity_I8; 4160 4161 if (opcode == 0x0) { // 8 bit variant 4162 name = "b"; 4163 valid = True; 4164 } else if (opcode == 0x2 && (sz & 1) == 0) { // 16 bit variant 4165 name = "h"; 4166 laneTy = Ity_I16; 4167 index >>= 1; 4168 valid = True; 4169 } else if (opcode == 0x4 && sz == 0x0) { // 32 bit variant 4170 name = "s"; 4171 laneTy = Ity_I32; 4172 index >>= 2; 4173 valid = True; 4174 } else if (opcode == 0x4 && sz == 0x1 && s == 0) { // 64 bit variant 4175 name = "d"; 4176 laneTy = Ity_I64; 4177 index >>= 3; 4178 valid = True; 4179 } 4180 4181 if (valid) { 4182 IRTemp tEA = newTemp(Ity_I64); 4183 assign(tEA, getIReg64orSP(rN)); 4184 if (rN == 31) { /* FIXME generate stack alignment check */ } 4185 if (isLD) { 4186 putQRegLane(vT, index, loadLE(laneTy, mkexpr(tEA))); 4187 } else { 4188 storeLE(mkexpr(tEA), getQRegLane(vT, index, laneTy)); 4189 } 4190 4191 DIP("%s {v%u.%s}[%d], [%s]\n", isLD ? "ld1" : "st1", 4192 vT, name, index, nameIReg64orSP(rN)); 4193 return True; 4194 } 4195 4196 } 4197 4198 4199 /* ---------- LD1/ST1 (multiple structure, no offset, one register variant) ---------- */ 4200 /* 31 23 4201 0100 1100 0100 0000 0111 11 N T LD1 {vT.2d}, [Xn|SP] 4202 0100 1100 0000 0000 0111 11 N T ST1 {vT.2d}, [Xn|SP] 4203 0100 1100 0100 0000 0111 10 N T LD1 {vT.4s}, [Xn|SP] 4204 0100 1100 0000 0000 0111 10 N T ST1 {vT.4s}, [Xn|SP] 4205 0100 1100 0100 0000 0111 01 N T LD1 {vT.8h}, [Xn|SP] 4206 0100 1100 0000 0000 0111 01 N T ST1 {vT.8h}, [Xn|SP] 4207 0100 1100 0100 0000 0111 00 N T LD1 {vT.16b}, [Xn|SP] 4208 0100 1100 0000 0000 0111 00 N T ST1 {vT.16b}, [Xn|SP] 4209 FIXME does this assume that the host is little endian? 4210 */ 4211 if ( (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases 4212 || (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases 4213 ) { 4214 Bool isLD = INSN(22,22) == 1; 4215 UInt rN = INSN(9,5); 4216 UInt vT = INSN(4,0); 4217 IRTemp tEA = newTemp(Ity_I64); 4218 const HChar* names[4] = { "2d", "4s", "8h", "16b" }; 4219 const HChar* name = names[INSN(11,10)]; 4220 assign(tEA, getIReg64orSP(rN)); 4221 if (rN == 31) { /* FIXME generate stack alignment check */ } 4222 if (isLD) { 4223 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA))); 4224 } else { 4225 storeLE(mkexpr(tEA), getQReg128(vT)); 4226 } 4227 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1", 4228 vT, name, nameIReg64orSP(rN)); 4229 return True; 4230 } 4231 4232 /* 31 23 4233 0000 1100 0100 0000 0111 11 N T LD1 {vT.1d}, [Xn|SP] 4234 0000 1100 0000 0000 0111 11 N T ST1 {vT.1d}, [Xn|SP] 4235 0000 1100 0100 0000 0111 10 N T LD1 {vT.2s}, [Xn|SP] 4236 0000 1100 0000 0000 0111 10 N T ST1 {vT.2s}, [Xn|SP] 4237 0000 1100 0100 0000 0111 01 N T LD1 {vT.4h}, [Xn|SP] 4238 0000 1100 0000 0000 0111 01 N T ST1 {vT.4h}, [Xn|SP] 4239 0000 1100 0100 0000 0111 00 N T LD1 {vT.8b}, [Xn|SP] 4240 0000 1100 0000 0000 0111 00 N T ST1 {vT.8b}, [Xn|SP] 4241 FIXME does this assume that the host is little endian? 4242 */ 4243 if ( (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases 4244 || (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases 4245 ) { 4246 Bool isLD = INSN(22,22) == 1; 4247 UInt rN = INSN(9,5); 4248 UInt vT = INSN(4,0); 4249 IRTemp tEA = newTemp(Ity_I64); 4250 const HChar* names[4] = { "1d", "2s", "4h", "8b" }; 4251 const HChar* name = names[INSN(11,10)]; 4252 assign(tEA, getIReg64orSP(rN)); 4253 if (rN == 31) { /* FIXME generate stack alignment check */ } 4254 if (isLD) { 4255 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA))); 4256 putQRegLane(vT, 1, mkU64(0)); 4257 } else { 4258 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64)); 4259 } 4260 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1", 4261 vT, name, nameIReg64orSP(rN)); 4262 return True; 4263 } 4264 4265 /* ---------- LD1/ST1 (multiple structure, post-index, one register variant) ---------- */ 4266 /* 31 23 4267 0100 1100 1001 1111 0111 11 N T ST1 {vT.2d}, [xN|SP], #16 4268 0100 1100 1101 1111 0111 11 N T LD1 {vT.2d}, [xN|SP], #16 4269 0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN|SP], #16 4270 0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN|SP], #16 4271 0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN|SP], #16 4272 0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN|SP], #16 4273 0100 1100 1001 1111 0111 00 N T ST1 {vT.16b}, [xN|SP], #16 4274 0100 1100 1101 1111 0111 00 N T LD1 {vT.16b}, [xN|SP], #16 4275 Note that #16 is implied and cannot be any other value. 4276 FIXME does this assume that the host is little endian? 4277 */ 4278 if ( (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases 4279 || (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases 4280 ) { 4281 Bool isLD = INSN(22,22) == 1; 4282 UInt rN = INSN(9,5); 4283 UInt vT = INSN(4,0); 4284 IRTemp tEA = newTemp(Ity_I64); 4285 const HChar* names[4] = { "2d", "4s", "8h", "16b" }; 4286 const HChar* name = names[INSN(11,10)]; 4287 assign(tEA, getIReg64orSP(rN)); 4288 if (rN == 31) { /* FIXME generate stack alignment check */ } 4289 if (isLD) { 4290 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA))); 4291 } else { 4292 storeLE(mkexpr(tEA), getQReg128(vT)); 4293 } 4294 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16))); 4295 DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1", 4296 vT, name, nameIReg64orSP(rN)); 4297 return True; 4298 } 4299 4300 /* 31 23 4301 0000 1100 1001 1111 0111 11 N T ST1 {vT.1d}, [xN|SP], #8 4302 0000 1100 1101 1111 0111 11 N T LD1 {vT.1d}, [xN|SP], #8 4303 0000 1100 1001 1111 0111 10 N T ST1 {vT.2s}, [xN|SP], #8 4304 0000 1100 1101 1111 0111 10 N T LD1 {vT.2s}, [xN|SP], #8 4305 0000 1100 1001 1111 0111 01 N T ST1 {vT.4h}, [xN|SP], #8 4306 0000 1100 1101 1111 0111 01 N T LD1 {vT.4h}, [xN|SP], #8 4307 0000 1100 1001 1111 0111 00 N T ST1 {vT.8b}, [xN|SP], #8 4308 0000 1100 1101 1111 0111 00 N T LD1 {vT.8b}, [xN|SP], #8 4309 Note that #8 is implied and cannot be any other value. 4310 FIXME does this assume that the host is little endian? 4311 */ 4312 if ( (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases 4313 || (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases 4314 ) { 4315 Bool isLD = INSN(22,22) == 1; 4316 UInt rN = INSN(9,5); 4317 UInt vT = INSN(4,0); 4318 IRTemp tEA = newTemp(Ity_I64); 4319 const HChar* names[4] = { "1d", "2s", "4h", "8b" }; 4320 const HChar* name = names[INSN(11,10)]; 4321 assign(tEA, getIReg64orSP(rN)); 4322 if (rN == 31) { /* FIXME generate stack alignment check */ } 4323 if (isLD) { 4324 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA))); 4325 putQRegLane(vT, 1, mkU64(0)); 4326 } else { 4327 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64)); 4328 } 4329 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8))); 4330 DIP("%s {v%u.%s}, [%s], #8\n", isLD ? "ld1" : "st1", 4331 vT, name, nameIReg64orSP(rN)); 4332 return True; 4333 } 4334 4335 /* ---------- LD2/ST2 (multiple structures, post index) ---------- */ 4336 /* Only a very few cases. */ 4337 /* 31 23 11 9 4 4338 0100 1100 1101 1111 1000 11 n t LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32 4339 0100 1100 1001 1111 1000 11 n t ST2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32 4340 0100 1100 1101 1111 1000 10 n t LD2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32 4341 0100 1100 1001 1111 1000 10 n t ST2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32 4342 */ 4343 if ( (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d 4344 || (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d 4345 || (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s 4346 || (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s 4347 ) { 4348 Bool isLD = INSN(22,22) == 1; 4349 UInt rN = INSN(9,5); 4350 UInt vT = INSN(4,0); 4351 IRTemp tEA = newTemp(Ity_I64); 4352 UInt sz = INSN(11,10); 4353 const HChar* name = "??"; 4354 assign(tEA, getIReg64orSP(rN)); 4355 if (rN == 31) { /* FIXME generate stack alignment check */ } 4356 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0)); 4357 IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8)); 4358 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16)); 4359 IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24)); 4360 if (sz == BITS2(1,1)) { 4361 name = "2d"; 4362 if (isLD) { 4363 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0)); 4364 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16)); 4365 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8)); 4366 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24)); 4367 } else { 4368 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64)); 4369 storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64)); 4370 storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64)); 4371 storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64)); 4372 } 4373 } 4374 else if (sz == BITS2(1,0)) { 4375 /* Uh, this is ugly. TODO: better. */ 4376 name = "4s"; 4377 IRExpr* tEA_4 = binop(Iop_Add64, mkexpr(tEA), mkU64(4)); 4378 IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12)); 4379 IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20)); 4380 IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28)); 4381 if (isLD) { 4382 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0)); 4383 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8)); 4384 putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16)); 4385 putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24)); 4386 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4)); 4387 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12)); 4388 putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20)); 4389 putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28)); 4390 } else { 4391 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I32)); 4392 storeLE(tEA_8, getQRegLane((vT+0) % 32, 1, Ity_I32)); 4393 storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32)); 4394 storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32)); 4395 storeLE(tEA_4, getQRegLane((vT+1) % 32, 0, Ity_I32)); 4396 storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32)); 4397 storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32)); 4398 storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32)); 4399 } 4400 } 4401 else { 4402 vassert(0); // Can't happen. 4403 } 4404 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32))); 4405 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2", 4406 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN)); 4407 return True; 4408 } 4409 4410 /* ---------- LD1/ST1 (multiple structures, no offset) ---------- */ 4411 /* Only a very few cases. */ 4412 /* 31 23 4413 0100 1100 0100 0000 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP] 4414 0100 1100 0000 0000 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP] 4415 */ 4416 if ( (insn & 0xFFFFFC00) == 0x4C40A000 // LD1 4417 || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1 4418 ) { 4419 Bool isLD = INSN(22,22) == 1; 4420 UInt rN = INSN(9,5); 4421 UInt vT = INSN(4,0); 4422 IRTemp tEA = newTemp(Ity_I64); 4423 const HChar* name = "16b"; 4424 assign(tEA, getIReg64orSP(rN)); 4425 if (rN == 31) { /* FIXME generate stack alignment check */ } 4426 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0)); 4427 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16)); 4428 if (isLD) { 4429 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0)); 4430 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16)); 4431 } else { 4432 storeLE(tEA_0, getQReg128((vT+0) % 32)); 4433 storeLE(tEA_16, getQReg128((vT+1) % 32)); 4434 } 4435 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1", 4436 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN)); 4437 return True; 4438 } 4439 4440 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */ 4441 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */ 4442 /* 31 29 23 20 14 9 4 4443 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP] 4444 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP] 4445 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP] 4446 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP] 4447 */ 4448 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0) 4449 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0) 4450 && INSN(14,10) == BITS5(1,1,1,1,1)) { 4451 UInt szBlg2 = INSN(31,30); 4452 Bool isLD = INSN(22,22) == 1; 4453 Bool isAcqOrRel = INSN(15,15) == 1; 4454 UInt ss = INSN(20,16); 4455 UInt nn = INSN(9,5); 4456 UInt tt = INSN(4,0); 4457 4458 vassert(szBlg2 < 4); 4459 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */ 4460 IRType ty = integerIRTypeOfSize(szB); 4461 const HChar* suffix[4] = { "rb", "rh", "r", "r" }; 4462 4463 IRTemp ea = newTemp(Ity_I64); 4464 assign(ea, getIReg64orSP(nn)); 4465 /* FIXME generate check that ea is szB-aligned */ 4466 4467 if (isLD && ss == BITS5(1,1,1,1,1)) { 4468 IRTemp res = newTemp(ty); 4469 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/)); 4470 putIReg64orZR(tt, widenUto64(ty, mkexpr(res))); 4471 if (isAcqOrRel) { 4472 stmt(IRStmt_MBE(Imbe_Fence)); 4473 } 4474 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2], 4475 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 4476 return True; 4477 } 4478 if (!isLD) { 4479 if (isAcqOrRel) { 4480 stmt(IRStmt_MBE(Imbe_Fence)); 4481 } 4482 IRTemp res = newTemp(Ity_I1); 4483 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt)); 4484 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data)); 4485 /* IR semantics: res is 1 if store succeeds, 0 if it fails. 4486 Need to set rS to 1 on failure, 0 on success. */ 4487 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)), 4488 mkU64(1))); 4489 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2], 4490 nameIRegOrZR(False, ss), 4491 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 4492 return True; 4493 } 4494 /* else fall through */ 4495 } 4496 4497 /* ------------------ LDA{R,RH,RB} ------------------ */ 4498 /* ------------------ STL{R,RH,RB} ------------------ */ 4499 /* 31 29 23 20 14 9 4 4500 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP] 4501 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP] 4502 */ 4503 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1) 4504 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) { 4505 UInt szBlg2 = INSN(31,30); 4506 Bool isLD = INSN(22,22) == 1; 4507 UInt nn = INSN(9,5); 4508 UInt tt = INSN(4,0); 4509 4510 vassert(szBlg2 < 4); 4511 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */ 4512 IRType ty = integerIRTypeOfSize(szB); 4513 const HChar* suffix[4] = { "rb", "rh", "r", "r" }; 4514 4515 IRTemp ea = newTemp(Ity_I64); 4516 assign(ea, getIReg64orSP(nn)); 4517 /* FIXME generate check that ea is szB-aligned */ 4518 4519 if (isLD) { 4520 IRTemp res = newTemp(ty); 4521 assign(res, loadLE(ty, mkexpr(ea))); 4522 putIReg64orZR(tt, widenUto64(ty, mkexpr(res))); 4523 stmt(IRStmt_MBE(Imbe_Fence)); 4524 DIP("lda%s %s, [%s]\n", suffix[szBlg2], 4525 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 4526 } else { 4527 stmt(IRStmt_MBE(Imbe_Fence)); 4528 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt)); 4529 storeLE(mkexpr(ea), data); 4530 DIP("stl%s %s, [%s]\n", suffix[szBlg2], 4531 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 4532 } 4533 return True; 4534 } 4535 4536 /* ------------------ PRFM (immediate) ------------------ */ 4537 /* 31 29 21 9 4 4538 11 11100110 imm12 n t PRFM <option>, [Xn|SP{, #pimm}] 4539 */ 4540 4541 if (INSN(31, 22) == BITS10(1,1,1,1,1,0,0,1,1,0)) { 4542 /* TODO: decode */ 4543 DIP("prfm ??? (imm)"); 4544 return True; 4545 } 4546 4547 vex_printf("ARM64 front end: load_store\n"); 4548 return False; 4549# undef INSN 4550} 4551 4552 4553/*------------------------------------------------------------*/ 4554/*--- Control flow and misc instructions ---*/ 4555/*------------------------------------------------------------*/ 4556 4557static 4558Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn, 4559 VexArchInfo* archinfo) 4560{ 4561# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 4562 4563 /* ---------------------- B cond ----------------------- */ 4564 /* 31 24 4 3 4565 0101010 0 imm19 0 cond */ 4566 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) { 4567 UInt cond = INSN(3,0); 4568 ULong uimm64 = INSN(23,5) << 2; 4569 Long simm64 = (Long)sx_to_64(uimm64, 21); 4570 vassert(dres->whatNext == Dis_Continue); 4571 vassert(dres->len == 4); 4572 vassert(dres->continueAt == 0); 4573 vassert(dres->jk_StopHere == Ijk_INVALID); 4574 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)), 4575 Ijk_Boring, 4576 IRConst_U64(guest_PC_curr_instr + simm64), 4577 OFFB_PC) ); 4578 putPC(mkU64(guest_PC_curr_instr + 4)); 4579 dres->whatNext = Dis_StopHere; 4580 dres->jk_StopHere = Ijk_Boring; 4581 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64); 4582 return True; 4583 } 4584 4585 /* -------------------- B{L} uncond -------------------- */ 4586 if (INSN(30,26) == BITS5(0,0,1,0,1)) { 4587 /* 000101 imm26 B (PC + sxTo64(imm26 << 2)) 4588 100101 imm26 B (PC + sxTo64(imm26 << 2)) 4589 */ 4590 UInt bLink = INSN(31,31); 4591 ULong uimm64 = INSN(25,0) << 2; 4592 Long simm64 = (Long)sx_to_64(uimm64, 28); 4593 if (bLink) { 4594 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4)); 4595 } 4596 putPC(mkU64(guest_PC_curr_instr + simm64)); 4597 dres->whatNext = Dis_StopHere; 4598 dres->jk_StopHere = Ijk_Call; 4599 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "", 4600 guest_PC_curr_instr + simm64); 4601 return True; 4602 } 4603 4604 /* --------------------- B{L} reg --------------------- */ 4605 /* 31 24 22 20 15 9 4 4606 1101011 00 10 11111 000000 nn 00000 RET Rn 4607 1101011 00 01 11111 000000 nn 00000 CALL Rn 4608 1101011 00 00 11111 000000 nn 00000 JMP Rn 4609 */ 4610 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0) 4611 && INSN(20,16) == BITS5(1,1,1,1,1) 4612 && INSN(15,10) == BITS6(0,0,0,0,0,0) 4613 && INSN(4,0) == BITS5(0,0,0,0,0)) { 4614 UInt branch_type = INSN(22,21); 4615 UInt nn = INSN(9,5); 4616 if (branch_type == BITS2(1,0) /* RET */) { 4617 putPC(getIReg64orZR(nn)); 4618 dres->whatNext = Dis_StopHere; 4619 dres->jk_StopHere = Ijk_Ret; 4620 DIP("ret %s\n", nameIReg64orZR(nn)); 4621 return True; 4622 } 4623 if (branch_type == BITS2(0,1) /* CALL */) { 4624 IRTemp dst = newTemp(Ity_I64); 4625 assign(dst, getIReg64orZR(nn)); 4626 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4)); 4627 putPC(mkexpr(dst)); 4628 dres->whatNext = Dis_StopHere; 4629 dres->jk_StopHere = Ijk_Call; 4630 DIP("blr %s\n", nameIReg64orZR(nn)); 4631 return True; 4632 } 4633 if (branch_type == BITS2(0,0) /* JMP */) { 4634 putPC(getIReg64orZR(nn)); 4635 dres->whatNext = Dis_StopHere; 4636 dres->jk_StopHere = Ijk_Boring; 4637 DIP("jmp %s\n", nameIReg64orZR(nn)); 4638 return True; 4639 } 4640 } 4641 4642 /* -------------------- CB{N}Z -------------------- */ 4643 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2)) 4644 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2)) 4645 */ 4646 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) { 4647 Bool is64 = INSN(31,31) == 1; 4648 Bool bIfZ = INSN(24,24) == 0; 4649 ULong uimm64 = INSN(23,5) << 2; 4650 UInt rT = INSN(4,0); 4651 Long simm64 = (Long)sx_to_64(uimm64, 21); 4652 IRExpr* cond = NULL; 4653 if (is64) { 4654 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64, 4655 getIReg64orZR(rT), mkU64(0)); 4656 } else { 4657 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32, 4658 getIReg32orZR(rT), mkU32(0)); 4659 } 4660 stmt( IRStmt_Exit(cond, 4661 Ijk_Boring, 4662 IRConst_U64(guest_PC_curr_instr + simm64), 4663 OFFB_PC) ); 4664 putPC(mkU64(guest_PC_curr_instr + 4)); 4665 dres->whatNext = Dis_StopHere; 4666 dres->jk_StopHere = Ijk_Boring; 4667 DIP("cb%sz %s, 0x%llx\n", 4668 bIfZ ? "" : "n", nameIRegOrZR(is64, rT), 4669 guest_PC_curr_instr + simm64); 4670 return True; 4671 } 4672 4673 /* -------------------- TB{N}Z -------------------- */ 4674 /* 31 30 24 23 18 5 4 4675 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2)) 4676 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2)) 4677 */ 4678 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) { 4679 UInt b5 = INSN(31,31); 4680 Bool bIfZ = INSN(24,24) == 0; 4681 UInt b40 = INSN(23,19); 4682 UInt imm14 = INSN(18,5); 4683 UInt tt = INSN(4,0); 4684 UInt bitNo = (b5 << 5) | b40; 4685 ULong uimm64 = imm14 << 2; 4686 Long simm64 = sx_to_64(uimm64, 16); 4687 IRExpr* cond 4688 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64, 4689 binop(Iop_And64, 4690 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)), 4691 mkU64(1)), 4692 mkU64(0)); 4693 stmt( IRStmt_Exit(cond, 4694 Ijk_Boring, 4695 IRConst_U64(guest_PC_curr_instr + simm64), 4696 OFFB_PC) ); 4697 putPC(mkU64(guest_PC_curr_instr + 4)); 4698 dres->whatNext = Dis_StopHere; 4699 dres->jk_StopHere = Ijk_Boring; 4700 DIP("tb%sz %s, #%u, 0x%llx\n", 4701 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo, 4702 guest_PC_curr_instr + simm64); 4703 return True; 4704 } 4705 4706 /* -------------------- SVC -------------------- */ 4707 /* 11010100 000 imm16 000 01 4708 Don't bother with anything except the imm16==0 case. 4709 */ 4710 if (INSN(31,0) == 0xD4000001) { 4711 putPC(mkU64(guest_PC_curr_instr + 4)); 4712 dres->whatNext = Dis_StopHere; 4713 dres->jk_StopHere = Ijk_Sys_syscall; 4714 DIP("svc #0\n"); 4715 return True; 4716 } 4717 4718 /* ------------------ M{SR,RS} ------------------ */ 4719 /* Only handles the case where the system register is TPIDR_EL0. 4720 0xD51BD0 010 Rt MSR tpidr_el0, rT 4721 0xD53BD0 010 Rt MRS rT, tpidr_el0 4722 */ 4723 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/ 4724 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) { 4725 Bool toSys = INSN(21,21) == 0; 4726 UInt tt = INSN(4,0); 4727 if (toSys) { 4728 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) ); 4729 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt)); 4730 } else { 4731 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 )); 4732 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt)); 4733 } 4734 return True; 4735 } 4736 /* Cases for FPCR 4737 0xD51B44 000 Rt MSR fpcr, rT 4738 0xD53B44 000 Rt MSR rT, fpcr 4739 */ 4740 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/ 4741 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) { 4742 Bool toSys = INSN(21,21) == 0; 4743 UInt tt = INSN(4,0); 4744 if (toSys) { 4745 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) ); 4746 DIP("msr fpcr, %s\n", nameIReg64orZR(tt)); 4747 } else { 4748 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32)); 4749 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt)); 4750 } 4751 return True; 4752 } 4753 /* Cases for FPSR 4754 0xD51B44 001 Rt MSR fpsr, rT 4755 0xD53B44 001 Rt MSR rT, fpsr 4756 */ 4757 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/ 4758 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) { 4759 Bool toSys = INSN(21,21) == 0; 4760 UInt tt = INSN(4,0); 4761 if (toSys) { 4762 stmt( IRStmt_Put( OFFB_FPSR, getIReg32orZR(tt)) ); 4763 DIP("msr fpsr, %s\n", nameIReg64orZR(tt)); 4764 } else { 4765 putIReg32orZR(tt, IRExpr_Get(OFFB_FPSR, Ity_I32)); 4766 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt)); 4767 } 4768 return True; 4769 } 4770 /* Cases for NZCV 4771 D51B42 000 Rt MSR nzcv, rT 4772 D53B42 000 Rt MRS rT, nzcv 4773 */ 4774 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/ 4775 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) { 4776 Bool toSys = INSN(21,21) == 0; 4777 UInt tt = INSN(4,0); 4778 if (toSys) { 4779 IRTemp t = newTemp(Ity_I64); 4780 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL))); 4781 setFlags_COPY(t); 4782 DIP("msr %s, nzcv\n", nameIReg32orZR(tt)); 4783 } else { 4784 IRTemp res = newTemp(Ity_I64); 4785 assign(res, mk_arm64g_calculate_flags_nzcv()); 4786 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res))); 4787 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt)); 4788 } 4789 return True; 4790 } 4791 /* Cases for DCZID_EL0 4792 Don't support arbitrary reads and writes to this register. Just 4793 return the value 16, which indicates that the DC ZVA instruction 4794 is not permitted, so we don't have to emulate it. 4795 D5 3B 00 111 Rt MRS rT, dczid_el0 4796 */ 4797 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) { 4798 UInt tt = INSN(4,0); 4799 putIReg64orZR(tt, mkU64(1<<4)); 4800 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt)); 4801 return True; 4802 } 4803 /* Cases for CTR_EL0 4804 We just handle reads, and make up a value from the D and I line 4805 sizes in the VexArchInfo we are given, and patch in the following 4806 fields that the Foundation model gives ("natively"): 4807 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11 4808 D5 3B 00 001 Rt MRS rT, dczid_el0 4809 */ 4810 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) { 4811 UInt tt = INSN(4,0); 4812 /* Need to generate a value from dMinLine_lg2_szB and 4813 dMinLine_lg2_szB. The value in the register is in 32-bit 4814 units, so need to subtract 2 from the values in the 4815 VexArchInfo. We can assume that the values here are valid -- 4816 disInstr_ARM64 checks them -- so there's no need to deal with 4817 out-of-range cases. */ 4818 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2 4819 && archinfo->arm64_dMinLine_lg2_szB <= 17 4820 && archinfo->arm64_iMinLine_lg2_szB >= 2 4821 && archinfo->arm64_iMinLine_lg2_szB <= 17); 4822 UInt val 4823 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16) 4824 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0); 4825 putIReg64orZR(tt, mkU64(val)); 4826 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt)); 4827 return True; 4828 } 4829 4830 /* ------------------ IC_IVAU ------------------ */ 4831 /* D5 0B 75 001 Rt ic ivau, rT 4832 */ 4833 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) { 4834 /* We will always be provided with a valid iMinLine value. */ 4835 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2 4836 && archinfo->arm64_iMinLine_lg2_szB <= 17); 4837 /* Round the requested address, in rT, down to the start of the 4838 containing block. */ 4839 UInt tt = INSN(4,0); 4840 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB; 4841 IRTemp addr = newTemp(Ity_I64); 4842 assign( addr, binop( Iop_And64, 4843 getIReg64orZR(tt), 4844 mkU64(~(lineszB - 1))) ); 4845 /* Set the invalidation range, request exit-and-invalidate, with 4846 continuation at the next instruction. */ 4847 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr))); 4848 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB))); 4849 /* be paranoid ... */ 4850 stmt( IRStmt_MBE(Imbe_Fence) ); 4851 putPC(mkU64( guest_PC_curr_instr + 4 )); 4852 dres->whatNext = Dis_StopHere; 4853 dres->jk_StopHere = Ijk_InvalICache; 4854 DIP("ic ivau, %s\n", nameIReg64orZR(tt)); 4855 return True; 4856 } 4857 4858 /* ------------------ DC_CVAU ------------------ */ 4859 /* D5 0B 7B 001 Rt dc cvau, rT 4860 */ 4861 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) { 4862 /* Exactly the same scheme as for IC IVAU, except we observe the 4863 dMinLine size, and request an Ijk_FlushDCache instead of 4864 Ijk_InvalICache. */ 4865 /* We will always be provided with a valid dMinLine value. */ 4866 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2 4867 && archinfo->arm64_dMinLine_lg2_szB <= 17); 4868 /* Round the requested address, in rT, down to the start of the 4869 containing block. */ 4870 UInt tt = INSN(4,0); 4871 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB; 4872 IRTemp addr = newTemp(Ity_I64); 4873 assign( addr, binop( Iop_And64, 4874 getIReg64orZR(tt), 4875 mkU64(~(lineszB - 1))) ); 4876 /* Set the flush range, request exit-and-flush, with 4877 continuation at the next instruction. */ 4878 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr))); 4879 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB))); 4880 /* be paranoid ... */ 4881 stmt( IRStmt_MBE(Imbe_Fence) ); 4882 putPC(mkU64( guest_PC_curr_instr + 4 )); 4883 dres->whatNext = Dis_StopHere; 4884 dres->jk_StopHere = Ijk_FlushDCache; 4885 DIP("dc cvau, %s\n", nameIReg64orZR(tt)); 4886 return True; 4887 } 4888 4889 /* ------------------ ISB, DMB, DSB ------------------ */ 4890 if (INSN(31,0) == 0xD5033FDF) { 4891 stmt(IRStmt_MBE(Imbe_Fence)); 4892 DIP("isb\n"); 4893 return True; 4894 } 4895 if (INSN(31,0) == 0xD5033BBF) { 4896 stmt(IRStmt_MBE(Imbe_Fence)); 4897 DIP("dmb ish\n"); 4898 return True; 4899 } 4900 if (INSN(31,0) == 0xD5033ABF) { 4901 stmt(IRStmt_MBE(Imbe_Fence)); 4902 DIP("dmb ishst\n"); 4903 return True; 4904 } 4905 if (INSN(31,0) == 0xD5033B9F) { 4906 stmt(IRStmt_MBE(Imbe_Fence)); 4907 DIP("dsb ish\n"); 4908 return True; 4909 } 4910 4911 /* -------------------- NOP -------------------- */ 4912 if (INSN(31,0) == 0xD503201F) { 4913 DIP("nop\n"); 4914 return True; 4915 } 4916 4917 //fail: 4918 vex_printf("ARM64 front end: branch_etc\n"); 4919 return False; 4920# undef INSN 4921} 4922 4923 4924/*------------------------------------------------------------*/ 4925/*--- SIMD and FP instructions ---*/ 4926/*------------------------------------------------------------*/ 4927 4928/* begin FIXME -- rm temp scaffolding */ 4929static IRExpr* mk_CatEvenLanes64x2 ( IRTemp, IRTemp ); 4930static IRExpr* mk_CatOddLanes64x2 ( IRTemp, IRTemp ); 4931 4932static IRExpr* mk_CatEvenLanes32x4 ( IRTemp, IRTemp ); 4933static IRExpr* mk_CatOddLanes32x4 ( IRTemp, IRTemp ); 4934static IRExpr* mk_InterleaveLO32x4 ( IRTemp, IRTemp ); 4935static IRExpr* mk_InterleaveHI32x4 ( IRTemp, IRTemp ); 4936 4937static IRExpr* mk_CatEvenLanes16x8 ( IRTemp, IRTemp ); 4938static IRExpr* mk_CatOddLanes16x8 ( IRTemp, IRTemp ); 4939static IRExpr* mk_InterleaveLO16x8 ( IRTemp, IRTemp ); 4940static IRExpr* mk_InterleaveHI16x8 ( IRTemp, IRTemp ); 4941 4942static IRExpr* mk_CatEvenLanes8x16 ( IRTemp, IRTemp ); 4943static IRExpr* mk_CatOddLanes8x16 ( IRTemp, IRTemp ); 4944static IRExpr* mk_InterleaveLO8x16 ( IRTemp, IRTemp ); 4945static IRExpr* mk_InterleaveHI8x16 ( IRTemp, IRTemp ); 4946/* end FIXME -- rm temp scaffolding */ 4947 4948/* Generate N copies of |bit| in the bottom of a ULong. */ 4949static ULong Replicate ( ULong bit, Int N ) 4950{ 4951 vassert(bit <= 1 && N >= 1 && N < 64); 4952 if (bit == 0) { 4953 return 0; 4954 } else { 4955 /* Careful. This won't work for N == 64. */ 4956 return (1ULL << N) - 1; 4957 } 4958} 4959 4960static ULong Replicate32x2 ( ULong bits32 ) 4961{ 4962 vassert(0 == (bits32 & ~0xFFFFFFFFULL)); 4963 return (bits32 << 32) | bits32; 4964} 4965 4966static ULong Replicate16x4 ( ULong bits16 ) 4967{ 4968 vassert(0 == (bits16 & ~0xFFFFULL)); 4969 return Replicate32x2((bits16 << 16) | bits16); 4970} 4971 4972static ULong Replicate8x8 ( ULong bits8 ) 4973{ 4974 vassert(0 == (bits8 & ~0xFFULL)); 4975 return Replicate16x4((bits8 << 8) | bits8); 4976} 4977 4978/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of 4979 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N 4980 is 64. In the former case, the upper 32 bits of the returned value 4981 are guaranteed to be zero. */ 4982static ULong VFPExpandImm ( ULong imm8, Int N ) 4983{ 4984 vassert(imm8 <= 0xFF); 4985 vassert(N == 32 || N == 64); 4986 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2. 4987 Int F = N - E - 1; 4988 ULong imm8_6 = (imm8 >> 6) & 1; 4989 /* sign: 1 bit */ 4990 /* exp: E bits */ 4991 /* frac: F bits */ 4992 ULong sign = (imm8 >> 7) & 1; 4993 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1); 4994 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6); 4995 vassert(sign < (1ULL << 1)); 4996 vassert(exp < (1ULL << E)); 4997 vassert(frac < (1ULL << F)); 4998 vassert(1 + E + F == N); 4999 ULong res = (sign << (E+F)) | (exp << F) | frac; 5000 return res; 5001} 5002 5003/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value. 5004 This might fail, as indicated by the returned Bool. Page 2530 of 5005 the manual. */ 5006static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res, 5007 UInt op, UInt cmode, UInt imm8 ) 5008{ 5009 vassert(op <= 1); 5010 vassert(cmode <= 15); 5011 vassert(imm8 <= 255); 5012 5013 *res = 0; /* will overwrite iff returning True */ 5014 5015 ULong imm64 = 0; 5016 Bool testimm8 = False; 5017 5018 switch (cmode >> 1) { 5019 case 0: 5020 testimm8 = False; imm64 = Replicate32x2(imm8); break; 5021 case 1: 5022 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break; 5023 case 2: 5024 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break; 5025 case 3: 5026 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break; 5027 case 4: 5028 testimm8 = False; imm64 = Replicate16x4(imm8); break; 5029 case 5: 5030 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break; 5031 case 6: 5032 testimm8 = True; 5033 if ((cmode & 1) == 0) 5034 imm64 = Replicate32x2((imm8 << 8) | 0xFF); 5035 else 5036 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF); 5037 break; 5038 case 7: 5039 testimm8 = False; 5040 if ((cmode & 1) == 0 && op == 0) 5041 imm64 = Replicate8x8(imm8); 5042 if ((cmode & 1) == 0 && op == 1) { 5043 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00; 5044 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00; 5045 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00; 5046 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00; 5047 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00; 5048 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00; 5049 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00; 5050 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00; 5051 } 5052 if ((cmode & 1) == 1 && op == 0) { 5053 ULong imm8_7 = (imm8 >> 7) & 1; 5054 ULong imm8_6 = (imm8 >> 6) & 1; 5055 ULong imm8_50 = imm8 & 63; 5056 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19)) 5057 | ((imm8_6 ^ 1) << (5 + 6 + 19)) 5058 | (Replicate(imm8_6, 5) << (6 + 19)) 5059 | (imm8_50 << 19); 5060 imm64 = Replicate32x2(imm32); 5061 } 5062 if ((cmode & 1) == 1 && op == 1) { 5063 // imm64 = imm8<7>:NOT(imm8<6>) 5064 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48); 5065 ULong imm8_7 = (imm8 >> 7) & 1; 5066 ULong imm8_6 = (imm8 >> 6) & 1; 5067 ULong imm8_50 = imm8 & 63; 5068 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62) 5069 | (Replicate(imm8_6, 8) << 54) 5070 | (imm8_50 << 48); 5071 } 5072 break; 5073 default: 5074 vassert(0); 5075 } 5076 5077 if (testimm8 && imm8 == 0) 5078 return False; 5079 5080 *res = imm64; 5081 return True; 5082} 5083 5084 5085/* Help a bit for decoding laneage for vector operations that can be 5086 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q 5087 and SZ bits, typically for vector floating point. */ 5088static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF, 5089 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper, 5090 /*OUT*/const HChar** arrSpec, 5091 Bool bitQ, Bool bitSZ ) 5092{ 5093 vassert(bitQ == True || bitQ == False); 5094 vassert(bitSZ == True || bitSZ == False); 5095 if (bitQ && bitSZ) { // 2x64 5096 if (tyI) *tyI = Ity_I64; 5097 if (tyF) *tyF = Ity_F64; 5098 if (nLanes) *nLanes = 2; 5099 if (zeroUpper) *zeroUpper = False; 5100 if (arrSpec) *arrSpec = "2d"; 5101 return True; 5102 } 5103 if (bitQ && !bitSZ) { // 4x32 5104 if (tyI) *tyI = Ity_I32; 5105 if (tyF) *tyF = Ity_F32; 5106 if (nLanes) *nLanes = 4; 5107 if (zeroUpper) *zeroUpper = False; 5108 if (arrSpec) *arrSpec = "4s"; 5109 return True; 5110 } 5111 if (!bitQ && !bitSZ) { // 2x32 5112 if (tyI) *tyI = Ity_I32; 5113 if (tyF) *tyF = Ity_F32; 5114 if (nLanes) *nLanes = 2; 5115 if (zeroUpper) *zeroUpper = True; 5116 if (arrSpec) *arrSpec = "2s"; 5117 return True; 5118 } 5119 // Else impliedly 1x64, which isn't allowed. 5120 return False; 5121} 5122 5123/* Helper for decoding laneage for simple vector operations, 5124 eg integer add. */ 5125static Bool getLaneInfo_SIMPLE ( /*OUT*/Bool* zeroUpper, 5126 /*OUT*/const HChar** arrSpec, 5127 Bool bitQ, UInt szBlg2 ) 5128{ 5129 vassert(bitQ == True || bitQ == False); 5130 vassert(szBlg2 < 4); 5131 Bool zu = False; 5132 const HChar* as = NULL; 5133 switch ((szBlg2 << 1) | (bitQ ? 1 : 0)) { 5134 case 0: zu = True; as = "8b"; break; 5135 case 1: zu = False; as = "16b"; break; 5136 case 2: zu = True; as = "4h"; break; 5137 case 3: zu = False; as = "8h"; break; 5138 case 4: zu = True; as = "2s"; break; 5139 case 5: zu = False; as = "4s"; break; 5140 case 6: return False; // impliedly 1x64 5141 case 7: zu = False; as = "2d"; break; 5142 default: vassert(0); 5143 } 5144 vassert(as); 5145 if (arrSpec) *arrSpec = as; 5146 if (zeroUpper) *zeroUpper = zu; 5147 return True; 5148} 5149 5150 5151/* Helper for decoding laneage for shift-style vector operations 5152 that involve an immediate shift amount. */ 5153static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2, 5154 UInt immh, UInt immb ) 5155{ 5156 vassert(immh < (1<<4)); 5157 vassert(immb < (1<<3)); 5158 UInt immhb = (immh << 3) | immb; 5159 if (immh & 8) { 5160 if (shift) *shift = 128 - immhb; 5161 if (szBlg2) *szBlg2 = 3; 5162 return True; 5163 } 5164 if (immh & 4) { 5165 if (shift) *shift = 64 - immhb; 5166 if (szBlg2) *szBlg2 = 2; 5167 return True; 5168 } 5169 if (immh & 2) { 5170 if (shift) *shift = 32 - immhb; 5171 if (szBlg2) *szBlg2 = 1; 5172 return True; 5173 } 5174 if (immh & 1) { 5175 if (shift) *shift = 16 - immhb; 5176 if (szBlg2) *szBlg2 = 0; 5177 return True; 5178 } 5179 return False; 5180} 5181 5182 5183/* Generate IR to fold all lanes of the V128 value in 'src' as 5184 characterised by the operator 'op', and return the result in the 5185 bottom bits of a V128, with all other bits set to zero. */ 5186static IRTemp math_MINMAXV ( IRTemp src, IROp op ) 5187{ 5188 /* The basic idea is to use repeated applications of Iop_CatEven* 5189 and Iop_CatOdd* operators to 'src' so as to clone each lane into 5190 a complete vector. Then fold all those vectors with 'op' and 5191 zero out all but the least significant lane. */ 5192 switch (op) { 5193 case Iop_Min8Sx16: case Iop_Min8Ux16: 5194 case Iop_Max8Sx16: case Iop_Max8Ux16: { 5195 /* NB: temp naming here is misleading -- the naming is for 8 5196 lanes of 16 bit, whereas what is being operated on is 16 5197 lanes of 8 bits. */ 5198 IRTemp x76543210 = src; 5199 IRTemp x76547654 = newTemp(Ity_V128); 5200 IRTemp x32103210 = newTemp(Ity_V128); 5201 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210)); 5202 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210)); 5203 IRTemp x76767676 = newTemp(Ity_V128); 5204 IRTemp x54545454 = newTemp(Ity_V128); 5205 IRTemp x32323232 = newTemp(Ity_V128); 5206 IRTemp x10101010 = newTemp(Ity_V128); 5207 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654)); 5208 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654)); 5209 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210)); 5210 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210)); 5211 IRTemp x77777777 = newTemp(Ity_V128); 5212 IRTemp x66666666 = newTemp(Ity_V128); 5213 IRTemp x55555555 = newTemp(Ity_V128); 5214 IRTemp x44444444 = newTemp(Ity_V128); 5215 IRTemp x33333333 = newTemp(Ity_V128); 5216 IRTemp x22222222 = newTemp(Ity_V128); 5217 IRTemp x11111111 = newTemp(Ity_V128); 5218 IRTemp x00000000 = newTemp(Ity_V128); 5219 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676)); 5220 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676)); 5221 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454)); 5222 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454)); 5223 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232)); 5224 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232)); 5225 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010)); 5226 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010)); 5227 /* Naming not misleading after here. */ 5228 IRTemp xAllF = newTemp(Ity_V128); 5229 IRTemp xAllE = newTemp(Ity_V128); 5230 IRTemp xAllD = newTemp(Ity_V128); 5231 IRTemp xAllC = newTemp(Ity_V128); 5232 IRTemp xAllB = newTemp(Ity_V128); 5233 IRTemp xAllA = newTemp(Ity_V128); 5234 IRTemp xAll9 = newTemp(Ity_V128); 5235 IRTemp xAll8 = newTemp(Ity_V128); 5236 IRTemp xAll7 = newTemp(Ity_V128); 5237 IRTemp xAll6 = newTemp(Ity_V128); 5238 IRTemp xAll5 = newTemp(Ity_V128); 5239 IRTemp xAll4 = newTemp(Ity_V128); 5240 IRTemp xAll3 = newTemp(Ity_V128); 5241 IRTemp xAll2 = newTemp(Ity_V128); 5242 IRTemp xAll1 = newTemp(Ity_V128); 5243 IRTemp xAll0 = newTemp(Ity_V128); 5244 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777)); 5245 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777)); 5246 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666)); 5247 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666)); 5248 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555)); 5249 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555)); 5250 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444)); 5251 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444)); 5252 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333)); 5253 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333)); 5254 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222)); 5255 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222)); 5256 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111)); 5257 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111)); 5258 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000)); 5259 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000)); 5260 IRTemp maxFE = newTemp(Ity_V128); 5261 IRTemp maxDC = newTemp(Ity_V128); 5262 IRTemp maxBA = newTemp(Ity_V128); 5263 IRTemp max98 = newTemp(Ity_V128); 5264 IRTemp max76 = newTemp(Ity_V128); 5265 IRTemp max54 = newTemp(Ity_V128); 5266 IRTemp max32 = newTemp(Ity_V128); 5267 IRTemp max10 = newTemp(Ity_V128); 5268 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE))); 5269 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC))); 5270 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA))); 5271 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8))); 5272 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6))); 5273 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4))); 5274 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2))); 5275 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0))); 5276 IRTemp maxFEDC = newTemp(Ity_V128); 5277 IRTemp maxBA98 = newTemp(Ity_V128); 5278 IRTemp max7654 = newTemp(Ity_V128); 5279 IRTemp max3210 = newTemp(Ity_V128); 5280 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC))); 5281 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98))); 5282 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54))); 5283 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 5284 IRTemp maxFEDCBA98 = newTemp(Ity_V128); 5285 IRTemp max76543210 = newTemp(Ity_V128); 5286 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98))); 5287 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210))); 5288 IRTemp maxAllLanes = newTemp(Ity_V128); 5289 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98), 5290 mkexpr(max76543210))); 5291 IRTemp res = newTemp(Ity_V128); 5292 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes))); 5293 return res; 5294 } 5295 case Iop_Min16Sx8: case Iop_Min16Ux8: 5296 case Iop_Max16Sx8: case Iop_Max16Ux8: { 5297 IRTemp x76543210 = src; 5298 IRTemp x76547654 = newTemp(Ity_V128); 5299 IRTemp x32103210 = newTemp(Ity_V128); 5300 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210)); 5301 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210)); 5302 IRTemp x76767676 = newTemp(Ity_V128); 5303 IRTemp x54545454 = newTemp(Ity_V128); 5304 IRTemp x32323232 = newTemp(Ity_V128); 5305 IRTemp x10101010 = newTemp(Ity_V128); 5306 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654)); 5307 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654)); 5308 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210)); 5309 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210)); 5310 IRTemp x77777777 = newTemp(Ity_V128); 5311 IRTemp x66666666 = newTemp(Ity_V128); 5312 IRTemp x55555555 = newTemp(Ity_V128); 5313 IRTemp x44444444 = newTemp(Ity_V128); 5314 IRTemp x33333333 = newTemp(Ity_V128); 5315 IRTemp x22222222 = newTemp(Ity_V128); 5316 IRTemp x11111111 = newTemp(Ity_V128); 5317 IRTemp x00000000 = newTemp(Ity_V128); 5318 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676)); 5319 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676)); 5320 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454)); 5321 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454)); 5322 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232)); 5323 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232)); 5324 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010)); 5325 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010)); 5326 IRTemp max76 = newTemp(Ity_V128); 5327 IRTemp max54 = newTemp(Ity_V128); 5328 IRTemp max32 = newTemp(Ity_V128); 5329 IRTemp max10 = newTemp(Ity_V128); 5330 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666))); 5331 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444))); 5332 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222))); 5333 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000))); 5334 IRTemp max7654 = newTemp(Ity_V128); 5335 IRTemp max3210 = newTemp(Ity_V128); 5336 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54))); 5337 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 5338 IRTemp max76543210 = newTemp(Ity_V128); 5339 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210))); 5340 IRTemp res = newTemp(Ity_V128); 5341 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210))); 5342 return res; 5343 } 5344 case Iop_Min32Sx4: case Iop_Min32Ux4: 5345 case Iop_Max32Sx4: case Iop_Max32Ux4: { 5346 IRTemp x3210 = src; 5347 IRTemp x3232 = newTemp(Ity_V128); 5348 IRTemp x1010 = newTemp(Ity_V128); 5349 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210)); 5350 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210)); 5351 IRTemp x3333 = newTemp(Ity_V128); 5352 IRTemp x2222 = newTemp(Ity_V128); 5353 IRTemp x1111 = newTemp(Ity_V128); 5354 IRTemp x0000 = newTemp(Ity_V128); 5355 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232)); 5356 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232)); 5357 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010)); 5358 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010)); 5359 IRTemp max32 = newTemp(Ity_V128); 5360 IRTemp max10 = newTemp(Ity_V128); 5361 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222))); 5362 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000))); 5363 IRTemp max3210 = newTemp(Ity_V128); 5364 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 5365 IRTemp res = newTemp(Ity_V128); 5366 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210))); 5367 return res; 5368 } 5369 default: 5370 vassert(0); 5371 } 5372} 5373 5374 5375/* Generate IR for TBL and TBX. This deals with the 128 bit case 5376 only. */ 5377static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src, 5378 IRTemp oor_values ) 5379{ 5380 vassert(len >= 0 && len <= 3); 5381 5382 /* Generate some useful constants as concisely as possible. */ 5383 IRTemp half15 = newTemp(Ity_I64); 5384 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL)); 5385 IRTemp half16 = newTemp(Ity_I64); 5386 assign(half16, mkU64(0x1010101010101010ULL)); 5387 5388 /* A zero vector */ 5389 IRTemp allZero = newTemp(Ity_V128); 5390 assign(allZero, mkV128(0x0000)); 5391 /* A vector containing 15 in each 8-bit lane */ 5392 IRTemp all15 = newTemp(Ity_V128); 5393 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15))); 5394 /* A vector containing 16 in each 8-bit lane */ 5395 IRTemp all16 = newTemp(Ity_V128); 5396 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16))); 5397 /* A vector containing 32 in each 8-bit lane */ 5398 IRTemp all32 = newTemp(Ity_V128); 5399 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16))); 5400 /* A vector containing 48 in each 8-bit lane */ 5401 IRTemp all48 = newTemp(Ity_V128); 5402 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32))); 5403 /* A vector containing 64 in each 8-bit lane */ 5404 IRTemp all64 = newTemp(Ity_V128); 5405 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32))); 5406 5407 /* Group the 16/32/48/64 vectors so as to be indexable. */ 5408 IRTemp allXX[4] = { all16, all32, all48, all64 }; 5409 5410 /* Compute the result for each table vector, with zeroes in places 5411 where the index values are out of range, and OR them into the 5412 running vector. */ 5413 IRTemp running_result = newTemp(Ity_V128); 5414 assign(running_result, mkV128(0)); 5415 5416 UInt tabent; 5417 for (tabent = 0; tabent <= len; tabent++) { 5418 vassert(tabent >= 0 && tabent < 4); 5419 IRTemp bias = newTemp(Ity_V128); 5420 assign(bias, 5421 mkexpr(tabent == 0 ? allZero : allXX[tabent-1])); 5422 IRTemp biased_indices = newTemp(Ity_V128); 5423 assign(biased_indices, 5424 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias))); 5425 IRTemp valid_mask = newTemp(Ity_V128); 5426 assign(valid_mask, 5427 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices))); 5428 IRTemp safe_biased_indices = newTemp(Ity_V128); 5429 assign(safe_biased_indices, 5430 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15))); 5431 IRTemp results_or_junk = newTemp(Ity_V128); 5432 assign(results_or_junk, 5433 binop(Iop_Perm8x16, mkexpr(tab[tabent]), 5434 mkexpr(safe_biased_indices))); 5435 IRTemp results_or_zero = newTemp(Ity_V128); 5436 assign(results_or_zero, 5437 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask))); 5438 /* And OR that into the running result. */ 5439 IRTemp tmp = newTemp(Ity_V128); 5440 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero), 5441 mkexpr(running_result))); 5442 running_result = tmp; 5443 } 5444 5445 /* So now running_result holds the overall result where the indices 5446 are in range, and zero in out-of-range lanes. Now we need to 5447 compute an overall validity mask and use this to copy in the 5448 lanes in the oor_values for out of range indices. This is 5449 unnecessary for TBL but will get folded out by iropt, so we lean 5450 on that and generate the same code for TBL and TBX here. */ 5451 IRTemp overall_valid_mask = newTemp(Ity_V128); 5452 assign(overall_valid_mask, 5453 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src))); 5454 IRTemp result = newTemp(Ity_V128); 5455 assign(result, 5456 binop(Iop_OrV128, 5457 mkexpr(running_result), 5458 binop(Iop_AndV128, 5459 mkexpr(oor_values), 5460 unop(Iop_NotV128, mkexpr(overall_valid_mask))))); 5461 return result; 5462} 5463 5464 5465static 5466Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) 5467{ 5468# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 5469 5470 /* ---------------- FMOV (general) ---------------- */ 5471 /* case 30 23 20 18 15 9 4 5472 (1) 0 00 11110 00 1 00 111 000000 n d FMOV Sd, Wn 5473 (2) 1 00 11110 01 1 00 111 000000 n d FMOV Dd, Xn 5474 (3) 1 00 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn 5475 5476 (4) 0 00 11110 00 1 00 110 000000 n d FMOV Wd, Sn 5477 (5) 1 00 11110 01 1 00 110 000000 n d FMOV Xd, Dn 5478 (6) 1 00 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1] 5479 */ 5480 if (INSN(30,24) == BITS7(0,0,1,1,1,1,0) 5481 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,0,0,0)) { 5482 UInt sf = INSN(31,31); 5483 UInt ty = INSN(23,22); // type 5484 UInt rm = INSN(20,19); // rmode 5485 UInt op = INSN(18,16); // opcode 5486 UInt nn = INSN(9,5); 5487 UInt dd = INSN(4,0); 5488 UInt ix = 0; // case 5489 if (sf == 0) { 5490 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1)) 5491 ix = 1; 5492 else 5493 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0)) 5494 ix = 4; 5495 } else { 5496 vassert(sf == 1); 5497 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1)) 5498 ix = 2; 5499 else 5500 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0)) 5501 ix = 5; 5502 else 5503 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1)) 5504 ix = 3; 5505 else 5506 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0)) 5507 ix = 6; 5508 } 5509 if (ix > 0) { 5510 switch (ix) { 5511 case 1: 5512 putQReg128(dd, mkV128(0)); 5513 putQRegLO(dd, getIReg32orZR(nn)); 5514 DIP("fmov s%u, w%u\n", dd, nn); 5515 break; 5516 case 2: 5517 putQReg128(dd, mkV128(0)); 5518 putQRegLO(dd, getIReg64orZR(nn)); 5519 DIP("fmov d%u, x%u\n", dd, nn); 5520 break; 5521 case 3: 5522 putQRegHI64(dd, getIReg64orZR(nn)); 5523 DIP("fmov v%u.d[1], x%u\n", dd, nn); 5524 break; 5525 case 4: 5526 putIReg32orZR(dd, getQRegLO(nn, Ity_I32)); 5527 DIP("fmov w%u, s%u\n", dd, nn); 5528 break; 5529 case 5: 5530 putIReg64orZR(dd, getQRegLO(nn, Ity_I64)); 5531 DIP("fmov x%u, d%u\n", dd, nn); 5532 break; 5533 case 6: 5534 putIReg64orZR(dd, getQRegHI64(nn)); 5535 DIP("fmov x%u, v%u.d[1]\n", dd, nn); 5536 break; 5537 default: 5538 vassert(0); 5539 } 5540 return True; 5541 } 5542 /* undecodable; fall through */ 5543 } 5544 5545 /* -------------- FMOV (scalar, immediate) -------------- */ 5546 /* 31 28 23 20 12 9 4 5547 000 11110 00 1 imm8 100 00000 d FMOV Sd, #imm 5548 000 11110 01 1 imm8 100 00000 d FMOV Dd, #imm 5549 */ 5550 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) 5551 && INSN(21,21) == 1 && INSN(12,5) == BITS8(1,0,0,0,0,0,0,0)) { 5552 Bool isD = INSN(22,22) == 1; 5553 UInt imm8 = INSN(20,13); 5554 UInt dd = INSN(4,0); 5555 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32); 5556 if (!isD) { 5557 vassert(0 == (imm & 0xFFFFFFFF00000000ULL)); 5558 } 5559 putQReg128(dd, mkV128(0)); 5560 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL)); 5561 DIP("fmov %s, #0x%llx\n", 5562 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm); 5563 return True; 5564 } 5565 5566 /* -------------- {FMOV,MOVI} (vector, immediate) -------------- */ 5567 /* 31 28 18 15 11 9 4 5568 0q op 01111 00000 abc cmode 01 defgh d MOV Dd, #imm (q=0) 5569 MOV Vd.2d #imm (q=1) 5570 Allowable op:cmode 5571 FMOV = 1:1111 5572 MOVI = 0:xx00, 1:0x00, 1:10x0, 1:110x, x:1110 5573 */ 5574 if (INSN(31,31) == 0 5575 && INSN(28,19) == BITS10(0,1,1,1,1,0,0,0,0,0) 5576 && INSN(11,10) == BITS2(0,1)) { 5577 UInt bitQ = INSN(30,30); 5578 UInt bitOP = INSN(29,29); 5579 UInt cmode = INSN(15,12); 5580 UInt imm8 = (INSN(18,16) << 5) | INSN(9,5); 5581 UInt dd = INSN(4,0); 5582 ULong imm64lo = 0; 5583 UInt op_cmode = (bitOP << 4) | cmode; 5584 Bool ok = False; 5585 switch (op_cmode) { 5586 case BITS5(1,1,1,1,1): // 1:1111 5587 case BITS5(0,0,0,0,0): case BITS5(0,0,1,0,0): 5588 case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:xx00 5589 case BITS5(1,0,0,0,0): case BITS5(1,0,1,0,0): // 1:0x00 5590 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0 5591 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x 5592 case BITS5(1,1,1,1,0): case BITS5(0,1,1,1,0): // x:1110 5593 ok = True; break; 5594 default: 5595 break; 5596 } 5597 if (ok) { 5598 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, imm8); 5599 } 5600 if (ok) { 5601 ULong imm64hi = (bitQ == 0 && bitOP == 0) ? 0 : imm64lo; 5602 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo))); 5603 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo); 5604 return True; 5605 } 5606 /* else fall through */ 5607 } 5608 5609 /* -------------- {S,U}CVTF (scalar, integer) -------------- */ 5610 /* 31 28 23 21 20 18 15 9 4 ix 5611 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn 0 5612 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn 1 5613 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn 2 5614 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn 3 5615 5616 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn 4 5617 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn 5 5618 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn 6 5619 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn 7 5620 5621 These are signed/unsigned conversion from integer registers to 5622 FP registers, all 4 32/64-bit combinations, rounded per FPCR. 5623 */ 5624 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,17) == BITS5(1,0,0,0,1) 5625 && INSN(15,10) == BITS6(0,0,0,0,0,0)) { 5626 Bool isI64 = INSN(31,31) == 1; 5627 Bool isF64 = INSN(22,22) == 1; 5628 Bool isU = INSN(16,16) == 1; 5629 UInt nn = INSN(9,5); 5630 UInt dd = INSN(4,0); 5631 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0); 5632 const IROp ops[8] 5633 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64, 5634 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 }; 5635 IRExpr* src = getIRegOrZR(isI64, nn); 5636 IRExpr* res = (isF64 && !isI64) 5637 ? unop(ops[ix], src) 5638 : binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src); 5639 putQReg128(dd, mkV128(0)); 5640 putQRegLO(dd, res); 5641 DIP("%ccvtf %s, %s\n", 5642 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32), 5643 nameIRegOrZR(isI64, nn)); 5644 return True; 5645 } 5646 5647 /* ------------ F{ADD,SUB,MUL,DIV,NMUL} (scalar) ------------ */ 5648 /* 31 23 20 15 11 9 4 5649 ---------------- 0000 ------ FMUL -------- 5650 000 11110 001 m 0001 10 n d FDIV Sd,Sn,Sm 5651 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm 5652 ---------------- 0010 ------ FADD -------- 5653 ---------------- 0011 ------ FSUB -------- 5654 ---------------- 1000 ------ FNMUL -------- 5655 */ 5656 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) 5657 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 5658 Bool isD = INSN(22,22) == 1; 5659 UInt mm = INSN(20,16); 5660 UInt op = INSN(15,12); 5661 UInt nn = INSN(9,5); 5662 UInt dd = INSN(4,0); 5663 IROp iop = Iop_INVALID; 5664 IRType ty = isD ? Ity_F64 : Ity_F32; 5665 Bool neg = False; 5666 const HChar* nm = "???"; 5667 switch (op) { 5668 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ty); break; 5669 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ty); break; 5670 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ty); break; 5671 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ty); break; 5672 case BITS4(1,0,0,0): nm = "fnmul"; iop = mkMULF(ty); 5673 neg = True; break; 5674 default: return False; 5675 } 5676 vassert(iop != Iop_INVALID); 5677 IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()), 5678 getQRegLO(nn, ty), getQRegLO(mm, ty)); 5679 IRTemp res = newTemp(ty); 5680 assign(res, neg ? unop(mkNEGF(ty),resE) : resE); 5681 putQReg128(dd, mkV128(0)); 5682 putQRegLO(dd, mkexpr(res)); 5683 DIP("%s %s, %s, %s\n", 5684 nm, nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty)); 5685 return True; 5686 } 5687 5688 /* ------------ F{MOV,ABS,NEG,SQRT} D/D or S/S ------------ */ 5689 /* 31 23 21 16 14 9 4 5690 000 11110 00 10000 00 10000 n d FMOV Sd, Sn 5691 000 11110 01 10000 00 10000 n d FMOV Dd, Dn 5692 ------------------ 01 --------- FABS ------ 5693 ------------------ 10 --------- FNEG ------ 5694 ------------------ 11 --------- FSQRT ----- 5695 */ 5696 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) 5697 && INSN(21,17) == BITS5(1,0,0,0,0) 5698 && INSN(14,10) == BITS5(1,0,0,0,0)) { 5699 Bool isD = INSN(22,22) == 1; 5700 UInt opc = INSN(16,15); 5701 UInt nn = INSN(9,5); 5702 UInt dd = INSN(4,0); 5703 IRType ty = isD ? Ity_F64 : Ity_F32; 5704 IRTemp res = newTemp(ty); 5705 if (opc == BITS2(0,0)) { 5706 assign(res, getQRegLO(nn, ty)); 5707 putQReg128(dd, mkV128(0x0000)); 5708 putQRegLO(dd, mkexpr(res)); 5709 DIP("fmov %s, %s\n", 5710 nameQRegLO(dd, ty), nameQRegLO(nn, ty)); 5711 return True; 5712 } 5713 if (opc == BITS2(1,0) || opc == BITS2(0,1)) { 5714 Bool isAbs = opc == BITS2(0,1); 5715 IROp op = isAbs ? mkABSF(ty) : mkNEGF(ty); 5716 assign(res, unop(op, getQRegLO(nn, ty))); 5717 putQReg128(dd, mkV128(0x0000)); 5718 putQRegLO(dd, mkexpr(res)); 5719 DIP("%s %s, %s\n", isAbs ? "fabs" : "fneg", 5720 nameQRegLO(dd, ty), nameQRegLO(nn, ty)); 5721 return True; 5722 } 5723 if (opc == BITS2(1,1)) { 5724 assign(res, 5725 binop(mkSQRTF(ty), 5726 mkexpr(mk_get_IR_rounding_mode()), getQRegLO(nn, ty))); 5727 putQReg128(dd, mkV128(0x0000)); 5728 putQRegLO(dd, mkexpr(res)); 5729 DIP("fsqrt %s, %s\n", nameQRegLO(dd, ty), nameQRegLO(nn, ty)); 5730 return True; 5731 } 5732 /* else fall through; other cases are ATC */ 5733 } 5734 5735 /* ---------------- F{ABS,NEG} (vector) ---------------- */ 5736 /* 31 28 22 21 16 9 4 5737 0q0 01110 1 sz 10000 01111 10 n d FABS Vd.T, Vn.T 5738 0q1 01110 1 sz 10000 01111 10 n d FNEG Vd.T, Vn.T 5739 */ 5740 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,1) 5741 && INSN(21,17) == BITS5(1,0,0,0,0) 5742 && INSN(16,10) == BITS7(0,1,1,1,1,1,0)) { 5743 UInt bitQ = INSN(30,30); 5744 UInt bitSZ = INSN(22,22); 5745 Bool isFNEG = INSN(29,29) == 1; 5746 UInt nn = INSN(9,5); 5747 UInt dd = INSN(4,0); 5748 const HChar* ar = "??"; 5749 IRType tyF = Ity_INVALID; 5750 Bool zeroHI = False; 5751 Bool ok = getLaneInfo_Q_SZ(NULL, &tyF, NULL, &zeroHI, &ar, 5752 (Bool)bitQ, (Bool)bitSZ); 5753 if (ok) { 5754 vassert(tyF == Ity_F64 || tyF == Ity_F32); 5755 IROp op = (tyF == Ity_F64) ? (isFNEG ? Iop_Neg64Fx2 : Iop_Abs64Fx2) 5756 : (isFNEG ? Iop_Neg32Fx4 : Iop_Abs32Fx4); 5757 IRTemp res = newTemp(Ity_V128); 5758 assign(res, unop(op, getQReg128(nn))); 5759 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res)) 5760 : mkexpr(res)); 5761 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs", 5762 nameQReg128(dd), ar, nameQReg128(nn), ar); 5763 return True; 5764 } 5765 /* else fall through */ 5766 } 5767 5768 /* -------------------- FCMP,FCMPE -------------------- */ 5769 /* 31 23 20 15 9 4 5770 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm 5771 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0 5772 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm 5773 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0 5774 5775 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm 5776 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0 5777 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm 5778 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0 5779 5780 FCMPE generates Invalid Operation exn if either arg is any kind 5781 of NaN. FCMP generates Invalid Operation exn if either arg is a 5782 signalling NaN. We ignore this detail here and produce the same 5783 IR for both. 5784 */ 5785 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) && INSN(21,21) == 1 5786 && INSN(15,10) == BITS6(0,0,1,0,0,0) && INSN(2,0) == BITS3(0,0,0)) { 5787 Bool isD = INSN(22,22) == 1; 5788 UInt mm = INSN(20,16); 5789 UInt nn = INSN(9,5); 5790 Bool isCMPE = INSN(4,4) == 1; 5791 Bool cmpZero = INSN(3,3) == 1; 5792 IRType ty = isD ? Ity_F64 : Ity_F32; 5793 Bool valid = True; 5794 if (cmpZero && mm != 0) valid = False; 5795 if (valid) { 5796 IRTemp argL = newTemp(ty); 5797 IRTemp argR = newTemp(ty); 5798 IRTemp irRes = newTemp(Ity_I32); 5799 assign(argL, getQRegLO(nn, ty)); 5800 assign(argR, 5801 cmpZero 5802 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0))) 5803 : getQRegLO(mm, ty)); 5804 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32, 5805 mkexpr(argL), mkexpr(argR))); 5806 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes); 5807 IRTemp nzcv_28x0 = newTemp(Ity_I64); 5808 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28))); 5809 setFlags_COPY(nzcv_28x0); 5810 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ty), 5811 cmpZero ? "#0.0" : nameQRegLO(mm, ty)); 5812 return True; 5813 } 5814 } 5815 5816 /* -------------------- F{N}M{ADD,SUB} -------------------- */ 5817 /* 31 22 20 15 14 9 4 ix 5818 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa 5819 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa 5820 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa 5821 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa 5822 where Fx=Dx when sz=1, Fx=Sx when sz=0 5823 5824 -----SPEC------ ----IMPL---- 5825 fmadd a + n * m a + n * m 5826 fmsub a + (-n) * m a - n * m 5827 fnmadd (-a) + (-n) * m -(a + n * m) 5828 fnmsub (-a) + n * m -(a - n * m) 5829 */ 5830 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,1,0)) { 5831 Bool isD = INSN(22,22) == 1; 5832 UInt mm = INSN(20,16); 5833 UInt aa = INSN(14,10); 5834 UInt nn = INSN(9,5); 5835 UInt dd = INSN(4,0); 5836 UInt ix = (INSN(21,21) << 1) | INSN(15,15); 5837 IRType ty = isD ? Ity_F64 : Ity_F32; 5838 IROp opADD = mkADDF(ty); 5839 IROp opSUB = mkSUBF(ty); 5840 IROp opMUL = mkMULF(ty); 5841 IROp opNEG = mkNEGF(ty); 5842 IRTemp res = newTemp(ty); 5843 IRExpr* eA = getQRegLO(aa, ty); 5844 IRExpr* eN = getQRegLO(nn, ty); 5845 IRExpr* eM = getQRegLO(mm, ty); 5846 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode()); 5847 IRExpr* eNxM = triop(opMUL, rm, eN, eM); 5848 switch (ix) { 5849 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break; 5850 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break; 5851 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break; 5852 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break; 5853 default: vassert(0); 5854 } 5855 putQReg128(dd, mkV128(0x0000)); 5856 putQRegLO(dd, mkexpr(res)); 5857 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" }; 5858 DIP("%s %s, %s, %s, %s\n", 5859 names[ix], nameQRegLO(dd, ty), nameQRegLO(nn, ty), 5860 nameQRegLO(mm, ty), nameQRegLO(aa, ty)); 5861 return True; 5862 } 5863 5864 /* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */ 5865 /* 30 23 20 18 15 9 4 5866 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to 5867 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest) 5868 ---------------- 01 -------------- FCVTP-------- (round to +inf) 5869 ---------------- 10 -------------- FCVTM-------- (round to -inf) 5870 ---------------- 11 -------------- FCVTZ-------- (round to zero) 5871 5872 Rd is Xd when sf==1, Wd when sf==0 5873 Fn is Dn when x==1, Sn when x==0 5874 20:19 carry the rounding mode, using the same encoding as FPCR 5875 */ 5876 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,21) == 1 5877 && INSN(18,17) == BITS2(0,0) && INSN(15,10) == BITS6(0,0,0,0,0,0)) { 5878 Bool isI64 = INSN(31,31) == 1; 5879 Bool isF64 = INSN(22,22) == 1; 5880 UInt rm = INSN(20,19); 5881 Bool isU = INSN(16,16) == 1; 5882 UInt nn = INSN(9,5); 5883 UInt dd = INSN(4,0); 5884 /* Decide on the IR rounding mode to use. */ 5885 IRRoundingMode irrm = 8; /*impossible*/ 5886 HChar ch = '?'; 5887 switch (rm) { 5888 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break; 5889 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break; 5890 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break; 5891 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break; 5892 default: vassert(0); 5893 } 5894 vassert(irrm != 8); 5895 /* Decide on the conversion primop, based on the source size, 5896 dest size and signedness (8 possibilities). Case coding: 5897 F32 ->s I32 0 5898 F32 ->u I32 1 5899 F32 ->s I64 2 5900 F32 ->u I64 3 5901 F64 ->s I32 4 5902 F64 ->u I32 5 5903 F64 ->s I64 6 5904 F64 ->u I64 7 5905 */ 5906 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0); 5907 vassert(ix < 8); 5908 const IROp ops[8] 5909 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U, 5910 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U }; 5911 IROp op = ops[ix]; 5912 // A bit of ATCery: bounce all cases we haven't seen an example of. 5913 if (/* F32toI32S */ 5914 (op == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */ 5915 || (op == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */ 5916 || (op == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */ 5917 /* F32toI32U */ 5918 || (op == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */ 5919 || (op == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */ 5920 /* F32toI64S */ 5921 || (op == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */ 5922 /* F32toI64U */ 5923 || (op == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */ 5924 || (op == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */ 5925 /* F64toI32S */ 5926 || (op == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */ 5927 || (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */ 5928 || (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */ 5929 /* F64toI32U */ 5930 || (op == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */ 5931 || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */ 5932 || (op == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */ 5933 /* F64toI64S */ 5934 || (op == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */ 5935 || (op == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */ 5936 || (op == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */ 5937 /* F64toI64U */ 5938 || (op == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */ 5939 || (op == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */ 5940 ) { 5941 /* validated */ 5942 } else { 5943 return False; 5944 } 5945 IRType srcTy = isF64 ? Ity_F64 : Ity_F32; 5946 IRType dstTy = isI64 ? Ity_I64 : Ity_I32; 5947 IRTemp src = newTemp(srcTy); 5948 IRTemp dst = newTemp(dstTy); 5949 assign(src, getQRegLO(nn, srcTy)); 5950 assign(dst, binop(op, mkU32(irrm), mkexpr(src))); 5951 putIRegOrZR(isI64, dd, mkexpr(dst)); 5952 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's', 5953 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy)); 5954 return True; 5955 } 5956 5957 /* -------- FCVTAS (KLUDGED) (scalar, integer) -------- */ 5958 /* 30 23 20 18 15 9 4 5959 1 00 11110 0x 1 00 100 000000 n d FCVTAS Xd, Fn 5960 0 00 11110 0x 1 00 100 000000 n d FCVTAS Wd, Fn 5961 Fn is Dn when x==1, Sn when x==0 5962 */ 5963 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) 5964 && INSN(21,16) == BITS6(1,0,0,1,0,0) 5965 && INSN(15,10) == BITS6(0,0,0,0,0,0)) { 5966 Bool isI64 = INSN(31,31) == 1; 5967 Bool isF64 = INSN(22,22) == 1; 5968 UInt nn = INSN(9,5); 5969 UInt dd = INSN(4,0); 5970 /* Decide on the IR rounding mode to use. */ 5971 /* KLUDGE: should be Irrm_NEAREST_TIE_AWAY_0 */ 5972 IRRoundingMode irrm = Irrm_NEAREST; 5973 /* Decide on the conversion primop. */ 5974 IROp op = isI64 ? (isF64 ? Iop_F64toI64S : Iop_F32toI64S) 5975 : (isF64 ? Iop_F64toI32S : Iop_F32toI32S); 5976 IRType srcTy = isF64 ? Ity_F64 : Ity_F32; 5977 IRType dstTy = isI64 ? Ity_I64 : Ity_I32; 5978 IRTemp src = newTemp(srcTy); 5979 IRTemp dst = newTemp(dstTy); 5980 assign(src, getQRegLO(nn, srcTy)); 5981 assign(dst, binop(op, mkU32(irrm), mkexpr(src))); 5982 putIRegOrZR(isI64, dd, mkexpr(dst)); 5983 DIP("fcvtas %s, %s (KLUDGED)\n", 5984 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy)); 5985 return True; 5986 } 5987 5988 /* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */ 5989 /* 31 23 21 17 14 9 4 5990 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR) 5991 rm 5992 x==0 => S-registers, x==1 => D-registers 5993 rm (17:15) encodings: 5994 111 per FPCR (FRINTI) 5995 001 +inf (FRINTP) 5996 010 -inf (FRINTM) 5997 011 zero (FRINTZ) 5998 000 tieeven 5999 100 tieaway (FRINTA) -- !! FIXME KLUDGED !! 6000 110 per FPCR + "exact = TRUE" 6001 101 unallocated 6002 */ 6003 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) 6004 && INSN(21,18) == BITS4(1,0,0,1) && INSN(14,10) == BITS5(1,0,0,0,0)) { 6005 Bool isD = INSN(22,22) == 1; 6006 UInt rm = INSN(17,15); 6007 UInt nn = INSN(9,5); 6008 UInt dd = INSN(4,0); 6009 IRType ty = isD ? Ity_F64 : Ity_F32; 6010 IRExpr* irrmE = NULL; 6011 UChar ch = '?'; 6012 switch (rm) { 6013 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break; 6014 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break; 6015 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break; 6016 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0 6017 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break; 6018 default: break; 6019 } 6020 if (irrmE) { 6021 IRTemp src = newTemp(ty); 6022 IRTemp dst = newTemp(ty); 6023 assign(src, getQRegLO(nn, ty)); 6024 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 6025 irrmE, mkexpr(src))); 6026 putQReg128(dd, mkV128(0x0000)); 6027 putQRegLO(dd, mkexpr(dst)); 6028 DIP("frint%c %s, %s\n", 6029 ch, nameQRegLO(dd, ty), nameQRegLO(nn, ty)); 6030 return True; 6031 } 6032 /* else unhandled rounding mode case -- fall through */ 6033 } 6034 6035 /* ------------------ FCVT (scalar) ------------------ */ 6036 /* 31 23 21 16 14 9 4 6037 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp) 6038 --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp) 6039 --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp) 6040 --------- 00 ----- 01 --------- FCVT Dd, Sn 6041 --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp) 6042 --------- 01 ----- 00 --------- FCVT Sd, Dn 6043 Rounding, when dst is smaller than src, is per the FPCR. 6044 */ 6045 if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0) 6046 && INSN(21,17) == BITS5(1,0,0,0,1) 6047 && INSN(14,10) == BITS5(1,0,0,0,0)) { 6048 UInt b2322 = INSN(23,22); 6049 UInt b1615 = INSN(16,15); 6050 UInt nn = INSN(9,5); 6051 UInt dd = INSN(4,0); 6052 if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) { 6053 /* Convert S to D */ 6054 IRTemp res = newTemp(Ity_F64); 6055 assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32))); 6056 putQReg128(dd, mkV128(0x0000)); 6057 putQRegLO(dd, mkexpr(res)); 6058 DIP("fcvt %s, %s\n", 6059 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32)); 6060 return True; 6061 } 6062 if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) { 6063 /* Convert D to S */ 6064 IRTemp res = newTemp(Ity_F32); 6065 assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()), 6066 getQRegLO(nn, Ity_F64))); 6067 putQReg128(dd, mkV128(0x0000)); 6068 putQRegLO(dd, mkexpr(res)); 6069 DIP("fcvt %s, %s\n", 6070 nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64)); 6071 return True; 6072 } 6073 /* else unhandled */ 6074 } 6075 6076 /* ------------------ FABD (scalar) ------------------ */ 6077 /* 31 23 20 15 9 4 6078 011 11110 111 m 110101 n d FABD Dd, Dn, Dm 6079 011 11110 101 m 110101 n d FABD Sd, Sn, Sm 6080 */ 6081 if (INSN(31,23) == BITS9(0,1,1,1,1,1,1,0,1) && INSN(21,21) == 1 6082 && INSN(15,10) == BITS6(1,1,0,1,0,1)) { 6083 Bool isD = INSN(22,22) == 1; 6084 UInt mm = INSN(20,16); 6085 UInt nn = INSN(9,5); 6086 UInt dd = INSN(4,0); 6087 IRType ty = isD ? Ity_F64 : Ity_F32; 6088 IRTemp res = newTemp(ty); 6089 assign(res, unop(mkABSF(ty), 6090 triop(mkSUBF(ty), 6091 mkexpr(mk_get_IR_rounding_mode()), 6092 getQRegLO(nn,ty), getQRegLO(mm,ty)))); 6093 putQReg128(dd, mkV128(0x0000)); 6094 putQRegLO(dd, mkexpr(res)); 6095 DIP("fabd %s, %s, %s\n", 6096 nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty)); 6097 return True; 6098 } 6099 6100 /* -------------- {S,U}CVTF (vector, integer) -------------- */ 6101 /* 31 28 22 21 15 9 4 6102 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn 6103 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn 6104 with laneage: 6105 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D 6106 */ 6107 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,0) 6108 && INSN(21,16) == BITS6(1,0,0,0,0,1) 6109 && INSN(15,10) == BITS6(1,1,0,1,1,0)) { 6110 Bool isQ = INSN(30,30) == 1; 6111 Bool isU = INSN(29,29) == 1; 6112 Bool isF64 = INSN(22,22) == 1; 6113 UInt nn = INSN(9,5); 6114 UInt dd = INSN(4,0); 6115 if (isQ || !isF64) { 6116 IRType tyF = Ity_INVALID, tyI = Ity_INVALID; 6117 UInt nLanes = 0; 6118 Bool zeroHI = False; 6119 const HChar* arrSpec = NULL; 6120 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec, 6121 isQ, isF64 ); 6122 IROp op = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32) 6123 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32); 6124 IRTemp rm = mk_get_IR_rounding_mode(); 6125 UInt i; 6126 vassert(ok); /* the 'if' above should ensure this */ 6127 for (i = 0; i < nLanes; i++) { 6128 putQRegLane(dd, i, 6129 binop(op, mkexpr(rm), getQRegLane(nn, i, tyI))); 6130 } 6131 if (zeroHI) { 6132 putQRegLane(dd, 1, mkU64(0)); 6133 } 6134 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's', 6135 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec); 6136 return True; 6137 } 6138 /* else fall through */ 6139 } 6140 6141 /* ---------- F{ADD,SUB,MUL,DIV,MLA,MLS} (vector) ---------- */ 6142 /* 31 28 22 21 20 15 9 4 case 6143 0q0 01110 0 sz 1 m 110101 n d FADD Vd,Vn,Vm 1 6144 0q0 01110 1 sz 1 m 110101 n d FSUB Vd,Vn,Vm 2 6145 0q1 01110 0 sz 1 m 110111 n d FMUL Vd,Vn,Vm 3 6146 0q1 01110 0 sz 1 m 111111 n d FDIV Vd,Vn,Vm 4 6147 0q0 01110 0 sz 1 m 110011 n d FMLA Vd,Vn,Vm 5 6148 0q0 01110 1 sz 1 m 110011 n d FMLS Vd,Vn,Vm 6 6149 0q1 01110 1 sz 1 m 110101 n d FABD Vd,Vn,Vm 7 6150 */ 6151 if (INSN(31,31) == 0 6152 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) { 6153 Bool isQ = INSN(30,30) == 1; 6154 UInt b29 = INSN(29,29); 6155 UInt b23 = INSN(23,23); 6156 Bool isF64 = INSN(22,22) == 1; 6157 UInt mm = INSN(20,16); 6158 UInt b1510 = INSN(15,10); 6159 UInt nn = INSN(9,5); 6160 UInt dd = INSN(4,0); 6161 UInt ix = 0; 6162 /**/ if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,1,0,1)) ix = 1; 6163 else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 2; 6164 else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,0,1,1,1)) ix = 3; 6165 else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,1,1,1,1)) ix = 4; 6166 else if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,0,1,1)) ix = 5; 6167 else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,0,1,1)) ix = 6; 6168 else if (b29 == 1 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 7; 6169 IRType laneTy = Ity_INVALID; 6170 Bool zeroHI = False; 6171 const HChar* arr = "??"; 6172 Bool ok 6173 = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64); 6174 /* Skip MLA/MLS for the time being */ 6175 if (ok && ix >= 1 && ix <= 4) { 6176 const IROp ops64[4] 6177 = { Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2 }; 6178 const IROp ops32[4] 6179 = { Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4 }; 6180 const HChar* names[4] 6181 = { "fadd", "fsub", "fmul", "fdiv" }; 6182 IROp op = laneTy==Ity_F64 ? ops64[ix-1] : ops32[ix-1]; 6183 IRTemp rm = mk_get_IR_rounding_mode(); 6184 IRTemp t1 = newTemp(Ity_V128); 6185 IRTemp t2 = newTemp(Ity_V128); 6186 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm))); 6187 assign(t2, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t1)) 6188 : mkexpr(t1)); 6189 putQReg128(dd, mkexpr(t2)); 6190 DIP("%s %s.%s, %s.%s, %s.%s\n", names[ix-1], 6191 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 6192 return True; 6193 } 6194 if (ok && ix >= 5 && ix <= 6) { 6195 IROp opADD = laneTy==Ity_F64 ? Iop_Add64Fx2 : Iop_Add32Fx4; 6196 IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4; 6197 IROp opMUL = laneTy==Ity_F64 ? Iop_Mul64Fx2 : Iop_Mul32Fx4; 6198 IRTemp rm = mk_get_IR_rounding_mode(); 6199 IRTemp t1 = newTemp(Ity_V128); 6200 IRTemp t2 = newTemp(Ity_V128); 6201 // FIXME: double rounding; use FMA primops instead 6202 assign(t1, triop(opMUL, 6203 mkexpr(rm), getQReg128(nn), getQReg128(mm))); 6204 assign(t2, triop(ix == 5 ? opADD : opSUB, 6205 mkexpr(rm), getQReg128(dd), mkexpr(t1))); 6206 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2)) 6207 : mkexpr(t2)); 6208 DIP("%s %s.%s, %s.%s, %s.%s\n", ix == 5 ? "fmla" : "fmls", 6209 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 6210 return True; 6211 } 6212 if (ok && ix == 7) { 6213 IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4; 6214 IROp opABS = laneTy==Ity_F64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4; 6215 IRTemp rm = mk_get_IR_rounding_mode(); 6216 IRTemp t1 = newTemp(Ity_V128); 6217 IRTemp t2 = newTemp(Ity_V128); 6218 // FIXME: use Abd primop instead? 6219 assign(t1, triop(opSUB, 6220 mkexpr(rm), getQReg128(nn), getQReg128(mm))); 6221 assign(t2, unop(opABS, mkexpr(t1))); 6222 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2)) 6223 : mkexpr(t2)); 6224 DIP("fabd %s.%s, %s.%s, %s.%s\n", 6225 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 6226 return True; 6227 } 6228 } 6229 6230 /* ------------ FCM{EQ,GE,GT}, FAC{GE,GT} (vector) ------------ */ 6231 /* 31 28 22 20 15 9 4 case 6232 0q1 01110 0 sz 1 m 111011 n d FACGE Vd, Vn, Vm 6233 0q1 01110 1 sz 1 m 111011 n d FACGT Vd, Vn, Vm 6234 0q0 01110 0 sz 1 m 111001 n d FCMEQ Vd, Vn, Vm 6235 0q1 01110 0 sz 1 m 111001 n d FCMGE Vd, Vn, Vm 6236 0q1 01110 1 sz 1 m 111001 n d FCMGT Vd, Vn, Vm 6237 */ 6238 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1 6239 && INSN(15,12) == BITS4(1,1,1,0) && INSN(10,10) == 1) { 6240 Bool isQ = INSN(30,30) == 1; 6241 UInt U = INSN(29,29); 6242 UInt E = INSN(23,23); 6243 Bool isF64 = INSN(22,22) == 1; 6244 UInt ac = INSN(11,11); 6245 UInt mm = INSN(20,16); 6246 UInt nn = INSN(9,5); 6247 UInt dd = INSN(4,0); 6248 /* */ 6249 UInt EUac = (E << 2) | (U << 1) | ac; 6250 IROp opABS = Iop_INVALID; 6251 IROp opCMP = Iop_INVALID; 6252 IRType laneTy = Ity_INVALID; 6253 Bool zeroHI = False; 6254 Bool swap = True; 6255 const HChar* arr = "??"; 6256 const HChar* nm = "??"; 6257 Bool ok 6258 = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64); 6259 if (ok) { 6260 vassert((isF64 && laneTy == Ity_F64) || (!isF64 && laneTy == Ity_F32)); 6261 switch (EUac) { 6262 case BITS3(0,0,0): 6263 nm = "fcmeq"; 6264 opCMP = isF64 ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4; 6265 swap = False; 6266 break; 6267 case BITS3(0,1,0): 6268 nm = "fcmge"; 6269 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; 6270 break; 6271 case BITS3(0,1,1): 6272 nm = "facge"; 6273 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; 6274 opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4; 6275 break; 6276 case BITS3(1,1,0): 6277 nm = "fcmgt"; 6278 opCMP = isF64 ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4; 6279 break; 6280 case BITS3(1,1,1): 6281 nm = "fcagt"; 6282 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; 6283 opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4; 6284 break; 6285 default: 6286 break; 6287 } 6288 } 6289 if (opCMP != Iop_INVALID) { 6290 IRExpr* argN = getQReg128(nn); 6291 IRExpr* argM = getQReg128(mm); 6292 if (opABS != Iop_INVALID) { 6293 argN = unop(opABS, argN); 6294 argM = unop(opABS, argM); 6295 } 6296 IRExpr* res = swap ? binop(opCMP, argM, argN) 6297 : binop(opCMP, argN, argM); 6298 if (zeroHI) { 6299 res = unop(Iop_ZeroHI64ofV128, res); 6300 } 6301 putQReg128(dd, res); 6302 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 6303 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 6304 return True; 6305 } 6306 /* else fall through */ 6307 } 6308 6309 /* -------------------- FCVTN -------------------- */ 6310 /* 31 28 23 20 15 9 4 6311 0q0 01110 0s1 00001 011010 n d FCVTN Vd, Vn 6312 where case q:s of 00: 16Fx4(lo) <- 32Fx4 6313 01: 32Fx2(lo) <- 64Fx2 6314 10: 16Fx4(hi) <- 32Fx4 6315 11: 32Fx2(hi) <- 64Fx2 6316 Only deals with the 32Fx2 <- 64Fx2 version (s==1) 6317 */ 6318 if (INSN(31,31) == 0 && INSN(29,23) == BITS7(0,0,1,1,1,0,0) 6319 && INSN(21,10) == BITS12(1,0,0,0,0,1,0,1,1,0,1,0)) { 6320 UInt bQ = INSN(30,30); 6321 UInt bS = INSN(22,22); 6322 UInt nn = INSN(9,5); 6323 UInt dd = INSN(4,0); 6324 if (bS == 1) { 6325 IRTemp rm = mk_get_IR_rounding_mode(); 6326 IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64); 6327 IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64); 6328 putQRegLane(dd, 2 * bQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo)); 6329 putQRegLane(dd, 2 * bQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi)); 6330 if (bQ == 0) { 6331 putQRegLane(dd, 1, mkU64(0)); 6332 } 6333 DIP("fcvtn%s %s.%s, %s.2d\n", bQ ? "2" : "", 6334 nameQReg128(dd), bQ ? "4s" : "2s", nameQReg128(nn)); 6335 return True; 6336 } 6337 /* else fall through */ 6338 } 6339 6340 /* ---------------- ADD/SUB (vector) ---------------- */ 6341 /* 31 28 23 21 20 15 9 4 6342 0q0 01110 size 1 m 100001 n d ADD Vd.T, Vn.T, Vm.T 6343 0q1 01110 size 1 m 100001 n d SUB Vd.T, Vn.T, Vm.T 6344 */ 6345 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) 6346 && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,0,0,0,0,1)) { 6347 Bool isQ = INSN(30,30) == 1; 6348 UInt szBlg2 = INSN(23,22); 6349 Bool isSUB = INSN(29,29) == 1; 6350 UInt mm = INSN(20,16); 6351 UInt nn = INSN(9,5); 6352 UInt dd = INSN(4,0); 6353 Bool zeroHI = False; 6354 const HChar* arrSpec = ""; 6355 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 ); 6356 if (ok) { 6357 const IROp opsADD[4] 6358 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 }; 6359 const IROp opsSUB[4] 6360 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 }; 6361 vassert(szBlg2 < 4); 6362 IROp op = isSUB ? opsSUB[szBlg2] : opsADD[szBlg2]; 6363 IRTemp t = newTemp(Ity_V128); 6364 assign(t, binop(op, getQReg128(nn), getQReg128(mm))); 6365 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t)) 6366 : mkexpr(t)); 6367 const HChar* nm = isSUB ? "sub" : "add"; 6368 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 6369 nameQReg128(dd), arrSpec, 6370 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec); 6371 return True; 6372 } 6373 /* else fall through */ 6374 } 6375 6376 /* ---------------- ADD/SUB (scalar) ---------------- */ 6377 /* 31 28 23 21 20 15 9 4 6378 010 11110 11 1 m 100001 n d ADD Dd, Dn, Dm 6379 011 11110 11 1 m 100001 n d SUB Dd, Dn, Dm 6380 */ 6381 if (INSN(31,30) == BITS2(0,1) && INSN(28,21) == BITS8(1,1,1,1,0,1,1,1) 6382 && INSN(15,10) == BITS6(1,0,0,0,0,1)) { 6383 Bool isSUB = INSN(29,29) == 1; 6384 UInt mm = INSN(20,16); 6385 UInt nn = INSN(9,5); 6386 UInt dd = INSN(4,0); 6387 IRTemp res = newTemp(Ity_I64); 6388 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64, 6389 getQRegLane(nn, 0, Ity_I64), 6390 getQRegLane(mm, 0, Ity_I64))); 6391 putQRegLane(dd, 0, mkexpr(res)); 6392 putQRegLane(dd, 1, mkU64(0)); 6393 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add", 6394 nameQRegLO(dd, Ity_I64), 6395 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); 6396 return True; 6397 } 6398 6399 /* ------------ MUL/PMUL/MLA/MLS (vector) ------------ */ 6400 /* 31 28 23 21 20 15 9 4 6401 0q0 01110 size 1 m 100111 n d MUL Vd.T, Vn.T, Vm.T B/H/S only 6402 0q1 01110 size 1 m 100111 n d PMUL Vd.T, Vn.T, Vm.T B only 6403 0q0 01110 size 1 m 100101 n d MLA Vd.T, Vn.T, Vm.T B/H/S only 6404 0q1 01110 size 1 m 100101 n d MLS Vd.T, Vn.T, Vm.T B/H/S only 6405 */ 6406 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) 6407 && INSN(21,21) == 1 6408 && (INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(1,0,0,1,0,1)) { 6409 Bool isQ = INSN(30,30) == 1; 6410 UInt szBlg2 = INSN(23,22); 6411 UInt bit29 = INSN(29,29); 6412 UInt mm = INSN(20,16); 6413 UInt nn = INSN(9,5); 6414 UInt dd = INSN(4,0); 6415 Bool isMLAS = INSN(11,11) == 0; 6416 const IROp opsADD[4] 6417 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_INVALID }; 6418 const IROp opsSUB[4] 6419 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_INVALID }; 6420 const IROp opsMUL[4] 6421 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID }; 6422 const IROp opsPMUL[4] 6423 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID }; 6424 /* Set opMUL and, if necessary, opACC. A result value of 6425 Iop_INVALID for opMUL indicates that the instruction is 6426 invalid. */ 6427 Bool zeroHI = False; 6428 const HChar* arrSpec = ""; 6429 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 ); 6430 vassert(szBlg2 < 4); 6431 IROp opACC = Iop_INVALID; 6432 IROp opMUL = Iop_INVALID; 6433 if (ok) { 6434 opMUL = (bit29 == 1 && !isMLAS) ? opsPMUL[szBlg2] 6435 : opsMUL[szBlg2]; 6436 opACC = isMLAS ? (bit29 == 1 ? opsSUB[szBlg2] : opsADD[szBlg2]) 6437 : Iop_INVALID; 6438 } 6439 if (ok && opMUL != Iop_INVALID) { 6440 IRTemp t1 = newTemp(Ity_V128); 6441 assign(t1, binop(opMUL, getQReg128(nn), getQReg128(mm))); 6442 IRTemp t2 = newTemp(Ity_V128); 6443 assign(t2, opACC == Iop_INVALID 6444 ? mkexpr(t1) 6445 : binop(opACC, getQReg128(dd), mkexpr(t1))); 6446 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2)) 6447 : mkexpr(t2)); 6448 const HChar* nm = isMLAS ? (bit29 == 1 ? "mls" : "mla") 6449 : (bit29 == 1 ? "pmul" : "mul"); 6450 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 6451 nameQReg128(dd), arrSpec, 6452 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec); 6453 return True; 6454 } 6455 /* else fall through */ 6456 } 6457 6458 /* ---------------- {S,U}{MIN,MAX} (vector) ---------------- */ 6459 /* 31 28 23 21 20 15 9 4 6460 0q0 01110 size 1 m 011011 n d SMIN Vd.T, Vn.T, Vm.T 6461 0q1 01110 size 1 m 011011 n d UMIN Vd.T, Vn.T, Vm.T 6462 0q0 01110 size 1 m 011001 n d SMAX Vd.T, Vn.T, Vm.T 6463 0q1 01110 size 1 m 011001 n d UMAX Vd.T, Vn.T, Vm.T 6464 */ 6465 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) 6466 && INSN(21,21) == 1 6467 && ((INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(0,1,1,0,0,1))) { 6468 Bool isQ = INSN(30,30) == 1; 6469 Bool isU = INSN(29,29) == 1; 6470 UInt szBlg2 = INSN(23,22); 6471 Bool isMAX = INSN(11,11) == 0; 6472 UInt mm = INSN(20,16); 6473 UInt nn = INSN(9,5); 6474 UInt dd = INSN(4,0); 6475 Bool zeroHI = False; 6476 const HChar* arrSpec = ""; 6477 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 ); 6478 if (ok) { 6479 const IROp opMINS[4] 6480 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 }; 6481 const IROp opMINU[4] 6482 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 }; 6483 const IROp opMAXS[4] 6484 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 }; 6485 const IROp opMAXU[4] 6486 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 }; 6487 vassert(szBlg2 < 4); 6488 IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2]) 6489 : (isU ? opMINU[szBlg2] : opMINS[szBlg2]); 6490 IRTemp t = newTemp(Ity_V128); 6491 assign(t, binop(op, getQReg128(nn), getQReg128(mm))); 6492 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t)) 6493 : mkexpr(t)); 6494 const HChar* nm = isMAX ? (isU ? "umax" : "smax") 6495 : (isU ? "umin" : "smin"); 6496 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 6497 nameQReg128(dd), arrSpec, 6498 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec); 6499 return True; 6500 } 6501 /* else fall through */ 6502 } 6503 6504 /* -------------------- {S,U}{MIN,MAX}V -------------------- */ 6505 /* 31 28 23 21 16 15 9 4 6506 0q0 01110 size 11000 1 101010 n d SMINV Vd, Vn.T 6507 0q1 01110 size 11000 1 101010 n d UMINV Vd, Vn.T 6508 0q0 01110 size 11000 0 101010 n d SMAXV Vd, Vn.T 6509 0q1 01110 size 11000 0 101010 n d UMAXV Vd, Vn.T 6510 */ 6511 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) 6512 && INSN(21,17) == BITS5(1,1,0,0,0) 6513 && INSN(15,10) == BITS6(1,0,1,0,1,0)) { 6514 Bool isQ = INSN(30,30) == 1; 6515 Bool isU = INSN(29,29) == 1; 6516 UInt szBlg2 = INSN(23,22); 6517 Bool isMAX = INSN(16,16) == 0; 6518 UInt nn = INSN(9,5); 6519 UInt dd = INSN(4,0); 6520 Bool zeroHI = False; 6521 const HChar* arrSpec = ""; 6522 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2); 6523 if (ok) { 6524 if (szBlg2 == 3) ok = False; 6525 if (szBlg2 == 2 && !isQ) ok = False; 6526 } 6527 if (ok) { 6528 const IROp opMINS[3] 6529 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 }; 6530 const IROp opMINU[3] 6531 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 }; 6532 const IROp opMAXS[3] 6533 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 }; 6534 const IROp opMAXU[3] 6535 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 }; 6536 vassert(szBlg2 < 3); 6537 IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2]) 6538 : (isU ? opMINU[szBlg2] : opMINS[szBlg2]); 6539 IRTemp tN1 = newTemp(Ity_V128); 6540 assign(tN1, getQReg128(nn)); 6541 /* If Q == 0, we're just folding lanes in the lower half of 6542 the value. In which case, copy the lower half of the 6543 source into the upper half, so we can then treat it the 6544 same as the full width case. */ 6545 IRTemp tN2 = newTemp(Ity_V128); 6546 assign(tN2, zeroHI ? mk_CatEvenLanes64x2(tN1,tN1) : mkexpr(tN1)); 6547 IRTemp res = math_MINMAXV(tN2, op); 6548 if (res == IRTemp_INVALID) 6549 return False; /* means math_MINMAXV 6550 doesn't handle this case yet */ 6551 putQReg128(dd, mkexpr(res)); 6552 const HChar* nm = isMAX ? (isU ? "umaxv" : "smaxv") 6553 : (isU ? "uminv" : "sminv"); 6554 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 }; 6555 IRType laneTy = tys[szBlg2]; 6556 DIP("%s %s, %s.%s\n", nm, 6557 nameQRegLO(dd, laneTy), nameQReg128(nn), arrSpec); 6558 return True; 6559 } 6560 /* else fall through */ 6561 } 6562 6563 /* ------------ {AND,BIC,ORR,ORN} (vector) ------------ */ 6564 /* 31 28 23 20 15 9 4 6565 0q0 01110 001 m 000111 n d AND Vd.T, Vn.T, Vm.T 6566 0q0 01110 011 m 000111 n d BIC Vd.T, Vn.T, Vm.T 6567 0q0 01110 101 m 000111 n d ORR Vd.T, Vn.T, Vm.T 6568 0q0 01110 111 m 000111 n d ORN Vd.T, Vn.T, Vm.T 6569 T is 16b when q==1, 8b when q==0 6570 */ 6571 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0) 6572 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) { 6573 Bool isQ = INSN(30,30) == 1; 6574 Bool isORR = INSN(23,23) == 1; 6575 Bool invert = INSN(22,22) == 1; 6576 UInt mm = INSN(20,16); 6577 UInt nn = INSN(9,5); 6578 UInt dd = INSN(4,0); 6579 IRTemp res = newTemp(Ity_V128); 6580 assign(res, binop(isORR ? Iop_OrV128 : Iop_AndV128, 6581 getQReg128(nn), 6582 invert ? unop(Iop_NotV128, getQReg128(mm)) 6583 : getQReg128(mm))); 6584 putQReg128(dd, isQ ? mkexpr(res) 6585 : unop(Iop_ZeroHI64ofV128, mkexpr(res))); 6586 const HChar* names[4] = { "and", "bic", "orr", "orn" }; 6587 const HChar* ar = isQ ? "16b" : "8b"; 6588 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)], 6589 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar); 6590 return True; 6591 } 6592 6593 /* ---------- CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector) ---------- */ 6594 /* 31 28 23 21 15 9 4 ix 6595 0q1 01110 size 1 m 100011 n d CMEQ Vd.T, Vn.T, Vm.T (1) == 6596 0q0 01110 size 1 m 100011 n d CMTST Vd.T, Vn.T, Vm.T (2) &, != 0 6597 6598 0q1 01110 size 1 m 001101 n d CMHI Vd.T, Vn.T, Vm.T (3) >u 6599 0q0 01110 size 1 m 001101 n d CMGT Vd.T, Vn.T, Vm.T (4) >s 6600 6601 0q1 01110 size 1 m 001111 n d CMHS Vd.T, Vn.T, Vm.T (5) >=u 6602 0q0 01110 size 1 m 001111 n d CMGE Vd.T, Vn.T, Vm.T (6) >=s 6603 6604 0q1 01110 size 100000 100010 n d CMGE Vd.T, Vn.T, #0 (7) >=s 0 6605 0q0 01110 size 100000 100010 n d CMGT Vd.T, Vn.T, #0 (8) >s 0 6606 6607 0q1 01110 size 100000 100110 n d CMLE Vd.T, Vn.T, #0 (9) <=s 0 6608 0q0 01110 size 100000 100110 n d CMEQ Vd.T, Vn.T, #0 (10) == 0 6609 6610 0q0 01110 size 100000 101010 n d CMLT Vd.T, Vn.T, #0 (11) <s 0 6611 */ 6612 if (INSN(31,31) == 0 6613 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) { 6614 Bool isQ = INSN(30,30) == 1; 6615 UInt bit29 = INSN(29,29); 6616 UInt szBlg2 = INSN(23,22); 6617 UInt mm = INSN(20,16); 6618 UInt b1510 = INSN(15,10); 6619 UInt nn = INSN(9,5); 6620 UInt dd = INSN(4,0); 6621 const IROp opsEQ[4] 6622 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 }; 6623 const IROp opsGTS[4] 6624 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 }; 6625 const IROp opsGTU[4] 6626 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 }; 6627 Bool zeroHI = False; 6628 const HChar* arrSpec = "??"; 6629 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2); 6630 UInt ix = 0; 6631 if (ok) { 6632 switch (b1510) { 6633 case BITS6(1,0,0,0,1,1): ix = bit29 ? 1 : 2; break; 6634 case BITS6(0,0,1,1,0,1): ix = bit29 ? 3 : 4; break; 6635 case BITS6(0,0,1,1,1,1): ix = bit29 ? 5 : 6; break; 6636 case BITS6(1,0,0,0,1,0): 6637 if (mm == 0) { ix = bit29 ? 7 : 8; }; break; 6638 case BITS6(1,0,0,1,1,0): 6639 if (mm == 0) { ix = bit29 ? 9 : 10; }; break; 6640 case BITS6(1,0,1,0,1,0): 6641 if (mm == 0 && bit29 == 0) { ix = 11; }; break; 6642 default: break; 6643 } 6644 } 6645 if (ix != 0) { 6646 vassert(ok && szBlg2 < 4); 6647 IRExpr* argL = getQReg128(nn); 6648 IRExpr* argR = (ix <= 6) ? getQReg128(mm) : mkV128(0x0000); 6649 IRExpr* res = NULL; 6650 /* Some useful identities: 6651 x > y can be expressed directly 6652 x < y == y > x 6653 x <= y == not (x > y) 6654 x >= y == not (y > x) 6655 */ 6656 switch (ix) { 6657 case 1: res = binop(opsEQ[szBlg2], argL, argR); break; 6658 case 2: res = unop(Iop_NotV128, binop(opsEQ[szBlg2], 6659 binop(Iop_AndV128, argL, argR), 6660 mkV128(0x0000))); 6661 break; 6662 case 3: res = binop(opsGTU[szBlg2], argL, argR); break; 6663 case 4: res = binop(opsGTS[szBlg2], argL, argR); break; 6664 case 5: res = unop(Iop_NotV128, binop(opsGTU[szBlg2], argR, argL)); 6665 break; 6666 case 6: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL)); 6667 break; 6668 case 7: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL)); 6669 break; 6670 case 8: res = binop(opsGTS[szBlg2], argL, argR); break; 6671 case 9: res = unop(Iop_NotV128, 6672 binop(opsGTS[szBlg2], argL, argR)); 6673 break; 6674 case 10: res = binop(opsEQ[szBlg2], argL, argR); break; 6675 case 11: res = binop(opsGTS[szBlg2], argR, argL); break; 6676 default: vassert(0); 6677 } 6678 vassert(res); 6679 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, res) : res); 6680 const HChar* nms[11] = { "eq", "tst", "hi", "gt", "hs", "ge", 6681 "ge", "gt", "le", "eq", "lt" }; 6682 if (ix <= 6) { 6683 DIP("cm%s %s.%s, %s.%s, %s.%s\n", nms[ix-1], 6684 nameQReg128(dd), arrSpec, 6685 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec); 6686 } else { 6687 DIP("cm%s %s.%s, %s.%s, #0\n", nms[ix-1], 6688 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec); 6689 } 6690 return True; 6691 } 6692 /* else fall through */ 6693 } 6694 6695 /* -------------- {EOR,BSL,BIT,BIF} (vector) -------------- */ 6696 /* 31 28 23 20 15 9 4 6697 0q1 01110 00 1 m 000111 n d EOR Vd.T, Vm.T, Vn.T 6698 0q1 01110 01 1 m 000111 n d BSL Vd.T, Vm.T, Vn.T 6699 0q1 01110 10 1 m 000111 n d BIT Vd.T, Vm.T, Vn.T 6700 0q1 01110 11 1 m 000111 n d BIF Vd.T, Vm.T, Vn.T 6701 */ 6702 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0) 6703 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) { 6704 Bool isQ = INSN(30,30) == 1; 6705 UInt op = INSN(23,22); 6706 UInt mm = INSN(20,16); 6707 UInt nn = INSN(9,5); 6708 UInt dd = INSN(4,0); 6709 IRTemp argD = newTemp(Ity_V128); 6710 IRTemp argN = newTemp(Ity_V128); 6711 IRTemp argM = newTemp(Ity_V128); 6712 assign(argD, getQReg128(dd)); 6713 assign(argN, getQReg128(nn)); 6714 assign(argM, getQReg128(mm)); 6715 const IROp opXOR = Iop_XorV128; 6716 const IROp opAND = Iop_AndV128; 6717 const IROp opNOT = Iop_NotV128; 6718 IRExpr* res = NULL; 6719 switch (op) { 6720 case BITS2(0,0): /* EOR */ 6721 res = binop(opXOR, mkexpr(argM), mkexpr(argN)); 6722 break; 6723 case BITS2(0,1): /* BSL */ 6724 res = binop(opXOR, mkexpr(argM), 6725 binop(opAND, 6726 binop(opXOR, mkexpr(argM), mkexpr(argN)), 6727 mkexpr(argD))); 6728 break; 6729 case BITS2(1,0): /* BIT */ 6730 res = binop(opXOR, mkexpr(argD), 6731 binop(opAND, 6732 binop(opXOR, mkexpr(argD), mkexpr(argN)), 6733 mkexpr(argM))); 6734 break; 6735 case BITS2(1,1): /* BIF */ 6736 res = binop(opXOR, mkexpr(argD), 6737 binop(opAND, 6738 binop(opXOR, mkexpr(argD), mkexpr(argN)), 6739 unop(opNOT, mkexpr(argM)))); 6740 break; 6741 default: 6742 vassert(0); 6743 } 6744 vassert(res); 6745 putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res)); 6746 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" }; 6747 const HChar* arr = isQ ? "16b" : "8b"; 6748 vassert(op < 4); 6749 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[op], 6750 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 6751 return True; 6752 } 6753 6754 /* ------------ {USHR,SSHR,SHL} (vector, immediate) ------------ */ 6755 /* 31 28 22 18 15 9 4 6756 0q1 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #shift (1) 6757 0q0 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #shift (2) 6758 0q0 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #shift (3) 6759 laneTy, shift = case immh:immb of 6760 0001:xxx -> B, SHR:8-xxx, SHL:xxx 6761 001x:xxx -> H, SHR:16-xxxx SHL:xxxx 6762 01xx:xxx -> S, SHR:32-xxxxx SHL:xxxxx 6763 1xxx:xxx -> D, SHR:64-xxxxxx SHL:xxxxxx 6764 other -> invalid 6765 As usual the case laneTy==D && q==0 is not allowed. 6766 */ 6767 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0) 6768 && INSN(10,10) == 1) { 6769 UInt ix = 0; 6770 /**/ if (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 1; 6771 else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 2; 6772 else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,1,0,1,0)) ix = 3; 6773 if (ix > 0) { 6774 Bool isQ = INSN(30,30) == 1; 6775 UInt immh = INSN(22,19); 6776 UInt immb = INSN(18,16); 6777 UInt nn = INSN(9,5); 6778 UInt dd = INSN(4,0); 6779 const IROp opsSHRN[4] 6780 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 }; 6781 const IROp opsSARN[4] 6782 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 }; 6783 const IROp opsSHLN[4] 6784 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 }; 6785 UInt szBlg2 = 0; 6786 UInt shift = 0; 6787 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb); 6788 if (ix == 3) { 6789 /* The shift encoding has opposite sign for the leftwards 6790 case. Adjust shift to compensate. */ 6791 shift = (8 << szBlg2) - shift; 6792 } 6793 if (ok && szBlg2 < 4 && shift > 0 && shift < (8 << szBlg2) 6794 && !(szBlg2 == 3/*64bit*/ && !isQ)) { 6795 IROp op = Iop_INVALID; 6796 const HChar* nm = NULL; 6797 switch (ix) { 6798 case 1: op = opsSHRN[szBlg2]; nm = "ushr"; break; 6799 case 2: op = opsSARN[szBlg2]; nm = "sshr"; break; 6800 case 3: op = opsSHLN[szBlg2]; nm = "shl"; break; 6801 default: vassert(0); 6802 } 6803 IRExpr* src = getQReg128(nn); 6804 IRExpr* res = binop(op, src, mkU8(shift)); 6805 putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res)); 6806 HChar laneCh = "bhsd"[szBlg2]; 6807 UInt nLanes = (isQ ? 128 : 64) / (8 << szBlg2); 6808 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm, 6809 nameQReg128(dd), nLanes, laneCh, 6810 nameQReg128(nn), nLanes, laneCh, shift); 6811 return True; 6812 } 6813 /* else fall through */ 6814 } 6815 } 6816 6817 /* -------------------- {U,S}SHLL{,2} -------------------- */ 6818 /* 31 28 22 18 15 9 4 6819 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh 6820 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh 6821 where Ta,Tb,sh 6822 = case immh of 1xxx -> invalid 6823 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31) 6824 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15) 6825 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7) 6826 0000 -> AdvSIMD modified immediate (???) 6827 */ 6828 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0) 6829 && INSN(15,10) == BITS6(1,0,1,0,0,1)) { 6830 Bool isQ = INSN(30,30) == 1; 6831 Bool isU = INSN(29,29) == 1; 6832 UInt immh = INSN(22,19); 6833 UInt immb = INSN(18,16); 6834 UInt nn = INSN(9,5); 6835 UInt dd = INSN(4,0); 6836 UInt immhb = (immh << 3) | immb; 6837 IRTemp src = newTemp(Ity_V128); 6838 IRTemp zero = newTemp(Ity_V128); 6839 IRExpr* res = NULL; 6840 UInt sh = 0; 6841 const HChar* ta = "??"; 6842 const HChar* tb = "??"; 6843 assign(src, getQReg128(nn)); 6844 assign(zero, mkV128(0x0000)); 6845 if (immh & 8) { 6846 /* invalid; don't assign to res */ 6847 } 6848 else if (immh & 4) { 6849 sh = immhb - 32; 6850 vassert(sh < 32); /* so 32-sh is 1..32 */ 6851 ta = "2d"; 6852 tb = isQ ? "4s" : "2s"; 6853 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero) 6854 : mk_InterleaveLO32x4(src, zero); 6855 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh)); 6856 } 6857 else if (immh & 2) { 6858 sh = immhb - 16; 6859 vassert(sh < 16); /* so 16-sh is 1..16 */ 6860 ta = "4s"; 6861 tb = isQ ? "8h" : "4h"; 6862 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero) 6863 : mk_InterleaveLO16x8(src, zero); 6864 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh)); 6865 } 6866 else if (immh & 1) { 6867 sh = immhb - 8; 6868 vassert(sh < 8); /* so 8-sh is 1..8 */ 6869 ta = "8h"; 6870 tb = isQ ? "16b" : "8b"; 6871 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero) 6872 : mk_InterleaveLO8x16(src, zero); 6873 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh)); 6874 } else { 6875 vassert(immh == 0); 6876 /* invalid; don't assign to res */ 6877 } 6878 /* */ 6879 if (res) { 6880 putQReg128(dd, res); 6881 DIP("%cshll%s %s.%s, %s.%s, #%d\n", 6882 isU ? 'u' : 's', isQ ? "2" : "", 6883 nameQReg128(dd), ta, nameQReg128(nn), tb, sh); 6884 return True; 6885 } 6886 /* else fall through */ 6887 } 6888 6889 /* -------------------- XTN{,2} -------------------- */ 6890 /* 31 28 23 21 15 9 4 XTN{,2} Vd.Tb, Vn.Ta 6891 0q0 01110 size 100001 001010 n d 6892 */ 6893 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0) 6894 && INSN(21,16) == BITS6(1,0,0,0,0,1) 6895 && INSN(15,10) == BITS6(0,0,1,0,1,0)) { 6896 Bool isQ = INSN(30,30) == 1; 6897 UInt size = INSN(23,22); 6898 UInt nn = INSN(9,5); 6899 UInt dd = INSN(4,0); 6900 IROp op = Iop_INVALID; 6901 const HChar* tb = NULL; 6902 const HChar* ta = NULL; 6903 switch ((size << 1) | (isQ ? 1 : 0)) { 6904 case 0: tb = "8b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break; 6905 case 1: tb = "16b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break; 6906 case 2: tb = "4h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break; 6907 case 3: tb = "8h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break; 6908 case 4: tb = "2s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break; 6909 case 5: tb = "4s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break; 6910 case 6: break; 6911 case 7: break; 6912 default: vassert(0); 6913 } 6914 if (op != Iop_INVALID) { 6915 if (!isQ) { 6916 putQRegLane(dd, 1, mkU64(0)); 6917 } 6918 putQRegLane(dd, isQ ? 1 : 0, unop(op, getQReg128(nn))); 6919 DIP("xtn%s %s.%s, %s.%s\n", isQ ? "2" : "", 6920 nameQReg128(dd), tb, nameQReg128(nn), ta); 6921 return True; 6922 } 6923 /* else fall through */ 6924 } 6925 6926 /* ---------------- CNT (vector) ---------------- */ 6927 /* 31 29 23 21 9 4 6928 0q 001110 00 100000010110 n d CNT Vd.T, Vn.T 6929 */ 6930 6931 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0) 6932 && INSN(23,22) == BITS2(0,0) 6933 && INSN(21,10) == BITS12(1,0,0,0,0,0,0,1,0,1,1,0) ) { 6934 Bool isQ = INSN(30,30) == 1; 6935 UInt nn = INSN(9,5); 6936 UInt dd = INSN(4,0); 6937 const HChar* name = isQ ? "16b" : "8b"; 6938 6939 IRExpr* res = unop(Iop_Cnt8x16, getQReg128(nn)); 6940 putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res)); 6941 6942 DIP("cnt %s.%s, %s.%s\n", nameQReg128(dd), name, nameQReg128(nn), name); 6943 return True; 6944 } 6945 6946 6947 /* ---------------- DUP (element, vector) ---------------- */ 6948 /* 31 28 20 15 9 4 6949 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index] 6950 */ 6951 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0) 6952 && INSN(15,10) == BITS6(0,0,0,0,0,1)) { 6953 Bool isQ = INSN(30,30) == 1; 6954 UInt imm5 = INSN(20,16); 6955 UInt nn = INSN(9,5); 6956 UInt dd = INSN(4,0); 6957 IRTemp w0 = newTemp(Ity_I64); 6958 const HChar* arT = "??"; 6959 const HChar* arTs = "??"; 6960 IRType laneTy = Ity_INVALID; 6961 UInt laneNo = 16; /* invalid */ 6962 if (imm5 & 1) { 6963 arT = isQ ? "16b" : "8b"; 6964 arTs = "b"; 6965 laneNo = (imm5 >> 1) & 15; 6966 laneTy = Ity_I8; 6967 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy))); 6968 } 6969 else if (imm5 & 2) { 6970 arT = isQ ? "8h" : "4h"; 6971 arTs = "h"; 6972 laneNo = (imm5 >> 2) & 7; 6973 laneTy = Ity_I16; 6974 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy))); 6975 } 6976 else if (imm5 & 4) { 6977 arT = isQ ? "4s" : "2s"; 6978 arTs = "s"; 6979 laneNo = (imm5 >> 3) & 3; 6980 laneTy = Ity_I32; 6981 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy))); 6982 } 6983 else if ((imm5 & 8) && isQ) { 6984 arT = "2d"; 6985 arTs = "d"; 6986 laneNo = (imm5 >> 4) & 1; 6987 laneTy = Ity_I64; 6988 assign(w0, getQRegLane(nn, laneNo, laneTy)); 6989 } 6990 else { 6991 /* invalid; leave laneTy unchanged. */ 6992 } 6993 /* */ 6994 if (laneTy != Ity_INVALID) { 6995 vassert(laneNo < 16); 6996 IRTemp w1 = math_DUP_TO_64(w0, laneTy); 6997 putQReg128(dd, binop(Iop_64HLtoV128, 6998 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1))); 6999 DIP("dup %s.%s, %s.%s[%u]\n", 7000 nameQReg128(dd), arT, nameQReg128(nn), arTs, laneNo); 7001 return True; 7002 } 7003 /* else fall through */ 7004 } 7005 7006 /* ---------------- DUP (general, vector) ---------------- */ 7007 /* 31 28 23 20 15 9 4 7008 0q0 01110 000 imm5 000011 n d DUP Vd.T, Rn 7009 Q=0 writes 64, Q=1 writes 128 7010 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W 7011 xxx10 4H(q=0) or 8H(q=1), R=W 7012 xx100 2S(q=0) or 4S(q=1), R=W 7013 x1000 Invalid(q=0) or 2D(q=1), R=X 7014 x0000 Invalid(q=0) or Invalid(q=1) 7015 */ 7016 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0) 7017 && INSN(15,10) == BITS6(0,0,0,0,1,1)) { 7018 Bool isQ = INSN(30,30) == 1; 7019 UInt imm5 = INSN(20,16); 7020 UInt nn = INSN(9,5); 7021 UInt dd = INSN(4,0); 7022 IRTemp w0 = newTemp(Ity_I64); 7023 const HChar* arT = "??"; 7024 IRType laneTy = Ity_INVALID; 7025 if (imm5 & 1) { 7026 arT = isQ ? "16b" : "8b"; 7027 laneTy = Ity_I8; 7028 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn)))); 7029 } 7030 else if (imm5 & 2) { 7031 arT = isQ ? "8h" : "4h"; 7032 laneTy = Ity_I16; 7033 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn)))); 7034 } 7035 else if (imm5 & 4) { 7036 arT = isQ ? "4s" : "2s"; 7037 laneTy = Ity_I32; 7038 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn)))); 7039 } 7040 else if ((imm5 & 8) && isQ) { 7041 arT = "2d"; 7042 laneTy = Ity_I64; 7043 assign(w0, getIReg64orZR(nn)); 7044 } 7045 else { 7046 /* invalid; leave laneTy unchanged. */ 7047 } 7048 /* */ 7049 if (laneTy != Ity_INVALID) { 7050 IRTemp w1 = math_DUP_TO_64(w0, laneTy); 7051 putQReg128(dd, binop(Iop_64HLtoV128, 7052 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1))); 7053 DIP("dup %s.%s, %s\n", 7054 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn)); 7055 return True; 7056 } 7057 /* else fall through */ 7058 } 7059 7060 /* ---------------------- {S,U}MOV ---------------------- */ 7061 /* 31 28 20 15 9 4 7062 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index] 7063 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index] 7064 dest is Xd when q==1, Wd when q==0 7065 UMOV: 7066 Ts,index,ops = case q:imm5 of 7067 0:xxxx1 -> B, xxxx, 8Uto64 7068 1:xxxx1 -> invalid 7069 0:xxx10 -> H, xxx, 16Uto64 7070 1:xxx10 -> invalid 7071 0:xx100 -> S, xx, 32Uto64 7072 1:xx100 -> invalid 7073 1:x1000 -> D, x, copy64 7074 other -> invalid 7075 SMOV: 7076 Ts,index,ops = case q:imm5 of 7077 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32) 7078 1:xxxx1 -> B, xxxx, 8Sto64 7079 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32) 7080 1:xxx10 -> H, xxx, 16Sto64 7081 0:xx100 -> invalid 7082 1:xx100 -> S, xx, 32Sto64 7083 1:x1000 -> invalid 7084 other -> invalid 7085 */ 7086 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0) 7087 && (INSN(15,10) & BITS6(1,1,1,0,1,1)) == BITS6(0,0,1,0,1,1)) { 7088 UInt bitQ = INSN(30,30) == 1; 7089 UInt imm5 = INSN(20,16); 7090 UInt nn = INSN(9,5); 7091 UInt dd = INSN(4,0); 7092 Bool isU = INSN(12,12) == 1; 7093 const HChar* arTs = "??"; 7094 UInt laneNo = 16; /* invalid */ 7095 // Setting 'res' to non-NULL determines valid/invalid 7096 IRExpr* res = NULL; 7097 if (!bitQ && (imm5 & 1)) { // 0:xxxx1 7098 laneNo = (imm5 >> 1) & 15; 7099 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8); 7100 res = isU ? unop(Iop_8Uto64, lane) 7101 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane)); 7102 arTs = "b"; 7103 } 7104 else if (bitQ && (imm5 & 1)) { // 1:xxxx1 7105 laneNo = (imm5 >> 1) & 15; 7106 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8); 7107 res = isU ? NULL 7108 : unop(Iop_8Sto64, lane); 7109 arTs = "b"; 7110 } 7111 else if (!bitQ && (imm5 & 2)) { // 0:xxx10 7112 laneNo = (imm5 >> 2) & 7; 7113 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16); 7114 res = isU ? unop(Iop_16Uto64, lane) 7115 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane)); 7116 arTs = "h"; 7117 } 7118 else if (bitQ && (imm5 & 2)) { // 1:xxx10 7119 laneNo = (imm5 >> 2) & 7; 7120 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16); 7121 res = isU ? NULL 7122 : unop(Iop_16Sto64, lane); 7123 arTs = "h"; 7124 } 7125 else if (!bitQ && (imm5 & 4)) { // 0:xx100 7126 laneNo = (imm5 >> 3) & 3; 7127 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32); 7128 res = isU ? unop(Iop_32Uto64, lane) 7129 : NULL; 7130 arTs = "s"; 7131 } 7132 else if (bitQ && (imm5 & 4)) { // 1:xxx10 7133 laneNo = (imm5 >> 3) & 3; 7134 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32); 7135 res = isU ? NULL 7136 : unop(Iop_32Sto64, lane); 7137 arTs = "s"; 7138 } 7139 else if (bitQ && (imm5 & 8)) { // 1:x1000 7140 laneNo = (imm5 >> 4) & 1; 7141 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64); 7142 res = isU ? lane 7143 : NULL; 7144 arTs = "d"; 7145 } 7146 /* */ 7147 if (res) { 7148 vassert(laneNo < 16); 7149 putIReg64orZR(dd, res); 7150 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's', 7151 nameIRegOrZR(bitQ == 1, dd), 7152 nameQReg128(nn), arTs, laneNo); 7153 return True; 7154 } 7155 /* else fall through */ 7156 } 7157 7158 /* -------------------- INS (general) -------------------- */ 7159 /* 31 28 20 15 9 4 7160 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn 7161 where Ts,ix = case imm5 of xxxx1 -> B, xxxx 7162 xxx10 -> H, xxx 7163 xx100 -> S, xx 7164 x1000 -> D, x 7165 */ 7166 if (INSN(31,21) == BITS11(0,1,0,0,1,1,1,0,0,0,0) 7167 && INSN(15,10) == BITS6(0,0,0,1,1,1)) { 7168 UInt imm5 = INSN(20,16); 7169 UInt nn = INSN(9,5); 7170 UInt dd = INSN(4,0); 7171 HChar ts = '?'; 7172 UInt laneNo = 16; 7173 IRExpr* src = NULL; 7174 if (imm5 & 1) { 7175 src = unop(Iop_64to8, getIReg64orZR(nn)); 7176 laneNo = (imm5 >> 1) & 15; 7177 ts = 'b'; 7178 } 7179 else if (imm5 & 2) { 7180 src = unop(Iop_64to16, getIReg64orZR(nn)); 7181 laneNo = (imm5 >> 2) & 7; 7182 ts = 'h'; 7183 } 7184 else if (imm5 & 4) { 7185 src = unop(Iop_64to32, getIReg64orZR(nn)); 7186 laneNo = (imm5 >> 3) & 3; 7187 ts = 's'; 7188 } 7189 else if (imm5 & 8) { 7190 src = getIReg64orZR(nn); 7191 laneNo = (imm5 >> 4) & 1; 7192 ts = 'd'; 7193 } 7194 /* */ 7195 if (src) { 7196 vassert(laneNo < 16); 7197 putQRegLane(dd, laneNo, src); 7198 DIP("ins %s.%c[%u], %s\n", 7199 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn)); 7200 return True; 7201 } 7202 /* else invalid; fall through */ 7203 } 7204 7205 /* -------------------- NEG (vector) -------------------- */ 7206 /* 31 28 23 21 16 9 4 7207 0q1 01110 sz 10000 0101110 n d NEG Vd, Vn 7208 sz is laneSz, q:sz == 011 is disallowed, as usual 7209 */ 7210 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0) 7211 && INSN(21,10) == BITS12(1,0,0,0,0,0,1,0,1,1,1,0)) { 7212 Bool isQ = INSN(30,30) == 1; 7213 UInt szBlg2 = INSN(23,22); 7214 UInt nn = INSN(9,5); 7215 UInt dd = INSN(4,0); 7216 Bool zeroHI = False; 7217 const HChar* arrSpec = ""; 7218 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 ); 7219 if (ok) { 7220 const IROp opSUB[4] 7221 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 }; 7222 IRTemp res = newTemp(Ity_V128); 7223 vassert(szBlg2 < 4); 7224 assign(res, binop(opSUB[szBlg2], mkV128(0x0000), getQReg128(nn))); 7225 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res)) 7226 : mkexpr(res)); 7227 DIP("neg %s.%s, %s.%s\n", 7228 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec); 7229 return True; 7230 } 7231 /* else fall through */ 7232 } 7233 7234 /* -------------------- TBL, TBX -------------------- */ 7235 /* 31 28 20 15 14 12 9 4 7236 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta 7237 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta 7238 where Ta = 16b(q=1) or 8b(q=0) 7239 */ 7240 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0) 7241 && INSN(15,15) == 0 && INSN(11,10) == BITS2(0,0)) { 7242 Bool isQ = INSN(30,30) == 1; 7243 Bool isTBX = INSN(12,12) == 1; 7244 UInt mm = INSN(20,16); 7245 UInt len = INSN(14,13); 7246 UInt nn = INSN(9,5); 7247 UInt dd = INSN(4,0); 7248 /* The out-of-range values to use. */ 7249 IRTemp oor_values = newTemp(Ity_V128); 7250 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0)); 7251 /* src value */ 7252 IRTemp src = newTemp(Ity_V128); 7253 assign(src, getQReg128(mm)); 7254 /* The table values */ 7255 IRTemp tab[4]; 7256 UInt i; 7257 for (i = 0; i <= len; i++) { 7258 vassert(i < 4); 7259 tab[i] = newTemp(Ity_V128); 7260 assign(tab[i], getQReg128((nn + i) % 32)); 7261 } 7262 IRTemp res = math_TBL_TBX(tab, len, src, oor_values); 7263 putQReg128(dd, isQ ? mkexpr(res) 7264 : unop(Iop_ZeroHI64ofV128, mkexpr(res)) ); 7265 const HChar* Ta = isQ ? "16b" : "8b"; 7266 const HChar* nm = isTBX ? "tbx" : "tbl"; 7267 DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n", 7268 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta); 7269 return True; 7270 } 7271 /* FIXME Temporary hacks to get through ld.so FIXME */ 7272 7273 /* ------------------ movi vD.4s, #0x0 ------------------ */ 7274 /* 0x4F 0x00 0x04 000 vD */ 7275 if ((insn & 0xFFFFFFE0) == 0x4F000400) { 7276 UInt vD = INSN(4,0); 7277 putQReg128(vD, mkV128(0x0000)); 7278 DIP("movi v%u.4s, #0x0\n", vD); 7279 return True; 7280 } 7281 7282 /* ---------------- MOV vD.16b, vN.16b ---------------- */ 7283 /* 31 23 20 15 9 4 7284 010 01110 101 m 000111 n d ORR vD.16b, vN.16b, vM.16b 7285 This only handles the N == M case. 7286 */ 7287 if (INSN(31,24) == BITS8(0,1,0,0,1,1,1,0) 7288 && INSN(23,21) == BITS3(1,0,1) && INSN(15,10) == BITS6(0,0,0,1,1,1)) { 7289 UInt mm = INSN(20,16); 7290 UInt nn = INSN(9,5); 7291 UInt dd = INSN(4,0); 7292 if (mm == nn) { 7293 putQReg128(dd, getQReg128(nn)); 7294 DIP("mov v%u.16b, v%u.16b\n", dd, nn); 7295 return True; 7296 } 7297 /* else it's really an ORR; fall through. */ 7298 } 7299 7300 /* ---------------- CMEQ_d_d_#0 ---------------- */ 7301 /* 7302 010 11110 11 10000 0100 110 n d CMEQ Dd, Dn, #0 7303 */ 7304 if ((INSN(31,0) & 0xFFFFFC00) == 0x5EE09800) { 7305 UInt nn = INSN(9,5); 7306 UInt dd = INSN(4,0); 7307 putQReg128(dd, unop(Iop_ZeroHI64ofV128, 7308 binop(Iop_CmpEQ64x2, getQReg128(nn), 7309 mkV128(0x0000)))); 7310 DIP("cmeq d%u, d%u, #0\n", dd, nn); 7311 return True; 7312 } 7313 7314 /* ---------------- SHL_d_d_#imm ---------------- */ 7315 /* 31 22 21 18 15 9 4 7316 010 111110 1 ih3 ib 010101 n d SHL Dd, Dn, #(ih3:ib) 7317 */ 7318 if (INSN(31,22) == BITS10(0,1,0,1,1,1,1,1,0,1) 7319 && INSN(15,10) == BITS6(0,1,0,1,0,1)) { 7320 UInt nn = INSN(9,5); 7321 UInt dd = INSN(4,0); 7322 UInt sh = INSN(21,16); 7323 vassert(sh < 64); 7324 putQReg128(dd, unop(Iop_ZeroHI64ofV128, 7325 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh)))); 7326 DIP("shl d%u, d%u, #%u\n", dd, nn, sh); 7327 return True; 7328 } 7329 7330 vex_printf("ARM64 front end: simd_and_fp\n"); 7331 return False; 7332# undef INSN 7333} 7334 7335 7336/*------------------------------------------------------------*/ 7337/*--- Disassemble a single ARM64 instruction ---*/ 7338/*------------------------------------------------------------*/ 7339 7340/* Disassemble a single ARM64 instruction into IR. The instruction 7341 has is located at |guest_instr| and has guest IP of 7342 |guest_PC_curr_instr|, which will have been set before the call 7343 here. Returns True iff the instruction was decoded, in which case 7344 *dres will be set accordingly, or False, in which case *dres should 7345 be ignored by the caller. */ 7346 7347static 7348Bool disInstr_ARM64_WRK ( 7349 /*MB_OUT*/DisResult* dres, 7350 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 7351 Bool resteerCisOk, 7352 void* callback_opaque, 7353 UChar* guest_instr, 7354 VexArchInfo* archinfo, 7355 VexAbiInfo* abiinfo 7356 ) 7357{ 7358 // A macro to fish bits out of 'insn'. 7359# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 7360 7361//ZZ DisResult dres; 7362//ZZ UInt insn; 7363//ZZ //Bool allow_VFP = False; 7364//ZZ //UInt hwcaps = archinfo->hwcaps; 7365//ZZ IRTemp condT; /* :: Ity_I32 */ 7366//ZZ UInt summary; 7367//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text 7368//ZZ 7369//ZZ /* What insn variants are we supporting today? */ 7370//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP)); 7371//ZZ // etc etc 7372 7373 /* Set result defaults. */ 7374 dres->whatNext = Dis_Continue; 7375 dres->len = 4; 7376 dres->continueAt = 0; 7377 dres->jk_StopHere = Ijk_INVALID; 7378 7379 /* At least this is simple on ARM64: insns are all 4 bytes long, and 7380 4-aligned. So just fish the whole thing out of memory right now 7381 and have done. */ 7382 UInt insn = getUIntLittleEndianly( guest_instr ); 7383 7384 if (0) vex_printf("insn: 0x%x\n", insn); 7385 7386 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr); 7387 7388 vassert(0 == (guest_PC_curr_instr & 3ULL)); 7389 7390 /* ----------------------------------------------------------- */ 7391 7392 /* Spot "Special" instructions (see comment at top of file). */ 7393 { 7394 UChar* code = (UChar*)guest_instr; 7395 /* Spot the 16-byte preamble: 7396 93CC0D8C ror x12, x12, #3 7397 93CC358C ror x12, x12, #13 7398 93CCCD8C ror x12, x12, #51 7399 93CCF58C ror x12, x12, #61 7400 */ 7401 UInt word1 = 0x93CC0D8C; 7402 UInt word2 = 0x93CC358C; 7403 UInt word3 = 0x93CCCD8C; 7404 UInt word4 = 0x93CCF58C; 7405 if (getUIntLittleEndianly(code+ 0) == word1 && 7406 getUIntLittleEndianly(code+ 4) == word2 && 7407 getUIntLittleEndianly(code+ 8) == word3 && 7408 getUIntLittleEndianly(code+12) == word4) { 7409 /* Got a "Special" instruction preamble. Which one is it? */ 7410 if (getUIntLittleEndianly(code+16) == 0xAA0A014A 7411 /* orr x10,x10,x10 */) { 7412 /* X3 = client_request ( X4 ) */ 7413 DIP("x3 = client_request ( x4 )\n"); 7414 putPC(mkU64( guest_PC_curr_instr + 20 )); 7415 dres->jk_StopHere = Ijk_ClientReq; 7416 dres->whatNext = Dis_StopHere; 7417 return True; 7418 } 7419 else 7420 if (getUIntLittleEndianly(code+16) == 0xAA0B016B 7421 /* orr x11,x11,x11 */) { 7422 /* X3 = guest_NRADDR */ 7423 DIP("x3 = guest_NRADDR\n"); 7424 dres->len = 20; 7425 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 )); 7426 return True; 7427 } 7428 else 7429 if (getUIntLittleEndianly(code+16) == 0xAA0C018C 7430 /* orr x12,x12,x12 */) { 7431 /* branch-and-link-to-noredir X8 */ 7432 DIP("branch-and-link-to-noredir x8\n"); 7433 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20)); 7434 putPC(getIReg64orZR(8)); 7435 dres->jk_StopHere = Ijk_NoRedir; 7436 dres->whatNext = Dis_StopHere; 7437 return True; 7438 } 7439 else 7440 if (getUIntLittleEndianly(code+16) == 0xAA090129 7441 /* orr x9,x9,x9 */) { 7442 /* IR injection */ 7443 DIP("IR injection\n"); 7444 vex_inject_ir(irsb, Iend_LE); 7445 // Invalidate the current insn. The reason is that the IRop we're 7446 // injecting here can change. In which case the translation has to 7447 // be redone. For ease of handling, we simply invalidate all the 7448 // time. 7449 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr))); 7450 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20))); 7451 putPC(mkU64( guest_PC_curr_instr + 20 )); 7452 dres->whatNext = Dis_StopHere; 7453 dres->jk_StopHere = Ijk_InvalICache; 7454 return True; 7455 } 7456 /* We don't know what it is. */ 7457 return False; 7458 /*NOTREACHED*/ 7459 } 7460 } 7461 7462 /* ----------------------------------------------------------- */ 7463 7464 /* Main ARM64 instruction decoder starts here. */ 7465 7466 Bool ok = False; 7467 7468 /* insn[28:25] determines the top-level grouping, so let's start 7469 off with that. 7470 7471 For all of these dis_ARM64_ functions, we pass *dres with the 7472 normal default results "insn OK, 4 bytes long, keep decoding" so 7473 they don't need to change it. However, decodes of control-flow 7474 insns may cause *dres to change. 7475 */ 7476 switch (INSN(28,25)) { 7477 case BITS4(1,0,0,0): case BITS4(1,0,0,1): 7478 // Data processing - immediate 7479 ok = dis_ARM64_data_processing_immediate(dres, insn); 7480 break; 7481 case BITS4(1,0,1,0): case BITS4(1,0,1,1): 7482 // Branch, exception generation and system instructions 7483 ok = dis_ARM64_branch_etc(dres, insn, archinfo); 7484 break; 7485 case BITS4(0,1,0,0): case BITS4(0,1,1,0): 7486 case BITS4(1,1,0,0): case BITS4(1,1,1,0): 7487 // Loads and stores 7488 ok = dis_ARM64_load_store(dres, insn); 7489 break; 7490 case BITS4(0,1,0,1): case BITS4(1,1,0,1): 7491 // Data processing - register 7492 ok = dis_ARM64_data_processing_register(dres, insn); 7493 break; 7494 case BITS4(0,1,1,1): case BITS4(1,1,1,1): 7495 // Data processing - SIMD and floating point 7496 ok = dis_ARM64_simd_and_fp(dres, insn); 7497 break; 7498 case BITS4(0,0,0,0): case BITS4(0,0,0,1): 7499 case BITS4(0,0,1,0): case BITS4(0,0,1,1): 7500 // UNALLOCATED 7501 break; 7502 default: 7503 vassert(0); /* Can't happen */ 7504 } 7505 7506 /* If the next-level down decoders failed, make sure |dres| didn't 7507 get changed. */ 7508 if (!ok) { 7509 vassert(dres->whatNext == Dis_Continue); 7510 vassert(dres->len == 4); 7511 vassert(dres->continueAt == 0); 7512 vassert(dres->jk_StopHere == Ijk_INVALID); 7513 } 7514 7515 return ok; 7516 7517# undef INSN 7518} 7519 7520 7521/*------------------------------------------------------------*/ 7522/*--- Top-level fn ---*/ 7523/*------------------------------------------------------------*/ 7524 7525/* Disassemble a single instruction into IR. The instruction 7526 is located in host memory at &guest_code[delta]. */ 7527 7528DisResult disInstr_ARM64 ( IRSB* irsb_IN, 7529 Bool (*resteerOkFn) ( void*, Addr64 ), 7530 Bool resteerCisOk, 7531 void* callback_opaque, 7532 UChar* guest_code_IN, 7533 Long delta_IN, 7534 Addr64 guest_IP, 7535 VexArch guest_arch, 7536 VexArchInfo* archinfo, 7537 VexAbiInfo* abiinfo, 7538 Bool host_bigendian_IN, 7539 Bool sigill_diag_IN ) 7540{ 7541 DisResult dres; 7542 vex_bzero(&dres, sizeof(dres)); 7543 7544 /* Set globals (see top of this file) */ 7545 vassert(guest_arch == VexArchARM64); 7546 7547 irsb = irsb_IN; 7548 host_is_bigendian = host_bigendian_IN; 7549 guest_PC_curr_instr = (Addr64)guest_IP; 7550 7551 /* Sanity checks */ 7552 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */ 7553 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15); 7554 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15); 7555 7556 /* Try to decode */ 7557 Bool ok = disInstr_ARM64_WRK( &dres, 7558 resteerOkFn, resteerCisOk, callback_opaque, 7559 (UChar*)&guest_code_IN[delta_IN], 7560 archinfo, abiinfo ); 7561 if (ok) { 7562 /* All decode successes end up here. */ 7563 vassert(dres.len == 4 || dres.len == 20); 7564 switch (dres.whatNext) { 7565 case Dis_Continue: 7566 putPC( mkU64(dres.len + guest_PC_curr_instr) ); 7567 break; 7568 case Dis_ResteerU: 7569 case Dis_ResteerC: 7570 putPC(mkU64(dres.continueAt)); 7571 break; 7572 case Dis_StopHere: 7573 break; 7574 default: 7575 vassert(0); 7576 } 7577 DIP("\n"); 7578 } else { 7579 /* All decode failures end up here. */ 7580 if (sigill_diag_IN) { 7581 Int i, j; 7582 UChar buf[64]; 7583 UInt insn 7584 = getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] ); 7585 vex_bzero(buf, sizeof(buf)); 7586 for (i = j = 0; i < 32; i++) { 7587 if (i > 0) { 7588 if ((i & 7) == 0) buf[j++] = ' '; 7589 else if ((i & 3) == 0) buf[j++] = '\''; 7590 } 7591 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0'; 7592 } 7593 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn); 7594 vex_printf("disInstr(arm64): %s\n", buf); 7595 } 7596 7597 /* Tell the dispatcher that this insn cannot be decoded, and so 7598 has not been executed, and (is currently) the next to be 7599 executed. PC should be up-to-date since it is made so at the 7600 start of each insn, but nevertheless be paranoid and update 7601 it again right now. */ 7602 putPC( mkU64(guest_PC_curr_instr) ); 7603 dres.whatNext = Dis_StopHere; 7604 dres.len = 0; 7605 dres.continueAt = 0; 7606 dres.jk_StopHere = Ijk_NoDecode; 7607 } 7608 return dres; 7609} 7610 7611//////////////////////////////////////////////////////////////////////// 7612//////////////////////////////////////////////////////////////////////// 7613 7614/* Spare code for doing reference implementations of various 128-bit 7615 SIMD interleaves/deinterleaves/concatenation ops. For 64-bit 7616 equivalents see the end of guest_arm_toIR.c. */ 7617 7618//////////////////////////////////////////////////////////////// 7619// 64x2 operations 7620// 7621static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) 7622{ 7623 // returns a0 b0 7624 return binop(Iop_64HLtoV128, unop(Iop_V128to64, mkexpr(a10)), 7625 unop(Iop_V128to64, mkexpr(b10))); 7626} 7627 7628static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) 7629{ 7630 // returns a1 b1 7631 return binop(Iop_64HLtoV128, unop(Iop_V128HIto64, mkexpr(a10)), 7632 unop(Iop_V128HIto64, mkexpr(b10))); 7633} 7634 7635 7636//////////////////////////////////////////////////////////////// 7637// 32x4 operations 7638// 7639 7640// Split a 128 bit value into 4 32 bit ones, in 64-bit IRTemps with 7641// the top halves guaranteed to be zero. 7642static void breakV128to32s ( IRTemp* out3, IRTemp* out2, IRTemp* out1, 7643 IRTemp* out0, IRTemp v128 ) 7644{ 7645 if (out3) *out3 = newTemp(Ity_I64); 7646 if (out2) *out2 = newTemp(Ity_I64); 7647 if (out1) *out1 = newTemp(Ity_I64); 7648 if (out0) *out0 = newTemp(Ity_I64); 7649 IRTemp hi64 = newTemp(Ity_I64); 7650 IRTemp lo64 = newTemp(Ity_I64); 7651 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) ); 7652 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) ); 7653 if (out3) assign(*out3, binop(Iop_Shr64, mkexpr(hi64), mkU8(32))); 7654 if (out2) assign(*out2, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFFFFFF))); 7655 if (out1) assign(*out1, binop(Iop_Shr64, mkexpr(lo64), mkU8(32))); 7656 if (out0) assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFFFFFF))); 7657} 7658 7659// Make a V128 bit value from 4 32 bit ones, each of which is in a 64 bit 7660// IRTemp. 7661static IRTemp mkV128from32s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 ) 7662{ 7663 IRTemp hi64 = newTemp(Ity_I64); 7664 IRTemp lo64 = newTemp(Ity_I64); 7665 assign(hi64, 7666 binop(Iop_Or64, 7667 binop(Iop_Shl64, mkexpr(in3), mkU8(32)), 7668 binop(Iop_And64, mkexpr(in2), mkU64(0xFFFFFFFF)))); 7669 assign(lo64, 7670 binop(Iop_Or64, 7671 binop(Iop_Shl64, mkexpr(in1), mkU8(32)), 7672 binop(Iop_And64, mkexpr(in0), mkU64(0xFFFFFFFF)))); 7673 IRTemp res = newTemp(Ity_V128); 7674 assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64))); 7675 return res; 7676} 7677 7678static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) 7679{ 7680 // returns a2 a0 b2 b0 7681 IRTemp a2, a0, b2, b0; 7682 breakV128to32s(NULL, &a2, NULL, &a0, a3210); 7683 breakV128to32s(NULL, &b2, NULL, &b0, b3210); 7684 return mkexpr(mkV128from32s(a2, a0, b2, b0)); 7685} 7686 7687static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) 7688{ 7689 // returns a3 a1 b3 b1 7690 IRTemp a3, a1, b3, b1; 7691 breakV128to32s(&a3, NULL, &a1, NULL, a3210); 7692 breakV128to32s(&b3, NULL, &b1, NULL, b3210); 7693 return mkexpr(mkV128from32s(a3, a1, b3, b1)); 7694} 7695 7696static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) 7697{ 7698 // returns a1 b1 a0 b0 7699 IRTemp a1, a0, b1, b0; 7700 breakV128to32s(NULL, NULL, &a1, &a0, a3210); 7701 breakV128to32s(NULL, NULL, &b1, &b0, b3210); 7702 return mkexpr(mkV128from32s(a1, b1, a0, b0)); 7703} 7704 7705static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) 7706{ 7707 // returns a3 b3 a2 b2 7708 IRTemp a3, a2, b3, b2; 7709 breakV128to32s(&a3, &a2, NULL, NULL, a3210); 7710 breakV128to32s(&b3, &b2, NULL, NULL, b3210); 7711 return mkexpr(mkV128from32s(a3, b3, a2, b2)); 7712} 7713 7714//////////////////////////////////////////////////////////////// 7715// 16x8 operations 7716// 7717 7718static void breakV128to16s ( IRTemp* out7, IRTemp* out6, IRTemp* out5, 7719 IRTemp* out4, IRTemp* out3, IRTemp* out2, 7720 IRTemp* out1,IRTemp* out0, IRTemp v128 ) 7721{ 7722 if (out7) *out7 = newTemp(Ity_I64); 7723 if (out6) *out6 = newTemp(Ity_I64); 7724 if (out5) *out5 = newTemp(Ity_I64); 7725 if (out4) *out4 = newTemp(Ity_I64); 7726 if (out3) *out3 = newTemp(Ity_I64); 7727 if (out2) *out2 = newTemp(Ity_I64); 7728 if (out1) *out1 = newTemp(Ity_I64); 7729 if (out0) *out0 = newTemp(Ity_I64); 7730 IRTemp hi64 = newTemp(Ity_I64); 7731 IRTemp lo64 = newTemp(Ity_I64); 7732 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) ); 7733 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) ); 7734 if (out7) 7735 assign(*out7, binop(Iop_And64, 7736 binop(Iop_Shr64, mkexpr(hi64), mkU8(48)), 7737 mkU64(0xFFFF))); 7738 if (out6) 7739 assign(*out6, binop(Iop_And64, 7740 binop(Iop_Shr64, mkexpr(hi64), mkU8(32)), 7741 mkU64(0xFFFF))); 7742 if (out5) 7743 assign(*out5, binop(Iop_And64, 7744 binop(Iop_Shr64, mkexpr(hi64), mkU8(16)), 7745 mkU64(0xFFFF))); 7746 if (out4) 7747 assign(*out4, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFF))); 7748 if (out3) 7749 assign(*out3, binop(Iop_And64, 7750 binop(Iop_Shr64, mkexpr(lo64), mkU8(48)), 7751 mkU64(0xFFFF))); 7752 if (out2) 7753 assign(*out2, binop(Iop_And64, 7754 binop(Iop_Shr64, mkexpr(lo64), mkU8(32)), 7755 mkU64(0xFFFF))); 7756 if (out1) 7757 assign(*out1, binop(Iop_And64, 7758 binop(Iop_Shr64, mkexpr(lo64), mkU8(16)), 7759 mkU64(0xFFFF))); 7760 if (out0) 7761 assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFF))); 7762} 7763 7764static IRTemp mkV128from16s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4, 7765 IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 ) 7766{ 7767 IRTemp hi64 = newTemp(Ity_I64); 7768 IRTemp lo64 = newTemp(Ity_I64); 7769 assign(hi64, 7770 binop(Iop_Or64, 7771 binop(Iop_Or64, 7772 binop(Iop_Shl64, 7773 binop(Iop_And64, mkexpr(in7), mkU64(0xFFFF)), 7774 mkU8(48)), 7775 binop(Iop_Shl64, 7776 binop(Iop_And64, mkexpr(in6), mkU64(0xFFFF)), 7777 mkU8(32))), 7778 binop(Iop_Or64, 7779 binop(Iop_Shl64, 7780 binop(Iop_And64, mkexpr(in5), mkU64(0xFFFF)), 7781 mkU8(16)), 7782 binop(Iop_And64, 7783 mkexpr(in4), mkU64(0xFFFF))))); 7784 assign(lo64, 7785 binop(Iop_Or64, 7786 binop(Iop_Or64, 7787 binop(Iop_Shl64, 7788 binop(Iop_And64, mkexpr(in3), mkU64(0xFFFF)), 7789 mkU8(48)), 7790 binop(Iop_Shl64, 7791 binop(Iop_And64, mkexpr(in2), mkU64(0xFFFF)), 7792 mkU8(32))), 7793 binop(Iop_Or64, 7794 binop(Iop_Shl64, 7795 binop(Iop_And64, mkexpr(in1), mkU64(0xFFFF)), 7796 mkU8(16)), 7797 binop(Iop_And64, 7798 mkexpr(in0), mkU64(0xFFFF))))); 7799 IRTemp res = newTemp(Ity_V128); 7800 assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64))); 7801 return res; 7802} 7803 7804static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) 7805{ 7806 // returns a6 a4 a2 a0 b6 b4 b2 b0 7807 IRTemp a6, a4, a2, a0, b6, b4, b2, b0; 7808 breakV128to16s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210); 7809 breakV128to16s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210); 7810 return mkexpr(mkV128from16s(a6, a4, a2, a0, b6, b4, b2, b0)); 7811} 7812 7813static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) 7814{ 7815 // returns a7 a5 a3 a1 b7 b5 b3 b1 7816 IRTemp a7, a5, a3, a1, b7, b5, b3, b1; 7817 breakV128to16s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210); 7818 breakV128to16s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210); 7819 return mkexpr(mkV128from16s(a7, a5, a3, a1, b7, b5, b3, b1)); 7820} 7821 7822static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) 7823{ 7824 // returns a3 b3 a2 b2 a1 b1 a0 b0 7825 IRTemp a3, b3, a2, b2, a1, a0, b1, b0; 7826 breakV128to16s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210); 7827 breakV128to16s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210); 7828 return mkexpr(mkV128from16s(a3, b3, a2, b2, a1, b1, a0, b0)); 7829} 7830 7831static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) 7832{ 7833 // returns a7 b7 a6 b6 a5 b5 a4 b4 7834 IRTemp a7, b7, a6, b6, a5, b5, a4, b4; 7835 breakV128to16s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210); 7836 breakV128to16s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210); 7837 return mkexpr(mkV128from16s(a7, b7, a6, b6, a5, b5, a4, b4)); 7838} 7839 7840//////////////////////////////////////////////////////////////// 7841// 8x16 operations 7842// 7843 7844static void breakV128to8s ( IRTemp* outF, IRTemp* outE, IRTemp* outD, 7845 IRTemp* outC, IRTemp* outB, IRTemp* outA, 7846 IRTemp* out9, IRTemp* out8, 7847 IRTemp* out7, IRTemp* out6, IRTemp* out5, 7848 IRTemp* out4, IRTemp* out3, IRTemp* out2, 7849 IRTemp* out1,IRTemp* out0, IRTemp v128 ) 7850{ 7851 if (outF) *outF = newTemp(Ity_I64); 7852 if (outE) *outE = newTemp(Ity_I64); 7853 if (outD) *outD = newTemp(Ity_I64); 7854 if (outC) *outC = newTemp(Ity_I64); 7855 if (outB) *outB = newTemp(Ity_I64); 7856 if (outA) *outA = newTemp(Ity_I64); 7857 if (out9) *out9 = newTemp(Ity_I64); 7858 if (out8) *out8 = newTemp(Ity_I64); 7859 if (out7) *out7 = newTemp(Ity_I64); 7860 if (out6) *out6 = newTemp(Ity_I64); 7861 if (out5) *out5 = newTemp(Ity_I64); 7862 if (out4) *out4 = newTemp(Ity_I64); 7863 if (out3) *out3 = newTemp(Ity_I64); 7864 if (out2) *out2 = newTemp(Ity_I64); 7865 if (out1) *out1 = newTemp(Ity_I64); 7866 if (out0) *out0 = newTemp(Ity_I64); 7867 IRTemp hi64 = newTemp(Ity_I64); 7868 IRTemp lo64 = newTemp(Ity_I64); 7869 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) ); 7870 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) ); 7871 if (outF) 7872 assign(*outF, binop(Iop_And64, 7873 binop(Iop_Shr64, mkexpr(hi64), mkU8(56)), 7874 mkU64(0xFF))); 7875 if (outE) 7876 assign(*outE, binop(Iop_And64, 7877 binop(Iop_Shr64, mkexpr(hi64), mkU8(48)), 7878 mkU64(0xFF))); 7879 if (outD) 7880 assign(*outD, binop(Iop_And64, 7881 binop(Iop_Shr64, mkexpr(hi64), mkU8(40)), 7882 mkU64(0xFF))); 7883 if (outC) 7884 assign(*outC, binop(Iop_And64, 7885 binop(Iop_Shr64, mkexpr(hi64), mkU8(32)), 7886 mkU64(0xFF))); 7887 if (outB) 7888 assign(*outB, binop(Iop_And64, 7889 binop(Iop_Shr64, mkexpr(hi64), mkU8(24)), 7890 mkU64(0xFF))); 7891 if (outA) 7892 assign(*outA, binop(Iop_And64, 7893 binop(Iop_Shr64, mkexpr(hi64), mkU8(16)), 7894 mkU64(0xFF))); 7895 if (out9) 7896 assign(*out9, binop(Iop_And64, 7897 binop(Iop_Shr64, mkexpr(hi64), mkU8(8)), 7898 mkU64(0xFF))); 7899 if (out8) 7900 assign(*out8, binop(Iop_And64, 7901 binop(Iop_Shr64, mkexpr(hi64), mkU8(0)), 7902 mkU64(0xFF))); 7903 if (out7) 7904 assign(*out7, binop(Iop_And64, 7905 binop(Iop_Shr64, mkexpr(lo64), mkU8(56)), 7906 mkU64(0xFF))); 7907 if (out6) 7908 assign(*out6, binop(Iop_And64, 7909 binop(Iop_Shr64, mkexpr(lo64), mkU8(48)), 7910 mkU64(0xFF))); 7911 if (out5) 7912 assign(*out5, binop(Iop_And64, 7913 binop(Iop_Shr64, mkexpr(lo64), mkU8(40)), 7914 mkU64(0xFF))); 7915 if (out4) 7916 assign(*out4, binop(Iop_And64, 7917 binop(Iop_Shr64, mkexpr(lo64), mkU8(32)), 7918 mkU64(0xFF))); 7919 if (out3) 7920 assign(*out3, binop(Iop_And64, 7921 binop(Iop_Shr64, mkexpr(lo64), mkU8(24)), 7922 mkU64(0xFF))); 7923 if (out2) 7924 assign(*out2, binop(Iop_And64, 7925 binop(Iop_Shr64, mkexpr(lo64), mkU8(16)), 7926 mkU64(0xFF))); 7927 if (out1) 7928 assign(*out1, binop(Iop_And64, 7929 binop(Iop_Shr64, mkexpr(lo64), mkU8(8)), 7930 mkU64(0xFF))); 7931 if (out0) 7932 assign(*out0, binop(Iop_And64, 7933 binop(Iop_Shr64, mkexpr(lo64), mkU8(0)), 7934 mkU64(0xFF))); 7935} 7936 7937static IRTemp mkV128from8s ( IRTemp inF, IRTemp inE, IRTemp inD, IRTemp inC, 7938 IRTemp inB, IRTemp inA, IRTemp in9, IRTemp in8, 7939 IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4, 7940 IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 ) 7941{ 7942 IRTemp vFE = newTemp(Ity_I64); 7943 IRTemp vDC = newTemp(Ity_I64); 7944 IRTemp vBA = newTemp(Ity_I64); 7945 IRTemp v98 = newTemp(Ity_I64); 7946 IRTemp v76 = newTemp(Ity_I64); 7947 IRTemp v54 = newTemp(Ity_I64); 7948 IRTemp v32 = newTemp(Ity_I64); 7949 IRTemp v10 = newTemp(Ity_I64); 7950 assign(vFE, binop(Iop_Or64, 7951 binop(Iop_Shl64, 7952 binop(Iop_And64, mkexpr(inF), mkU64(0xFF)), mkU8(8)), 7953 binop(Iop_And64, mkexpr(inE), mkU64(0xFF)))); 7954 assign(vDC, binop(Iop_Or64, 7955 binop(Iop_Shl64, 7956 binop(Iop_And64, mkexpr(inD), mkU64(0xFF)), mkU8(8)), 7957 binop(Iop_And64, mkexpr(inC), mkU64(0xFF)))); 7958 assign(vBA, binop(Iop_Or64, 7959 binop(Iop_Shl64, 7960 binop(Iop_And64, mkexpr(inB), mkU64(0xFF)), mkU8(8)), 7961 binop(Iop_And64, mkexpr(inA), mkU64(0xFF)))); 7962 assign(v98, binop(Iop_Or64, 7963 binop(Iop_Shl64, 7964 binop(Iop_And64, mkexpr(in9), mkU64(0xFF)), mkU8(8)), 7965 binop(Iop_And64, mkexpr(in8), mkU64(0xFF)))); 7966 assign(v76, binop(Iop_Or64, 7967 binop(Iop_Shl64, 7968 binop(Iop_And64, mkexpr(in7), mkU64(0xFF)), mkU8(8)), 7969 binop(Iop_And64, mkexpr(in6), mkU64(0xFF)))); 7970 assign(v54, binop(Iop_Or64, 7971 binop(Iop_Shl64, 7972 binop(Iop_And64, mkexpr(in5), mkU64(0xFF)), mkU8(8)), 7973 binop(Iop_And64, mkexpr(in4), mkU64(0xFF)))); 7974 assign(v32, binop(Iop_Or64, 7975 binop(Iop_Shl64, 7976 binop(Iop_And64, mkexpr(in3), mkU64(0xFF)), mkU8(8)), 7977 binop(Iop_And64, mkexpr(in2), mkU64(0xFF)))); 7978 assign(v10, binop(Iop_Or64, 7979 binop(Iop_Shl64, 7980 binop(Iop_And64, mkexpr(in1), mkU64(0xFF)), mkU8(8)), 7981 binop(Iop_And64, mkexpr(in0), mkU64(0xFF)))); 7982 return mkV128from16s(vFE, vDC, vBA, v98, v76, v54, v32, v10); 7983} 7984 7985static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210, 7986 IRTemp bFEDCBA9876543210 ) 7987{ 7988 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0 7989 IRTemp aE, aC, aA, a8, a6, a4, a2, a0, bE, bC, bA, b8, b6, b4, b2, b0; 7990 breakV128to8s(NULL, &aE, NULL, &aC, NULL, &aA, NULL, &a8, 7991 NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, 7992 aFEDCBA9876543210); 7993 breakV128to8s(NULL, &bE, NULL, &bC, NULL, &bA, NULL, &b8, 7994 NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, 7995 bFEDCBA9876543210); 7996 return mkexpr(mkV128from8s(aE, aC, aA, a8, a6, a4, a2, a0, 7997 bE, bC, bA, b8, b6, b4, b2, b0)); 7998} 7999 8000static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210, 8001 IRTemp bFEDCBA9876543210 ) 8002{ 8003 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1 8004 IRTemp aF, aD, aB, a9, a7, a5, a3, a1, bF, bD, bB, b9, b7, b5, b3, b1; 8005 breakV128to8s(&aF, NULL, &aD, NULL, &aB, NULL, &a9, NULL, 8006 &a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, 8007 aFEDCBA9876543210); 8008 8009 breakV128to8s(&bF, NULL, &bD, NULL, &bB, NULL, &b9, NULL, 8010 &b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, 8011 aFEDCBA9876543210); 8012 8013 return mkexpr(mkV128from8s(aF, aD, aB, a9, a7, a5, a3, a1, 8014 bF, bD, bB, b9, b7, b5, b3, b1)); 8015} 8016 8017static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210, 8018 IRTemp bFEDCBA9876543210 ) 8019{ 8020 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0 8021 IRTemp a7, b7, a6, b6, a5, b5, a4, b4, a3, b3, a2, b2, a1, b1, a0, b0; 8022 breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 8023 &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0, 8024 aFEDCBA9876543210); 8025 breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 8026 &b7, &b6, &b5, &b4, &b3, &b2, &b1, &b0, 8027 bFEDCBA9876543210); 8028 return mkexpr(mkV128from8s(a7, b7, a6, b6, a5, b5, a4, b4, 8029 a3, b3, a2, b2, a1, b1, a0, b0)); 8030} 8031 8032static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210, 8033 IRTemp bFEDCBA9876543210 ) 8034{ 8035 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8 8036 IRTemp aF, bF, aE, bE, aD, bD, aC, bC, aB, bB, aA, bA, a9, b9, a8, b8; 8037 breakV128to8s(&aF, &aE, &aD, &aC, &aB, &aA, &a9, &a8, 8038 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 8039 aFEDCBA9876543210); 8040 breakV128to8s(&bF, &bE, &bD, &bC, &bB, &bA, &b9, &b8, 8041 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 8042 bFEDCBA9876543210); 8043 return mkexpr(mkV128from8s(aF, bF, aE, bE, aD, bD, aC, bC, 8044 aB, bB, aA, bA, a9, b9, a8, b8)); 8045} 8046 8047/*--------------------------------------------------------------------*/ 8048/*--- end guest_arm64_toIR.c ---*/ 8049/*--------------------------------------------------------------------*/ 8050