guest_arm64_toIR.c revision 6d9b01c06e645ea0b0a3544e968961aaa7bc6a66
1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- begin                                     guest_arm64_toIR.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2013-2013 OpenWorks
12      info@open-works.net
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27   02110-1301, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32//ZZ /* XXXX thumb to check:
33//ZZ    that all cases where putIRegT writes r15, we generate a jump.
34//ZZ
35//ZZ    All uses of newTemp assign to an IRTemp and not a UInt
36//ZZ
37//ZZ    For all thumb loads and stores, including VFP ones, new-ITSTATE is
38//ZZ    backed out before the memory op, and restored afterwards.  This
39//ZZ    needs to happen even after we go uncond.  (and for sure it doesn't
40//ZZ    happen for VFP loads/stores right now).
41//ZZ
42//ZZ    VFP on thumb: check that we exclude all r13/r15 cases that we
43//ZZ    should.
44//ZZ
45//ZZ    XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
46//ZZ    taking into account the number of insns guarded by an IT.
47//ZZ
48//ZZ    remove the nasty hack, in the spechelper, of looking for Or32(...,
49//ZZ    0xE0) in as the first arg to armg_calculate_condition, and instead
50//ZZ    use Slice44 as specified in comments in the spechelper.
51//ZZ
52//ZZ    add specialisations for armg_calculate_flag_c and _v, as they
53//ZZ    are moderately often needed in Thumb code.
54//ZZ
55//ZZ    Correctness: ITSTATE handling in Thumb SVCs is wrong.
56//ZZ
57//ZZ    Correctness (obscure): in m_transtab, when invalidating code
58//ZZ    address ranges, invalidate up to 18 bytes after the end of the
59//ZZ    range.  This is because the ITSTATE optimisation at the top of
60//ZZ    _THUMB_WRK below analyses up to 18 bytes before the start of any
61//ZZ    given instruction, and so might depend on the invalidated area.
62//ZZ */
63//ZZ
64//ZZ /* Limitations, etc
65//ZZ
66//ZZ    - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
67//ZZ      These instructions are non-restartable in the case where the
68//ZZ      transfer(s) fault.
69//ZZ
70//ZZ    - SWP: the restart jump back is Ijk_Boring; it should be
71//ZZ      Ijk_NoRedir but that's expensive.  See comments on casLE() in
72//ZZ      guest_x86_toIR.c.
73//ZZ */
74
75/* "Special" instructions.
76
77   This instruction decoder can decode four special instructions
78   which mean nothing natively (are no-ops as far as regs/mem are
79   concerned) but have meaning for supporting Valgrind.  A special
80   instruction is flagged by a 16-byte preamble:
81
82      93CC0D8C 93CC358C 93CCCD8C 93CCF58C
83      (ror x12, x12, #3;   ror x12, x12, #13
84       ror x12, x12, #51;  ror x12, x12, #61)
85
86   Following that, one of the following 3 are allowed
87   (standard interpretation in parentheses):
88
89      AA0A014A (orr x10,x10,x10)   X3 = client_request ( X4 )
90      AA0B016B (orr x11,x11,x11)   X3 = guest_NRADDR
91      AA0C018C (orr x12,x12,x12)   branch-and-link-to-noredir X8
92      AA090129 (orr x9,x9,x9)      IR injection
93
94   Any other bytes following the 16-byte preamble are illegal and
95   constitute a failure in instruction decoding.  This all assumes
96   that the preamble will never occur except in specific code
97   fragments designed for Valgrind to catch.
98*/
99
100/* Translates ARM64 code to IR. */
101
102#include "libvex_basictypes.h"
103#include "libvex_ir.h"
104#include "libvex.h"
105#include "libvex_guest_arm64.h"
106
107#include "main_util.h"
108#include "main_globals.h"
109#include "guest_generic_bb_to_IR.h"
110#include "guest_arm64_defs.h"
111
112
113/*------------------------------------------------------------*/
114/*--- Globals                                              ---*/
115/*------------------------------------------------------------*/
116
117/* These are set at the start of the translation of a instruction, so
118   that we don't have to pass them around endlessly.  CONST means does
119   not change during translation of the instruction.
120*/
121
122/* CONST: is the host bigendian?  We need to know this in order to do
123   sub-register accesses to the SIMD/FP registers correctly. */
124static Bool host_is_bigendian;
125
126/* CONST: The guest address for the instruction currently being
127   translated.  */
128static Addr64 guest_PC_curr_instr;
129
130/* MOD: The IRSB* into which we're generating code. */
131static IRSB* irsb;
132
133
134/*------------------------------------------------------------*/
135/*--- Debugging output                                     ---*/
136/*------------------------------------------------------------*/
137
138#define DIP(format, args...)           \
139   if (vex_traceflags & VEX_TRACE_FE)  \
140      vex_printf(format, ## args)
141
142#define DIS(buf, format, args...)      \
143   if (vex_traceflags & VEX_TRACE_FE)  \
144      vex_sprintf(buf, format, ## args)
145
146
147/*------------------------------------------------------------*/
148/*--- Helper bits and pieces for deconstructing the        ---*/
149/*--- arm insn stream.                                     ---*/
150/*------------------------------------------------------------*/
151
152/* Do a little-endian load of a 32-bit word, regardless of the
153   endianness of the underlying host. */
154static inline UInt getUIntLittleEndianly ( UChar* p )
155{
156   UInt w = 0;
157   w = (w << 8) | p[3];
158   w = (w << 8) | p[2];
159   w = (w << 8) | p[1];
160   w = (w << 8) | p[0];
161   return w;
162}
163
164/* Sign extend a N-bit value up to 64 bits, by copying
165   bit N-1 into all higher positions. */
166static ULong sx_to_64 ( ULong x, UInt n )
167{
168   vassert(n > 1 && n < 64);
169   Long r = (Long)x;
170   r = (r << (64-n)) >> (64-n);
171   return (ULong)r;
172}
173
174//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
175//ZZ    endianness of the underlying host. */
176//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
177//ZZ {
178//ZZ    UShort w = 0;
179//ZZ    w = (w << 8) | p[1];
180//ZZ    w = (w << 8) | p[0];
181//ZZ    return w;
182//ZZ }
183//ZZ
184//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
185//ZZ    vassert(sh >= 0 && sh < 32);
186//ZZ    if (sh == 0)
187//ZZ       return x;
188//ZZ    else
189//ZZ       return (x << (32-sh)) | (x >> sh);
190//ZZ }
191//ZZ
192//ZZ static Int popcount32 ( UInt x )
193//ZZ {
194//ZZ    Int res = 0, i;
195//ZZ    for (i = 0; i < 32; i++) {
196//ZZ       res += (x & 1);
197//ZZ       x >>= 1;
198//ZZ    }
199//ZZ    return res;
200//ZZ }
201//ZZ
202//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
203//ZZ {
204//ZZ    UInt mask = 1 << ix;
205//ZZ    x &= ~mask;
206//ZZ    x |= ((b << ix) & mask);
207//ZZ    return x;
208//ZZ }
209
210#define BITS2(_b1,_b0)  \
211   (((_b1) << 1) | (_b0))
212
213#define BITS3(_b2,_b1,_b0)  \
214  (((_b2) << 2) | ((_b1) << 1) | (_b0))
215
216#define BITS4(_b3,_b2,_b1,_b0)  \
217   (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
218
219#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
220   ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4)  \
221    | BITS4((_b3),(_b2),(_b1),(_b0)))
222
223#define BITS5(_b4,_b3,_b2,_b1,_b0)  \
224   (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
225#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
226   (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
227#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
228   (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
229
230#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
231   (((_b8) << 8)  \
232    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
233
234#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
235   (((_b9) << 9) | ((_b8) << 8)  \
236    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
237
238#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
239   (((_b10) << 10)  \
240    | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
241
242#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
243   (((_b11) << 11)  \
244    | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
245
246// produces _uint[_bMax:_bMin]
247#define SLICE_UInt(_uint,_bMax,_bMin)  \
248   (( ((UInt)(_uint)) >> (_bMin))  \
249    & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
250
251
252/*------------------------------------------------------------*/
253/*--- Helper bits and pieces for creating IR fragments.    ---*/
254/*------------------------------------------------------------*/
255
256static IRExpr* mkV128 ( UShort w )
257{
258   return IRExpr_Const(IRConst_V128(w));
259}
260
261static IRExpr* mkU64 ( ULong i )
262{
263   return IRExpr_Const(IRConst_U64(i));
264}
265
266static IRExpr* mkU32 ( UInt i )
267{
268   return IRExpr_Const(IRConst_U32(i));
269}
270
271static IRExpr* mkU8 ( UInt i )
272{
273   vassert(i < 256);
274   return IRExpr_Const(IRConst_U8( (UChar)i ));
275}
276
277static IRExpr* mkexpr ( IRTemp tmp )
278{
279   return IRExpr_RdTmp(tmp);
280}
281
282static IRExpr* unop ( IROp op, IRExpr* a )
283{
284   return IRExpr_Unop(op, a);
285}
286
287static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
288{
289   return IRExpr_Binop(op, a1, a2);
290}
291
292static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
293{
294   return IRExpr_Triop(op, a1, a2, a3);
295}
296
297static IRExpr* loadLE ( IRType ty, IRExpr* addr )
298{
299   return IRExpr_Load(Iend_LE, ty, addr);
300}
301
302/* Add a statement to the list held by "irbb". */
303static void stmt ( IRStmt* st )
304{
305   addStmtToIRSB( irsb, st );
306}
307
308static void assign ( IRTemp dst, IRExpr* e )
309{
310   stmt( IRStmt_WrTmp(dst, e) );
311}
312
313static void storeLE ( IRExpr* addr, IRExpr* data )
314{
315   stmt( IRStmt_Store(Iend_LE, addr, data) );
316}
317
318//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
319//ZZ {
320//ZZ    if (guardT == IRTemp_INVALID) {
321//ZZ       /* unconditional */
322//ZZ       storeLE(addr, data);
323//ZZ    } else {
324//ZZ       stmt( IRStmt_StoreG(Iend_LE, addr, data,
325//ZZ                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
326//ZZ    }
327//ZZ }
328//ZZ
329//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
330//ZZ                             IRExpr* addr, IRExpr* alt,
331//ZZ                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
332//ZZ {
333//ZZ    if (guardT == IRTemp_INVALID) {
334//ZZ       /* unconditional */
335//ZZ       IRExpr* loaded = NULL;
336//ZZ       switch (cvt) {
337//ZZ          case ILGop_Ident32:
338//ZZ             loaded = loadLE(Ity_I32, addr); break;
339//ZZ          case ILGop_8Uto32:
340//ZZ             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
341//ZZ          case ILGop_8Sto32:
342//ZZ             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
343//ZZ          case ILGop_16Uto32:
344//ZZ             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
345//ZZ          case ILGop_16Sto32:
346//ZZ             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
347//ZZ          default:
348//ZZ             vassert(0);
349//ZZ       }
350//ZZ       vassert(loaded != NULL);
351//ZZ       assign(dst, loaded);
352//ZZ    } else {
353//ZZ       /* Generate a guarded load into 'dst', but apply 'cvt' to the
354//ZZ          loaded data before putting the data in 'dst'.  If the load
355//ZZ          does not take place, 'alt' is placed directly in 'dst'. */
356//ZZ       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
357//ZZ                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
358//ZZ    }
359//ZZ }
360
361/* Generate a new temporary of the given type. */
362static IRTemp newTemp ( IRType ty )
363{
364   vassert(isPlausibleIRType(ty));
365   return newIRTemp( irsb->tyenv, ty );
366}
367
368//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
369//ZZ    IRRoundingMode. */
370//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
371//ZZ {
372//ZZ    return mkU32(Irrm_NEAREST);
373//ZZ }
374//ZZ
375//ZZ /* Generate an expression for SRC rotated right by ROT. */
376//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
377//ZZ {
378//ZZ    vassert(rot >= 0 && rot < 32);
379//ZZ    if (rot == 0)
380//ZZ       return mkexpr(src);
381//ZZ    return
382//ZZ       binop(Iop_Or32,
383//ZZ             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
384//ZZ             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
385//ZZ }
386//ZZ
387//ZZ static IRExpr* mkU128 ( ULong i )
388//ZZ {
389//ZZ    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
390//ZZ }
391//ZZ
392//ZZ /* Generate a 4-aligned version of the given expression if
393//ZZ    the given condition is true.  Else return it unchanged. */
394//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
395//ZZ {
396//ZZ    if (b)
397//ZZ       return binop(Iop_And32, e, mkU32(~3));
398//ZZ    else
399//ZZ       return e;
400//ZZ }
401
402/* Other IR construction helpers. */
403static IROp mkAND ( IRType ty ) {
404   switch (ty) {
405      case Ity_I32: return Iop_And32;
406      case Ity_I64: return Iop_And64;
407      default: vpanic("mkAND");
408   }
409}
410
411static IROp mkOR ( IRType ty ) {
412   switch (ty) {
413      case Ity_I32: return Iop_Or32;
414      case Ity_I64: return Iop_Or64;
415      default: vpanic("mkOR");
416   }
417}
418
419static IROp mkXOR ( IRType ty ) {
420   switch (ty) {
421      case Ity_I32: return Iop_Xor32;
422      case Ity_I64: return Iop_Xor64;
423      default: vpanic("mkXOR");
424   }
425}
426
427static IROp mkSHL ( IRType ty ) {
428   switch (ty) {
429      case Ity_I32: return Iop_Shl32;
430      case Ity_I64: return Iop_Shl64;
431      default: vpanic("mkSHL");
432   }
433}
434
435static IROp mkSHR ( IRType ty ) {
436   switch (ty) {
437      case Ity_I32: return Iop_Shr32;
438      case Ity_I64: return Iop_Shr64;
439      default: vpanic("mkSHR");
440   }
441}
442
443static IROp mkSAR ( IRType ty ) {
444   switch (ty) {
445      case Ity_I32: return Iop_Sar32;
446      case Ity_I64: return Iop_Sar64;
447      default: vpanic("mkSAR");
448   }
449}
450
451static IROp mkNOT ( IRType ty ) {
452   switch (ty) {
453      case Ity_I32: return Iop_Not32;
454      case Ity_I64: return Iop_Not64;
455      default: vpanic("mkNOT");
456   }
457}
458
459static IROp mkADD ( IRType ty ) {
460   switch (ty) {
461      case Ity_I32: return Iop_Add32;
462      case Ity_I64: return Iop_Add64;
463      default: vpanic("mkADD");
464   }
465}
466
467static IROp mkSUB ( IRType ty ) {
468   switch (ty) {
469      case Ity_I32: return Iop_Sub32;
470      case Ity_I64: return Iop_Sub64;
471      default: vpanic("mkSUB");
472   }
473}
474
475static IROp mkADDF ( IRType ty ) {
476   switch (ty) {
477      case Ity_F32: return Iop_AddF32;
478      case Ity_F64: return Iop_AddF64;
479      default: vpanic("mkADDF");
480   }
481}
482
483static IROp mkSUBF ( IRType ty ) {
484   switch (ty) {
485      case Ity_F32: return Iop_SubF32;
486      case Ity_F64: return Iop_SubF64;
487      default: vpanic("mkSUBF");
488   }
489}
490
491static IROp mkMULF ( IRType ty ) {
492   switch (ty) {
493      case Ity_F32: return Iop_MulF32;
494      case Ity_F64: return Iop_MulF64;
495      default: vpanic("mkMULF");
496   }
497}
498
499static IROp mkDIVF ( IRType ty ) {
500   switch (ty) {
501      case Ity_F32: return Iop_DivF32;
502      case Ity_F64: return Iop_DivF64;
503      default: vpanic("mkMULF");
504   }
505}
506
507static IROp mkNEGF ( IRType ty ) {
508   switch (ty) {
509      case Ity_F32: return Iop_NegF32;
510      case Ity_F64: return Iop_NegF64;
511      default: vpanic("mkNEGF");
512   }
513}
514
515static IROp mkABSF ( IRType ty ) {
516   switch (ty) {
517      case Ity_F32: return Iop_AbsF32;
518      case Ity_F64: return Iop_AbsF64;
519      default: vpanic("mkNEGF");
520   }
521}
522
523static IROp mkSQRTF ( IRType ty ) {
524   switch (ty) {
525      case Ity_F32: return Iop_SqrtF32;
526      case Ity_F64: return Iop_SqrtF64;
527      default: vpanic("mkNEGF");
528   }
529}
530
531static IRExpr* mkU ( IRType ty, ULong imm ) {
532   switch (ty) {
533      case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
534      case Ity_I64: return mkU64(imm);
535      default: vpanic("mkU");
536   }
537}
538
539/* Generate IR to create 'arg rotated right by imm', for sane values
540   of 'ty' and 'imm'. */
541static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
542{
543   UInt w = 0;
544   if (ty == Ity_I64) {
545      w = 64;
546   } else {
547      vassert(ty == Ity_I32);
548      w = 32;
549   }
550   vassert(w != 0);
551   vassert(imm < w);
552   if (imm == 0) {
553      return arg;
554   }
555   IRTemp res = newTemp(ty);
556   assign(res, binop(mkOR(ty),
557                     binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
558                     binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
559   return res;
560}
561
562/* Generate IR to set the returned temp to either all-zeroes or
563   all ones, as a copy of arg<imm>. */
564static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
565{
566   UInt w = 0;
567   if (ty == Ity_I64) {
568      w = 64;
569   } else {
570      vassert(ty == Ity_I32);
571      w = 32;
572   }
573   vassert(w != 0);
574   vassert(imm < w);
575   IRTemp res = newTemp(ty);
576   assign(res, binop(mkSAR(ty),
577                     binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
578                     mkU8(w - 1)));
579   return res;
580}
581
582/* U-widen 8/16/32/64 bit int expr to 64. */
583static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
584{
585   switch (srcTy) {
586      case Ity_I64: return e;
587      case Ity_I32: return unop(Iop_32Uto64, e);
588      case Ity_I16: return unop(Iop_16Uto64, e);
589      case Ity_I8:  return unop(Iop_8Uto64, e);
590      default: vpanic("widenUto64(arm64)");
591   }
592}
593
594/* Narrow 64 bit int expr to 8/16/32/64.  Clearly only some
595   of these combinations make sense. */
596static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
597{
598   switch (dstTy) {
599      case Ity_I64: return e;
600      case Ity_I32: return unop(Iop_64to32, e);
601      case Ity_I16: return unop(Iop_64to16, e);
602      case Ity_I8:  return unop(Iop_64to8, e);
603      default: vpanic("narrowFrom64(arm64)");
604   }
605}
606
607
608/*------------------------------------------------------------*/
609/*--- Helpers for accessing guest registers.               ---*/
610/*------------------------------------------------------------*/
611
612#define OFFB_X0       offsetof(VexGuestARM64State,guest_X0)
613#define OFFB_X1       offsetof(VexGuestARM64State,guest_X1)
614#define OFFB_X2       offsetof(VexGuestARM64State,guest_X2)
615#define OFFB_X3       offsetof(VexGuestARM64State,guest_X3)
616#define OFFB_X4       offsetof(VexGuestARM64State,guest_X4)
617#define OFFB_X5       offsetof(VexGuestARM64State,guest_X5)
618#define OFFB_X6       offsetof(VexGuestARM64State,guest_X6)
619#define OFFB_X7       offsetof(VexGuestARM64State,guest_X7)
620#define OFFB_X8       offsetof(VexGuestARM64State,guest_X8)
621#define OFFB_X9       offsetof(VexGuestARM64State,guest_X9)
622#define OFFB_X10      offsetof(VexGuestARM64State,guest_X10)
623#define OFFB_X11      offsetof(VexGuestARM64State,guest_X11)
624#define OFFB_X12      offsetof(VexGuestARM64State,guest_X12)
625#define OFFB_X13      offsetof(VexGuestARM64State,guest_X13)
626#define OFFB_X14      offsetof(VexGuestARM64State,guest_X14)
627#define OFFB_X15      offsetof(VexGuestARM64State,guest_X15)
628#define OFFB_X16      offsetof(VexGuestARM64State,guest_X16)
629#define OFFB_X17      offsetof(VexGuestARM64State,guest_X17)
630#define OFFB_X18      offsetof(VexGuestARM64State,guest_X18)
631#define OFFB_X19      offsetof(VexGuestARM64State,guest_X19)
632#define OFFB_X20      offsetof(VexGuestARM64State,guest_X20)
633#define OFFB_X21      offsetof(VexGuestARM64State,guest_X21)
634#define OFFB_X22      offsetof(VexGuestARM64State,guest_X22)
635#define OFFB_X23      offsetof(VexGuestARM64State,guest_X23)
636#define OFFB_X24      offsetof(VexGuestARM64State,guest_X24)
637#define OFFB_X25      offsetof(VexGuestARM64State,guest_X25)
638#define OFFB_X26      offsetof(VexGuestARM64State,guest_X26)
639#define OFFB_X27      offsetof(VexGuestARM64State,guest_X27)
640#define OFFB_X28      offsetof(VexGuestARM64State,guest_X28)
641#define OFFB_X29      offsetof(VexGuestARM64State,guest_X29)
642#define OFFB_X30      offsetof(VexGuestARM64State,guest_X30)
643
644#define OFFB_XSP      offsetof(VexGuestARM64State,guest_XSP)
645#define OFFB_PC       offsetof(VexGuestARM64State,guest_PC)
646
647#define OFFB_CC_OP    offsetof(VexGuestARM64State,guest_CC_OP)
648#define OFFB_CC_DEP1  offsetof(VexGuestARM64State,guest_CC_DEP1)
649#define OFFB_CC_DEP2  offsetof(VexGuestARM64State,guest_CC_DEP2)
650#define OFFB_CC_NDEP  offsetof(VexGuestARM64State,guest_CC_NDEP)
651
652#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
653#define OFFB_NRADDR   offsetof(VexGuestARM64State,guest_NRADDR)
654
655#define OFFB_Q0       offsetof(VexGuestARM64State,guest_Q0)
656#define OFFB_Q1       offsetof(VexGuestARM64State,guest_Q1)
657#define OFFB_Q2       offsetof(VexGuestARM64State,guest_Q2)
658#define OFFB_Q3       offsetof(VexGuestARM64State,guest_Q3)
659#define OFFB_Q4       offsetof(VexGuestARM64State,guest_Q4)
660#define OFFB_Q5       offsetof(VexGuestARM64State,guest_Q5)
661#define OFFB_Q6       offsetof(VexGuestARM64State,guest_Q6)
662#define OFFB_Q7       offsetof(VexGuestARM64State,guest_Q7)
663#define OFFB_Q8       offsetof(VexGuestARM64State,guest_Q8)
664#define OFFB_Q9       offsetof(VexGuestARM64State,guest_Q9)
665#define OFFB_Q10      offsetof(VexGuestARM64State,guest_Q10)
666#define OFFB_Q11      offsetof(VexGuestARM64State,guest_Q11)
667#define OFFB_Q12      offsetof(VexGuestARM64State,guest_Q12)
668#define OFFB_Q13      offsetof(VexGuestARM64State,guest_Q13)
669#define OFFB_Q14      offsetof(VexGuestARM64State,guest_Q14)
670#define OFFB_Q15      offsetof(VexGuestARM64State,guest_Q15)
671#define OFFB_Q16      offsetof(VexGuestARM64State,guest_Q16)
672#define OFFB_Q17      offsetof(VexGuestARM64State,guest_Q17)
673#define OFFB_Q18      offsetof(VexGuestARM64State,guest_Q18)
674#define OFFB_Q19      offsetof(VexGuestARM64State,guest_Q19)
675#define OFFB_Q20      offsetof(VexGuestARM64State,guest_Q20)
676#define OFFB_Q21      offsetof(VexGuestARM64State,guest_Q21)
677#define OFFB_Q22      offsetof(VexGuestARM64State,guest_Q22)
678#define OFFB_Q23      offsetof(VexGuestARM64State,guest_Q23)
679#define OFFB_Q24      offsetof(VexGuestARM64State,guest_Q24)
680#define OFFB_Q25      offsetof(VexGuestARM64State,guest_Q25)
681#define OFFB_Q26      offsetof(VexGuestARM64State,guest_Q26)
682#define OFFB_Q27      offsetof(VexGuestARM64State,guest_Q27)
683#define OFFB_Q28      offsetof(VexGuestARM64State,guest_Q28)
684#define OFFB_Q29      offsetof(VexGuestARM64State,guest_Q29)
685#define OFFB_Q30      offsetof(VexGuestARM64State,guest_Q30)
686#define OFFB_Q31      offsetof(VexGuestARM64State,guest_Q31)
687
688#define OFFB_FPCR     offsetof(VexGuestARM64State,guest_FPCR)
689#define OFFB_FPSR     offsetof(VexGuestARM64State,guest_FPSR)
690//ZZ #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
691//ZZ #define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
692//ZZ #define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
693//ZZ #define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
694//ZZ #define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
695//ZZ #define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
696//ZZ #define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
697
698#define OFFB_CMSTART  offsetof(VexGuestARM64State,guest_CMSTART)
699#define OFFB_CMLEN    offsetof(VexGuestARM64State,guest_CMLEN)
700
701
702/* ---------------- Integer registers ---------------- */
703
704static Int offsetIReg64 ( UInt iregNo )
705{
706   /* Do we care about endianness here?  We do if sub-parts of integer
707      registers are accessed. */
708   switch (iregNo) {
709      case 0:  return OFFB_X0;
710      case 1:  return OFFB_X1;
711      case 2:  return OFFB_X2;
712      case 3:  return OFFB_X3;
713      case 4:  return OFFB_X4;
714      case 5:  return OFFB_X5;
715      case 6:  return OFFB_X6;
716      case 7:  return OFFB_X7;
717      case 8:  return OFFB_X8;
718      case 9:  return OFFB_X9;
719      case 10: return OFFB_X10;
720      case 11: return OFFB_X11;
721      case 12: return OFFB_X12;
722      case 13: return OFFB_X13;
723      case 14: return OFFB_X14;
724      case 15: return OFFB_X15;
725      case 16: return OFFB_X16;
726      case 17: return OFFB_X17;
727      case 18: return OFFB_X18;
728      case 19: return OFFB_X19;
729      case 20: return OFFB_X20;
730      case 21: return OFFB_X21;
731      case 22: return OFFB_X22;
732      case 23: return OFFB_X23;
733      case 24: return OFFB_X24;
734      case 25: return OFFB_X25;
735      case 26: return OFFB_X26;
736      case 27: return OFFB_X27;
737      case 28: return OFFB_X28;
738      case 29: return OFFB_X29;
739      case 30: return OFFB_X30;
740      /* but not 31 */
741      default: vassert(0);
742   }
743}
744
745static Int offsetIReg64orSP ( UInt iregNo )
746{
747   return iregNo == 31  ? OFFB_XSP  : offsetIReg64(iregNo);
748}
749
750static const HChar* nameIReg64orZR ( UInt iregNo )
751{
752   vassert(iregNo < 32);
753   static const HChar* names[32]
754      = { "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
755          "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
756          "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
757          "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
758   return names[iregNo];
759}
760
761static const HChar* nameIReg64orSP ( UInt iregNo )
762{
763   if (iregNo == 31) {
764      return "sp";
765   }
766   vassert(iregNo < 31);
767   return nameIReg64orZR(iregNo);
768}
769
770static IRExpr* getIReg64orSP ( UInt iregNo )
771{
772   vassert(iregNo < 32);
773   return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
774}
775
776static IRExpr* getIReg64orZR ( UInt iregNo )
777{
778   if (iregNo == 31) {
779      return mkU64(0);
780   }
781   vassert(iregNo < 31);
782   return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
783}
784
785static void putIReg64orSP ( UInt iregNo, IRExpr* e )
786{
787   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
788   stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
789}
790
791static void putIReg64orZR ( UInt iregNo, IRExpr* e )
792{
793   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
794   if (iregNo == 31) {
795      return;
796   }
797   vassert(iregNo < 31);
798   stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
799}
800
801static const HChar* nameIReg32orZR ( UInt iregNo )
802{
803   vassert(iregNo < 32);
804   static const HChar* names[32]
805      = { "w0",  "w1",  "w2",  "w3",  "w4",  "w5",  "w6",  "w7",
806          "w8",  "w9",  "w10", "w11", "w12", "w13", "w14", "w15",
807          "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
808          "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
809   return names[iregNo];
810}
811
812static const HChar* nameIReg32orSP ( UInt iregNo )
813{
814   if (iregNo == 31) {
815      return "wsp";
816   }
817   vassert(iregNo < 31);
818   return nameIReg32orZR(iregNo);
819}
820
821static IRExpr* getIReg32orSP ( UInt iregNo )
822{
823   vassert(iregNo < 32);
824   return unop(Iop_64to32,
825               IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
826}
827
828static IRExpr* getIReg32orZR ( UInt iregNo )
829{
830   if (iregNo == 31) {
831      return mkU32(0);
832   }
833   vassert(iregNo < 31);
834   return unop(Iop_64to32,
835               IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
836}
837
838static void putIReg32orSP ( UInt iregNo, IRExpr* e )
839{
840   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
841   stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
842}
843
844static void putIReg32orZR ( UInt iregNo, IRExpr* e )
845{
846   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
847   if (iregNo == 31) {
848      return;
849   }
850   vassert(iregNo < 31);
851   stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
852}
853
854static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
855{
856   vassert(is64 == True || is64 == False);
857   return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
858}
859
860static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
861{
862   vassert(is64 == True || is64 == False);
863   return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
864}
865
866static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
867{
868   vassert(is64 == True || is64 == False);
869   return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
870}
871
872static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
873{
874   vassert(is64 == True || is64 == False);
875   if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
876}
877
878static void putPC ( IRExpr* e )
879{
880   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
881   stmt( IRStmt_Put(OFFB_PC, e) );
882}
883
884
885/* ---------------- Vector (Q) registers ---------------- */
886
887static Int offsetQReg128 ( UInt qregNo )
888{
889   /* We don't care about endianness at this point.  It only becomes
890      relevant when dealing with sections of these registers.*/
891   switch (qregNo) {
892      case 0:  return OFFB_Q0;
893      case 1:  return OFFB_Q1;
894      case 2:  return OFFB_Q2;
895      case 3:  return OFFB_Q3;
896      case 4:  return OFFB_Q4;
897      case 5:  return OFFB_Q5;
898      case 6:  return OFFB_Q6;
899      case 7:  return OFFB_Q7;
900      case 8:  return OFFB_Q8;
901      case 9:  return OFFB_Q9;
902      case 10: return OFFB_Q10;
903      case 11: return OFFB_Q11;
904      case 12: return OFFB_Q12;
905      case 13: return OFFB_Q13;
906      case 14: return OFFB_Q14;
907      case 15: return OFFB_Q15;
908      case 16: return OFFB_Q16;
909      case 17: return OFFB_Q17;
910      case 18: return OFFB_Q18;
911      case 19: return OFFB_Q19;
912      case 20: return OFFB_Q20;
913      case 21: return OFFB_Q21;
914      case 22: return OFFB_Q22;
915      case 23: return OFFB_Q23;
916      case 24: return OFFB_Q24;
917      case 25: return OFFB_Q25;
918      case 26: return OFFB_Q26;
919      case 27: return OFFB_Q27;
920      case 28: return OFFB_Q28;
921      case 29: return OFFB_Q29;
922      case 30: return OFFB_Q30;
923      case 31: return OFFB_Q31;
924      default: vassert(0);
925   }
926}
927
928/* Write to a complete Qreg. */
929static void putQReg128 ( UInt qregNo, IRExpr* e )
930{
931   vassert(qregNo < 32);
932   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
933   stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
934}
935
936/* Read a complete Qreg. */
937static IRExpr* getQReg128 ( UInt qregNo )
938{
939   vassert(qregNo < 32);
940   return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
941}
942
943/* Produce the IR type for some sub-part of a vector.  For 32- and 64-
944   bit sub-parts we can choose either integer or float types, and
945   choose float on the basis that that is the common use case and so
946   will give least interference with Put-to-Get forwarding later
947   on. */
948static IRType preferredVectorSubTypeFromSize ( UInt szB )
949{
950   switch (szB) {
951      case 1:  return Ity_I8;
952      case 2:  return Ity_I16;
953      case 4:  return Ity_I32; //Ity_F32;
954      case 8:  return Ity_F64;
955      case 16: return Ity_V128;
956      default: vassert(0);
957   }
958}
959
960/* Find the offset of the laneNo'th lane of type laneTy in the given
961   Qreg.  Since the host is little-endian, the least significant lane
962   has the lowest offset. */
963static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
964{
965   vassert(!host_is_bigendian);
966   Int base = offsetQReg128(qregNo);
967   /* Since the host is little-endian, the least significant lane
968      will be at the lowest address. */
969   /* Restrict this to known types, so as to avoid silently accepting
970      stupid types. */
971   UInt laneSzB = 0;
972   switch (laneTy) {
973      case Ity_I8:                 laneSzB = 1;  break;
974      case Ity_I16:                laneSzB = 2;  break;
975      case Ity_F32: case Ity_I32:  laneSzB = 4;  break;
976      case Ity_F64: case Ity_I64:  laneSzB = 8;  break;
977      case Ity_V128:               laneSzB = 16; break;
978      default: break;
979   }
980   vassert(laneSzB > 0);
981   UInt minOff = laneNo * laneSzB;
982   UInt maxOff = minOff + laneSzB - 1;
983   vassert(maxOff < 16);
984   return base + minOff;
985}
986
987/* Put to the least significant lane of a Qreg. */
988static void putQRegLO ( UInt qregNo, IRExpr* e )
989{
990   IRType ty  = typeOfIRExpr(irsb->tyenv, e);
991   Int    off = offsetQRegLane(qregNo, ty, 0);
992   switch (ty) {
993      case Ity_I8:  case Ity_I16: case Ity_I32: case Ity_I64:
994      case Ity_F32: case Ity_F64: case Ity_V128:
995         break;
996      default:
997         vassert(0); // Other cases are probably invalid
998   }
999   stmt(IRStmt_Put(off, e));
1000}
1001
1002/* Get from the least significant lane of a Qreg. */
1003static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
1004{
1005   Int off = offsetQRegLane(qregNo, ty, 0);
1006   switch (ty) {
1007      case Ity_I8:
1008      case Ity_I16:
1009      case Ity_I32: case Ity_I64:
1010      case Ity_F32: case Ity_F64: case Ity_V128:
1011         break;
1012      default:
1013         vassert(0); // Other cases are ATC
1014   }
1015   return IRExpr_Get(off, ty);
1016}
1017
1018static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
1019{
1020   static const HChar* namesQ[32]
1021      = { "q0",  "q1",  "q2",  "q3",  "q4",  "q5",  "q6",  "q7",
1022          "q8",  "q9",  "q10", "q11", "q12", "q13", "q14", "q15",
1023          "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1024          "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1025   static const HChar* namesD[32]
1026      = { "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
1027          "d8",  "d9",  "d10", "d11", "d12", "d13", "d14", "d15",
1028          "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1029          "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1030   static const HChar* namesS[32]
1031      = { "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
1032          "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
1033          "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1034          "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1035   static const HChar* namesH[32]
1036      = { "h0",  "h1",  "h2",  "h3",  "h4",  "h5",  "h6",  "h7",
1037          "h8",  "h9",  "h10", "h11", "h12", "h13", "h14", "h15",
1038          "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1039          "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1040   static const HChar* namesB[32]
1041      = { "b0",  "b1",  "b2",  "b3",  "b4",  "b5",  "b6",  "b7",
1042          "b8",  "b9",  "b10", "b11", "b12", "b13", "b14", "b15",
1043          "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1044          "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1045   vassert(qregNo < 32);
1046   switch (sizeofIRType(laneTy)) {
1047      case 1:  return namesB[qregNo];
1048      case 2:  return namesH[qregNo];
1049      case 4:  return namesS[qregNo];
1050      case 8:  return namesD[qregNo];
1051      case 16: return namesQ[qregNo];
1052      default: vassert(0);
1053   }
1054   /*NOTREACHED*/
1055}
1056
1057static const HChar* nameQReg128 ( UInt qregNo )
1058{
1059   return nameQRegLO(qregNo, Ity_V128);
1060}
1061
1062/* Find the offset of the most significant half (8 bytes) of the given
1063   Qreg.  This requires knowing the endianness of the host. */
1064static Int offsetQRegHI64 ( UInt qregNo )
1065{
1066   return offsetQRegLane(qregNo, Ity_I64, 1);
1067}
1068
1069static IRExpr* getQRegHI64 ( UInt qregNo )
1070{
1071   return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
1072}
1073
1074static void putQRegHI64 ( UInt qregNo, IRExpr* e )
1075{
1076   IRType ty  = typeOfIRExpr(irsb->tyenv, e);
1077   Int    off = offsetQRegHI64(qregNo);
1078   switch (ty) {
1079      case Ity_I64: case Ity_F64:
1080         break;
1081      default:
1082         vassert(0); // Other cases are plain wrong
1083   }
1084   stmt(IRStmt_Put(off, e));
1085}
1086
1087/* Put to a specified lane of a Qreg. */
1088static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1089{
1090   IRType laneTy  = typeOfIRExpr(irsb->tyenv, e);
1091   Int    off     = offsetQRegLane(qregNo, laneTy, laneNo);
1092   switch (laneTy) {
1093      case Ity_F64: case Ity_I64:
1094      case Ity_I32: case Ity_F32:
1095      case Ity_I16:
1096      case Ity_I8:
1097         break;
1098      default:
1099         vassert(0); // Other cases are ATC
1100   }
1101   stmt(IRStmt_Put(off, e));
1102}
1103
1104/* Get from a specified lane of a Qreg. */
1105static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1106{
1107   Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1108   switch (laneTy) {
1109      case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1110      case Ity_F64:
1111         break;
1112      default:
1113         vassert(0); // Other cases are ATC
1114   }
1115   return IRExpr_Get(off, laneTy);
1116}
1117
1118
1119//ZZ /* ---------------- Misc registers ---------------- */
1120//ZZ
1121//ZZ static void putMiscReg32 ( UInt    gsoffset,
1122//ZZ                            IRExpr* e, /* :: Ity_I32 */
1123//ZZ                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
1124//ZZ {
1125//ZZ    switch (gsoffset) {
1126//ZZ       case OFFB_FPSCR:   break;
1127//ZZ       case OFFB_QFLAG32: break;
1128//ZZ       case OFFB_GEFLAG0: break;
1129//ZZ       case OFFB_GEFLAG1: break;
1130//ZZ       case OFFB_GEFLAG2: break;
1131//ZZ       case OFFB_GEFLAG3: break;
1132//ZZ       default: vassert(0); /* awaiting more cases */
1133//ZZ    }
1134//ZZ    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1135//ZZ
1136//ZZ    if (guardT == IRTemp_INVALID) {
1137//ZZ       /* unconditional write */
1138//ZZ       stmt(IRStmt_Put(gsoffset, e));
1139//ZZ    } else {
1140//ZZ       stmt(IRStmt_Put(
1141//ZZ          gsoffset,
1142//ZZ          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1143//ZZ                      e, IRExpr_Get(gsoffset, Ity_I32) )
1144//ZZ       ));
1145//ZZ    }
1146//ZZ }
1147//ZZ
1148//ZZ static IRTemp get_ITSTATE ( void )
1149//ZZ {
1150//ZZ    ASSERT_IS_THUMB;
1151//ZZ    IRTemp t = newTemp(Ity_I32);
1152//ZZ    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1153//ZZ    return t;
1154//ZZ }
1155//ZZ
1156//ZZ static void put_ITSTATE ( IRTemp t )
1157//ZZ {
1158//ZZ    ASSERT_IS_THUMB;
1159//ZZ    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1160//ZZ }
1161//ZZ
1162//ZZ static IRTemp get_QFLAG32 ( void )
1163//ZZ {
1164//ZZ    IRTemp t = newTemp(Ity_I32);
1165//ZZ    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1166//ZZ    return t;
1167//ZZ }
1168//ZZ
1169//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1170//ZZ {
1171//ZZ    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1172//ZZ }
1173//ZZ
1174//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1175//ZZ    Status Register) to indicate that overflow or saturation occurred.
1176//ZZ    Nb: t must be zero to denote no saturation, and any nonzero
1177//ZZ    value to indicate saturation. */
1178//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1179//ZZ {
1180//ZZ    IRTemp old = get_QFLAG32();
1181//ZZ    IRTemp nyu = newTemp(Ity_I32);
1182//ZZ    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1183//ZZ    put_QFLAG32(nyu, condT);
1184//ZZ }
1185
1186
1187/* ---------------- FPCR stuff ---------------- */
1188
1189/* Generate IR to get hold of the rounding mode bits in FPCR, and
1190   convert them to IR format.  Bind the final result to the
1191   returned temp. */
1192static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1193{
1194   /* The ARMvfp encoding for rounding mode bits is:
1195         00  to nearest
1196         01  to +infinity
1197         10  to -infinity
1198         11  to zero
1199      We need to convert that to the IR encoding:
1200         00  to nearest (the default)
1201         10  to +infinity
1202         01  to -infinity
1203         11  to zero
1204      Which can be done by swapping bits 0 and 1.
1205      The rmode bits are at 23:22 in FPSCR.
1206   */
1207   IRTemp armEncd = newTemp(Ity_I32);
1208   IRTemp swapped = newTemp(Ity_I32);
1209   /* Fish FPCR[23:22] out, and slide to bottom.  Doesn't matter that
1210      we don't zero out bits 24 and above, since the assignment to
1211      'swapped' will mask them out anyway. */
1212   assign(armEncd,
1213          binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1214   /* Now swap them. */
1215   assign(swapped,
1216          binop(Iop_Or32,
1217                binop(Iop_And32,
1218                      binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1219                      mkU32(2)),
1220                binop(Iop_And32,
1221                      binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1222                      mkU32(1))
1223         ));
1224   return swapped;
1225}
1226
1227
1228/*------------------------------------------------------------*/
1229/*--- Helpers for flag handling and conditional insns      ---*/
1230/*------------------------------------------------------------*/
1231
1232static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1233{
1234   switch (cond) {
1235      case ARM64CondEQ:  return "eq";
1236      case ARM64CondNE:  return "ne";
1237      case ARM64CondCS:  return "cs";  // or 'hs'
1238      case ARM64CondCC:  return "cc";  // or 'lo'
1239      case ARM64CondMI:  return "mi";
1240      case ARM64CondPL:  return "pl";
1241      case ARM64CondVS:  return "vs";
1242      case ARM64CondVC:  return "vc";
1243      case ARM64CondHI:  return "hi";
1244      case ARM64CondLS:  return "ls";
1245      case ARM64CondGE:  return "ge";
1246      case ARM64CondLT:  return "lt";
1247      case ARM64CondGT:  return "gt";
1248      case ARM64CondLE:  return "le";
1249      case ARM64CondAL:  return "al";
1250      case ARM64CondNV:  return "nv";
1251      default: vpanic("name_ARM64Condcode");
1252   }
1253}
1254
1255/* and a handy shorthand for it */
1256static const HChar* nameCC ( ARM64Condcode cond ) {
1257   return nameARM64Condcode(cond);
1258}
1259
1260
1261/* Build IR to calculate some particular condition from stored
1262   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1263   Ity_I64, suitable for narrowing.  Although the return type is
1264   Ity_I64, the returned value is either 0 or 1.  'cond' must be
1265   :: Ity_I64 and must denote the condition to compute in
1266   bits 7:4, and be zero everywhere else.
1267*/
1268static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1269{
1270   vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1271   /* And 'cond' had better produce a value in which only bits 7:4 are
1272      nonzero.  However, obviously we can't assert for that. */
1273
1274   /* So what we're constructing for the first argument is
1275      "(cond << 4) | stored-operation".
1276      However, as per comments above, 'cond' must be supplied
1277      pre-shifted to this function.
1278
1279      This pairing scheme requires that the ARM64_CC_OP_ values all fit
1280      in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1281      8 bits of the first argument. */
1282   IRExpr** args
1283      = mkIRExprVec_4(
1284           binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1285           IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1286           IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1287           IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1288        );
1289   IRExpr* call
1290      = mkIRExprCCall(
1291           Ity_I64,
1292           0/*regparm*/,
1293           "arm64g_calculate_condition", &arm64g_calculate_condition,
1294           args
1295        );
1296
1297   /* Exclude the requested condition, OP and NDEP from definedness
1298      checking.  We're only interested in DEP1 and DEP2. */
1299   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1300   return call;
1301}
1302
1303
1304/* Build IR to calculate some particular condition from stored
1305   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1306   Ity_I64, suitable for narrowing.  Although the return type is
1307   Ity_I64, the returned value is either 0 or 1.
1308*/
1309static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1310{
1311  /* First arg is "(cond << 4) | condition".  This requires that the
1312     ARM64_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1313     (COND, OP) pair in the lowest 8 bits of the first argument. */
1314   vassert(cond >= 0 && cond <= 15);
1315   return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1316}
1317
1318
1319/* Build IR to calculate just the carry flag from stored
1320   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1321   Ity_I64. */
1322static IRExpr* mk_arm64g_calculate_flag_c ( void )
1323{
1324   IRExpr** args
1325      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
1326                       IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1327                       IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1328                       IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1329   IRExpr* call
1330      = mkIRExprCCall(
1331           Ity_I64,
1332           0/*regparm*/,
1333           "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1334           args
1335        );
1336   /* Exclude OP and NDEP from definedness checking.  We're only
1337      interested in DEP1 and DEP2. */
1338   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1339   return call;
1340}
1341
1342
1343//ZZ /* Build IR to calculate just the overflow flag from stored
1344//ZZ    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1345//ZZ    Ity_I32. */
1346//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1347//ZZ {
1348//ZZ    IRExpr** args
1349//ZZ       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1350//ZZ                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1351//ZZ                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1352//ZZ                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1353//ZZ    IRExpr* call
1354//ZZ       = mkIRExprCCall(
1355//ZZ            Ity_I32,
1356//ZZ            0/*regparm*/,
1357//ZZ            "armg_calculate_flag_v", &armg_calculate_flag_v,
1358//ZZ            args
1359//ZZ         );
1360//ZZ    /* Exclude OP and NDEP from definedness checking.  We're only
1361//ZZ       interested in DEP1 and DEP2. */
1362//ZZ    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1363//ZZ    return call;
1364//ZZ }
1365
1366
1367/* Build IR to calculate N Z C V in bits 31:28 of the
1368   returned word. */
1369static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1370{
1371   IRExpr** args
1372      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
1373                       IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1374                       IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1375                       IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1376   IRExpr* call
1377      = mkIRExprCCall(
1378           Ity_I64,
1379           0/*regparm*/,
1380           "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1381           args
1382        );
1383   /* Exclude OP and NDEP from definedness checking.  We're only
1384      interested in DEP1 and DEP2. */
1385   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1386   return call;
1387}
1388
1389
1390/* Build IR to set the flags thunk, in the most general case. */
1391static
1392void setFlags_D1_D2_ND ( UInt cc_op,
1393                         IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1394{
1395   vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1396   vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1397   vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1398   vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1399   stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(cc_op) ));
1400   stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1401   stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1402   stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1403}
1404
1405/* Build IR to set the flags thunk after ADD or SUB. */
1406static
1407void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1408{
1409   IRTemp argL64 = IRTemp_INVALID;
1410   IRTemp argR64 = IRTemp_INVALID;
1411   IRTemp z64    = newTemp(Ity_I64);
1412   if (is64) {
1413      argL64 = argL;
1414      argR64 = argR;
1415   } else {
1416      argL64 = newTemp(Ity_I64);
1417      argR64 = newTemp(Ity_I64);
1418      assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1419      assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1420   }
1421   assign(z64, mkU64(0));
1422   UInt cc_op = ARM64G_CC_OP_NUMBER;
1423   /**/ if ( isSUB &&  is64) { cc_op = ARM64G_CC_OP_SUB64; }
1424   else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1425   else if (!isSUB &&  is64) { cc_op = ARM64G_CC_OP_ADD64; }
1426   else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1427   else                      { vassert(0); }
1428   setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1429}
1430
1431static
1432void setFlags_ADC_SBC(Bool is64, Bool isSBC, IRTemp argL, IRTemp argR, IRTemp oldC)
1433{
1434   IRTemp argL64 = IRTemp_INVALID;
1435   IRTemp argR64 = IRTemp_INVALID;
1436   IRTemp oldC64 = IRTemp_INVALID;
1437   if (is64) {
1438      argL64 = argL;
1439      argR64 = argR;
1440      oldC64 = oldC;
1441   } else {
1442      argL64 = newTemp(Ity_I64);
1443      argR64 = newTemp(Ity_I64);
1444      oldC64 = newTemp(Ity_I64);
1445      assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1446      assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1447      assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1448   }
1449   UInt cc_op = ARM64G_CC_OP_NUMBER;
1450   /**/ if ( isSBC &&  is64) { cc_op = ARM64G_CC_OP_SBC64; }
1451   else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1452   else if (!isSBC &&  is64) { cc_op = ARM64G_CC_OP_ADC64; }
1453   else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1454   else                      { vassert(0); }
1455   setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1456}
1457
1458/* Build IR to set the flags thunk after ADD or SUB, if the given
1459   condition evaluates to True at run time.  If not, the flags are set
1460   to the specified NZCV value. */
1461static
1462void setFlags_ADD_SUB_conditionally (
1463        Bool is64, Bool isSUB,
1464        IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1465     )
1466{
1467   /* Generate IR as follows:
1468        CC_OP   = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1469        CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1470        CC_DEP2 = ITE(cond, argR64, 0)
1471        CC_NDEP = 0
1472   */
1473
1474   IRTemp z64 = newTemp(Ity_I64);
1475   assign(z64, mkU64(0));
1476
1477   /* Establish the operation and operands for the True case. */
1478   IRTemp t_dep1 = IRTemp_INVALID;
1479   IRTemp t_dep2 = IRTemp_INVALID;
1480   UInt   t_op   = ARM64G_CC_OP_NUMBER;
1481   /**/ if ( isSUB &&  is64) { t_op = ARM64G_CC_OP_SUB64; }
1482   else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1483   else if (!isSUB &&  is64) { t_op = ARM64G_CC_OP_ADD64; }
1484   else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1485   else                      { vassert(0); }
1486   /* */
1487   if (is64) {
1488      t_dep1 = argL;
1489      t_dep2 = argR;
1490   } else {
1491      t_dep1 = newTemp(Ity_I64);
1492      t_dep2 = newTemp(Ity_I64);
1493      assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1494      assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1495   }
1496
1497   /* Establish the operation and operands for the False case. */
1498   IRTemp f_dep1 = newTemp(Ity_I64);
1499   IRTemp f_dep2 = z64;
1500   UInt   f_op   = ARM64G_CC_OP_COPY;
1501   assign(f_dep1, mkU64(nzcv << 28));
1502
1503   /* Final thunk values */
1504   IRTemp dep1 = newTemp(Ity_I64);
1505   IRTemp dep2 = newTemp(Ity_I64);
1506   IRTemp op   = newTemp(Ity_I64);
1507
1508   assign(op,   IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1509   assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1510   assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1511
1512   /* finally .. */
1513   stmt( IRStmt_Put( OFFB_CC_OP,   mkexpr(op) ));
1514   stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1515   stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1516   stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1517}
1518
1519/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1520static
1521void setFlags_LOGIC ( Bool is64, IRTemp res )
1522{
1523   IRTemp res64 = IRTemp_INVALID;
1524   IRTemp z64   = newTemp(Ity_I64);
1525   UInt   cc_op = ARM64G_CC_OP_NUMBER;
1526   if (is64) {
1527      res64 = res;
1528      cc_op = ARM64G_CC_OP_LOGIC64;
1529   } else {
1530      res64 = newTemp(Ity_I64);
1531      assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1532      cc_op = ARM64G_CC_OP_LOGIC32;
1533   }
1534   assign(z64, mkU64(0));
1535   setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1536}
1537
1538/* Build IR to set the flags thunk to a given NZCV value.  NZCV is
1539   located in bits 31:28 of the supplied value. */
1540static
1541void setFlags_COPY ( IRTemp nzcv_28x0 )
1542{
1543   IRTemp z64 = newTemp(Ity_I64);
1544   assign(z64, mkU64(0));
1545   setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1546}
1547
1548
1549//ZZ /* Minor variant of the above that sets NDEP to zero (if it
1550//ZZ    sets it at all) */
1551//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1552//ZZ                              IRTemp t_dep2,
1553//ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1554//ZZ {
1555//ZZ    IRTemp z32 = newTemp(Ity_I32);
1556//ZZ    assign( z32, mkU32(0) );
1557//ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1558//ZZ }
1559//ZZ
1560//ZZ
1561//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
1562//ZZ    sets it at all) */
1563//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1564//ZZ                              IRTemp t_ndep,
1565//ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1566//ZZ {
1567//ZZ    IRTemp z32 = newTemp(Ity_I32);
1568//ZZ    assign( z32, mkU32(0) );
1569//ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1570//ZZ }
1571//ZZ
1572//ZZ
1573//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1574//ZZ    sets them at all) */
1575//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1576//ZZ                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1577//ZZ {
1578//ZZ    IRTemp z32 = newTemp(Ity_I32);
1579//ZZ    assign( z32, mkU32(0) );
1580//ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1581//ZZ }
1582
1583
1584/*------------------------------------------------------------*/
1585/*--- Misc math helpers                                    ---*/
1586/*------------------------------------------------------------*/
1587
1588/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
1589static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
1590{
1591   IRTemp maskT = newTemp(Ity_I64);
1592   IRTemp res   = newTemp(Ity_I64);
1593   vassert(sh >= 1 && sh <= 63);
1594   assign(maskT, mkU64(mask));
1595   assign( res,
1596           binop(Iop_Or64,
1597                 binop(Iop_Shr64,
1598                       binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
1599                       mkU8(sh)),
1600                 binop(Iop_And64,
1601                       binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
1602                       mkexpr(maskT))
1603                 )
1604           );
1605   return res;
1606}
1607
1608/* Generates byte swaps within 32-bit lanes. */
1609static IRTemp math_UINTSWAP64 ( IRTemp src )
1610{
1611   IRTemp res;
1612   res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1613   res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
1614   return res;
1615}
1616
1617/* Generates byte swaps within 16-bit lanes. */
1618static IRTemp math_USHORTSWAP64 ( IRTemp src )
1619{
1620   IRTemp res;
1621   res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1622   return res;
1623}
1624
1625/* Generates a 64-bit byte swap. */
1626static IRTemp math_BYTESWAP64 ( IRTemp src )
1627{
1628   IRTemp res;
1629   res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1630   res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
1631   res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
1632   return res;
1633}
1634
1635/* Generates a 64-bit bit swap. */
1636static IRTemp math_BITSWAP64 ( IRTemp src )
1637{
1638   IRTemp res;
1639   res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
1640   res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
1641   res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
1642   return math_BYTESWAP64(res);
1643}
1644
1645/* Duplicates the bits at the bottom of the given word to fill the
1646   whole word.  src :: Ity_I64 is assumed to have zeroes everywhere
1647   except for the bottom bits. */
1648static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
1649{
1650   if (srcTy == Ity_I8) {
1651      IRTemp t16 = newTemp(Ity_I64);
1652      assign(t16, binop(Iop_Or64, mkexpr(src),
1653                                  binop(Iop_Shl64, mkexpr(src), mkU8(8))));
1654      IRTemp t32 = newTemp(Ity_I64);
1655      assign(t32, binop(Iop_Or64, mkexpr(t16),
1656                                  binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
1657      IRTemp t64 = newTemp(Ity_I64);
1658      assign(t64, binop(Iop_Or64, mkexpr(t32),
1659                                  binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
1660      return t64;
1661   }
1662   if (srcTy == Ity_I16) {
1663      IRTemp t32 = newTemp(Ity_I64);
1664      assign(t32, binop(Iop_Or64, mkexpr(src),
1665                                  binop(Iop_Shl64, mkexpr(src), mkU8(16))));
1666      IRTemp t64 = newTemp(Ity_I64);
1667      assign(t64, binop(Iop_Or64, mkexpr(t32),
1668                                  binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
1669      return t64;
1670   }
1671   if (srcTy == Ity_I32) {
1672      IRTemp t64 = newTemp(Ity_I64);
1673      assign(t64, binop(Iop_Or64, mkexpr(src),
1674                                  binop(Iop_Shl64, mkexpr(src), mkU8(32))));
1675      return t64;
1676   }
1677   if (srcTy == Ity_I64) {
1678      return src;
1679   }
1680   vassert(0);
1681}
1682
1683
1684/*------------------------------------------------------------*/
1685/*--- FP comparison helpers                                ---*/
1686/*------------------------------------------------------------*/
1687
1688/* irRes :: Ity_I32 holds a floating point comparison result encoded
1689   as an IRCmpF64Result.  Generate code to convert it to an
1690   ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
1691   Assign a new temp to hold that value, and return the temp. */
1692static
1693IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
1694{
1695   IRTemp ix       = newTemp(Ity_I64);
1696   IRTemp termL    = newTemp(Ity_I64);
1697   IRTemp termR    = newTemp(Ity_I64);
1698   IRTemp nzcv     = newTemp(Ity_I64);
1699   IRTemp irRes    = newTemp(Ity_I64);
1700
1701   /* This is where the fun starts.  We have to convert 'irRes' from
1702      an IR-convention return result (IRCmpF64Result) to an
1703      ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
1704      4 bits of 'nzcv'. */
1705   /* Map compare result from IR to ARM(nzcv) */
1706   /*
1707      FP cmp result | IR   | ARM(nzcv)
1708      --------------------------------
1709      UN              0x45   0011
1710      LT              0x01   1000
1711      GT              0x00   0010
1712      EQ              0x40   0110
1713   */
1714   /* Now since you're probably wondering WTF ..
1715
1716      ix fishes the useful bits out of the IR value, bits 6 and 0, and
1717      places them side by side, giving a number which is 0, 1, 2 or 3.
1718
1719      termL is a sequence cooked up by GNU superopt.  It converts ix
1720         into an almost correct value NZCV value (incredibly), except
1721         for the case of UN, where it produces 0100 instead of the
1722         required 0011.
1723
1724      termR is therefore a correction term, also computed from ix.  It
1725         is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
1726         the final correct value, we subtract termR from termL.
1727
1728      Don't take my word for it.  There's a test program at the bottom
1729      of guest_arm_toIR.c, to try this out with.
1730   */
1731   assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
1732
1733   assign(
1734      ix,
1735      binop(Iop_Or64,
1736            binop(Iop_And64,
1737                  binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
1738                  mkU64(3)),
1739            binop(Iop_And64, mkexpr(irRes), mkU64(1))));
1740
1741   assign(
1742      termL,
1743      binop(Iop_Add64,
1744            binop(Iop_Shr64,
1745                  binop(Iop_Sub64,
1746                        binop(Iop_Shl64,
1747                              binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
1748                              mkU8(62)),
1749                        mkU64(1)),
1750                  mkU8(61)),
1751            mkU64(1)));
1752
1753   assign(
1754      termR,
1755      binop(Iop_And64,
1756            binop(Iop_And64,
1757                  mkexpr(ix),
1758                  binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
1759            mkU64(1)));
1760
1761   assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
1762   return nzcv;
1763}
1764
1765
1766/*------------------------------------------------------------*/
1767/*--- Data processing (immediate)                          ---*/
1768/*------------------------------------------------------------*/
1769
1770/* Helper functions for supporting "DecodeBitMasks" */
1771
1772static ULong dbm_ROR ( Int width, ULong x, Int rot )
1773{
1774   vassert(width > 0 && width <= 64);
1775   vassert(rot >= 0 && rot < width);
1776   if (rot == 0) return x;
1777   ULong res = x >> rot;
1778   res |= (x << (width - rot));
1779   if (width < 64)
1780     res &= ((1ULL << width) - 1);
1781   return res;
1782}
1783
1784static ULong dbm_RepTo64( Int esize, ULong x )
1785{
1786   switch (esize) {
1787      case 64:
1788         return x;
1789      case 32:
1790         x &= 0xFFFFFFFF; x |= (x << 32);
1791         return x;
1792      case 16:
1793         x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
1794         return x;
1795      case 8:
1796         x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
1797         return x;
1798      case 4:
1799         x &= 0xF; x |= (x << 4); x |= (x << 8);
1800         x |= (x << 16); x |= (x << 32);
1801         return x;
1802      case 2:
1803         x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
1804         x |= (x << 16); x |= (x << 32);
1805         return x;
1806      default:
1807         break;
1808   }
1809   vpanic("dbm_RepTo64");
1810   /*NOTREACHED*/
1811   return 0;
1812}
1813
1814static Int dbm_highestSetBit ( ULong x )
1815{
1816   Int i;
1817   for (i = 63; i >= 0; i--) {
1818      if (x & (1ULL << i))
1819         return i;
1820   }
1821   vassert(x == 0);
1822   return -1;
1823}
1824
1825static
1826Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
1827                          ULong immN, ULong imms, ULong immr, Bool immediate,
1828                          UInt M /*32 or 64*/)
1829{
1830   vassert(immN < (1ULL << 1));
1831   vassert(imms < (1ULL << 6));
1832   vassert(immr < (1ULL << 6));
1833   vassert(immediate == False || immediate == True);
1834   vassert(M == 32 || M == 64);
1835
1836   Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
1837   if (len < 1) { /* printf("fail1\n"); */ return False; }
1838   vassert(len <= 6);
1839   vassert(M >= (1 << len));
1840
1841   vassert(len >= 1 && len <= 6);
1842   ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
1843                  (1 << len) - 1;
1844   vassert(levels >= 1 && levels <= 63);
1845
1846   if (immediate && ((imms & levels) == levels)) {
1847      /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
1848      return False;
1849   }
1850
1851   ULong S = imms & levels;
1852   ULong R = immr & levels;
1853   Int   diff = S - R;
1854   diff &= 63;
1855   Int esize = 1 << len;
1856   vassert(2 <= esize && esize <= 64);
1857
1858   /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
1859      same below with d.  S can be 63 in which case we have an out of
1860      range and hence undefined shift. */
1861   vassert(S >= 0 && S <= 63);
1862   vassert(esize >= (S+1));
1863   ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
1864                  //(1ULL << (S+1)) - 1;
1865                  ((1ULL << S) - 1) + (1ULL << S);
1866
1867   Int d = // diff<len-1:0>
1868           diff & ((1 << len)-1);
1869   vassert(esize >= (d+1));
1870   vassert(d >= 0 && d <= 63);
1871
1872   ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
1873                  //(1ULL << (d+1)) - 1;
1874                  ((1ULL << d) - 1) + (1ULL << d);
1875
1876   if (esize != 64) vassert(elem_s < (1ULL << esize));
1877   if (esize != 64) vassert(elem_d < (1ULL << esize));
1878
1879   if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
1880   if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
1881
1882   return True;
1883}
1884
1885
1886static
1887Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
1888                                         UInt insn)
1889{
1890#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
1891
1892   /* insn[28:23]
1893      10000x PC-rel addressing
1894      10001x Add/subtract (immediate)
1895      100100 Logical (immediate)
1896      100101 Move Wide (immediate)
1897      100110 Bitfield
1898      100111 Extract
1899   */
1900
1901   /* ------------------ ADD/SUB{,S} imm12 ------------------ */
1902   if (INSN(28,24) == BITS5(1,0,0,0,1)) {
1903      Bool is64   = INSN(31,31) == 1;
1904      Bool isSub  = INSN(30,30) == 1;
1905      Bool setCC  = INSN(29,29) == 1;
1906      UInt sh     = INSN(23,22);
1907      UInt uimm12 = INSN(21,10);
1908      UInt nn     = INSN(9,5);
1909      UInt dd     = INSN(4,0);
1910      const HChar* nm = isSub ? "sub" : "add";
1911      if (sh >= 2) {
1912         /* Invalid; fall through */
1913      } else {
1914         vassert(sh <= 1);
1915         uimm12 <<= (12 * sh);
1916         if (is64) {
1917            IRTemp argL  = newTemp(Ity_I64);
1918            IRTemp argR  = newTemp(Ity_I64);
1919            IRTemp res   = newTemp(Ity_I64);
1920            assign(argL, getIReg64orSP(nn));
1921            assign(argR, mkU64(uimm12));
1922            assign(res,  binop(isSub ? Iop_Sub64 : Iop_Add64,
1923                               mkexpr(argL), mkexpr(argR)));
1924            if (setCC) {
1925               putIReg64orZR(dd, mkexpr(res));
1926               setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
1927               DIP("%ss %s, %s, 0x%x\n",
1928                   nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
1929            } else {
1930               putIReg64orSP(dd, mkexpr(res));
1931               DIP("%s %s, %s, 0x%x\n",
1932                   nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
1933            }
1934         } else {
1935            IRTemp argL  = newTemp(Ity_I32);
1936            IRTemp argR  = newTemp(Ity_I32);
1937            IRTemp res   = newTemp(Ity_I32);
1938            assign(argL, getIReg32orSP(nn));
1939            assign(argR, mkU32(uimm12));
1940            assign(res,  binop(isSub ? Iop_Sub32 : Iop_Add32,
1941                               mkexpr(argL), mkexpr(argR)));
1942            if (setCC) {
1943               putIReg32orZR(dd, mkexpr(res));
1944               setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
1945               DIP("%ss %s, %s, 0x%x\n",
1946                   nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
1947            } else {
1948               putIReg32orSP(dd, mkexpr(res));
1949               DIP("%s %s, %s, 0x%x\n",
1950                   nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
1951            }
1952         }
1953         return True;
1954      }
1955   }
1956
1957   /* -------------------- ADR/ADRP -------------------- */
1958   if (INSN(28,24) == BITS5(1,0,0,0,0)) {
1959      UInt  bP    = INSN(31,31);
1960      UInt  immLo = INSN(30,29);
1961      UInt  immHi = INSN(23,5);
1962      UInt  rD    = INSN(4,0);
1963      ULong uimm  = (immHi << 2) | immLo;
1964      ULong simm  = sx_to_64(uimm, 21);
1965      ULong val;
1966      if (bP) {
1967         val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
1968      } else {
1969         val = guest_PC_curr_instr + simm;
1970      }
1971      putIReg64orZR(rD, mkU64(val));
1972      DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
1973      return True;
1974   }
1975
1976   /* -------------------- LOGIC(imm) -------------------- */
1977   if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
1978      /* 31 30 28     22 21   15   9  4
1979         sf op 100100 N  immr imms Rn Rd
1980           op=00: AND  Rd|SP, Rn, #imm
1981           op=01: ORR  Rd|SP, Rn, #imm
1982           op=10: EOR  Rd|SP, Rn, #imm
1983           op=11: ANDS Rd|ZR, Rn, #imm
1984      */
1985      Bool  is64 = INSN(31,31) == 1;
1986      UInt  op   = INSN(30,29);
1987      UInt  N    = INSN(22,22);
1988      UInt  immR = INSN(21,16);
1989      UInt  immS = INSN(15,10);
1990      UInt  nn   = INSN(9,5);
1991      UInt  dd   = INSN(4,0);
1992      ULong imm  = 0;
1993      Bool  ok;
1994      if (N == 1 && !is64)
1995         goto after_logic_imm; /* not allowed; fall through */
1996      ok = dbm_DecodeBitMasks(&imm, NULL,
1997                              N, immS, immR, True, is64 ? 64 : 32);
1998      if (!ok)
1999         goto after_logic_imm;
2000
2001      const HChar* names[4] = { "and", "orr", "eor", "ands" };
2002      const IROp   ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2003      const IROp   ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2004
2005      vassert(op < 4);
2006      if (is64) {
2007         IRExpr* argL = getIReg64orZR(nn);
2008         IRExpr* argR = mkU64(imm);
2009         IRTemp  res  = newTemp(Ity_I64);
2010         assign(res, binop(ops64[op], argL, argR));
2011         if (op < 3) {
2012            putIReg64orSP(dd, mkexpr(res));
2013            DIP("%s %s, %s, 0x%llx\n", names[op],
2014                nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2015         } else {
2016            putIReg64orZR(dd, mkexpr(res));
2017            setFlags_LOGIC(True/*is64*/, res);
2018            DIP("%s %s, %s, 0x%llx\n", names[op],
2019                nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2020         }
2021      } else {
2022         IRExpr* argL = getIReg32orZR(nn);
2023         IRExpr* argR = mkU32((UInt)imm);
2024         IRTemp  res  = newTemp(Ity_I32);
2025         assign(res, binop(ops32[op], argL, argR));
2026         if (op < 3) {
2027            putIReg32orSP(dd, mkexpr(res));
2028            DIP("%s %s, %s, 0x%x\n", names[op],
2029                nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2030         } else {
2031            putIReg32orZR(dd, mkexpr(res));
2032            setFlags_LOGIC(False/*!is64*/, res);
2033            DIP("%s %s, %s, 0x%x\n", names[op],
2034                nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2035         }
2036      }
2037      return True;
2038   }
2039   after_logic_imm:
2040
2041   /* -------------------- MOV{Z,N,K} -------------------- */
2042   if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2043      /* 31 30 28      22 20    4
2044         |  |  |       |  |     |
2045         sf 10 100 101 hw imm16 Rd   MOV(Z) Rd, (imm16 << (16*hw))
2046         sf 00 100 101 hw imm16 Rd   MOV(N) Rd, ~(imm16 << (16*hw))
2047         sf 11 100 101 hw imm16 Rd   MOV(K) Rd, (imm16 << (16*hw))
2048      */
2049      Bool is64   = INSN(31,31) == 1;
2050      UInt subopc = INSN(30,29);
2051      UInt hw     = INSN(22,21);
2052      UInt imm16  = INSN(20,5);
2053      UInt dd     = INSN(4,0);
2054      if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2055         /* invalid; fall through */
2056      } else {
2057         ULong imm64 = ((ULong)imm16) << (16 * hw);
2058         if (!is64)
2059            vassert(imm64 < 0x100000000ULL);
2060         switch (subopc) {
2061            case BITS2(1,0): // MOVZ
2062               putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2063               DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2064               break;
2065            case BITS2(0,0): // MOVN
2066               imm64 = ~imm64;
2067               if (!is64)
2068                  imm64 &= 0xFFFFFFFFULL;
2069               putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2070               DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2071               break;
2072            case BITS2(1,1): // MOVK
2073               /* This is more complex.  We are inserting a slice into
2074                  the destination register, so we need to have the old
2075                  value of it. */
2076               if (is64) {
2077                  IRTemp old = newTemp(Ity_I64);
2078                  assign(old, getIReg64orZR(dd));
2079                  ULong mask = 0xFFFFULL << (16 * hw);
2080                  IRExpr* res
2081                     = binop(Iop_Or64,
2082                             binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2083                             mkU64(imm64));
2084                  putIReg64orZR(dd, res);
2085                  DIP("movk %s, 0x%x, lsl %u\n",
2086                      nameIReg64orZR(dd), imm16, 16*hw);
2087               } else {
2088                  IRTemp old = newTemp(Ity_I32);
2089                  assign(old, getIReg32orZR(dd));
2090                  vassert(hw <= 1);
2091                  UInt mask = 0xFFFF << (16 * hw);
2092                  IRExpr* res
2093                     = binop(Iop_Or32,
2094                             binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2095                             mkU32((UInt)imm64));
2096                  putIReg32orZR(dd, res);
2097                  DIP("movk %s, 0x%x, lsl %u\n",
2098                      nameIReg32orZR(dd), imm16, 16*hw);
2099               }
2100               break;
2101            default:
2102               vassert(0);
2103         }
2104         return True;
2105      }
2106   }
2107
2108   /* -------------------- {U,S,}BFM -------------------- */
2109   /*    30 28     22 21   15   9  4
2110
2111      sf 10 100110 N  immr imms nn dd
2112         UBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2113         UBFM Xd, Xn, #immr, #imms   when sf=1, N=1
2114
2115      sf 00 100110 N  immr imms nn dd
2116         SBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2117         SBFM Xd, Xn, #immr, #imms   when sf=1, N=1
2118
2119      sf 01 100110 N  immr imms nn dd
2120         BFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2121         BFM Xd, Xn, #immr, #imms   when sf=1, N=1
2122   */
2123   if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2124      UInt sf     = INSN(31,31);
2125      UInt opc    = INSN(30,29);
2126      UInt N      = INSN(22,22);
2127      UInt immR   = INSN(21,16);
2128      UInt immS   = INSN(15,10);
2129      UInt nn     = INSN(9,5);
2130      UInt dd     = INSN(4,0);
2131      Bool inZero = False;
2132      Bool extend = False;
2133      const HChar* nm = "???";
2134      /* skip invalid combinations */
2135      switch (opc) {
2136         case BITS2(0,0):
2137            inZero = True; extend = True; nm = "sbfm"; break;
2138         case BITS2(0,1):
2139            inZero = False; extend = False; nm = "bfm"; break;
2140         case BITS2(1,0):
2141            inZero = True; extend = False; nm = "ubfm"; break;
2142         case BITS2(1,1):
2143            goto after_bfm; /* invalid */
2144         default:
2145            vassert(0);
2146      }
2147      if (sf == 1 && N != 1) goto after_bfm;
2148      if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2149                             || ((immS >> 5) & 1) != 0)) goto after_bfm;
2150      ULong wmask = 0, tmask = 0;
2151      Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2152                                   N, immS, immR, False, sf == 1 ? 64 : 32);
2153      if (!ok) goto after_bfm; /* hmmm */
2154
2155      Bool   is64 = sf == 1;
2156      IRType ty   = is64 ? Ity_I64 : Ity_I32;
2157
2158      IRTemp dst = newTemp(ty);
2159      IRTemp src = newTemp(ty);
2160      IRTemp bot = newTemp(ty);
2161      IRTemp top = newTemp(ty);
2162      IRTemp res = newTemp(ty);
2163      assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2164      assign(src, getIRegOrZR(is64, nn));
2165      /* perform bitfield move on low bits */
2166      assign(bot, binop(mkOR(ty),
2167                        binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2168                        binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2169                                         mkU(ty, wmask))));
2170      /* determine extension bits (sign, zero or dest register) */
2171      assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2172      /* combine extension bits and result bits */
2173      assign(res, binop(mkOR(ty),
2174                        binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2175                        binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2176      putIRegOrZR(is64, dd, mkexpr(res));
2177      DIP("%s %s, %s, immR=%u, immS=%u\n",
2178          nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2179      return True;
2180   }
2181   after_bfm:
2182
2183   /* ---------------------- EXTR ---------------------- */
2184   /*   30 28     22 20 15   9 4
2185      1 00 100111 10 m  imm6 n d  EXTR Xd, Xn, Xm, #imm6
2186      0 00 100111 00 m  imm6 n d  EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2187   */
2188   if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2189      Bool is64  = INSN(31,31) == 1;
2190      UInt mm    = INSN(20,16);
2191      UInt imm6  = INSN(15,10);
2192      UInt nn    = INSN(9,5);
2193      UInt dd    = INSN(4,0);
2194      Bool valid = True;
2195      if (INSN(31,31) != INSN(22,22))
2196        valid = False;
2197      if (!is64 && imm6 >= 32)
2198        valid = False;
2199      if (!valid) goto after_extr;
2200      IRType ty    = is64 ? Ity_I64 : Ity_I32;
2201      IRTemp srcHi = newTemp(ty);
2202      IRTemp srcLo = newTemp(ty);
2203      IRTemp res   = newTemp(ty);
2204      assign(srcHi, getIRegOrZR(is64, nn));
2205      assign(srcLo, getIRegOrZR(is64, mm));
2206      if (imm6 == 0) {
2207        assign(res, mkexpr(srcLo));
2208      } else {
2209        UInt szBits = 8 * sizeofIRType(ty);
2210        vassert(imm6 > 0 && imm6 < szBits);
2211        assign(res, binop(mkOR(ty),
2212                          binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2213                          binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2214      }
2215      putIRegOrZR(is64, dd, mkexpr(res));
2216      DIP("extr %s, %s, %s, #%u\n",
2217          nameIRegOrZR(is64,dd),
2218          nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2219      return True;
2220   }
2221  after_extr:
2222
2223   vex_printf("ARM64 front end: data_processing_immediate\n");
2224   return False;
2225#  undef INSN
2226}
2227
2228
2229/*------------------------------------------------------------*/
2230/*--- Data processing (register) instructions              ---*/
2231/*------------------------------------------------------------*/
2232
2233static const HChar* nameSH ( UInt sh ) {
2234   switch (sh) {
2235      case 0: return "lsl";
2236      case 1: return "lsr";
2237      case 2: return "asr";
2238      case 3: return "ror";
2239      default: vassert(0);
2240   }
2241}
2242
2243/* Generate IR to get a register value, possibly shifted by an
2244   immediate.  Returns either a 32- or 64-bit temporary holding the
2245   result.  After the shift, the value can optionally be NOT-ed
2246   too.
2247
2248   sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR.  sh_amt may only be
2249   in the range 0 to (is64 ? 64 : 32)-1.  For some instructions, ROR
2250   isn't allowed, but it's the job of the caller to check that.
2251*/
2252static IRTemp getShiftedIRegOrZR ( Bool is64,
2253                                   UInt sh_how, UInt sh_amt, UInt regNo,
2254                                   Bool invert )
2255{
2256   vassert(sh_how < 4);
2257   vassert(sh_amt < (is64 ? 64 : 32));
2258   IRType ty = is64 ? Ity_I64 : Ity_I32;
2259   IRTemp t0 = newTemp(ty);
2260   assign(t0, getIRegOrZR(is64, regNo));
2261   IRTemp t1 = newTemp(ty);
2262   switch (sh_how) {
2263      case BITS2(0,0):
2264         assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2265         break;
2266      case BITS2(0,1):
2267         assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2268         break;
2269      case BITS2(1,0):
2270         assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2271         break;
2272      case BITS2(1,1):
2273         assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2274         break;
2275      default:
2276         vassert(0);
2277   }
2278   if (invert) {
2279      IRTemp t2 = newTemp(ty);
2280      assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2281      return t2;
2282   } else {
2283      return t1;
2284   }
2285}
2286
2287
2288static
2289Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2290                                        UInt insn)
2291{
2292#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
2293
2294   /* ------------------- ADD/SUB(reg) ------------------- */
2295   /* x==0 => 32 bit op      x==1 => 64 bit op
2296      sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2297
2298      31 30 29 28    23 21 20 15   9  4
2299      |  |  |  |     |  |  |  |    |  |
2300      x  0  0  01011 sh 0  Rm imm6 Rn Rd   ADD  Rd,Rn, sh(Rm,imm6)
2301      x  0  1  01011 sh 0  Rm imm6 Rn Rd   ADDS Rd,Rn, sh(Rm,imm6)
2302      x  1  0  01011 sh 0  Rm imm6 Rn Rd   SUB  Rd,Rn, sh(Rm,imm6)
2303      x  1  1  01011 sh 0  Rm imm6 Rn Rd   SUBS Rd,Rn, sh(Rm,imm6)
2304   */
2305   if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2306      UInt   bX    = INSN(31,31);
2307      UInt   bOP   = INSN(30,30); /* 0: ADD, 1: SUB */
2308      UInt   bS    = INSN(29, 29); /* set flags? */
2309      UInt   sh    = INSN(23,22);
2310      UInt   rM    = INSN(20,16);
2311      UInt   imm6  = INSN(15,10);
2312      UInt   rN    = INSN(9,5);
2313      UInt   rD    = INSN(4,0);
2314      Bool   isSUB = bOP == 1;
2315      Bool   is64  = bX == 1;
2316      IRType ty    = is64 ? Ity_I64 : Ity_I32;
2317      if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2318         /* invalid; fall through */
2319      } else {
2320         IRTemp argL = newTemp(ty);
2321         assign(argL, getIRegOrZR(is64, rN));
2322         IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2323         IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
2324         IRTemp res  = newTemp(ty);
2325         assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2326         if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2327         if (bS) {
2328            setFlags_ADD_SUB(is64, isSUB, argL, argR);
2329         }
2330         DIP("%s%s %s, %s, %s, %s #%u\n",
2331             bOP ? "sub" : "add", bS ? "s" : "",
2332             nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2333             nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2334         return True;
2335      }
2336   }
2337
2338   /* ------------------- ADC/SBC(reg) ------------------- */
2339   /* x==0 => 32 bit op      x==1 => 64 bit op
2340
2341      31 30 29 28    23 21 20 15     9  4
2342      |  |  |  |     |  |  |  |      |  |
2343      x  0  0  11010 00 0  Rm 000000 Rn Rd   ADC  Rd,Rn,Rm
2344      x  0  1  11010 00 0  Rm 000000 Rn Rd   ADCS Rd,Rn,Rm
2345      x  1  0  11010 00 0  Rm 000000 Rn Rd   SBC  Rd,Rn,Rm
2346      x  1  1  11010 00 0  Rm 000000 Rn Rd   SBCS Rd,Rn,Rm
2347   */
2348
2349   if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2350      UInt   bX    = INSN(31,31);
2351      UInt   bOP   = INSN(30,30); /* 0: ADC, 1: SBC */
2352      UInt   bS    = INSN(29,29); /* set flags */
2353      UInt   rM    = INSN(20,16);
2354      UInt   rN    = INSN(9,5);
2355      UInt   rD    = INSN(4,0);
2356
2357      Bool   isSUB = bOP == 1;
2358      Bool   is64  = bX == 1;
2359      IRType ty    = is64 ? Ity_I64 : Ity_I32;
2360
2361      IRTemp oldC = newTemp(ty);
2362      assign(oldC,
2363             is64 ? mk_arm64g_calculate_flag_c()
2364                  : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2365
2366      IRTemp argL = newTemp(ty);
2367      assign(argL, getIRegOrZR(is64, rN));
2368      IRTemp argR = newTemp(ty);
2369      assign(argR, getIRegOrZR(is64, rM));
2370
2371      IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
2372      IRTemp res  = newTemp(ty);
2373      if (isSUB) {
2374         IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2375         IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2376         assign(res,
2377                binop(op,
2378                      binop(op, mkexpr(argL), mkexpr(argR)),
2379                      binop(xorOp, mkexpr(oldC), one)));
2380      } else {
2381         assign(res,
2382                binop(op,
2383                      binop(op, mkexpr(argL), mkexpr(argR)),
2384                      mkexpr(oldC)));
2385      }
2386
2387      if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2388
2389      if (bS) {
2390         setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2391      }
2392
2393      DIP("%s%s %s, %s, %s\n",
2394          bOP ? "sbc" : "adc", bS ? "s" : "",
2395          nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2396          nameIRegOrZR(is64, rM));
2397      return True;
2398   }
2399
2400
2401
2402   /* -------------------- LOGIC(reg) -------------------- */
2403   /* x==0 => 32 bit op      x==1 => 64 bit op
2404      N==0 => inv? is no-op (no inversion)
2405      N==1 => inv? is NOT
2406      sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2407
2408      31 30 28    23 21 20 15   9  4
2409      |  |  |     |  |  |  |    |  |
2410      x  00 01010 sh N  Rm imm6 Rn Rd  AND  Rd,Rn, inv?(sh(Rm,imm6))
2411      x  01 01010 sh N  Rm imm6 Rn Rd  ORR  Rd,Rn, inv?(sh(Rm,imm6))
2412      x  10 01010 sh N  Rm imm6 Rn Rd  EOR  Rd,Rn, inv?(sh(Rm,imm6))
2413      x  11 01010 sh N  Rm imm6 Rn Rd  ANDS Rd,Rn, inv?(sh(Rm,imm6))
2414      With N=1, the names are: BIC ORN EON BICS
2415   */
2416   if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2417      UInt   bX   = INSN(31,31);
2418      UInt   sh   = INSN(23,22);
2419      UInt   bN   = INSN(21,21);
2420      UInt   rM   = INSN(20,16);
2421      UInt   imm6 = INSN(15,10);
2422      UInt   rN   = INSN(9,5);
2423      UInt   rD   = INSN(4,0);
2424      Bool   is64 = bX == 1;
2425      IRType ty   = is64 ? Ity_I64 : Ity_I32;
2426      if (!is64 && imm6 > 31) {
2427         /* invalid; fall though */
2428      } else {
2429         IRTemp argL = newTemp(ty);
2430         assign(argL, getIRegOrZR(is64, rN));
2431         IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2432         IROp   op   = Iop_INVALID;
2433         switch (INSN(30,29)) {
2434            case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2435            case BITS2(0,1):                  op = mkOR(ty);  break;
2436            case BITS2(1,0):                  op = mkXOR(ty); break;
2437            default: vassert(0);
2438         }
2439         IRTemp res = newTemp(ty);
2440         assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2441         if (INSN(30,29) == BITS2(1,1)) {
2442            setFlags_LOGIC(is64, res);
2443         }
2444         putIRegOrZR(is64, rD, mkexpr(res));
2445
2446         static const HChar* names_op[8]
2447            = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2448         vassert(((bN << 2) | INSN(30,29)) < 8);
2449         const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2450         /* Special-case the printing of "MOV" */
2451         if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2452            DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2453                                nameIRegOrZR(is64, rM));
2454         } else {
2455            DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2456                nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2457                nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2458         }
2459         return True;
2460      }
2461   }
2462
2463   /* -------------------- {U,S}MULH -------------------- */
2464   /* 31       23 22 20 15     9   4
2465      10011011 1  10 Rm 011111 Rn Rd   UMULH Xd,Xn,Xm
2466      10011011 0  10 Rm 011111 Rn Rd   SMULH Xd,Xn,Xm
2467   */
2468   if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2469       && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
2470      Bool isU = INSN(23,23) == 1;
2471      UInt mm  = INSN(20,16);
2472      UInt nn  = INSN(9,5);
2473      UInt dd  = INSN(4,0);
2474      putIReg64orZR(dd, unop(Iop_128HIto64,
2475                             binop(isU ? Iop_MullU64 : Iop_MullS64,
2476                                   getIReg64orZR(nn), getIReg64orZR(mm))));
2477      DIP("%cmulh %s, %s, %s\n",
2478          isU ? 'u' : 's',
2479          nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2480      return True;
2481   }
2482
2483   /* -------------------- M{ADD,SUB} -------------------- */
2484   /* 31 30           20 15 14 9 4
2485      sf 00 11011 000 m  0  a  n r   MADD Rd,Rn,Rm,Ra  d = a+m*n
2486      sf 00 11011 000 m  1  a  n r   MADD Rd,Rn,Rm,Ra  d = a-m*n
2487   */
2488   if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2489      Bool is64  = INSN(31,31) == 1;
2490      UInt mm    = INSN(20,16);
2491      Bool isAdd = INSN(15,15) == 0;
2492      UInt aa    = INSN(14,10);
2493      UInt nn    = INSN(9,5);
2494      UInt dd    = INSN(4,0);
2495      if (is64) {
2496         putIReg64orZR(
2497            dd,
2498            binop(isAdd ? Iop_Add64 : Iop_Sub64,
2499                  getIReg64orZR(aa),
2500                  binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
2501      } else {
2502         putIReg32orZR(
2503            dd,
2504            binop(isAdd ? Iop_Add32 : Iop_Sub32,
2505                  getIReg32orZR(aa),
2506                  binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
2507      }
2508      DIP("%s %s, %s, %s, %s\n",
2509          isAdd ? "madd" : "msub",
2510          nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
2511          nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
2512      return True;
2513   }
2514
2515   /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
2516   /* 31 30 28        20 15   11 9  4
2517      sf 00 1101 0100 mm cond 00 nn dd   CSEL  Rd,Rn,Rm
2518      sf 00 1101 0100 mm cond 01 nn dd   CSINC Rd,Rn,Rm
2519      sf 10 1101 0100 mm cond 00 nn dd   CSINV Rd,Rn,Rm
2520      sf 10 1101 0100 mm cond 01 nn dd   CSNEG Rd,Rn,Rm
2521      In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
2522   */
2523   if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
2524      Bool    is64 = INSN(31,31) == 1;
2525      UInt    b30  = INSN(30,30);
2526      UInt    mm   = INSN(20,16);
2527      UInt    cond = INSN(15,12);
2528      UInt    b10  = INSN(10,10);
2529      UInt    nn   = INSN(9,5);
2530      UInt    dd   = INSN(4,0);
2531      UInt    op   = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
2532      IRType  ty   = is64 ? Ity_I64 : Ity_I32;
2533      IRExpr* argL = getIRegOrZR(is64, nn);
2534      IRExpr* argR = getIRegOrZR(is64, mm);
2535      switch (op) {
2536         case BITS2(0,0):
2537            break;
2538         case BITS2(0,1):
2539            argR = binop(mkADD(ty), argR, mkU(ty,1));
2540            break;
2541         case BITS2(1,0):
2542            argR = unop(mkNOT(ty), argR);
2543            break;
2544         case BITS2(1,1):
2545            argR = binop(mkSUB(ty), mkU(ty,0), argR);
2546            break;
2547         default:
2548            vassert(0);
2549      }
2550      putIRegOrZR(
2551         is64, dd,
2552         IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
2553                    argL, argR)
2554      );
2555      const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
2556      DIP("%s %s, %s, %s, %s\n", op_nm[op],
2557          nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
2558          nameIRegOrZR(is64, mm), nameCC(cond));
2559      return True;
2560   }
2561
2562   /* -------------- ADD/SUB(extended reg) -------------- */
2563   /*     28         20 15  12   9 4
2564      000 01011 00 1 m  opt imm3 n d   ADD  Wd|SP, Wn|SP, Wm ext&lsld
2565      100 01011 00 1 m  opt imm3 n d   ADD  Xd|SP, Xn|SP, Rm ext&lsld
2566
2567      001 01011 00 1 m  opt imm3 n d   ADDS Wd,    Wn|SP, Wm ext&lsld
2568      101 01011 00 1 m  opt imm3 n d   ADDS Xd,    Xn|SP, Rm ext&lsld
2569
2570      010 01011 00 1 m  opt imm3 n d   SUB  Wd|SP, Wn|SP, Wm ext&lsld
2571      110 01011 00 1 m  opt imm3 n d   SUB  Xd|SP, Xn|SP, Rm ext&lsld
2572
2573      011 01011 00 1 m  opt imm3 n d   SUBS Wd,    Wn|SP, Wm ext&lsld
2574      111 01011 00 1 m  opt imm3 n d   SUBS Xd,    Xn|SP, Rm ext&lsld
2575
2576      The 'm' operand is extended per opt, thusly:
2577
2578        000   Xm & 0xFF           UXTB
2579        001   Xm & 0xFFFF         UXTH
2580        010   Xm & (2^32)-1       UXTW
2581        011   Xm                  UXTX
2582
2583        100   Xm sx from bit 7    SXTB
2584        101   Xm sx from bit 15   SXTH
2585        110   Xm sx from bit 31   SXTW
2586        111   Xm                  SXTX
2587
2588      In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
2589      operation on Xm.  In the 32 bit case, UXTW, UXTX, SXTW and SXTX
2590      are the identity operation on Wm.
2591
2592      After extension, the value is shifted left by imm3 bits, which
2593      may only be in the range 0 .. 4 inclusive.
2594   */
2595   if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
2596      Bool is64  = INSN(31,31) == 1;
2597      Bool isSub = INSN(30,30) == 1;
2598      Bool setCC = INSN(29,29) == 1;
2599      UInt mm    = INSN(20,16);
2600      UInt opt   = INSN(15,13);
2601      UInt imm3  = INSN(12,10);
2602      UInt nn    = INSN(9,5);
2603      UInt dd    = INSN(4,0);
2604      const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
2605                                  "sxtb", "sxth", "sxtw", "sxtx" };
2606      /* Do almost the same thing in the 32- and 64-bit cases. */
2607      IRTemp xN = newTemp(Ity_I64);
2608      IRTemp xM = newTemp(Ity_I64);
2609      assign(xN, getIReg64orSP(nn));
2610      assign(xM, getIReg64orZR(mm));
2611      IRExpr* xMw  = mkexpr(xM); /* "xM widened" */
2612      Int     shSX = 0;
2613      /* widen Xm .. */
2614      switch (opt) {
2615         case BITS3(0,0,0): // UXTB
2616            xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
2617         case BITS3(0,0,1): // UXTH
2618            xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
2619         case BITS3(0,1,0): // UXTW -- noop for the 32bit case
2620            if (is64) {
2621               xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
2622            }
2623            break;
2624         case BITS3(0,1,1): // UXTX -- always a noop
2625            break;
2626         case BITS3(1,0,0): // SXTB
2627            shSX = 56; goto sxTo64;
2628         case BITS3(1,0,1): // SXTH
2629            shSX = 48; goto sxTo64;
2630         case BITS3(1,1,0): // SXTW -- noop for the 32bit case
2631            if (is64) {
2632               shSX = 32; goto sxTo64;
2633            }
2634            break;
2635         case BITS3(1,1,1): // SXTX -- always a noop
2636            break;
2637         sxTo64:
2638            vassert(shSX >= 32);
2639            xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
2640                        mkU8(shSX));
2641            break;
2642         default:
2643            vassert(0);
2644      }
2645      /* and now shift */
2646      IRTemp argL = xN;
2647      IRTemp argR = newTemp(Ity_I64);
2648      assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
2649      IRTemp res = newTemp(Ity_I64);
2650      assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2651                        mkexpr(argL), mkexpr(argR)));
2652      if (is64) {
2653         if (setCC) {
2654            putIReg64orZR(dd, mkexpr(res));
2655            setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2656         } else {
2657            putIReg64orSP(dd, mkexpr(res));
2658         }
2659      } else {
2660         if (setCC) {
2661            IRTemp argL32 = newTemp(Ity_I32);
2662            IRTemp argR32 = newTemp(Ity_I32);
2663            putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
2664            assign(argL32, unop(Iop_64to32, mkexpr(argL)));
2665            assign(argR32, unop(Iop_64to32, mkexpr(argR)));
2666            setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
2667         } else {
2668            putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
2669         }
2670      }
2671      DIP("%s%s %s, %s, %s %s lsl %u\n",
2672          isSub ? "sub" : "add", setCC ? "s" : "",
2673          setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
2674          nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
2675          nameExt[opt], imm3);
2676      return True;
2677   }
2678
2679   /* ---------------- CCMP/CCMN(imm) ---------------- */
2680   /* Bizarrely, these appear in the "data processing register"
2681      category, even though they are operations against an
2682      immediate. */
2683   /* 31   29        20   15   11 9    3
2684      sf 1 111010010 imm5 cond 10 Rn 0 nzcv   CCMP Rn, #imm5, #nzcv, cond
2685      sf 0 111010010 imm5 cond 10 Rn 0 nzcv   CCMN Rn, #imm5, #nzcv, cond
2686
2687      Operation is:
2688         (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
2689         (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
2690   */
2691   if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
2692       && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
2693      Bool is64  = INSN(31,31) == 1;
2694      Bool isSUB = INSN(30,30) == 1;
2695      UInt imm5  = INSN(20,16);
2696      UInt cond  = INSN(15,12);
2697      UInt nn    = INSN(9,5);
2698      UInt nzcv  = INSN(3,0);
2699
2700      IRTemp condT = newTemp(Ity_I1);
2701      assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
2702
2703      IRType ty   = is64 ? Ity_I64 : Ity_I32;
2704      IRTemp argL = newTemp(ty);
2705      IRTemp argR = newTemp(ty);
2706
2707      if (is64) {
2708         assign(argL, getIReg64orZR(nn));
2709         assign(argR, mkU64(imm5));
2710      } else {
2711         assign(argL, getIReg32orZR(nn));
2712         assign(argR, mkU32(imm5));
2713      }
2714      setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
2715
2716      DIP("ccm%c %s, #%u, #%u, %s\n",
2717          isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
2718          imm5, nzcv, nameCC(cond));
2719      return True;
2720   }
2721
2722   /* ---------------- CCMP/CCMN(reg) ---------------- */
2723   /* 31   29        20 15   11 9    3
2724      sf 1 111010010 Rm cond 00 Rn 0 nzcv   CCMP Rn, Rm, #nzcv, cond
2725      sf 0 111010010 Rm cond 00 Rn 0 nzcv   CCMN Rn, Rm, #nzcv, cond
2726      Operation is:
2727         (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
2728         (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
2729   */
2730   if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
2731       && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
2732      Bool is64  = INSN(31,31) == 1;
2733      Bool isSUB = INSN(30,30) == 1;
2734      UInt mm    = INSN(20,16);
2735      UInt cond  = INSN(15,12);
2736      UInt nn    = INSN(9,5);
2737      UInt nzcv  = INSN(3,0);
2738
2739      IRTemp condT = newTemp(Ity_I1);
2740      assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
2741
2742      IRType ty   = is64 ? Ity_I64 : Ity_I32;
2743      IRTemp argL = newTemp(ty);
2744      IRTemp argR = newTemp(ty);
2745
2746      if (is64) {
2747         assign(argL, getIReg64orZR(nn));
2748         assign(argR, getIReg64orZR(mm));
2749      } else {
2750         assign(argL, getIReg32orZR(nn));
2751         assign(argR, getIReg32orZR(mm));
2752      }
2753      setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
2754
2755      DIP("ccm%c %s, %s, #%u, %s\n",
2756          isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
2757          nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
2758      return True;
2759   }
2760
2761
2762   /* -------------- REV/REV16/REV32/RBIT -------------- */
2763   /* 31 30 28       20    15   11 9 4
2764
2765      1  10 11010110 00000 0000 11 n d    (1) REV   Xd, Xn
2766      0  10 11010110 00000 0000 10 n d    (2) REV   Wd, Wn
2767
2768      1  10 11010110 00000 0000 00 n d    (3) RBIT  Xd, Xn
2769      0  10 11010110 00000 0000 00 n d    (4) RBIT  Wd, Wn
2770
2771      1  10 11010110 00000 0000 01 n d    (5) REV16 Xd, Xn
2772      0  10 11010110 00000 0000 01 n d    (6) REV16 Wd, Wn
2773
2774      1  10 11010110 00000 0000 10 n d    (7) REV32 Xd, Xn
2775   */
2776   if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
2777       && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
2778      UInt b31 = INSN(31,31);
2779      UInt opc = INSN(11,10);
2780
2781      UInt ix = 0;
2782      /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
2783      else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
2784      else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
2785      else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
2786      else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
2787      else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
2788      else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
2789      if (ix >= 1 && ix <= 7) {
2790         Bool   is64  = ix == 1 || ix == 3 || ix == 5 || ix == 7;
2791         UInt   nn    = INSN(9,5);
2792         UInt   dd    = INSN(4,0);
2793         IRTemp src   = newTemp(Ity_I64);
2794         IRTemp dst   = IRTemp_INVALID;
2795         IRTemp (*math)(IRTemp) = NULL;
2796         switch (ix) {
2797            case 1: case 2: math = math_BYTESWAP64;   break;
2798            case 3: case 4: math = math_BITSWAP64;    break;
2799            case 5: case 6: math = math_USHORTSWAP64; break;
2800            case 7:         math = math_UINTSWAP64;   break;
2801            default: vassert(0);
2802         }
2803         const HChar* names[7]
2804           = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
2805         const HChar* nm = names[ix-1];
2806         vassert(math);
2807         if (ix == 6) {
2808            /* This has to be special cased, since the logic below doesn't
2809               handle it correctly. */
2810            assign(src, getIReg64orZR(nn));
2811            dst = math(src);
2812            putIReg64orZR(dd,
2813                          unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
2814         } else if (is64) {
2815            assign(src, getIReg64orZR(nn));
2816            dst = math(src);
2817            putIReg64orZR(dd, mkexpr(dst));
2818         } else {
2819            assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
2820            dst = math(src);
2821            putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
2822         }
2823         DIP("%s %s, %s\n", nm,
2824             nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
2825         return True;
2826      }
2827      /* else fall through */
2828   }
2829
2830   /* -------------------- CLZ/CLS -------------------- */
2831   /*    30 28   24   20    15      9 4
2832      sf 10 1101 0110 00000 00010 0 n d    CLZ Rd, Rn
2833      sf 10 1101 0110 00000 00010 1 n d    CLS Rd, Rn
2834   */
2835   if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
2836       && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
2837      Bool   is64  = INSN(31,31) == 1;
2838      Bool   isCLS = INSN(10,10) == 1;
2839      UInt   nn    = INSN(9,5);
2840      UInt   dd    = INSN(4,0);
2841      IRTemp src   = newTemp(Ity_I64);
2842      IRTemp dst   = newTemp(Ity_I64);
2843      if (!isCLS) { // CLS not yet supported
2844         if (is64) {
2845            assign(src, getIReg64orZR(nn));
2846            assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
2847                                   mkU64(64),
2848                                   unop(Iop_Clz64, mkexpr(src))));
2849            putIReg64orZR(dd, mkexpr(dst));
2850         } else {
2851            assign(src, binop(Iop_Shl64,
2852                              unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
2853            assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
2854                                   mkU64(32),
2855                                   unop(Iop_Clz64, mkexpr(src))));
2856            putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
2857         }
2858         DIP("cl%c %s, %s\n",
2859             isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
2860         return True;
2861      }
2862   }
2863
2864   /* -------------------- LSLV/LSRV/ASRV -------------------- */
2865   /*    30 28        20 15   11 9 4
2866      sf 00 1101 0110 m  0010 00 n d   LSLV Rd,Rn,Rm
2867      sf 00 1101 0110 m  0010 01 n d   LSRV Rd,Rn,Rm
2868      sf 00 1101 0110 m  0010 10 n d   ASRV Rd,Rn,Rm
2869   */
2870   if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
2871       && INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) {
2872      Bool   is64 = INSN(31,31) == 1;
2873      UInt   mm   = INSN(20,16);
2874      UInt   op   = INSN(11,10);
2875      UInt   nn   = INSN(9,5);
2876      UInt   dd   = INSN(4,0);
2877      IRType ty   = is64 ? Ity_I64 : Ity_I32;
2878      IRTemp srcL = newTemp(ty);
2879      IRTemp srcR = newTemp(Ity_I8);
2880      IRTemp res  = newTemp(ty);
2881      IROp   iop  = Iop_INVALID;
2882      assign(srcL, getIRegOrZR(is64, nn));
2883      assign(srcR,
2884             unop(Iop_64to8,
2885                  binop(Iop_And64,
2886                        getIReg64orZR(mm), mkU64(is64 ? 63 : 31))));
2887      switch (op) {
2888         case BITS2(0,0): iop = mkSHL(ty); break;
2889         case BITS2(0,1): iop = mkSHR(ty); break;
2890         case BITS2(1,0): iop = mkSAR(ty); break;
2891         default: vassert(0);
2892      }
2893      assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR)));
2894      putIRegOrZR(is64, dd, mkexpr(res));
2895      vassert(op < 3);
2896      const HChar* names[3] = { "lslv", "lsrv", "asrv" };
2897      DIP("%s %s, %s, %s\n",
2898          names[op], nameIRegOrZR(is64,dd),
2899                     nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
2900      return True;
2901   }
2902
2903   /* -------------------- SDIV/UDIV -------------------- */
2904   /*    30 28        20 15    10 9 4
2905      sf 00 1101 0110 m  00001  1 n d  SDIV Rd,Rn,Rm
2906      sf 00 1101 0110 m  00001  0 n d  UDIV Rd,Rn,Rm
2907   */
2908   if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
2909       && INSN(15,11) == BITS5(0,0,0,0,1)) {
2910      Bool is64 = INSN(31,31) == 1;
2911      UInt mm   = INSN(20,16);
2912      Bool isS  = INSN(10,10) == 1;
2913      UInt nn   = INSN(9,5);
2914      UInt dd   = INSN(4,0);
2915      if (isS) {
2916         putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
2917                                     getIRegOrZR(is64, nn),
2918                                     getIRegOrZR(is64, mm)));
2919      } else {
2920         putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
2921                                     getIRegOrZR(is64, nn),
2922                                     getIRegOrZR(is64, mm)));
2923      }
2924      DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
2925          nameIRegOrZR(is64, dd),
2926          nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
2927      return True;
2928   }
2929
2930   /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
2931   /* 31        23  20 15 14 9 4
2932      1001 1011 101 m  0  a  n d   UMADDL Xd,Wn,Wm,Xa
2933      1001 1011 001 m  0  a  n d   SMADDL Xd,Wn,Wm,Xa
2934      1001 1011 101 m  1  a  n d   UMSUBL Xd,Wn,Wm,Xa
2935      1001 1011 001 m  1  a  n d   SMSUBL Xd,Wn,Wm,Xa
2936      with operation
2937         Xd = Xa +/- (Wn *u/s Wm)
2938   */
2939   if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
2940      Bool   isU   = INSN(23,23) == 1;
2941      UInt   mm    = INSN(20,16);
2942      Bool   isAdd = INSN(15,15) == 0;
2943      UInt   aa    = INSN(14,10);
2944      UInt   nn    = INSN(9,5);
2945      UInt   dd    = INSN(4,0);
2946      IRTemp wN    = newTemp(Ity_I32);
2947      IRTemp wM    = newTemp(Ity_I32);
2948      IRTemp xA    = newTemp(Ity_I64);
2949      IRTemp muld  = newTemp(Ity_I64);
2950      IRTemp res   = newTemp(Ity_I64);
2951      assign(wN, getIReg32orZR(nn));
2952      assign(wM, getIReg32orZR(mm));
2953      assign(xA, getIReg64orZR(aa));
2954      assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
2955                         mkexpr(wN), mkexpr(wM)));
2956      assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
2957                        mkexpr(xA), mkexpr(muld)));
2958      putIReg64orZR(dd, mkexpr(res));
2959      DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
2960          nameIReg64orZR(dd), nameIReg32orZR(nn),
2961          nameIReg32orZR(mm), nameIReg64orZR(aa));
2962      return True;
2963   }
2964   vex_printf("ARM64 front end: data_processing_register\n");
2965   return False;
2966#  undef INSN
2967}
2968
2969
2970/*------------------------------------------------------------*/
2971/*--- Load and Store instructions                          ---*/
2972/*------------------------------------------------------------*/
2973
2974/* Generate the EA for a "reg + reg" style amode.  This is done from
2975   parts of the insn, but for sanity checking sake it takes the whole
2976   insn.  This appears to depend on insn[15:12], with opt=insn[15:13]
2977   and S=insn[12]:
2978
2979   The possible forms, along with their opt:S values, are:
2980      011:0   Xn|SP + Xm
2981      111:0   Xn|SP + Xm
2982      011:1   Xn|SP + Xm * transfer_szB
2983      111:1   Xn|SP + Xm * transfer_szB
2984      010:0   Xn|SP + 32Uto64(Wm)
2985      010:1   Xn|SP + 32Uto64(Wm) * transfer_szB
2986      110:0   Xn|SP + 32Sto64(Wm)
2987      110:1   Xn|SP + 32Sto64(Wm) * transfer_szB
2988
2989   Rm is insn[20:16].  Rn is insn[9:5].  Rt is insn[4:0].  Log2 of
2990   the transfer size is insn[23,31,30].  For integer loads/stores,
2991   insn[23] is zero, hence szLg2 can be at most 3 in such cases.
2992
2993   If the decoding fails, it returns IRTemp_INVALID.
2994
2995   isInt is True iff this is decoding is for transfers to/from integer
2996   registers.  If False it is for transfers to/from vector registers.
2997*/
2998static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
2999{
3000   UInt    optS  = SLICE_UInt(insn, 15, 12);
3001   UInt    mm    = SLICE_UInt(insn, 20, 16);
3002   UInt    nn    = SLICE_UInt(insn, 9, 5);
3003   UInt    szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
3004                   | SLICE_UInt(insn, 31, 30); // Log2 of the size
3005
3006   buf[0] = 0;
3007
3008   /* Sanity checks, that this really is a load/store insn. */
3009   if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
3010      goto fail;
3011
3012   if (isInt
3013       && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
3014       && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
3015       && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
3016       && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
3017      goto fail;
3018
3019   if (!isInt
3020       && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
3021      goto fail;
3022
3023   /* Throw out non-verified but possibly valid cases. */
3024   switch (szLg2) {
3025      case BITS3(0,0,0): break; //  8 bit, valid for both int and vec
3026      case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
3027      case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
3028      case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
3029      case BITS3(1,0,0): // can only ever be valid for the vector case
3030                         if (isInt) goto fail; else goto fail;
3031      case BITS3(1,0,1): // these sizes are never valid
3032      case BITS3(1,1,0):
3033      case BITS3(1,1,1): goto fail;
3034
3035      default: vassert(0);
3036   }
3037
3038   IRExpr* rhs  = NULL;
3039   switch (optS) {
3040      case BITS4(1,1,1,0): goto fail; //ATC
3041      case BITS4(0,1,1,0):
3042         rhs = getIReg64orZR(mm);
3043         vex_sprintf(buf, "[%s, %s]",
3044                     nameIReg64orZR(nn), nameIReg64orZR(mm));
3045         break;
3046      case BITS4(1,1,1,1): goto fail; //ATC
3047      case BITS4(0,1,1,1):
3048         rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
3049         vex_sprintf(buf, "[%s, %s lsl %u]",
3050                     nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
3051         break;
3052      case BITS4(0,1,0,0):
3053         rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
3054         vex_sprintf(buf, "[%s, %s uxtx]",
3055                     nameIReg64orZR(nn), nameIReg32orZR(mm));
3056         break;
3057      case BITS4(0,1,0,1):
3058         rhs = binop(Iop_Shl64,
3059                     unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
3060         vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
3061                     nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
3062         break;
3063      case BITS4(1,1,0,0):
3064         rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
3065         vex_sprintf(buf, "[%s, %s sxtx]",
3066                     nameIReg64orZR(nn), nameIReg32orZR(mm));
3067         break;
3068      case BITS4(1,1,0,1):
3069         rhs = binop(Iop_Shl64,
3070                     unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
3071         vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
3072                     nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
3073         break;
3074      default:
3075         /* The rest appear to be genuinely invalid */
3076         goto fail;
3077   }
3078
3079   vassert(rhs);
3080   IRTemp res = newTemp(Ity_I64);
3081   assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
3082   return res;
3083
3084  fail:
3085   vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
3086   return IRTemp_INVALID;
3087}
3088
3089
3090/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
3091   bits of DATAE :: Ity_I64. */
3092static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
3093{
3094   IRExpr* addrE = mkexpr(addr);
3095   switch (szB) {
3096      case 8:
3097         storeLE(addrE, dataE);
3098         break;
3099      case 4:
3100         storeLE(addrE, unop(Iop_64to32, dataE));
3101         break;
3102      case 2:
3103         storeLE(addrE, unop(Iop_64to16, dataE));
3104         break;
3105      case 1:
3106         storeLE(addrE, unop(Iop_64to8, dataE));
3107         break;
3108      default:
3109         vassert(0);
3110   }
3111}
3112
3113
3114/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
3115   placing the result in an Ity_I64 temporary. */
3116static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
3117{
3118   IRTemp  res   = newTemp(Ity_I64);
3119   IRExpr* addrE = mkexpr(addr);
3120   switch (szB) {
3121      case 8:
3122         assign(res, loadLE(Ity_I64,addrE));
3123         break;
3124      case 4:
3125         assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
3126         break;
3127      case 2:
3128         assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
3129         break;
3130      case 1:
3131         assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
3132         break;
3133      default:
3134         vassert(0);
3135   }
3136   return res;
3137}
3138
3139
3140static
3141Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
3142{
3143#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
3144
3145   /* ------------ LDR,STR (immediate, uimm12) ----------- */
3146   /* uimm12 is scaled by the transfer size
3147
3148      31 29  26    21    9  4
3149      |  |   |     |     |  |
3150      11 111 00100 imm12 nn tt    STR  Xt, [Xn|SP, #imm12 * 8]
3151      11 111 00101 imm12 nn tt    LDR  Xt, [Xn|SP, #imm12 * 8]
3152
3153      10 111 00100 imm12 nn tt    STR  Wt, [Xn|SP, #imm12 * 4]
3154      10 111 00101 imm12 nn tt    LDR  Wt, [Xn|SP, #imm12 * 4]
3155
3156      01 111 00100 imm12 nn tt    STRH Wt, [Xn|SP, #imm12 * 2]
3157      01 111 00101 imm12 nn tt    LDRH Wt, [Xn|SP, #imm12 * 2]
3158
3159      00 111 00100 imm12 nn tt    STRB Wt, [Xn|SP, #imm12 * 1]
3160      00 111 00101 imm12 nn tt    LDRB Wt, [Xn|SP, #imm12 * 1]
3161   */
3162   if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
3163      UInt   szLg2 = INSN(31,30);
3164      UInt   szB   = 1 << szLg2;
3165      Bool   isLD  = INSN(22,22) == 1;
3166      UInt   offs  = INSN(21,10) * szB;
3167      UInt   nn    = INSN(9,5);
3168      UInt   tt    = INSN(4,0);
3169      IRTemp ta    = newTemp(Ity_I64);
3170      assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
3171      if (nn == 31) { /* FIXME generate stack alignment check */ }
3172      vassert(szLg2 < 4);
3173      if (isLD) {
3174         putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
3175      } else {
3176         gen_narrowing_store(szB, ta, getIReg64orZR(tt));
3177      }
3178      const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
3179      const HChar* st_name[4] = { "strb", "strh", "str", "str" };
3180      DIP("%s %s, [%s, #%u]\n",
3181          (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
3182          nameIReg64orSP(nn), offs);
3183      return True;
3184   }
3185
3186   /* ------------ LDUR,STUR (immediate, simm9) ----------- */
3187   /*
3188      31 29  26      20   11 9  4
3189      |  |   |       |    |  |  |
3190      (at-Rn-then-Rn=EA)  |  |  |
3191      sz 111 00000 0 imm9 01 Rn Rt   STR Rt, [Xn|SP], #simm9
3192      sz 111 00001 0 imm9 01 Rn Rt   LDR Rt, [Xn|SP], #simm9
3193
3194      (at-EA-then-Rn=EA)
3195      sz 111 00000 0 imm9 11 Rn Rt   STR Rt, [Xn|SP, #simm9]!
3196      sz 111 00001 0 imm9 11 Rn Rt   LDR Rt, [Xn|SP, #simm9]!
3197
3198      (at-EA)
3199      sz 111 00000 0 imm9 00 Rn Rt   STR Rt, [Xn|SP, #simm9]
3200      sz 111 00001 0 imm9 00 Rn Rt   LDR Rt, [Xn|SP, #simm9]
3201
3202      simm9 is unscaled.
3203
3204      The case 'wback && Rn == Rt && Rt != 31' is disallowed.  In the
3205      load case this is because would create two competing values for
3206      Rt.  In the store case the reason is unclear, but the spec
3207      disallows it anyway.
3208
3209      Stores are narrowing, loads are unsigned widening.  sz encodes
3210      the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
3211   */
3212   if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
3213       == BITS9(1,1,1, 0,0,0,0,0, 0)) {
3214      UInt szLg2  = INSN(31,30);
3215      UInt szB    = 1 << szLg2;
3216      Bool isLoad = INSN(22,22) == 1;
3217      UInt imm9   = INSN(20,12);
3218      UInt nn     = INSN(9,5);
3219      UInt tt     = INSN(4,0);
3220      Bool wBack  = INSN(10,10) == 1;
3221      UInt how    = INSN(11,10);
3222      if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
3223         /* undecodable; fall through */
3224      } else {
3225         if (nn == 31) { /* FIXME generate stack alignment check */ }
3226
3227         // Compute the transfer address TA and the writeback address WA.
3228         IRTemp tRN = newTemp(Ity_I64);
3229         assign(tRN, getIReg64orSP(nn));
3230         IRTemp tEA = newTemp(Ity_I64);
3231         Long simm9 = (Long)sx_to_64(imm9, 9);
3232         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3233
3234         IRTemp tTA = newTemp(Ity_I64);
3235         IRTemp tWA = newTemp(Ity_I64);
3236         switch (how) {
3237            case BITS2(0,1):
3238               assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3239            case BITS2(1,1):
3240               assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3241            case BITS2(0,0):
3242               assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3243            default:
3244               vassert(0); /* NOTREACHED */
3245         }
3246
3247         /* Normally rN would be updated after the transfer.  However, in
3248            the special case typifed by
3249               str x30, [sp,#-16]!
3250            it is necessary to update SP before the transfer, (1)
3251            because Memcheck will otherwise complain about a write
3252            below the stack pointer, and (2) because the segfault
3253            stack extension mechanism will otherwise extend the stack
3254            only down to SP before the instruction, which might not be
3255            far enough, if the -16 bit takes the actual access
3256            address to the next page.
3257         */
3258         Bool earlyWBack
3259           = wBack && simm9 < 0 && szB == 8
3260             && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
3261
3262         if (wBack && earlyWBack)
3263            putIReg64orSP(nn, mkexpr(tEA));
3264
3265         if (isLoad) {
3266            putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
3267         } else {
3268            gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
3269         }
3270
3271         if (wBack && !earlyWBack)
3272            putIReg64orSP(nn, mkexpr(tEA));
3273
3274         const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
3275         const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
3276         const HChar* fmt_str = NULL;
3277         switch (how) {
3278            case BITS2(0,1):
3279               fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3280               break;
3281            case BITS2(1,1):
3282               fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3283               break;
3284            case BITS2(0,0):
3285               fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
3286               break;
3287            default:
3288               vassert(0);
3289         }
3290         DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
3291                      nameIRegOrZR(szB == 8, tt),
3292                      nameIReg64orSP(nn), simm9);
3293         return True;
3294      }
3295   }
3296
3297   /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
3298   /* L==1 => mm==LD
3299      L==0 => mm==ST
3300      x==0 => 32 bit transfers, and zero extended loads
3301      x==1 => 64 bit transfers
3302      simm7 is scaled by the (single-register) transfer size
3303
3304      (at-Rn-then-Rn=EA)
3305      x0 101 0001 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP], #imm
3306
3307      (at-EA-then-Rn=EA)
3308      x0 101 0011 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]!
3309
3310      (at-EA)
3311      x0 101 0010 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]
3312   */
3313
3314   UInt insn_30_23 = INSN(30,23);
3315   if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
3316       || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
3317       || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
3318      UInt bL     = INSN(22,22);
3319      UInt bX     = INSN(31,31);
3320      UInt bWBack = INSN(23,23);
3321      UInt rT1    = INSN(4,0);
3322      UInt rN     = INSN(9,5);
3323      UInt rT2    = INSN(14,10);
3324      Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
3325      if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
3326          || (bL && rT1 == rT2)) {
3327         /* undecodable; fall through */
3328      } else {
3329         if (rN == 31) { /* FIXME generate stack alignment check */ }
3330
3331         // Compute the transfer address TA and the writeback address WA.
3332         IRTemp tRN = newTemp(Ity_I64);
3333         assign(tRN, getIReg64orSP(rN));
3334         IRTemp tEA = newTemp(Ity_I64);
3335         simm7 = (bX ? 8 : 4) * simm7;
3336         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3337
3338         IRTemp tTA = newTemp(Ity_I64);
3339         IRTemp tWA = newTemp(Ity_I64);
3340         switch (INSN(24,23)) {
3341            case BITS2(0,1):
3342               assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3343            case BITS2(1,1):
3344               assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3345            case BITS2(1,0):
3346               assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3347            default:
3348               vassert(0); /* NOTREACHED */
3349         }
3350
3351         /* Normally rN would be updated after the transfer.  However, in
3352            the special case typifed by
3353               stp x29, x30, [sp,#-112]!
3354            it is necessary to update SP before the transfer, (1)
3355            because Memcheck will otherwise complain about a write
3356            below the stack pointer, and (2) because the segfault
3357            stack extension mechanism will otherwise extend the stack
3358            only down to SP before the instruction, which might not be
3359            far enough, if the -112 bit takes the actual access
3360            address to the next page.
3361         */
3362         Bool earlyWBack
3363           = bWBack && simm7 < 0
3364             && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
3365
3366         if (bWBack && earlyWBack)
3367            putIReg64orSP(rN, mkexpr(tEA));
3368
3369         /**/ if (bL == 1 && bX == 1) {
3370            // 64 bit load
3371            putIReg64orZR(rT1, loadLE(Ity_I64,
3372                                      binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3373            putIReg64orZR(rT2, loadLE(Ity_I64,
3374                                      binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
3375         } else if (bL == 1 && bX == 0) {
3376            // 32 bit load
3377            putIReg32orZR(rT1, loadLE(Ity_I32,
3378                                      binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3379            putIReg32orZR(rT2, loadLE(Ity_I32,
3380                                      binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
3381         } else if (bL == 0 && bX == 1) {
3382            // 64 bit store
3383            storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3384                    getIReg64orZR(rT1));
3385            storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
3386                    getIReg64orZR(rT2));
3387         } else {
3388            vassert(bL == 0 && bX == 0);
3389            // 32 bit store
3390            storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3391                    getIReg32orZR(rT1));
3392            storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
3393                    getIReg32orZR(rT2));
3394         }
3395
3396         if (bWBack && !earlyWBack)
3397            putIReg64orSP(rN, mkexpr(tEA));
3398
3399         const HChar* fmt_str = NULL;
3400         switch (INSN(24,23)) {
3401            case BITS2(0,1):
3402               fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3403               break;
3404            case BITS2(1,1):
3405               fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3406               break;
3407            case BITS2(1,0):
3408               fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3409               break;
3410            default:
3411               vassert(0);
3412         }
3413         DIP(fmt_str, bL == 0 ? "st" : "ld",
3414                      nameIRegOrZR(bX == 1, rT1),
3415                      nameIRegOrZR(bX == 1, rT2),
3416                      nameIReg64orSP(rN), simm7);
3417         return True;
3418      }
3419   }
3420
3421   /* ---------------- LDR (literal, int reg) ---------------- */
3422   /* 31 29      23    4
3423      00 011 000 imm19 Rt   LDR   Wt, [PC + sxTo64(imm19 << 2)]
3424      01 011 000 imm19 Rt   LDR   Xt, [PC + sxTo64(imm19 << 2)]
3425      10 011 000 imm19 Rt   LDRSW Xt, [PC + sxTo64(imm19 << 2)]
3426      11 011 000 imm19 Rt   prefetch  [PC + sxTo64(imm19 << 2)]
3427      Just handles the first two cases for now.
3428   */
3429   if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
3430      UInt  imm19 = INSN(23,5);
3431      UInt  rT    = INSN(4,0);
3432      UInt  bX    = INSN(30,30);
3433      ULong ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
3434      if (bX) {
3435         putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
3436      } else {
3437         putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
3438      }
3439      DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
3440      return True;
3441   }
3442
3443   /* -------------- {LD,ST}R (integer register) --------------- */
3444   /* 31 29        20 15     12 11 9  4
3445      |  |         |  |      |  |  |  |
3446      11 111000011 Rm option S  10 Rn Rt  LDR  Xt, [Xn|SP, R<m>{ext/sh}]
3447      10 111000011 Rm option S  10 Rn Rt  LDR  Wt, [Xn|SP, R<m>{ext/sh}]
3448      01 111000011 Rm option S  10 Rn Rt  LDRH Wt, [Xn|SP, R<m>{ext/sh}]
3449      00 111000011 Rm option S  10 Rn Rt  LDRB Wt, [Xn|SP, R<m>{ext/sh}]
3450
3451      11 111000001 Rm option S  10 Rn Rt  STR  Xt, [Xn|SP, R<m>{ext/sh}]
3452      10 111000001 Rm option S  10 Rn Rt  STR  Wt, [Xn|SP, R<m>{ext/sh}]
3453      01 111000001 Rm option S  10 Rn Rt  STRH Wt, [Xn|SP, R<m>{ext/sh}]
3454      00 111000001 Rm option S  10 Rn Rt  STRB Wt, [Xn|SP, R<m>{ext/sh}]
3455   */
3456   if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
3457       && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3458      HChar  dis_buf[64];
3459      UInt   szLg2 = INSN(31,30);
3460      Bool   isLD  = INSN(22,22) == 1;
3461      UInt   tt    = INSN(4,0);
3462      IRTemp ea    = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3463      if (ea != IRTemp_INVALID) {
3464         switch (szLg2) {
3465            case 3: /* 64 bit */
3466               if (isLD) {
3467                  putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
3468                  DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
3469               } else {
3470                  storeLE(mkexpr(ea), getIReg64orZR(tt));
3471                  DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
3472               }
3473               break;
3474            case 2: /* 32 bit */
3475               if (isLD) {
3476                  putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
3477                  DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
3478               } else {
3479                  storeLE(mkexpr(ea), getIReg32orZR(tt));
3480                  DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
3481               }
3482               break;
3483            case 1: /* 16 bit */
3484               if (isLD) {
3485                  putIReg64orZR(tt, unop(Iop_16Uto64,
3486                                         loadLE(Ity_I16, mkexpr(ea))));
3487                  DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3488               } else {
3489                  storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
3490                  DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3491               }
3492               break;
3493            case 0: /* 8 bit */
3494               if (isLD) {
3495                  putIReg64orZR(tt, unop(Iop_8Uto64,
3496                                         loadLE(Ity_I8, mkexpr(ea))));
3497                  DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
3498               } else {
3499                  storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
3500                  DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
3501               }
3502               break;
3503            default:
3504               vassert(0);
3505         }
3506         return True;
3507      }
3508   }
3509
3510   /* -------------- LDRS{B,H,W} (uimm12) -------------- */
3511   /* 31 29  26  23 21    9 4
3512      10 111 001 10 imm12 n t   LDRSW Xt, [Xn|SP, #pimm12 * 4]
3513      01 111 001 1x imm12 n t   LDRSH Rt, [Xn|SP, #pimm12 * 2]
3514      00 111 001 1x imm12 n t   LDRSB Rt, [Xn|SP, #pimm12 * 1]
3515      where
3516         Rt is Wt when x==1, Xt when x==0
3517   */
3518   if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
3519      /* Further checks on bits 31:30 and 22 */
3520      Bool valid = False;
3521      switch ((INSN(31,30) << 1) | INSN(22,22)) {
3522         case BITS3(1,0,0):
3523         case BITS3(0,1,0): case BITS3(0,1,1):
3524         case BITS3(0,0,0): case BITS3(0,0,1):
3525            valid = True;
3526            break;
3527      }
3528      if (valid) {
3529         UInt    szLg2 = INSN(31,30);
3530         UInt    bitX  = INSN(22,22);
3531         UInt    imm12 = INSN(21,10);
3532         UInt    nn    = INSN(9,5);
3533         UInt    tt    = INSN(4,0);
3534         UInt    szB   = 1 << szLg2;
3535         IRExpr* ea    = binop(Iop_Add64,
3536                               getIReg64orSP(nn), mkU64(imm12 * szB));
3537         switch (szB) {
3538            case 4:
3539               vassert(bitX == 0);
3540               putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
3541               DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
3542                   nameIReg64orSP(nn), imm12 * szB);
3543               break;
3544            case 2:
3545               if (bitX == 1) {
3546                  putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
3547               } else {
3548                  putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
3549               }
3550               DIP("ldrsh %s, [%s, #%u]\n",
3551                   nameIRegOrZR(bitX == 0, tt),
3552                   nameIReg64orSP(nn), imm12 * szB);
3553               break;
3554            case 1:
3555               if (bitX == 1) {
3556                  putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
3557               } else {
3558                  putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
3559               }
3560               DIP("ldrsb %s, [%s, #%u]\n",
3561                   nameIRegOrZR(bitX == 0, tt),
3562                   nameIReg64orSP(nn), imm12 * szB);
3563               break;
3564            default:
3565               vassert(0);
3566         }
3567         return True;
3568      }
3569      /* else fall through */
3570   }
3571
3572   /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
3573   /* (at-Rn-then-Rn=EA)
3574      31 29      23 21 20   11 9 4
3575      00 111 000 1x 0  imm9 01 n t  LDRSB Rt, [Xn|SP], #simm9
3576      01 111 000 1x 0  imm9 01 n t  LDRSH Rt, [Xn|SP], #simm9
3577      10 111 000 10 0  imm9 01 n t  LDRSW Xt, [Xn|SP], #simm9
3578
3579      (at-EA-then-Rn=EA)
3580      00 111 000 1x 0  imm9 11 n t  LDRSB Rt, [Xn|SP, #simm9]!
3581      01 111 000 1x 0  imm9 11 n t  LDRSH Rt, [Xn|SP, #simm9]!
3582      10 111 000 10 0  imm9 11 n t  LDRSW Xt, [Xn|SP, #simm9]!
3583      where
3584         Rt is Wt when x==1, Xt when x==0
3585         transfer-at-Rn when [11]==0, at EA when [11]==1
3586   */
3587   if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3588       && INSN(21,21) == 0 && INSN(10,10) == 1) {
3589      /* Further checks on bits 31:30 and 22 */
3590      Bool valid = False;
3591      switch ((INSN(31,30) << 1) | INSN(22,22)) {
3592         case BITS3(1,0,0):                    // LDRSW Xt
3593         case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
3594         case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
3595            valid = True;
3596            break;
3597      }
3598      if (valid) {
3599         UInt   szLg2 = INSN(31,30);
3600         UInt   imm9  = INSN(20,12);
3601         Bool   atRN  = INSN(11,11) == 0;
3602         UInt   nn    = INSN(9,5);
3603         UInt   tt    = INSN(4,0);
3604         IRTemp tRN   = newTemp(Ity_I64);
3605         IRTemp tEA   = newTemp(Ity_I64);
3606         IRTemp tTA   = IRTemp_INVALID;
3607         ULong  simm9 = sx_to_64(imm9, 9);
3608         Bool   is64  = INSN(22,22) == 0;
3609         assign(tRN, getIReg64orSP(nn));
3610         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3611         tTA = atRN ? tRN : tEA;
3612         HChar ch = '?';
3613         /* There are 5 cases:
3614               byte     load,           SX to 64
3615               byte     load, SX to 32, ZX to 64
3616               halfword load,           SX to 64
3617               halfword load, SX to 32, ZX to 64
3618               word     load,           SX to 64
3619            The ifs below handle them in the listed order.
3620         */
3621         if (szLg2 == 0) {
3622            ch = 'b';
3623            if (is64) {
3624               putIReg64orZR(tt, unop(Iop_8Sto64,
3625                                      loadLE(Ity_I8, mkexpr(tTA))));
3626            } else {
3627               putIReg32orZR(tt, unop(Iop_8Sto32,
3628                                      loadLE(Ity_I8, mkexpr(tTA))));
3629            }
3630         }
3631         else if (szLg2 == 1) {
3632            ch = 'h';
3633            if (is64) {
3634               putIReg64orZR(tt, unop(Iop_16Sto64,
3635                                      loadLE(Ity_I16, mkexpr(tTA))));
3636            } else {
3637               putIReg32orZR(tt, unop(Iop_16Sto32,
3638                                      loadLE(Ity_I16, mkexpr(tTA))));
3639            }
3640         }
3641         else if (szLg2 == 2 && is64) {
3642            ch = 'w';
3643            putIReg64orZR(tt, unop(Iop_32Sto64,
3644                                   loadLE(Ity_I32, mkexpr(tTA))));
3645         }
3646         else {
3647            vassert(0);
3648         }
3649         putIReg64orSP(nn, mkexpr(tEA));
3650         DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
3651             ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
3652         return True;
3653      }
3654      /* else fall through */
3655   }
3656
3657   /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
3658   /* 31 29      23 21 20   11 9 4
3659      00 111 000 1x 0  imm9 00 n t  LDURSB Rt, [Xn|SP, #simm9]
3660      01 111 000 1x 0  imm9 00 n t  LDURSH Rt, [Xn|SP, #simm9]
3661      10 111 000 10 0  imm9 00 n t  LDURSW Xt, [Xn|SP, #simm9]
3662      where
3663         Rt is Wt when x==1, Xt when x==0
3664   */
3665   if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3666       && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
3667      /* Further checks on bits 31:30 and 22 */
3668      Bool valid = False;
3669      switch ((INSN(31,30) << 1) | INSN(22,22)) {
3670         case BITS3(1,0,0):                    // LDURSW Xt
3671         case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
3672         case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
3673            valid = True;
3674            break;
3675      }
3676      if (valid) {
3677         UInt   szLg2 = INSN(31,30);
3678         UInt   imm9  = INSN(20,12);
3679         UInt   nn    = INSN(9,5);
3680         UInt   tt    = INSN(4,0);
3681         IRTemp tRN   = newTemp(Ity_I64);
3682         IRTemp tEA   = newTemp(Ity_I64);
3683         ULong  simm9 = sx_to_64(imm9, 9);
3684         Bool   is64  = INSN(22,22) == 0;
3685         assign(tRN, getIReg64orSP(nn));
3686         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3687         HChar ch = '?';
3688         /* There are 5 cases:
3689               byte     load,           SX to 64
3690               byte     load, SX to 32, ZX to 64
3691               halfword load,           SX to 64
3692               halfword load, SX to 32, ZX to 64
3693               word     load,           SX to 64
3694            The ifs below handle them in the listed order.
3695         */
3696         if (szLg2 == 0) {
3697            ch = 'b';
3698            if (is64) {
3699               putIReg64orZR(tt, unop(Iop_8Sto64,
3700                                      loadLE(Ity_I8, mkexpr(tEA))));
3701            } else {
3702               putIReg32orZR(tt, unop(Iop_8Sto32,
3703                                      loadLE(Ity_I8, mkexpr(tEA))));
3704            }
3705         }
3706         else if (szLg2 == 1) {
3707            ch = 'h';
3708            if (is64) {
3709               putIReg64orZR(tt, unop(Iop_16Sto64,
3710                                      loadLE(Ity_I16, mkexpr(tEA))));
3711            } else {
3712               putIReg32orZR(tt, unop(Iop_16Sto32,
3713                                      loadLE(Ity_I16, mkexpr(tEA))));
3714            }
3715         }
3716         else if (szLg2 == 2 && is64) {
3717            ch = 'w';
3718            putIReg64orZR(tt, unop(Iop_32Sto64,
3719                                   loadLE(Ity_I32, mkexpr(tEA))));
3720         }
3721         else {
3722            vassert(0);
3723         }
3724         DIP("ldurs%c %s, [%s, #%lld]",
3725             ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
3726         return True;
3727      }
3728      /* else fall through */
3729   }
3730
3731   /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
3732   /* L==1    => mm==LD
3733      L==0    => mm==ST
3734      sz==00  => 32 bit (S) transfers
3735      sz==01  => 64 bit (D) transfers
3736      sz==10  => 128 bit (Q) transfers
3737      sz==11  isn't allowed
3738      simm7 is scaled by the (single-register) transfer size
3739
3740      31 29       22 21   14 9 4
3741      sz 101 1001 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP], #imm
3742      (at-Rn-then-Rn=EA)
3743
3744      sz 101 1011 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]!
3745      (at-EA-then-Rn=EA)
3746
3747      sz 101 1010 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]
3748      (at-EA)
3749   */
3750
3751   UInt insn_29_23 = INSN(29,23);
3752   if (insn_29_23 == BITS7(1,0,1,1,0,0,1)
3753       || insn_29_23 == BITS7(1,0,1,1,0,1,1)
3754       || insn_29_23 == BITS7(1,0,1,1,0,1,0)) {
3755      UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
3756      Bool isLD   = INSN(22,22) == 1;
3757      Bool wBack  = INSN(23,23) == 1;
3758      Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
3759      UInt tt2    = INSN(14,10);
3760      UInt nn     = INSN(9,5);
3761      UInt tt1    = INSN(4,0);
3762      if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
3763         /* undecodable; fall through */
3764      } else {
3765         if (nn == 31) { /* FIXME generate stack alignment check */ }
3766
3767         // Compute the transfer address TA and the writeback address WA.
3768         UInt   szB = 4 << szSlg2; /* szB is the per-register size */
3769         IRTemp tRN = newTemp(Ity_I64);
3770         assign(tRN, getIReg64orSP(nn));
3771         IRTemp tEA = newTemp(Ity_I64);
3772         simm7 = szB * simm7;
3773         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3774
3775         IRTemp tTA = newTemp(Ity_I64);
3776         IRTemp tWA = newTemp(Ity_I64);
3777         switch (INSN(24,23)) {
3778            case BITS2(0,1):
3779               assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3780            case BITS2(1,1):
3781               assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3782            case BITS2(1,0):
3783               assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3784            default:
3785               vassert(0); /* NOTREACHED */
3786         }
3787
3788         IRType ty = Ity_INVALID;
3789         switch (szB) {
3790            case 4:  ty = Ity_F32;  break;
3791            case 8:  ty = Ity_F64;  break;
3792            case 16: ty = Ity_V128; break;
3793            default: vassert(0);
3794         }
3795
3796         /* Normally rN would be updated after the transfer.  However, in
3797            the special cases typifed by
3798               stp q0, q1, [sp,#-512]!
3799               stp d0, d1, [sp,#-512]!
3800               stp s0, s1, [sp,#-512]!
3801            it is necessary to update SP before the transfer, (1)
3802            because Memcheck will otherwise complain about a write
3803            below the stack pointer, and (2) because the segfault
3804            stack extension mechanism will otherwise extend the stack
3805            only down to SP before the instruction, which might not be
3806            far enough, if the -512 bit takes the actual access
3807            address to the next page.
3808         */
3809         Bool earlyWBack
3810           = wBack && simm7 < 0
3811             && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
3812
3813         if (wBack && earlyWBack)
3814            putIReg64orSP(nn, mkexpr(tEA));
3815
3816         if (isLD) {
3817            if (szB < 16) {
3818               putQReg128(tt1, mkV128(0x0000));
3819            }
3820            putQRegLO(tt1,
3821                      loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
3822            if (szB < 16) {
3823               putQReg128(tt2, mkV128(0x0000));
3824            }
3825            putQRegLO(tt2,
3826                      loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
3827         } else {
3828            storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
3829                    getQRegLO(tt1, ty));
3830            storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
3831                    getQRegLO(tt2, ty));
3832         }
3833
3834         if (wBack && !earlyWBack)
3835            putIReg64orSP(nn, mkexpr(tEA));
3836
3837         const HChar* fmt_str = NULL;
3838         switch (INSN(24,23)) {
3839            case BITS2(0,1):
3840               fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3841               break;
3842            case BITS2(1,1):
3843               fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3844               break;
3845            case BITS2(1,0):
3846               fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3847               break;
3848            default:
3849               vassert(0);
3850         }
3851         DIP(fmt_str, isLD ? "ld" : "st",
3852                      nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
3853                      nameIReg64orSP(nn), simm7);
3854         return True;
3855      }
3856   }
3857
3858   /* -------------- {LD,ST}R (vector register) --------------- */
3859   /* 31 29     23  20 15     12 11 9  4
3860      |  |      |   |  |      |  |  |  |
3861      00 111100 011 Rm option S  10 Rn Rt  LDR Bt, [Xn|SP, R<m>{ext/sh}]
3862      01 111100 011 Rm option S  10 Rn Rt  LDR Ht, [Xn|SP, R<m>{ext/sh}]
3863      10 111100 011 Rm option S  10 Rn Rt  LDR St, [Xn|SP, R<m>{ext/sh}]
3864      11 111100 011 Rm option S  10 Rn Rt  LDR Dt, [Xn|SP, R<m>{ext/sh}]
3865      00 111100 111 Rm option S  10 Rn Rt  LDR Qt, [Xn|SP, R<m>{ext/sh}]
3866
3867      00 111100 001 Rm option S  10 Rn Rt  STR Bt, [Xn|SP, R<m>{ext/sh}]
3868      01 111100 001 Rm option S  10 Rn Rt  STR Ht, [Xn|SP, R<m>{ext/sh}]
3869      10 111100 001 Rm option S  10 Rn Rt  STR St, [Xn|SP, R<m>{ext/sh}]
3870      11 111100 001 Rm option S  10 Rn Rt  STR Dt, [Xn|SP, R<m>{ext/sh}]
3871      00 111100 101 Rm option S  10 Rn Rt  STR Qt, [Xn|SP, R<m>{ext/sh}]
3872   */
3873   if (INSN(29,24) == BITS6(1,1,1,1,0,0)
3874       && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3875      HChar  dis_buf[64];
3876      UInt   szLg2 = (INSN(23,23) << 2) | INSN(31,30);
3877      Bool   isLD  = INSN(22,22) == 1;
3878      UInt   tt    = INSN(4,0);
3879      if (szLg2 >= 4) goto after_LDR_STR_vector_register;
3880      IRTemp ea    = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
3881      if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
3882      switch (szLg2) {
3883         case 0: /* 8 bit */
3884            if (isLD) {
3885               putQReg128(tt, mkV128(0x0000));
3886               putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
3887               DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
3888            } else {
3889               vassert(0); //ATC
3890               storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
3891               DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
3892            }
3893            break;
3894         case 1:
3895            if (isLD) {
3896               putQReg128(tt, mkV128(0x0000));
3897               putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
3898               DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
3899            } else {
3900               vassert(0); //ATC
3901               storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
3902               DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
3903            }
3904            break;
3905         case 2: /* 32 bit */
3906            if (isLD) {
3907               putQReg128(tt, mkV128(0x0000));
3908               putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
3909               DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
3910            } else {
3911               storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
3912               DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
3913            }
3914            break;
3915         case 3: /* 64 bit */
3916            if (isLD) {
3917               putQReg128(tt, mkV128(0x0000));
3918               putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
3919               DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
3920            } else {
3921               storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
3922               DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
3923            }
3924            break;
3925         case 4:  return False; //ATC
3926         default: vassert(0);
3927      }
3928      return True;
3929   }
3930  after_LDR_STR_vector_register:
3931
3932   /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
3933   /* 31 29      22 20 15  12 11 9  4
3934      |  |       |  |  |   |  |  |  |
3935      10 1110001 01 Rm opt S 10 Rn Rt    LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
3936
3937      01 1110001 01 Rm opt S 10 Rn Rt    LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
3938      01 1110001 11 Rm opt S 10 Rn Rt    LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
3939
3940      00 1110001 01 Rm opt S 10 Rn Rt    LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
3941      00 1110001 11 Rm opt S 10 Rn Rt    LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
3942   */
3943   if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3944       && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3945      HChar  dis_buf[64];
3946      UInt   szLg2  = INSN(31,30);
3947      Bool   sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
3948      UInt   tt     = INSN(4,0);
3949      if (szLg2 == 3) goto after_LDRS_integer_register;
3950      IRTemp ea     = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3951      if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
3952      /* Enumerate the 5 variants explicitly. */
3953      if (szLg2 == 2/*32 bit*/ && sxTo64) {
3954         putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
3955         DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
3956         return True;
3957      }
3958      else
3959      if (szLg2 == 1/*16 bit*/) {
3960         if (sxTo64) {
3961            putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
3962            DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
3963         } else {
3964            putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
3965            DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3966         }
3967         return True;
3968      }
3969      else
3970      if (szLg2 == 0/*8 bit*/) {
3971         if (sxTo64) {
3972            putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
3973            DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
3974         } else {
3975            putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
3976            DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
3977         }
3978         return True;
3979      }
3980      /* else it's an invalid combination */
3981   }
3982  after_LDRS_integer_register:
3983
3984   /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
3985   /* This is the Unsigned offset variant only.  The Post-Index and
3986      Pre-Index variants are below.
3987
3988      31 29      23 21    9 4
3989      00 111 101 01 imm12 n t   LDR Bt, [Xn|SP + imm12 * 1]
3990      01 111 101 01 imm12 n t   LDR Ht, [Xn|SP + imm12 * 2]
3991      10 111 101 01 imm12 n t   LDR St, [Xn|SP + imm12 * 4]
3992      11 111 101 01 imm12 n t   LDR Dt, [Xn|SP + imm12 * 8]
3993      00 111 101 11 imm12 n t   LDR Qt, [Xn|SP + imm12 * 16]
3994
3995      00 111 101 00 imm12 n t   STR Bt, [Xn|SP + imm12 * 1]
3996      01 111 101 00 imm12 n t   STR Ht, [Xn|SP + imm12 * 2]
3997      10 111 101 00 imm12 n t   STR St, [Xn|SP + imm12 * 4]
3998      11 111 101 00 imm12 n t   STR Dt, [Xn|SP + imm12 * 8]
3999      00 111 101 10 imm12 n t   STR Qt, [Xn|SP + imm12 * 16]
4000   */
4001   if (INSN(29,24) == BITS6(1,1,1,1,0,1)
4002       && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
4003      UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
4004      Bool   isLD   = INSN(22,22) == 1;
4005      UInt   pimm12 = INSN(21,10) << szLg2;
4006      UInt   nn     = INSN(9,5);
4007      UInt   tt     = INSN(4,0);
4008      IRTemp tEA    = newTemp(Ity_I64);
4009      IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
4010      assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
4011      if (isLD) {
4012         if (szLg2 < 4) {
4013            putQReg128(tt, mkV128(0x0000));
4014         }
4015         putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
4016      } else {
4017         storeLE(mkexpr(tEA), getQRegLO(tt, ty));
4018      }
4019      DIP("%s %s, [%s, #%u]\n",
4020          isLD ? "ldr" : "str",
4021          nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
4022      return True;
4023   }
4024
4025   /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
4026   /* These are the Post-Index and Pre-Index variants.
4027
4028      31 29      23   20   11 9 4
4029      (at-Rn-then-Rn=EA)
4030      00 111 100 01 0 imm9 01 n t   LDR Bt, [Xn|SP], #simm
4031      01 111 100 01 0 imm9 01 n t   LDR Ht, [Xn|SP], #simm
4032      10 111 100 01 0 imm9 01 n t   LDR St, [Xn|SP], #simm
4033      11 111 100 01 0 imm9 01 n t   LDR Dt, [Xn|SP], #simm
4034      00 111 100 11 0 imm9 01 n t   LDR Qt, [Xn|SP], #simm
4035
4036      (at-EA-then-Rn=EA)
4037      00 111 100 01 0 imm9 11 n t   LDR Bt, [Xn|SP, #simm]!
4038      01 111 100 01 0 imm9 11 n t   LDR Ht, [Xn|SP, #simm]!
4039      10 111 100 01 0 imm9 11 n t   LDR St, [Xn|SP, #simm]!
4040      11 111 100 01 0 imm9 11 n t   LDR Dt, [Xn|SP, #simm]!
4041      00 111 100 11 0 imm9 11 n t   LDR Qt, [Xn|SP, #simm]!
4042
4043      Stores are the same except with bit 22 set to 0.
4044   */
4045   if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4046       && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
4047       && INSN(21,21) == 0 && INSN(10,10) == 1) {
4048      UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
4049      Bool   isLD   = INSN(22,22) == 1;
4050      UInt   imm9   = INSN(20,12);
4051      Bool   atRN   = INSN(11,11) == 0;
4052      UInt   nn     = INSN(9,5);
4053      UInt   tt     = INSN(4,0);
4054      IRTemp tRN    = newTemp(Ity_I64);
4055      IRTemp tEA    = newTemp(Ity_I64);
4056      IRTemp tTA    = IRTemp_INVALID;
4057      IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
4058      ULong  simm9  = sx_to_64(imm9, 9);
4059      assign(tRN, getIReg64orSP(nn));
4060      assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4061      tTA = atRN ? tRN : tEA;
4062      if (isLD) {
4063         if (szLg2 < 4) {
4064            putQReg128(tt, mkV128(0x0000));
4065         }
4066         putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
4067      } else {
4068         storeLE(mkexpr(tTA), getQRegLO(tt, ty));
4069      }
4070      putIReg64orSP(nn, mkexpr(tEA));
4071      DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
4072          isLD ? "ldr" : "str",
4073          nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
4074      return True;
4075   }
4076
4077   /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
4078   /* 31 29      23   20   11 9 4
4079      00 111 100 01 0 imm9 00 n t   LDR Bt, [Xn|SP, #simm]
4080      01 111 100 01 0 imm9 00 n t   LDR Ht, [Xn|SP, #simm]
4081      10 111 100 01 0 imm9 00 n t   LDR St, [Xn|SP, #simm]
4082      11 111 100 01 0 imm9 00 n t   LDR Dt, [Xn|SP, #simm]
4083      00 111 100 11 0 imm9 00 n t   LDR Qt, [Xn|SP, #simm]
4084
4085      00 111 100 00 0 imm9 00 n t   STR Bt, [Xn|SP, #simm]
4086      01 111 100 00 0 imm9 00 n t   STR Ht, [Xn|SP, #simm]
4087      10 111 100 00 0 imm9 00 n t   STR St, [Xn|SP, #simm]
4088      11 111 100 00 0 imm9 00 n t   STR Dt, [Xn|SP, #simm]
4089      00 111 100 10 0 imm9 00 n t   STR Qt, [Xn|SP, #simm]
4090   */
4091   if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4092       && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
4093       && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
4094      UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
4095      Bool   isLD   = INSN(22,22) == 1;
4096      UInt   imm9   = INSN(20,12);
4097      UInt   nn     = INSN(9,5);
4098      UInt   tt     = INSN(4,0);
4099      ULong  simm9  = sx_to_64(imm9, 9);
4100      IRTemp tEA    = newTemp(Ity_I64);
4101      IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
4102      assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
4103      if (isLD) {
4104         if (szLg2 < 4) {
4105            putQReg128(tt, mkV128(0x0000));
4106         }
4107         putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
4108      } else {
4109         storeLE(mkexpr(tEA), getQRegLO(tt, ty));
4110      }
4111      DIP("%s %s, [%s, #%lld]\n",
4112          isLD ? "ldur" : "stur",
4113          nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
4114      return True;
4115   }
4116
4117   /* ---------------- LDR (literal, SIMD&FP) ---------------- */
4118   /* 31 29      23    4
4119      00 011 100 imm19 t    LDR St, [PC + sxTo64(imm19 << 2)]
4120      01 011 100 imm19 t    LDR Dt, [PC + sxTo64(imm19 << 2)]
4121      10 011 100 imm19 t    LDR Qt, [PC + sxTo64(imm19 << 2)]
4122   */
4123   if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
4124      UInt   szB   = 4 << INSN(31,30);
4125      UInt   imm19 = INSN(23,5);
4126      UInt   tt    = INSN(4,0);
4127      ULong  ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4128      IRType ty    = preferredVectorSubTypeFromSize(szB);
4129      putQReg128(tt, mkV128(0x0000));
4130      putQRegLO(tt, loadLE(ty, mkU64(ea)));
4131      DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
4132      return True;
4133   }
4134
4135   /* ---------- LD1/ST1 (single structure, no offset) ---------- */
4136   /* 31        23        15
4137      0Q00 1101 0L00 0000 xx0S sz N T
4138                          ----
4139                          opcode
4140      1011 1111 1011 1111 0010 00 0 0 <- mask
4141      0000 1101 0000 0000 0000 00 0 0 <- result
4142
4143      FIXME does this assume that the host is little endian?
4144   */
4145
4146   if ((insn & 0xBFBF2000) == 0x0D000000) {
4147      Bool   isLD = INSN(22,22) == 1;
4148      UInt   rN   = INSN(9,5);
4149      UInt   vT   = INSN(4,0);
4150      UInt   q    = INSN(30, 30);
4151      UInt   xx   = INSN(15, 14);
4152      UInt   opcode = INSN(15, 13);
4153      UInt   s    = INSN(12, 12);
4154      UInt   sz   = INSN(11, 10);
4155
4156      UInt   index = (q << 3) | (s << 2) | sz;
4157      const HChar* name = "";
4158      Bool   valid = False;
4159      IRType laneTy = Ity_I8;
4160
4161      if (opcode == 0x0) { // 8 bit variant
4162         name = "b";
4163         valid = True;
4164      } else if (opcode == 0x2 && (sz & 1) == 0) { // 16 bit variant
4165         name = "h";
4166         laneTy = Ity_I16;
4167         index >>= 1;
4168         valid = True;
4169      } else if (opcode == 0x4 && sz == 0x0) { // 32 bit variant
4170         name = "s";
4171         laneTy = Ity_I32;
4172         index >>= 2;
4173         valid = True;
4174      } else if (opcode == 0x4 && sz == 0x1 && s == 0) { // 64 bit variant
4175         name = "d";
4176         laneTy = Ity_I64;
4177         index >>= 3;
4178         valid = True;
4179      }
4180
4181      if (valid) {
4182         IRTemp tEA  = newTemp(Ity_I64);
4183         assign(tEA, getIReg64orSP(rN));
4184         if (rN == 31) { /* FIXME generate stack alignment check */ }
4185         if (isLD) {
4186            putQRegLane(vT, index, loadLE(laneTy, mkexpr(tEA)));
4187         } else {
4188            storeLE(mkexpr(tEA), getQRegLane(vT, index, laneTy));
4189         }
4190
4191         DIP("%s {v%u.%s}[%d], [%s]\n", isLD ? "ld1" : "st1",
4192             vT, name, index, nameIReg64orSP(rN));
4193         return True;
4194      }
4195
4196   }
4197
4198
4199   /* ---------- LD1/ST1 (multiple structure, no offset, one register variant) ---------- */
4200   /* 31        23
4201      0100 1100 0100 0000 0111 11 N T   LD1 {vT.2d},  [Xn|SP]
4202      0100 1100 0000 0000 0111 11 N T   ST1 {vT.2d},  [Xn|SP]
4203      0100 1100 0100 0000 0111 10 N T   LD1 {vT.4s},  [Xn|SP]
4204      0100 1100 0000 0000 0111 10 N T   ST1 {vT.4s},  [Xn|SP]
4205      0100 1100 0100 0000 0111 01 N T   LD1 {vT.8h},  [Xn|SP]
4206      0100 1100 0000 0000 0111 01 N T   ST1 {vT.8h},  [Xn|SP]
4207      0100 1100 0100 0000 0111 00 N T   LD1 {vT.16b}, [Xn|SP]
4208      0100 1100 0000 0000 0111 00 N T   ST1 {vT.16b}, [Xn|SP]
4209      FIXME does this assume that the host is little endian?
4210   */
4211   if (   (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases
4212       || (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases
4213      ) {
4214      Bool   isLD = INSN(22,22) == 1;
4215      UInt   rN   = INSN(9,5);
4216      UInt   vT   = INSN(4,0);
4217      IRTemp tEA  = newTemp(Ity_I64);
4218      const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4219      const HChar* name = names[INSN(11,10)];
4220      assign(tEA, getIReg64orSP(rN));
4221      if (rN == 31) { /* FIXME generate stack alignment check */ }
4222      if (isLD) {
4223         putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4224      } else {
4225         storeLE(mkexpr(tEA), getQReg128(vT));
4226      }
4227      DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
4228          vT, name, nameIReg64orSP(rN));
4229      return True;
4230   }
4231
4232   /* 31        23
4233      0000 1100 0100 0000 0111 11 N T   LD1 {vT.1d}, [Xn|SP]
4234      0000 1100 0000 0000 0111 11 N T   ST1 {vT.1d}, [Xn|SP]
4235      0000 1100 0100 0000 0111 10 N T   LD1 {vT.2s}, [Xn|SP]
4236      0000 1100 0000 0000 0111 10 N T   ST1 {vT.2s}, [Xn|SP]
4237      0000 1100 0100 0000 0111 01 N T   LD1 {vT.4h}, [Xn|SP]
4238      0000 1100 0000 0000 0111 01 N T   ST1 {vT.4h}, [Xn|SP]
4239      0000 1100 0100 0000 0111 00 N T   LD1 {vT.8b}, [Xn|SP]
4240      0000 1100 0000 0000 0111 00 N T   ST1 {vT.8b}, [Xn|SP]
4241      FIXME does this assume that the host is little endian?
4242   */
4243   if (   (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases
4244       || (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases
4245      ) {
4246      Bool   isLD = INSN(22,22) == 1;
4247      UInt   rN   = INSN(9,5);
4248      UInt   vT   = INSN(4,0);
4249      IRTemp tEA  = newTemp(Ity_I64);
4250      const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4251      const HChar* name = names[INSN(11,10)];
4252      assign(tEA, getIReg64orSP(rN));
4253      if (rN == 31) { /* FIXME generate stack alignment check */ }
4254      if (isLD) {
4255         putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4256         putQRegLane(vT, 1, mkU64(0));
4257      } else {
4258         storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4259      }
4260      DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
4261          vT, name, nameIReg64orSP(rN));
4262      return True;
4263   }
4264
4265   /* ---------- LD1/ST1 (multiple structure, post-index, one register variant) ---------- */
4266   /* 31        23
4267      0100 1100 1001 1111 0111 11 N T  ST1 {vT.2d},  [xN|SP], #16
4268      0100 1100 1101 1111 0111 11 N T  LD1 {vT.2d},  [xN|SP], #16
4269      0100 1100 1001 1111 0111 10 N T  ST1 {vT.4s},  [xN|SP], #16
4270      0100 1100 1101 1111 0111 10 N T  LD1 {vT.4s},  [xN|SP], #16
4271      0100 1100 1001 1111 0111 01 N T  ST1 {vT.8h},  [xN|SP], #16
4272      0100 1100 1101 1111 0111 01 N T  LD1 {vT.8h},  [xN|SP], #16
4273      0100 1100 1001 1111 0111 00 N T  ST1 {vT.16b}, [xN|SP], #16
4274      0100 1100 1101 1111 0111 00 N T  LD1 {vT.16b}, [xN|SP], #16
4275      Note that #16 is implied and cannot be any other value.
4276      FIXME does this assume that the host is little endian?
4277   */
4278   if (   (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases
4279       || (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases
4280      ) {
4281      Bool   isLD = INSN(22,22) == 1;
4282      UInt   rN   = INSN(9,5);
4283      UInt   vT   = INSN(4,0);
4284      IRTemp tEA  = newTemp(Ity_I64);
4285      const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4286      const HChar* name = names[INSN(11,10)];
4287      assign(tEA, getIReg64orSP(rN));
4288      if (rN == 31) { /* FIXME generate stack alignment check */ }
4289      if (isLD) {
4290         putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4291      } else {
4292         storeLE(mkexpr(tEA), getQReg128(vT));
4293      }
4294      putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16)));
4295      DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1",
4296          vT, name, nameIReg64orSP(rN));
4297      return True;
4298   }
4299
4300   /* 31        23
4301      0000 1100 1001 1111 0111 11 N T  ST1 {vT.1d}, [xN|SP], #8
4302      0000 1100 1101 1111 0111 11 N T  LD1 {vT.1d}, [xN|SP], #8
4303      0000 1100 1001 1111 0111 10 N T  ST1 {vT.2s}, [xN|SP], #8
4304      0000 1100 1101 1111 0111 10 N T  LD1 {vT.2s}, [xN|SP], #8
4305      0000 1100 1001 1111 0111 01 N T  ST1 {vT.4h}, [xN|SP], #8
4306      0000 1100 1101 1111 0111 01 N T  LD1 {vT.4h}, [xN|SP], #8
4307      0000 1100 1001 1111 0111 00 N T  ST1 {vT.8b}, [xN|SP], #8
4308      0000 1100 1101 1111 0111 00 N T  LD1 {vT.8b}, [xN|SP], #8
4309      Note that #8 is implied and cannot be any other value.
4310      FIXME does this assume that the host is little endian?
4311   */
4312   if (   (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases
4313       || (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases
4314      ) {
4315      Bool   isLD = INSN(22,22) == 1;
4316      UInt   rN  = INSN(9,5);
4317      UInt   vT  = INSN(4,0);
4318      IRTemp tEA = newTemp(Ity_I64);
4319      const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4320      const HChar* name = names[INSN(11,10)];
4321      assign(tEA, getIReg64orSP(rN));
4322      if (rN == 31) { /* FIXME generate stack alignment check */ }
4323      if (isLD) {
4324         putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4325         putQRegLane(vT, 1, mkU64(0));
4326      } else {
4327         storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4328      }
4329      putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8)));
4330      DIP("%s {v%u.%s}, [%s], #8\n",  isLD ? "ld1" : "st1",
4331          vT, name, nameIReg64orSP(rN));
4332      return True;
4333   }
4334
4335   /* ---------- LD2/ST2 (multiple structures, post index) ---------- */
4336   /* Only a very few cases. */
4337   /* 31        23             11 9 4
4338      0100 1100 1101 1111 1000 11 n t  LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4339      0100 1100 1001 1111 1000 11 n t  ST2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4340      0100 1100 1101 1111 1000 10 n t  LD2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4341      0100 1100 1001 1111 1000 10 n t  ST2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4342   */
4343   if (   (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d
4344       || (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d
4345       || (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s
4346       || (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s
4347      ) {
4348      Bool   isLD = INSN(22,22) == 1;
4349      UInt   rN   = INSN(9,5);
4350      UInt   vT   = INSN(4,0);
4351      IRTemp tEA  = newTemp(Ity_I64);
4352      UInt   sz   = INSN(11,10);
4353      const HChar* name = "??";
4354      assign(tEA, getIReg64orSP(rN));
4355      if (rN == 31) { /* FIXME generate stack alignment check */ }
4356      IRExpr* tEA_0  = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4357      IRExpr* tEA_8  = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
4358      IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4359      IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
4360      if (sz == BITS2(1,1)) {
4361         name = "2d";
4362         if (isLD) {
4363            putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
4364            putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16));
4365            putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
4366            putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24));
4367         } else {
4368            storeLE(tEA_0,  getQRegLane((vT+0) % 32, 0, Ity_I64));
4369            storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64));
4370            storeLE(tEA_8,  getQRegLane((vT+1) % 32, 0, Ity_I64));
4371            storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64));
4372         }
4373      }
4374      else if (sz == BITS2(1,0)) {
4375         /* Uh, this is ugly.  TODO: better. */
4376         name = "4s";
4377         IRExpr* tEA_4  = binop(Iop_Add64, mkexpr(tEA), mkU64(4));
4378         IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12));
4379         IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20));
4380         IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28));
4381         if (isLD) {
4382            putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0));
4383            putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8));
4384            putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16));
4385            putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24));
4386            putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4));
4387            putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12));
4388            putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20));
4389            putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28));
4390         } else {
4391            storeLE(tEA_0,  getQRegLane((vT+0) % 32, 0, Ity_I32));
4392            storeLE(tEA_8,  getQRegLane((vT+0) % 32, 1, Ity_I32));
4393            storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32));
4394            storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32));
4395            storeLE(tEA_4,  getQRegLane((vT+1) % 32, 0, Ity_I32));
4396            storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32));
4397            storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32));
4398            storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32));
4399         }
4400      }
4401      else {
4402         vassert(0); // Can't happen.
4403      }
4404      putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
4405      DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2",
4406          (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
4407      return True;
4408   }
4409
4410   /* ---------- LD1/ST1 (multiple structures, no offset) ---------- */
4411   /* Only a very few cases. */
4412   /* 31        23
4413      0100 1100 0100 0000 1010 00 n t  LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4414      0100 1100 0000 0000 1010 00 n t  ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4415   */
4416   if (   (insn & 0xFFFFFC00) == 0x4C40A000 // LD1
4417       || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1
4418      ) {
4419      Bool   isLD = INSN(22,22) == 1;
4420      UInt   rN   = INSN(9,5);
4421      UInt   vT   = INSN(4,0);
4422      IRTemp tEA  = newTemp(Ity_I64);
4423      const HChar* name = "16b";
4424      assign(tEA, getIReg64orSP(rN));
4425      if (rN == 31) { /* FIXME generate stack alignment check */ }
4426      IRExpr* tEA_0  = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4427      IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4428      if (isLD) {
4429         putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4430         putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4431      } else {
4432         storeLE(tEA_0,  getQReg128((vT+0) % 32));
4433         storeLE(tEA_16, getQReg128((vT+1) % 32));
4434      }
4435      DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1",
4436          (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
4437      return True;
4438   }
4439
4440   /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
4441   /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
4442   /* 31 29     23  20      14    9 4
4443      sz 001000 010 11111 0 11111 n t   LDX{R,RH,RB}  Rt, [Xn|SP]
4444      sz 001000 010 11111 1 11111 n t   LDAX{R,RH,RB} Rt, [Xn|SP]
4445      sz 001000 000 s     0 11111 n t   STX{R,RH,RB}  Ws, Rt, [Xn|SP]
4446      sz 001000 000 s     1 11111 n t   STLX{R,RH,RB} Ws, Rt, [Xn|SP]
4447   */
4448   if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
4449       && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
4450       && INSN(14,10) == BITS5(1,1,1,1,1)) {
4451      UInt szBlg2     = INSN(31,30);
4452      Bool isLD       = INSN(22,22) == 1;
4453      Bool isAcqOrRel = INSN(15,15) == 1;
4454      UInt ss         = INSN(20,16);
4455      UInt nn         = INSN(9,5);
4456      UInt tt         = INSN(4,0);
4457
4458      vassert(szBlg2 < 4);
4459      UInt   szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
4460      IRType ty  = integerIRTypeOfSize(szB);
4461      const HChar* suffix[4] = { "rb", "rh", "r", "r" };
4462
4463      IRTemp ea = newTemp(Ity_I64);
4464      assign(ea, getIReg64orSP(nn));
4465      /* FIXME generate check that ea is szB-aligned */
4466
4467      if (isLD && ss == BITS5(1,1,1,1,1)) {
4468         IRTemp res = newTemp(ty);
4469         stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
4470         putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
4471         if (isAcqOrRel) {
4472            stmt(IRStmt_MBE(Imbe_Fence));
4473         }
4474         DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
4475             nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4476         return True;
4477      }
4478      if (!isLD) {
4479         if (isAcqOrRel) {
4480            stmt(IRStmt_MBE(Imbe_Fence));
4481         }
4482         IRTemp  res  = newTemp(Ity_I1);
4483         IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
4484         stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
4485         /* IR semantics: res is 1 if store succeeds, 0 if it fails.
4486            Need to set rS to 1 on failure, 0 on success. */
4487         putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
4488                                            mkU64(1)));
4489         DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
4490             nameIRegOrZR(False, ss),
4491             nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4492         return True;
4493      }
4494      /* else fall through */
4495   }
4496
4497   /* ------------------ LDA{R,RH,RB} ------------------ */
4498   /* ------------------ STL{R,RH,RB} ------------------ */
4499   /* 31 29     23  20      14    9 4
4500      sz 001000 110 11111 1 11111 n t   LDAR<sz> Rt, [Xn|SP]
4501      sz 001000 100 11111 1 11111 n t   STLR<sz> Rt, [Xn|SP]
4502   */
4503   if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
4504       && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
4505      UInt szBlg2 = INSN(31,30);
4506      Bool isLD   = INSN(22,22) == 1;
4507      UInt nn     = INSN(9,5);
4508      UInt tt     = INSN(4,0);
4509
4510      vassert(szBlg2 < 4);
4511      UInt   szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
4512      IRType ty  = integerIRTypeOfSize(szB);
4513      const HChar* suffix[4] = { "rb", "rh", "r", "r" };
4514
4515      IRTemp ea = newTemp(Ity_I64);
4516      assign(ea, getIReg64orSP(nn));
4517      /* FIXME generate check that ea is szB-aligned */
4518
4519      if (isLD) {
4520         IRTemp res = newTemp(ty);
4521         assign(res, loadLE(ty, mkexpr(ea)));
4522         putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
4523         stmt(IRStmt_MBE(Imbe_Fence));
4524         DIP("lda%s %s, [%s]\n", suffix[szBlg2],
4525             nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4526      } else {
4527         stmt(IRStmt_MBE(Imbe_Fence));
4528         IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
4529         storeLE(mkexpr(ea), data);
4530         DIP("stl%s %s, [%s]\n", suffix[szBlg2],
4531             nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4532      }
4533      return True;
4534   }
4535
4536   /* ------------------ PRFM (immediate) ------------------ */
4537   /* 31 29        21    9 4
4538      11 11100110  imm12 n t PRFM <option>, [Xn|SP{, #pimm}]
4539   */
4540
4541   if (INSN(31, 22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
4542      /* TODO: decode */
4543      DIP("prfm ??? (imm)");
4544      return True;
4545   }
4546
4547   vex_printf("ARM64 front end: load_store\n");
4548   return False;
4549#  undef INSN
4550}
4551
4552
4553/*------------------------------------------------------------*/
4554/*--- Control flow and misc instructions                   ---*/
4555/*------------------------------------------------------------*/
4556
4557static
4558Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
4559                          VexArchInfo* archinfo)
4560{
4561#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
4562
4563   /* ---------------------- B cond ----------------------- */
4564   /* 31        24    4 3
4565      0101010 0 imm19 0 cond */
4566   if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
4567      UInt  cond   = INSN(3,0);
4568      ULong uimm64 = INSN(23,5) << 2;
4569      Long  simm64 = (Long)sx_to_64(uimm64, 21);
4570      vassert(dres->whatNext    == Dis_Continue);
4571      vassert(dres->len         == 4);
4572      vassert(dres->continueAt  == 0);
4573      vassert(dres->jk_StopHere == Ijk_INVALID);
4574      stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
4575                        Ijk_Boring,
4576                        IRConst_U64(guest_PC_curr_instr + simm64),
4577                        OFFB_PC) );
4578      putPC(mkU64(guest_PC_curr_instr + 4));
4579      dres->whatNext    = Dis_StopHere;
4580      dres->jk_StopHere = Ijk_Boring;
4581      DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
4582      return True;
4583   }
4584
4585   /* -------------------- B{L} uncond -------------------- */
4586   if (INSN(30,26) == BITS5(0,0,1,0,1)) {
4587      /* 000101 imm26  B  (PC + sxTo64(imm26 << 2))
4588         100101 imm26  B  (PC + sxTo64(imm26 << 2))
4589      */
4590      UInt  bLink  = INSN(31,31);
4591      ULong uimm64 = INSN(25,0) << 2;
4592      Long  simm64 = (Long)sx_to_64(uimm64, 28);
4593      if (bLink) {
4594         putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
4595      }
4596      putPC(mkU64(guest_PC_curr_instr + simm64));
4597      dres->whatNext = Dis_StopHere;
4598      dres->jk_StopHere = Ijk_Call;
4599      DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
4600                          guest_PC_curr_instr + simm64);
4601      return True;
4602   }
4603
4604   /* --------------------- B{L} reg --------------------- */
4605   /* 31      24 22 20    15     9  4
4606      1101011 00 10 11111 000000 nn 00000  RET  Rn
4607      1101011 00 01 11111 000000 nn 00000  CALL Rn
4608      1101011 00 00 11111 000000 nn 00000  JMP  Rn
4609   */
4610   if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
4611       && INSN(20,16) == BITS5(1,1,1,1,1)
4612       && INSN(15,10) == BITS6(0,0,0,0,0,0)
4613       && INSN(4,0) == BITS5(0,0,0,0,0)) {
4614      UInt branch_type = INSN(22,21);
4615      UInt nn          = INSN(9,5);
4616      if (branch_type == BITS2(1,0) /* RET */) {
4617         putPC(getIReg64orZR(nn));
4618         dres->whatNext = Dis_StopHere;
4619         dres->jk_StopHere = Ijk_Ret;
4620         DIP("ret %s\n", nameIReg64orZR(nn));
4621         return True;
4622      }
4623      if (branch_type == BITS2(0,1) /* CALL */) {
4624         IRTemp dst = newTemp(Ity_I64);
4625         assign(dst, getIReg64orZR(nn));
4626         putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
4627         putPC(mkexpr(dst));
4628         dres->whatNext = Dis_StopHere;
4629         dres->jk_StopHere = Ijk_Call;
4630         DIP("blr %s\n", nameIReg64orZR(nn));
4631         return True;
4632      }
4633      if (branch_type == BITS2(0,0) /* JMP */) {
4634         putPC(getIReg64orZR(nn));
4635         dres->whatNext = Dis_StopHere;
4636         dres->jk_StopHere = Ijk_Boring;
4637         DIP("jmp %s\n", nameIReg64orZR(nn));
4638         return True;
4639      }
4640   }
4641
4642   /* -------------------- CB{N}Z -------------------- */
4643   /* sf 011 010 1 imm19 Rt   CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
4644      sf 011 010 0 imm19 Rt   CBZ  Xt|Wt, (PC + sxTo64(imm19 << 2))
4645   */
4646   if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
4647      Bool    is64   = INSN(31,31) == 1;
4648      Bool    bIfZ   = INSN(24,24) == 0;
4649      ULong   uimm64 = INSN(23,5) << 2;
4650      UInt    rT     = INSN(4,0);
4651      Long    simm64 = (Long)sx_to_64(uimm64, 21);
4652      IRExpr* cond   = NULL;
4653      if (is64) {
4654         cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
4655                      getIReg64orZR(rT), mkU64(0));
4656      } else {
4657         cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
4658                      getIReg32orZR(rT), mkU32(0));
4659      }
4660      stmt( IRStmt_Exit(cond,
4661                        Ijk_Boring,
4662                        IRConst_U64(guest_PC_curr_instr + simm64),
4663                        OFFB_PC) );
4664      putPC(mkU64(guest_PC_curr_instr + 4));
4665      dres->whatNext    = Dis_StopHere;
4666      dres->jk_StopHere = Ijk_Boring;
4667      DIP("cb%sz %s, 0x%llx\n",
4668          bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
4669          guest_PC_curr_instr + simm64);
4670      return True;
4671   }
4672
4673   /* -------------------- TB{N}Z -------------------- */
4674   /* 31 30      24 23  18  5 4
4675      b5 011 011 1  b40 imm14 t  TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
4676      b5 011 011 0  b40 imm14 t  TBZ  Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
4677   */
4678   if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
4679      UInt    b5     = INSN(31,31);
4680      Bool    bIfZ   = INSN(24,24) == 0;
4681      UInt    b40    = INSN(23,19);
4682      UInt    imm14  = INSN(18,5);
4683      UInt    tt     = INSN(4,0);
4684      UInt    bitNo  = (b5 << 5) | b40;
4685      ULong   uimm64 = imm14 << 2;
4686      Long    simm64 = sx_to_64(uimm64, 16);
4687      IRExpr* cond
4688         = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
4689                 binop(Iop_And64,
4690                       binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
4691                       mkU64(1)),
4692                 mkU64(0));
4693      stmt( IRStmt_Exit(cond,
4694                        Ijk_Boring,
4695                        IRConst_U64(guest_PC_curr_instr + simm64),
4696                        OFFB_PC) );
4697      putPC(mkU64(guest_PC_curr_instr + 4));
4698      dres->whatNext    = Dis_StopHere;
4699      dres->jk_StopHere = Ijk_Boring;
4700      DIP("tb%sz %s, #%u, 0x%llx\n",
4701          bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
4702          guest_PC_curr_instr + simm64);
4703      return True;
4704   }
4705
4706   /* -------------------- SVC -------------------- */
4707   /* 11010100 000 imm16 000 01
4708      Don't bother with anything except the imm16==0 case.
4709   */
4710   if (INSN(31,0) == 0xD4000001) {
4711      putPC(mkU64(guest_PC_curr_instr + 4));
4712      dres->whatNext    = Dis_StopHere;
4713      dres->jk_StopHere = Ijk_Sys_syscall;
4714      DIP("svc #0\n");
4715      return True;
4716   }
4717
4718   /* ------------------ M{SR,RS} ------------------ */
4719   /* Only handles the case where the system register is TPIDR_EL0.
4720      0xD51BD0 010 Rt   MSR tpidr_el0, rT
4721      0xD53BD0 010 Rt   MRS rT, tpidr_el0
4722   */
4723   if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
4724       || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
4725      Bool toSys = INSN(21,21) == 0;
4726      UInt tt    = INSN(4,0);
4727      if (toSys) {
4728         stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
4729         DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
4730      } else {
4731         putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
4732         DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
4733      }
4734      return True;
4735   }
4736   /* Cases for FPCR
4737      0xD51B44 000 Rt  MSR fpcr, rT
4738      0xD53B44 000 Rt  MSR rT, fpcr
4739   */
4740   if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
4741       || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
4742      Bool toSys = INSN(21,21) == 0;
4743      UInt tt    = INSN(4,0);
4744      if (toSys) {
4745         stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
4746         DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
4747      } else {
4748         putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
4749         DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
4750      }
4751      return True;
4752   }
4753   /* Cases for FPSR
4754      0xD51B44 001 Rt  MSR fpsr, rT
4755      0xD53B44 001 Rt  MSR rT, fpsr
4756   */
4757   if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
4758       || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
4759      Bool toSys = INSN(21,21) == 0;
4760      UInt tt    = INSN(4,0);
4761      if (toSys) {
4762         stmt( IRStmt_Put( OFFB_FPSR, getIReg32orZR(tt)) );
4763         DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
4764      } else {
4765         putIReg32orZR(tt, IRExpr_Get(OFFB_FPSR, Ity_I32));
4766         DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
4767      }
4768      return True;
4769   }
4770   /* Cases for NZCV
4771      D51B42 000 Rt  MSR nzcv, rT
4772      D53B42 000 Rt  MRS rT, nzcv
4773   */
4774   if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
4775       || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
4776      Bool  toSys = INSN(21,21) == 0;
4777      UInt  tt    = INSN(4,0);
4778      if (toSys) {
4779         IRTemp t = newTemp(Ity_I64);
4780         assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
4781         setFlags_COPY(t);
4782         DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
4783      } else {
4784         IRTemp res = newTemp(Ity_I64);
4785         assign(res, mk_arm64g_calculate_flags_nzcv());
4786         putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
4787         DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
4788      }
4789      return True;
4790   }
4791   /* Cases for DCZID_EL0
4792      Don't support arbitrary reads and writes to this register.  Just
4793      return the value 16, which indicates that the DC ZVA instruction
4794      is not permitted, so we don't have to emulate it.
4795      D5 3B 00 111 Rt  MRS rT, dczid_el0
4796   */
4797   if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
4798      UInt tt = INSN(4,0);
4799      putIReg64orZR(tt, mkU64(1<<4));
4800      DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
4801      return True;
4802   }
4803   /* Cases for CTR_EL0
4804      We just handle reads, and make up a value from the D and I line
4805      sizes in the VexArchInfo we are given, and patch in the following
4806      fields that the Foundation model gives ("natively"):
4807      CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
4808      D5 3B 00 001 Rt  MRS rT, dczid_el0
4809   */
4810   if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
4811      UInt tt = INSN(4,0);
4812      /* Need to generate a value from dMinLine_lg2_szB and
4813         dMinLine_lg2_szB.  The value in the register is in 32-bit
4814         units, so need to subtract 2 from the values in the
4815         VexArchInfo.  We can assume that the values here are valid --
4816         disInstr_ARM64 checks them -- so there's no need to deal with
4817         out-of-range cases. */
4818      vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
4819              && archinfo->arm64_dMinLine_lg2_szB <= 17
4820              && archinfo->arm64_iMinLine_lg2_szB >= 2
4821              && archinfo->arm64_iMinLine_lg2_szB <= 17);
4822      UInt val
4823         = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
4824                      | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
4825      putIReg64orZR(tt, mkU64(val));
4826      DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
4827      return True;
4828   }
4829
4830   /* ------------------ IC_IVAU ------------------ */
4831   /* D5 0B 75 001 Rt  ic ivau, rT
4832   */
4833   if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
4834      /* We will always be provided with a valid iMinLine value. */
4835      vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
4836              && archinfo->arm64_iMinLine_lg2_szB <= 17);
4837      /* Round the requested address, in rT, down to the start of the
4838         containing block. */
4839      UInt   tt      = INSN(4,0);
4840      ULong  lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
4841      IRTemp addr    = newTemp(Ity_I64);
4842      assign( addr, binop( Iop_And64,
4843                           getIReg64orZR(tt),
4844                           mkU64(~(lineszB - 1))) );
4845      /* Set the invalidation range, request exit-and-invalidate, with
4846         continuation at the next instruction. */
4847      stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
4848      stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(lineszB)));
4849      /* be paranoid ... */
4850      stmt( IRStmt_MBE(Imbe_Fence) );
4851      putPC(mkU64( guest_PC_curr_instr + 4 ));
4852      dres->whatNext    = Dis_StopHere;
4853      dres->jk_StopHere = Ijk_InvalICache;
4854      DIP("ic ivau, %s\n", nameIReg64orZR(tt));
4855      return True;
4856   }
4857
4858   /* ------------------ DC_CVAU ------------------ */
4859   /* D5 0B 7B 001 Rt  dc cvau, rT
4860   */
4861   if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
4862      /* Exactly the same scheme as for IC IVAU, except we observe the
4863         dMinLine size, and request an Ijk_FlushDCache instead of
4864         Ijk_InvalICache. */
4865      /* We will always be provided with a valid dMinLine value. */
4866      vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
4867              && archinfo->arm64_dMinLine_lg2_szB <= 17);
4868      /* Round the requested address, in rT, down to the start of the
4869         containing block. */
4870      UInt   tt      = INSN(4,0);
4871      ULong  lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
4872      IRTemp addr    = newTemp(Ity_I64);
4873      assign( addr, binop( Iop_And64,
4874                           getIReg64orZR(tt),
4875                           mkU64(~(lineszB - 1))) );
4876      /* Set the flush range, request exit-and-flush, with
4877         continuation at the next instruction. */
4878      stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
4879      stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(lineszB)));
4880      /* be paranoid ... */
4881      stmt( IRStmt_MBE(Imbe_Fence) );
4882      putPC(mkU64( guest_PC_curr_instr + 4 ));
4883      dres->whatNext    = Dis_StopHere;
4884      dres->jk_StopHere = Ijk_FlushDCache;
4885      DIP("dc cvau, %s\n", nameIReg64orZR(tt));
4886      return True;
4887   }
4888
4889   /* ------------------ ISB, DMB, DSB ------------------ */
4890   if (INSN(31,0) == 0xD5033FDF) {
4891      stmt(IRStmt_MBE(Imbe_Fence));
4892      DIP("isb\n");
4893      return True;
4894   }
4895   if (INSN(31,0) == 0xD5033BBF) {
4896      stmt(IRStmt_MBE(Imbe_Fence));
4897      DIP("dmb ish\n");
4898      return True;
4899   }
4900   if (INSN(31,0) == 0xD5033ABF) {
4901      stmt(IRStmt_MBE(Imbe_Fence));
4902      DIP("dmb ishst\n");
4903      return True;
4904   }
4905   if (INSN(31,0) == 0xD5033B9F) {
4906      stmt(IRStmt_MBE(Imbe_Fence));
4907      DIP("dsb ish\n");
4908      return True;
4909   }
4910
4911   /* -------------------- NOP -------------------- */
4912   if (INSN(31,0) == 0xD503201F) {
4913      DIP("nop\n");
4914      return True;
4915   }
4916
4917  //fail:
4918   vex_printf("ARM64 front end: branch_etc\n");
4919   return False;
4920#  undef INSN
4921}
4922
4923
4924/*------------------------------------------------------------*/
4925/*--- SIMD and FP instructions                             ---*/
4926/*------------------------------------------------------------*/
4927
4928/* begin FIXME -- rm temp scaffolding */
4929static IRExpr* mk_CatEvenLanes64x2 ( IRTemp, IRTemp );
4930static IRExpr* mk_CatOddLanes64x2  ( IRTemp, IRTemp );
4931
4932static IRExpr* mk_CatEvenLanes32x4 ( IRTemp, IRTemp );
4933static IRExpr* mk_CatOddLanes32x4  ( IRTemp, IRTemp );
4934static IRExpr* mk_InterleaveLO32x4 ( IRTemp, IRTemp );
4935static IRExpr* mk_InterleaveHI32x4 ( IRTemp, IRTemp );
4936
4937static IRExpr* mk_CatEvenLanes16x8 ( IRTemp, IRTemp );
4938static IRExpr* mk_CatOddLanes16x8  ( IRTemp, IRTemp );
4939static IRExpr* mk_InterleaveLO16x8 ( IRTemp, IRTemp );
4940static IRExpr* mk_InterleaveHI16x8 ( IRTemp, IRTemp );
4941
4942static IRExpr* mk_CatEvenLanes8x16 ( IRTemp, IRTemp );
4943static IRExpr* mk_CatOddLanes8x16  ( IRTemp, IRTemp );
4944static IRExpr* mk_InterleaveLO8x16 ( IRTemp, IRTemp );
4945static IRExpr* mk_InterleaveHI8x16 ( IRTemp, IRTemp );
4946/* end FIXME -- rm temp scaffolding */
4947
4948/* Generate N copies of |bit| in the bottom of a ULong. */
4949static ULong Replicate ( ULong bit, Int N )
4950{
4951   vassert(bit <= 1 && N >= 1 && N < 64);
4952   if (bit == 0) {
4953      return 0;
4954    } else {
4955      /* Careful.  This won't work for N == 64. */
4956      return (1ULL << N) - 1;
4957   }
4958}
4959
4960static ULong Replicate32x2 ( ULong bits32 )
4961{
4962   vassert(0 == (bits32 & ~0xFFFFFFFFULL));
4963   return (bits32 << 32) | bits32;
4964}
4965
4966static ULong Replicate16x4 ( ULong bits16 )
4967{
4968   vassert(0 == (bits16 & ~0xFFFFULL));
4969   return Replicate32x2((bits16 << 16) | bits16);
4970}
4971
4972static ULong Replicate8x8 ( ULong bits8 )
4973{
4974   vassert(0 == (bits8 & ~0xFFULL));
4975   return Replicate16x4((bits8 << 8) | bits8);
4976}
4977
4978/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
4979   |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
4980   is 64.  In the former case, the upper 32 bits of the returned value
4981   are guaranteed to be zero. */
4982static ULong VFPExpandImm ( ULong imm8, Int N )
4983{
4984   vassert(imm8 <= 0xFF);
4985   vassert(N == 32 || N == 64);
4986   Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
4987   Int F = N - E - 1;
4988   ULong imm8_6 = (imm8 >> 6) & 1;
4989   /* sign: 1 bit */
4990   /* exp:  E bits */
4991   /* frac: F bits */
4992   ULong sign = (imm8 >> 7) & 1;
4993   ULong exp  = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
4994   ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
4995   vassert(sign < (1ULL << 1));
4996   vassert(exp  < (1ULL << E));
4997   vassert(frac < (1ULL << F));
4998   vassert(1 + E + F == N);
4999   ULong res = (sign << (E+F)) | (exp << F) | frac;
5000   return res;
5001}
5002
5003/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
5004   This might fail, as indicated by the returned Bool.  Page 2530 of
5005   the manual. */
5006static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
5007                               UInt op, UInt cmode, UInt imm8 )
5008{
5009   vassert(op <= 1);
5010   vassert(cmode <= 15);
5011   vassert(imm8 <= 255);
5012
5013   *res = 0; /* will overwrite iff returning True */
5014
5015   ULong imm64    = 0;
5016   Bool  testimm8 = False;
5017
5018   switch (cmode >> 1) {
5019      case 0:
5020         testimm8 = False; imm64 = Replicate32x2(imm8); break;
5021      case 1:
5022         testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
5023      case 2:
5024         testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
5025      case 3:
5026         testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
5027      case 4:
5028          testimm8 = False; imm64 = Replicate16x4(imm8); break;
5029      case 5:
5030          testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
5031      case 6:
5032          testimm8 = True;
5033          if ((cmode & 1) == 0)
5034              imm64 = Replicate32x2((imm8 << 8) | 0xFF);
5035          else
5036              imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
5037          break;
5038      case 7:
5039         testimm8 = False;
5040         if ((cmode & 1) == 0 && op == 0)
5041             imm64 = Replicate8x8(imm8);
5042         if ((cmode & 1) == 0 && op == 1) {
5043             imm64 = 0;   imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
5044             imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
5045             imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
5046             imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
5047             imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
5048             imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
5049             imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
5050             imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
5051         }
5052         if ((cmode & 1) == 1 && op == 0) {
5053            ULong imm8_7  = (imm8 >> 7) & 1;
5054            ULong imm8_6  = (imm8 >> 6) & 1;
5055            ULong imm8_50 = imm8 & 63;
5056            ULong imm32 = (imm8_7                 << (1 + 5 + 6 + 19))
5057                          | ((imm8_6 ^ 1)         << (5 + 6 + 19))
5058                          | (Replicate(imm8_6, 5) << (6 + 19))
5059                          | (imm8_50              << 19);
5060            imm64 = Replicate32x2(imm32);
5061         }
5062         if ((cmode & 1) == 1 && op == 1) {
5063            // imm64 = imm8<7>:NOT(imm8<6>)
5064            //                :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
5065            ULong imm8_7  = (imm8 >> 7) & 1;
5066            ULong imm8_6  = (imm8 >> 6) & 1;
5067            ULong imm8_50 = imm8 & 63;
5068            imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
5069                    | (Replicate(imm8_6, 8) << 54)
5070                    | (imm8_50 << 48);
5071         }
5072         break;
5073      default:
5074        vassert(0);
5075   }
5076
5077   if (testimm8 && imm8 == 0)
5078      return False;
5079
5080   *res = imm64;
5081   return True;
5082}
5083
5084
5085/* Help a bit for decoding laneage for vector operations that can be
5086   of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
5087   and SZ bits, typically for vector floating point. */
5088static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI,  /*OUT*/IRType* tyF,
5089                               /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
5090                               /*OUT*/const HChar** arrSpec,
5091                               Bool bitQ, Bool bitSZ )
5092{
5093   vassert(bitQ == True || bitQ == False);
5094   vassert(bitSZ == True || bitSZ == False);
5095   if (bitQ && bitSZ) { // 2x64
5096      if (tyI)       *tyI       = Ity_I64;
5097      if (tyF)       *tyF       = Ity_F64;
5098      if (nLanes)    *nLanes    = 2;
5099      if (zeroUpper) *zeroUpper = False;
5100      if (arrSpec)   *arrSpec   = "2d";
5101      return True;
5102   }
5103   if (bitQ && !bitSZ) { // 4x32
5104      if (tyI)       *tyI       = Ity_I32;
5105      if (tyF)       *tyF       = Ity_F32;
5106      if (nLanes)    *nLanes    = 4;
5107      if (zeroUpper) *zeroUpper = False;
5108      if (arrSpec)   *arrSpec   = "4s";
5109      return True;
5110   }
5111   if (!bitQ && !bitSZ) { // 2x32
5112      if (tyI)       *tyI       = Ity_I32;
5113      if (tyF)       *tyF       = Ity_F32;
5114      if (nLanes)    *nLanes    = 2;
5115      if (zeroUpper) *zeroUpper = True;
5116      if (arrSpec)   *arrSpec   = "2s";
5117      return True;
5118   }
5119   // Else impliedly 1x64, which isn't allowed.
5120   return False;
5121}
5122
5123/* Helper for decoding laneage for simple vector operations,
5124   eg integer add. */
5125static Bool getLaneInfo_SIMPLE ( /*OUT*/Bool* zeroUpper,
5126                                 /*OUT*/const HChar** arrSpec,
5127                                 Bool bitQ, UInt szBlg2 )
5128{
5129   vassert(bitQ == True || bitQ == False);
5130   vassert(szBlg2 < 4);
5131   Bool zu = False;
5132   const HChar* as = NULL;
5133   switch ((szBlg2 << 1) | (bitQ ? 1 : 0)) {
5134      case 0: zu = True;  as = "8b";  break;
5135      case 1: zu = False; as = "16b"; break;
5136      case 2: zu = True;  as = "4h";  break;
5137      case 3: zu = False; as = "8h";  break;
5138      case 4: zu = True;  as = "2s";  break;
5139      case 5: zu = False; as = "4s";  break;
5140      case 6: return False; // impliedly 1x64
5141      case 7: zu = False; as = "2d";  break;
5142      default: vassert(0);
5143   }
5144   vassert(as);
5145   if (arrSpec)   *arrSpec = as;
5146   if (zeroUpper) *zeroUpper = zu;
5147   return True;
5148}
5149
5150
5151/* Helper for decoding laneage for shift-style vector operations
5152   that involve an immediate shift amount. */
5153static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
5154                                    UInt immh, UInt immb )
5155{
5156   vassert(immh < (1<<4));
5157   vassert(immb < (1<<3));
5158   UInt immhb = (immh << 3) | immb;
5159   if (immh & 8) {
5160      if (shift)  *shift  = 128 - immhb;
5161      if (szBlg2) *szBlg2 = 3;
5162      return True;
5163   }
5164   if (immh & 4) {
5165      if (shift)  *shift  = 64 - immhb;
5166      if (szBlg2) *szBlg2 = 2;
5167      return True;
5168   }
5169   if (immh & 2) {
5170      if (shift)  *shift  = 32 - immhb;
5171      if (szBlg2) *szBlg2 = 1;
5172      return True;
5173   }
5174   if (immh & 1) {
5175      if (shift)  *shift  = 16 - immhb;
5176      if (szBlg2) *szBlg2 = 0;
5177      return True;
5178   }
5179   return False;
5180}
5181
5182
5183/* Generate IR to fold all lanes of the V128 value in 'src' as
5184   characterised by the operator 'op', and return the result in the
5185   bottom bits of a V128, with all other bits set to zero. */
5186static IRTemp math_MINMAXV ( IRTemp src, IROp op )
5187{
5188   /* The basic idea is to use repeated applications of Iop_CatEven*
5189      and Iop_CatOdd* operators to 'src' so as to clone each lane into
5190      a complete vector.  Then fold all those vectors with 'op' and
5191      zero out all but the least significant lane. */
5192   switch (op) {
5193      case Iop_Min8Sx16: case Iop_Min8Ux16:
5194      case Iop_Max8Sx16: case Iop_Max8Ux16: {
5195         /* NB: temp naming here is misleading -- the naming is for 8
5196            lanes of 16 bit, whereas what is being operated on is 16
5197            lanes of 8 bits. */
5198         IRTemp x76543210 = src;
5199         IRTemp x76547654 = newTemp(Ity_V128);
5200         IRTemp x32103210 = newTemp(Ity_V128);
5201         assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5202         assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
5203         IRTemp x76767676 = newTemp(Ity_V128);
5204         IRTemp x54545454 = newTemp(Ity_V128);
5205         IRTemp x32323232 = newTemp(Ity_V128);
5206         IRTemp x10101010 = newTemp(Ity_V128);
5207         assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5208         assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5209         assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5210         assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
5211         IRTemp x77777777 = newTemp(Ity_V128);
5212         IRTemp x66666666 = newTemp(Ity_V128);
5213         IRTemp x55555555 = newTemp(Ity_V128);
5214         IRTemp x44444444 = newTemp(Ity_V128);
5215         IRTemp x33333333 = newTemp(Ity_V128);
5216         IRTemp x22222222 = newTemp(Ity_V128);
5217         IRTemp x11111111 = newTemp(Ity_V128);
5218         IRTemp x00000000 = newTemp(Ity_V128);
5219         assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5220         assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5221         assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5222         assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5223         assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5224         assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5225         assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5226         assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
5227         /* Naming not misleading after here. */
5228         IRTemp xAllF = newTemp(Ity_V128);
5229         IRTemp xAllE = newTemp(Ity_V128);
5230         IRTemp xAllD = newTemp(Ity_V128);
5231         IRTemp xAllC = newTemp(Ity_V128);
5232         IRTemp xAllB = newTemp(Ity_V128);
5233         IRTemp xAllA = newTemp(Ity_V128);
5234         IRTemp xAll9 = newTemp(Ity_V128);
5235         IRTemp xAll8 = newTemp(Ity_V128);
5236         IRTemp xAll7 = newTemp(Ity_V128);
5237         IRTemp xAll6 = newTemp(Ity_V128);
5238         IRTemp xAll5 = newTemp(Ity_V128);
5239         IRTemp xAll4 = newTemp(Ity_V128);
5240         IRTemp xAll3 = newTemp(Ity_V128);
5241         IRTemp xAll2 = newTemp(Ity_V128);
5242         IRTemp xAll1 = newTemp(Ity_V128);
5243         IRTemp xAll0 = newTemp(Ity_V128);
5244         assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
5245         assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
5246         assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
5247         assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
5248         assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
5249         assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
5250         assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
5251         assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
5252         assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
5253         assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
5254         assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
5255         assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
5256         assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
5257         assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
5258         assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
5259         assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
5260         IRTemp maxFE = newTemp(Ity_V128);
5261         IRTemp maxDC = newTemp(Ity_V128);
5262         IRTemp maxBA = newTemp(Ity_V128);
5263         IRTemp max98 = newTemp(Ity_V128);
5264         IRTemp max76 = newTemp(Ity_V128);
5265         IRTemp max54 = newTemp(Ity_V128);
5266         IRTemp max32 = newTemp(Ity_V128);
5267         IRTemp max10 = newTemp(Ity_V128);
5268         assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
5269         assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
5270         assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
5271         assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
5272         assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
5273         assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
5274         assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
5275         assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
5276         IRTemp maxFEDC = newTemp(Ity_V128);
5277         IRTemp maxBA98 = newTemp(Ity_V128);
5278         IRTemp max7654 = newTemp(Ity_V128);
5279         IRTemp max3210 = newTemp(Ity_V128);
5280         assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
5281         assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
5282         assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5283         assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5284         IRTemp maxFEDCBA98 = newTemp(Ity_V128);
5285         IRTemp max76543210 = newTemp(Ity_V128);
5286         assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
5287         assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
5288         IRTemp maxAllLanes = newTemp(Ity_V128);
5289         assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
5290                                       mkexpr(max76543210)));
5291         IRTemp res = newTemp(Ity_V128);
5292         assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
5293         return res;
5294      }
5295      case Iop_Min16Sx8: case Iop_Min16Ux8:
5296      case Iop_Max16Sx8: case Iop_Max16Ux8: {
5297         IRTemp x76543210 = src;
5298         IRTemp x76547654 = newTemp(Ity_V128);
5299         IRTemp x32103210 = newTemp(Ity_V128);
5300         assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5301         assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
5302         IRTemp x76767676 = newTemp(Ity_V128);
5303         IRTemp x54545454 = newTemp(Ity_V128);
5304         IRTemp x32323232 = newTemp(Ity_V128);
5305         IRTemp x10101010 = newTemp(Ity_V128);
5306         assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5307         assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5308         assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5309         assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
5310         IRTemp x77777777 = newTemp(Ity_V128);
5311         IRTemp x66666666 = newTemp(Ity_V128);
5312         IRTemp x55555555 = newTemp(Ity_V128);
5313         IRTemp x44444444 = newTemp(Ity_V128);
5314         IRTemp x33333333 = newTemp(Ity_V128);
5315         IRTemp x22222222 = newTemp(Ity_V128);
5316         IRTemp x11111111 = newTemp(Ity_V128);
5317         IRTemp x00000000 = newTemp(Ity_V128);
5318         assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5319         assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5320         assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5321         assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5322         assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5323         assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5324         assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5325         assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
5326         IRTemp max76 = newTemp(Ity_V128);
5327         IRTemp max54 = newTemp(Ity_V128);
5328         IRTemp max32 = newTemp(Ity_V128);
5329         IRTemp max10 = newTemp(Ity_V128);
5330         assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
5331         assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
5332         assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
5333         assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
5334         IRTemp max7654 = newTemp(Ity_V128);
5335         IRTemp max3210 = newTemp(Ity_V128);
5336         assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5337         assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5338         IRTemp max76543210 = newTemp(Ity_V128);
5339         assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
5340         IRTemp res = newTemp(Ity_V128);
5341         assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
5342         return res;
5343      }
5344      case Iop_Min32Sx4: case Iop_Min32Ux4:
5345      case Iop_Max32Sx4: case Iop_Max32Ux4: {
5346         IRTemp x3210 = src;
5347         IRTemp x3232 = newTemp(Ity_V128);
5348         IRTemp x1010 = newTemp(Ity_V128);
5349         assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
5350         assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
5351         IRTemp x3333 = newTemp(Ity_V128);
5352         IRTemp x2222 = newTemp(Ity_V128);
5353         IRTemp x1111 = newTemp(Ity_V128);
5354         IRTemp x0000 = newTemp(Ity_V128);
5355         assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
5356         assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
5357         assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
5358         assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
5359         IRTemp max32 = newTemp(Ity_V128);
5360         IRTemp max10 = newTemp(Ity_V128);
5361         assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
5362         assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
5363         IRTemp max3210 = newTemp(Ity_V128);
5364         assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5365         IRTemp res = newTemp(Ity_V128);
5366         assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
5367         return res;
5368      }
5369      default:
5370         vassert(0);
5371   }
5372}
5373
5374
5375/* Generate IR for TBL and TBX.  This deals with the 128 bit case
5376   only. */
5377static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
5378                             IRTemp oor_values )
5379{
5380   vassert(len >= 0 && len <= 3);
5381
5382   /* Generate some useful constants as concisely as possible. */
5383   IRTemp half15 = newTemp(Ity_I64);
5384   assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
5385   IRTemp half16 = newTemp(Ity_I64);
5386   assign(half16, mkU64(0x1010101010101010ULL));
5387
5388   /* A zero vector */
5389   IRTemp allZero = newTemp(Ity_V128);
5390   assign(allZero, mkV128(0x0000));
5391   /* A vector containing 15 in each 8-bit lane */
5392   IRTemp all15 = newTemp(Ity_V128);
5393   assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
5394   /* A vector containing 16 in each 8-bit lane */
5395   IRTemp all16 = newTemp(Ity_V128);
5396   assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
5397   /* A vector containing 32 in each 8-bit lane */
5398   IRTemp all32 = newTemp(Ity_V128);
5399   assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
5400   /* A vector containing 48 in each 8-bit lane */
5401   IRTemp all48 = newTemp(Ity_V128);
5402   assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
5403   /* A vector containing 64 in each 8-bit lane */
5404   IRTemp all64 = newTemp(Ity_V128);
5405   assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
5406
5407   /* Group the 16/32/48/64 vectors so as to be indexable. */
5408   IRTemp allXX[4] = { all16, all32, all48, all64 };
5409
5410   /* Compute the result for each table vector, with zeroes in places
5411      where the index values are out of range, and OR them into the
5412      running vector. */
5413   IRTemp running_result = newTemp(Ity_V128);
5414   assign(running_result, mkV128(0));
5415
5416   UInt tabent;
5417   for (tabent = 0; tabent <= len; tabent++) {
5418      vassert(tabent >= 0 && tabent < 4);
5419      IRTemp bias = newTemp(Ity_V128);
5420      assign(bias,
5421             mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
5422      IRTemp biased_indices = newTemp(Ity_V128);
5423      assign(biased_indices,
5424             binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
5425      IRTemp valid_mask = newTemp(Ity_V128);
5426      assign(valid_mask,
5427             binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
5428      IRTemp safe_biased_indices = newTemp(Ity_V128);
5429      assign(safe_biased_indices,
5430             binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
5431      IRTemp results_or_junk = newTemp(Ity_V128);
5432      assign(results_or_junk,
5433             binop(Iop_Perm8x16, mkexpr(tab[tabent]),
5434                                 mkexpr(safe_biased_indices)));
5435      IRTemp results_or_zero = newTemp(Ity_V128);
5436      assign(results_or_zero,
5437             binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
5438      /* And OR that into the running result. */
5439      IRTemp tmp = newTemp(Ity_V128);
5440      assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
5441                        mkexpr(running_result)));
5442      running_result = tmp;
5443   }
5444
5445   /* So now running_result holds the overall result where the indices
5446      are in range, and zero in out-of-range lanes.  Now we need to
5447      compute an overall validity mask and use this to copy in the
5448      lanes in the oor_values for out of range indices.  This is
5449      unnecessary for TBL but will get folded out by iropt, so we lean
5450      on that and generate the same code for TBL and TBX here. */
5451   IRTemp overall_valid_mask = newTemp(Ity_V128);
5452   assign(overall_valid_mask,
5453          binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
5454   IRTemp result = newTemp(Ity_V128);
5455   assign(result,
5456          binop(Iop_OrV128,
5457                mkexpr(running_result),
5458                binop(Iop_AndV128,
5459                      mkexpr(oor_values),
5460                      unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
5461   return result;
5462}
5463
5464
5465static
5466Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
5467{
5468#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
5469
5470   /* ---------------- FMOV (general) ---------------- */
5471   /* case   30       23   20 18  15     9 4
5472       (1) 0 00 11110 00 1 00 111 000000 n d     FMOV Sd,      Wn
5473       (2) 1 00 11110 01 1 00 111 000000 n d     FMOV Dd,      Xn
5474       (3) 1 00 11110 10 1 01 111 000000 n d     FMOV Vd.D[1], Xn
5475
5476       (4) 0 00 11110 00 1 00 110 000000 n d     FMOV Wd, Sn
5477       (5) 1 00 11110 01 1 00 110 000000 n d     FMOV Xd, Dn
5478       (6) 1 00 11110 10 1 01 110 000000 n d     FMOV Xd, Vn.D[1]
5479   */
5480   if (INSN(30,24) == BITS7(0,0,1,1,1,1,0)
5481       && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5482      UInt sf = INSN(31,31);
5483      UInt ty = INSN(23,22); // type
5484      UInt rm = INSN(20,19); // rmode
5485      UInt op = INSN(18,16); // opcode
5486      UInt nn = INSN(9,5);
5487      UInt dd = INSN(4,0);
5488      UInt ix = 0; // case
5489      if (sf == 0) {
5490         if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
5491            ix = 1;
5492         else
5493         if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
5494            ix = 4;
5495      } else {
5496         vassert(sf == 1);
5497         if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
5498            ix = 2;
5499         else
5500         if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
5501            ix = 5;
5502         else
5503         if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
5504            ix = 3;
5505         else
5506         if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
5507            ix = 6;
5508      }
5509      if (ix > 0) {
5510         switch (ix) {
5511            case 1:
5512               putQReg128(dd, mkV128(0));
5513               putQRegLO(dd, getIReg32orZR(nn));
5514               DIP("fmov s%u, w%u\n", dd, nn);
5515               break;
5516            case 2:
5517               putQReg128(dd, mkV128(0));
5518               putQRegLO(dd, getIReg64orZR(nn));
5519               DIP("fmov d%u, x%u\n", dd, nn);
5520               break;
5521            case 3:
5522               putQRegHI64(dd, getIReg64orZR(nn));
5523               DIP("fmov v%u.d[1], x%u\n", dd, nn);
5524               break;
5525            case 4:
5526               putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
5527               DIP("fmov w%u, s%u\n", dd, nn);
5528               break;
5529            case 5:
5530               putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
5531               DIP("fmov x%u, d%u\n", dd, nn);
5532               break;
5533            case 6:
5534               putIReg64orZR(dd, getQRegHI64(nn));
5535               DIP("fmov x%u, v%u.d[1]\n", dd, nn);
5536               break;
5537            default:
5538               vassert(0);
5539         }
5540         return True;
5541      }
5542      /* undecodable; fall through */
5543   }
5544
5545   /* -------------- FMOV (scalar, immediate) -------------- */
5546   /* 31  28    23   20   12  9     4
5547      000 11110 00 1 imm8 100 00000 d  FMOV Sd, #imm
5548      000 11110 01 1 imm8 100 00000 d  FMOV Dd, #imm
5549   */
5550   if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5551       && INSN(21,21) == 1 && INSN(12,5) == BITS8(1,0,0,0,0,0,0,0)) {
5552      Bool  isD  = INSN(22,22) == 1;
5553      UInt  imm8 = INSN(20,13);
5554      UInt  dd   = INSN(4,0);
5555      ULong imm  = VFPExpandImm(imm8, isD ? 64 : 32);
5556      if (!isD) {
5557         vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
5558      }
5559      putQReg128(dd, mkV128(0));
5560      putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
5561      DIP("fmov %s, #0x%llx\n",
5562          nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
5563      return True;
5564   }
5565
5566   /* -------------- {FMOV,MOVI} (vector, immediate) -------------- */
5567   /* 31    28          18  15    11 9     4
5568      0q op 01111 00000 abc cmode 01 defgh d  MOV Dd,   #imm (q=0)
5569                                              MOV Vd.2d #imm (q=1)
5570      Allowable op:cmode
5571         FMOV = 1:1111
5572         MOVI = 0:xx00, 1:0x00, 1:10x0, 1:110x, x:1110
5573   */
5574   if (INSN(31,31) == 0
5575       && INSN(28,19) == BITS10(0,1,1,1,1,0,0,0,0,0)
5576       && INSN(11,10) == BITS2(0,1)) {
5577      UInt  bitQ     = INSN(30,30);
5578      UInt  bitOP    = INSN(29,29);
5579      UInt  cmode    = INSN(15,12);
5580      UInt  imm8     = (INSN(18,16) << 5) | INSN(9,5);
5581      UInt  dd       = INSN(4,0);
5582      ULong imm64lo  = 0;
5583      UInt  op_cmode = (bitOP << 4) | cmode;
5584      Bool  ok       = False;
5585      switch (op_cmode) {
5586         case BITS5(1,1,1,1,1): // 1:1111
5587         case BITS5(0,0,0,0,0): case BITS5(0,0,1,0,0):
5588         case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:xx00
5589         case BITS5(1,0,0,0,0): case BITS5(1,0,1,0,0): // 1:0x00
5590         case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
5591         case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
5592         case BITS5(1,1,1,1,0): case BITS5(0,1,1,1,0): // x:1110
5593            ok = True; break;
5594         default:
5595           break;
5596      }
5597      if (ok) {
5598         ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, imm8);
5599      }
5600      if (ok) {
5601         ULong imm64hi = (bitQ == 0 && bitOP == 0)  ? 0  : imm64lo;
5602         putQReg128(dd, binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo)));
5603         DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
5604         return True;
5605      }
5606      /* else fall through */
5607   }
5608
5609   /* -------------- {S,U}CVTF (scalar, integer) -------------- */
5610   /* 31  28    23 21 20 18  15     9 4                  ix
5611      000 11110 00 1  00 010 000000 n d  SCVTF Sd, Wn    0
5612      000 11110 01 1  00 010 000000 n d  SCVTF Dd, Wn    1
5613      100 11110 00 1  00 010 000000 n d  SCVTF Sd, Xn    2
5614      100 11110 01 1  00 010 000000 n d  SCVTF Dd, Xn    3
5615
5616      000 11110 00 1  00 011 000000 n d  UCVTF Sd, Wn    4
5617      000 11110 01 1  00 011 000000 n d  UCVTF Dd, Wn    5
5618      100 11110 00 1  00 011 000000 n d  UCVTF Sd, Xn    6
5619      100 11110 01 1  00 011 000000 n d  UCVTF Dd, Xn    7
5620
5621      These are signed/unsigned conversion from integer registers to
5622      FP registers, all 4 32/64-bit combinations, rounded per FPCR.
5623   */
5624   if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,17) == BITS5(1,0,0,0,1)
5625       && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5626      Bool isI64 = INSN(31,31) == 1;
5627      Bool isF64 = INSN(22,22) == 1;
5628      Bool isU   = INSN(16,16) == 1;
5629      UInt nn    = INSN(9,5);
5630      UInt dd    = INSN(4,0);
5631      UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
5632      const IROp ops[8]
5633        = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
5634            Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
5635      IRExpr* src = getIRegOrZR(isI64, nn);
5636      IRExpr* res = (isF64 && !isI64)
5637                       ? unop(ops[ix], src)
5638                       : binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src);
5639      putQReg128(dd, mkV128(0));
5640      putQRegLO(dd, res);
5641      DIP("%ccvtf %s, %s\n",
5642          isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
5643          nameIRegOrZR(isI64, nn));
5644      return True;
5645   }
5646
5647   /* ------------ F{ADD,SUB,MUL,DIV,NMUL} (scalar) ------------ */
5648   /* 31        23  20 15   11 9 4
5649      ---------------- 0000 ------   FMUL  --------
5650      000 11110 001 m  0001 10 n d   FDIV  Sd,Sn,Sm
5651      000 11110 011 m  0001 10 n d   FDIV  Dd,Dn,Dm
5652      ---------------- 0010 ------   FADD  --------
5653      ---------------- 0011 ------   FSUB  --------
5654      ---------------- 1000 ------   FNMUL --------
5655   */
5656   if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5657       && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5658      Bool   isD = INSN(22,22) == 1;
5659      UInt   mm  = INSN(20,16);
5660      UInt   op  = INSN(15,12);
5661      UInt   nn  = INSN(9,5);
5662      UInt   dd  = INSN(4,0);
5663      IROp   iop = Iop_INVALID;
5664      IRType ty  = isD ? Ity_F64 : Ity_F32;
5665      Bool   neg = False;
5666      const HChar* nm = "???";
5667      switch (op) {
5668         case BITS4(0,0,0,0): nm = "fmul";  iop = mkMULF(ty); break;
5669         case BITS4(0,0,0,1): nm = "fdiv";  iop = mkDIVF(ty); break;
5670         case BITS4(0,0,1,0): nm = "fadd";  iop = mkADDF(ty); break;
5671         case BITS4(0,0,1,1): nm = "fsub";  iop = mkSUBF(ty); break;
5672         case BITS4(1,0,0,0): nm = "fnmul"; iop = mkMULF(ty);
5673                              neg = True; break;
5674         default:             return False;
5675      }
5676      vassert(iop != Iop_INVALID);
5677      IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()),
5678                           getQRegLO(nn, ty), getQRegLO(mm, ty));
5679      IRTemp res = newTemp(ty);
5680      assign(res, neg ? unop(mkNEGF(ty),resE) : resE);
5681      putQReg128(dd, mkV128(0));
5682      putQRegLO(dd, mkexpr(res));
5683      DIP("%s %s, %s, %s\n",
5684          nm, nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
5685      return True;
5686   }
5687
5688   /* ------------ F{MOV,ABS,NEG,SQRT} D/D or S/S ------------ */
5689   /* 31        23 21    16 14    9 4
5690      000 11110 00 10000 00 10000 n d  FMOV Sd, Sn
5691      000 11110 01 10000 00 10000 n d  FMOV Dd, Dn
5692      ------------------ 01 ---------  FABS ------
5693      ------------------ 10 ---------  FNEG ------
5694      ------------------ 11 ---------  FSQRT -----
5695   */
5696   if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5697       && INSN(21,17) == BITS5(1,0,0,0,0)
5698       && INSN(14,10) == BITS5(1,0,0,0,0)) {
5699      Bool   isD = INSN(22,22) == 1;
5700      UInt   opc = INSN(16,15);
5701      UInt   nn  = INSN(9,5);
5702      UInt   dd  = INSN(4,0);
5703      IRType ty  = isD ? Ity_F64 : Ity_F32;
5704      IRTemp res = newTemp(ty);
5705      if (opc == BITS2(0,0)) {
5706         assign(res, getQRegLO(nn, ty));
5707         putQReg128(dd, mkV128(0x0000));
5708         putQRegLO(dd, mkexpr(res));
5709         DIP("fmov %s, %s\n",
5710             nameQRegLO(dd, ty), nameQRegLO(nn, ty));
5711         return True;
5712      }
5713      if (opc == BITS2(1,0) || opc == BITS2(0,1)) {
5714         Bool isAbs = opc == BITS2(0,1);
5715         IROp op    = isAbs ? mkABSF(ty) : mkNEGF(ty);
5716         assign(res, unop(op, getQRegLO(nn, ty)));
5717         putQReg128(dd, mkV128(0x0000));
5718         putQRegLO(dd, mkexpr(res));
5719         DIP("%s %s, %s\n", isAbs ? "fabs" : "fneg",
5720             nameQRegLO(dd, ty), nameQRegLO(nn, ty));
5721         return True;
5722      }
5723      if (opc == BITS2(1,1)) {
5724         assign(res,
5725                binop(mkSQRTF(ty),
5726                      mkexpr(mk_get_IR_rounding_mode()), getQRegLO(nn, ty)));
5727         putQReg128(dd, mkV128(0x0000));
5728         putQRegLO(dd, mkexpr(res));
5729         DIP("fsqrt %s, %s\n", nameQRegLO(dd, ty), nameQRegLO(nn, ty));
5730         return True;
5731      }
5732      /* else fall through; other cases are ATC */
5733   }
5734
5735   /* ---------------- F{ABS,NEG} (vector) ---------------- */
5736   /* 31  28      22 21    16       9 4
5737      0q0 01110 1 sz 10000 01111 10 n d  FABS Vd.T, Vn.T
5738      0q1 01110 1 sz 10000 01111 10 n d  FNEG Vd.T, Vn.T
5739   */
5740   if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,1)
5741       && INSN(21,17) == BITS5(1,0,0,0,0)
5742       && INSN(16,10) == BITS7(0,1,1,1,1,1,0)) {
5743      UInt bitQ   = INSN(30,30);
5744      UInt bitSZ  = INSN(22,22);
5745      Bool isFNEG = INSN(29,29) == 1;
5746      UInt nn     = INSN(9,5);
5747      UInt dd     = INSN(4,0);
5748      const HChar* ar = "??";
5749      IRType tyF    = Ity_INVALID;
5750      Bool   zeroHI = False;
5751      Bool   ok     = getLaneInfo_Q_SZ(NULL, &tyF, NULL, &zeroHI, &ar,
5752                                       (Bool)bitQ, (Bool)bitSZ);
5753      if (ok) {
5754         vassert(tyF == Ity_F64 || tyF == Ity_F32);
5755         IROp op = (tyF == Ity_F64) ? (isFNEG ? Iop_Neg64Fx2 : Iop_Abs64Fx2)
5756                                    : (isFNEG ? Iop_Neg32Fx4 : Iop_Abs32Fx4);
5757         IRTemp res = newTemp(Ity_V128);
5758         assign(res, unop(op, getQReg128(nn)));
5759         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
5760                               : mkexpr(res));
5761         DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
5762             nameQReg128(dd), ar, nameQReg128(nn), ar);
5763         return True;
5764      }
5765      /* else fall through */
5766   }
5767
5768   /* -------------------- FCMP,FCMPE -------------------- */
5769   /* 31        23   20    15      9 4
5770      000 11110 01 1     m 00 1000 n 10 000  FCMPE Dn, Dm
5771      000 11110 01 1 00000 00 1000 n 11 000  FCMPE Dn, #0.0
5772      000 11110 01 1     m 00 1000 n 00 000  FCMP  Dn, Dm
5773      000 11110 01 1 00000 00 1000 n 01 000  FCMP  Dn, #0.0
5774
5775      000 11110 00 1     m 00 1000 n 10 000  FCMPE Sn, Sm
5776      000 11110 00 1 00000 00 1000 n 11 000  FCMPE Sn, #0.0
5777      000 11110 00 1     m 00 1000 n 00 000  FCMP  Sn, Sm
5778      000 11110 00 1 00000 00 1000 n 01 000  FCMP  Sn, #0.0
5779
5780      FCMPE generates Invalid Operation exn if either arg is any kind
5781      of NaN.  FCMP generates Invalid Operation exn if either arg is a
5782      signalling NaN.  We ignore this detail here and produce the same
5783      IR for both.
5784   */
5785   if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) && INSN(21,21) == 1
5786       && INSN(15,10) == BITS6(0,0,1,0,0,0) && INSN(2,0) == BITS3(0,0,0)) {
5787      Bool   isD     = INSN(22,22) == 1;
5788      UInt   mm      = INSN(20,16);
5789      UInt   nn      = INSN(9,5);
5790      Bool   isCMPE  = INSN(4,4) == 1;
5791      Bool   cmpZero = INSN(3,3) == 1;
5792      IRType ty      = isD ? Ity_F64 : Ity_F32;
5793      Bool   valid   = True;
5794      if (cmpZero && mm != 0) valid = False;
5795      if (valid) {
5796         IRTemp argL  = newTemp(ty);
5797         IRTemp argR  = newTemp(ty);
5798         IRTemp irRes = newTemp(Ity_I32);
5799         assign(argL, getQRegLO(nn, ty));
5800         assign(argR,
5801                cmpZero
5802                   ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
5803                   : getQRegLO(mm, ty));
5804         assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
5805                             mkexpr(argL), mkexpr(argR)));
5806         IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
5807         IRTemp nzcv_28x0 = newTemp(Ity_I64);
5808         assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
5809         setFlags_COPY(nzcv_28x0);
5810         DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ty),
5811             cmpZero ? "#0.0" : nameQRegLO(mm, ty));
5812         return True;
5813      }
5814   }
5815
5816   /* -------------------- F{N}M{ADD,SUB} -------------------- */
5817   /* 31          22   20 15 14 9 4   ix
5818      000 11111 0 sz 0 m  0  a  n d   0   FMADD  Fd,Fn,Fm,Fa
5819      000 11111 0 sz 0 m  1  a  n d   1   FMSUB  Fd,Fn,Fm,Fa
5820      000 11111 0 sz 1 m  0  a  n d   2   FNMADD Fd,Fn,Fm,Fa
5821      000 11111 0 sz 1 m  1  a  n d   3   FNMSUB Fd,Fn,Fm,Fa
5822      where Fx=Dx when sz=1, Fx=Sx when sz=0
5823
5824               -----SPEC------    ----IMPL----
5825      fmadd       a +    n * m    a + n * m
5826      fmsub       a + (-n) * m    a - n * m
5827      fnmadd   (-a) + (-n) * m    -(a + n * m)
5828      fnmsub   (-a) +    n * m    -(a - n * m)
5829   */
5830   if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,1,0)) {
5831      Bool    isD   = INSN(22,22) == 1;
5832      UInt    mm    = INSN(20,16);
5833      UInt    aa    = INSN(14,10);
5834      UInt    nn    = INSN(9,5);
5835      UInt    dd    = INSN(4,0);
5836      UInt    ix    = (INSN(21,21) << 1) | INSN(15,15);
5837      IRType  ty    = isD ? Ity_F64 : Ity_F32;
5838      IROp    opADD = mkADDF(ty);
5839      IROp    opSUB = mkSUBF(ty);
5840      IROp    opMUL = mkMULF(ty);
5841      IROp    opNEG = mkNEGF(ty);
5842      IRTemp  res   = newTemp(ty);
5843      IRExpr* eA    = getQRegLO(aa, ty);
5844      IRExpr* eN    = getQRegLO(nn, ty);
5845      IRExpr* eM    = getQRegLO(mm, ty);
5846      IRExpr* rm    = mkexpr(mk_get_IR_rounding_mode());
5847      IRExpr* eNxM  = triop(opMUL, rm, eN, eM);
5848      switch (ix) {
5849         case 0:  assign(res, triop(opADD, rm, eA, eNxM)); break;
5850         case 1:  assign(res, triop(opSUB, rm, eA, eNxM)); break;
5851         case 2:  assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
5852         case 3:  assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
5853         default: vassert(0);
5854      }
5855      putQReg128(dd, mkV128(0x0000));
5856      putQRegLO(dd, mkexpr(res));
5857      const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
5858      DIP("%s %s, %s, %s, %s\n",
5859          names[ix], nameQRegLO(dd, ty), nameQRegLO(nn, ty),
5860                     nameQRegLO(mm, ty), nameQRegLO(aa, ty));
5861      return True;
5862   }
5863
5864   /* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */
5865   /*    30       23   20 18  15     9 4
5866      sf 00 11110 0x 1 00 000 000000 n d  FCVTNS Rd, Fn (round to
5867      sf 00 11110 0x 1 00 001 000000 n d  FCVTNU Rd, Fn  nearest)
5868      ---------------- 01 --------------  FCVTP-------- (round to +inf)
5869      ---------------- 10 --------------  FCVTM-------- (round to -inf)
5870      ---------------- 11 --------------  FCVTZ-------- (round to zero)
5871
5872      Rd is Xd when sf==1, Wd when sf==0
5873      Fn is Dn when x==1, Sn when x==0
5874      20:19 carry the rounding mode, using the same encoding as FPCR
5875   */
5876   if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,21) == 1
5877       && INSN(18,17) == BITS2(0,0) && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5878      Bool isI64 = INSN(31,31) == 1;
5879      Bool isF64 = INSN(22,22) == 1;
5880      UInt rm    = INSN(20,19);
5881      Bool isU   = INSN(16,16) == 1;
5882      UInt nn    = INSN(9,5);
5883      UInt dd    = INSN(4,0);
5884      /* Decide on the IR rounding mode to use. */
5885      IRRoundingMode irrm = 8; /*impossible*/
5886      HChar ch = '?';
5887      switch (rm) {
5888         case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
5889         case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
5890         case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
5891         case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
5892         default: vassert(0);
5893      }
5894      vassert(irrm != 8);
5895      /* Decide on the conversion primop, based on the source size,
5896         dest size and signedness (8 possibilities).  Case coding:
5897            F32 ->s I32   0
5898            F32 ->u I32   1
5899            F32 ->s I64   2
5900            F32 ->u I64   3
5901            F64 ->s I32   4
5902            F64 ->u I32   5
5903            F64 ->s I64   6
5904            F64 ->u I64   7
5905      */
5906      UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
5907      vassert(ix < 8);
5908      const IROp ops[8]
5909         = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
5910             Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
5911      IROp op = ops[ix];
5912      // A bit of ATCery: bounce all cases we haven't seen an example of.
5913      if (/* F32toI32S */
5914             (op == Iop_F32toI32S && irrm == Irrm_ZERO)   /* FCVTZS Wd,Sn */
5915          || (op == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
5916          || (op == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
5917          /* F32toI32U */
5918          || (op == Iop_F32toI32U && irrm == Irrm_ZERO)   /* FCVTZU Wd,Sn */
5919          || (op == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
5920          /* F32toI64S */
5921          || (op == Iop_F32toI64S && irrm == Irrm_ZERO)   /* FCVTZS Xd,Sn */
5922          /* F32toI64U */
5923          || (op == Iop_F32toI64U && irrm == Irrm_ZERO)   /* FCVTZU Xd,Sn */
5924          || (op == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
5925          /* F64toI32S */
5926          || (op == Iop_F64toI32S && irrm == Irrm_ZERO)   /* FCVTZS Wd,Dn */
5927          || (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
5928          || (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
5929          /* F64toI32U */
5930          || (op == Iop_F64toI32U && irrm == Irrm_ZERO)   /* FCVTZU Wd,Dn */
5931          || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
5932          || (op == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
5933          /* F64toI64S */
5934          || (op == Iop_F64toI64S && irrm == Irrm_ZERO)   /* FCVTZS Xd,Dn */
5935          || (op == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
5936          || (op == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
5937          /* F64toI64U */
5938          || (op == Iop_F64toI64U && irrm == Irrm_ZERO)   /* FCVTZU Xd,Dn */
5939          || (op == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
5940         ) {
5941        /* validated */
5942      } else {
5943        return False;
5944      }
5945      IRType srcTy  = isF64 ? Ity_F64 : Ity_F32;
5946      IRType dstTy  = isI64 ? Ity_I64 : Ity_I32;
5947      IRTemp src    = newTemp(srcTy);
5948      IRTemp dst    = newTemp(dstTy);
5949      assign(src, getQRegLO(nn, srcTy));
5950      assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
5951      putIRegOrZR(isI64, dd, mkexpr(dst));
5952      DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
5953          nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
5954      return True;
5955   }
5956
5957   /* -------- FCVTAS (KLUDGED) (scalar, integer) -------- */
5958   /*   30       23   20 18  15     9 4
5959      1 00 11110 0x 1 00 100 000000 n d  FCVTAS Xd, Fn
5960      0 00 11110 0x 1 00 100 000000 n d  FCVTAS Wd, Fn
5961      Fn is Dn when x==1, Sn when x==0
5962   */
5963   if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0)
5964       && INSN(21,16) == BITS6(1,0,0,1,0,0)
5965       && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5966      Bool isI64 = INSN(31,31) == 1;
5967      Bool isF64 = INSN(22,22) == 1;
5968      UInt nn    = INSN(9,5);
5969      UInt dd    = INSN(4,0);
5970      /* Decide on the IR rounding mode to use. */
5971      /* KLUDGE: should be Irrm_NEAREST_TIE_AWAY_0 */
5972      IRRoundingMode irrm = Irrm_NEAREST;
5973      /* Decide on the conversion primop. */
5974      IROp   op    = isI64 ? (isF64 ? Iop_F64toI64S :  Iop_F32toI64S)
5975                           : (isF64 ? Iop_F64toI32S :  Iop_F32toI32S);
5976      IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
5977      IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
5978      IRTemp src   = newTemp(srcTy);
5979      IRTemp dst   = newTemp(dstTy);
5980      assign(src, getQRegLO(nn, srcTy));
5981      assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
5982      putIRegOrZR(isI64, dd, mkexpr(dst));
5983      DIP("fcvtas %s, %s (KLUDGED)\n",
5984          nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
5985      return True;
5986   }
5987
5988   /* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */
5989   /* 31        23 21   17  14    9 4
5990      000 11110 0x 1001 111 10000 n d  FRINTI Fd, Fm (round per FPCR)
5991                        rm
5992      x==0 => S-registers, x==1 => D-registers
5993      rm (17:15) encodings:
5994         111 per FPCR  (FRINTI)
5995         001 +inf      (FRINTP)
5996         010 -inf      (FRINTM)
5997         011 zero      (FRINTZ)
5998         000 tieeven
5999         100 tieaway   (FRINTA) -- !! FIXME KLUDGED !!
6000         110 per FPCR + "exact = TRUE"
6001         101 unallocated
6002   */
6003   if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
6004       && INSN(21,18) == BITS4(1,0,0,1) && INSN(14,10) == BITS5(1,0,0,0,0)) {
6005      Bool    isD   = INSN(22,22) == 1;
6006      UInt    rm    = INSN(17,15);
6007      UInt    nn    = INSN(9,5);
6008      UInt    dd    = INSN(4,0);
6009      IRType  ty    = isD ? Ity_F64 : Ity_F32;
6010      IRExpr* irrmE = NULL;
6011      UChar   ch    = '?';
6012      switch (rm) {
6013         case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
6014         case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
6015         case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
6016         // The following is a kludge.  Should be: Irrm_NEAREST_TIE_AWAY_0
6017         case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
6018         default: break;
6019      }
6020      if (irrmE) {
6021         IRTemp src = newTemp(ty);
6022         IRTemp dst = newTemp(ty);
6023         assign(src, getQRegLO(nn, ty));
6024         assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
6025                           irrmE, mkexpr(src)));
6026         putQReg128(dd, mkV128(0x0000));
6027         putQRegLO(dd, mkexpr(dst));
6028         DIP("frint%c %s, %s\n",
6029             ch, nameQRegLO(dd, ty), nameQRegLO(nn, ty));
6030         return True;
6031      }
6032      /* else unhandled rounding mode case -- fall through */
6033   }
6034
6035   /* ------------------ FCVT (scalar) ------------------ */
6036   /* 31        23 21    16 14    9 4
6037      000 11110 11 10001 00 10000 n d   FCVT Sd, Hn (unimp)
6038      --------- 11 ----- 01 ---------   FCVT Dd, Hn (unimp)
6039      --------- 00 ----- 11 ---------   FCVT Hd, Sn (unimp)
6040      --------- 00 ----- 01 ---------   FCVT Dd, Sn
6041      --------- 01 ----- 11 ---------   FCVT Hd, Dn (unimp)
6042      --------- 01 ----- 00 ---------   FCVT Sd, Dn
6043      Rounding, when dst is smaller than src, is per the FPCR.
6044   */
6045   if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0)
6046       && INSN(21,17) == BITS5(1,0,0,0,1)
6047       && INSN(14,10) == BITS5(1,0,0,0,0)) {
6048      UInt b2322 = INSN(23,22);
6049      UInt b1615 = INSN(16,15);
6050      UInt nn    = INSN(9,5);
6051      UInt dd    = INSN(4,0);
6052      if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
6053         /* Convert S to D */
6054         IRTemp res = newTemp(Ity_F64);
6055         assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
6056         putQReg128(dd, mkV128(0x0000));
6057         putQRegLO(dd, mkexpr(res));
6058         DIP("fcvt %s, %s\n",
6059             nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
6060         return True;
6061      }
6062      if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
6063         /* Convert D to S */
6064         IRTemp res = newTemp(Ity_F32);
6065         assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
6066                                         getQRegLO(nn, Ity_F64)));
6067         putQReg128(dd, mkV128(0x0000));
6068         putQRegLO(dd, mkexpr(res));
6069         DIP("fcvt %s, %s\n",
6070             nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
6071         return True;
6072      }
6073      /* else unhandled */
6074   }
6075
6076   /* ------------------ FABD (scalar) ------------------ */
6077   /* 31        23  20 15     9 4
6078      011 11110 111 m  110101 n d  FABD  Dd, Dn, Dm
6079      011 11110 101 m  110101 n d  FABD  Sd, Sn, Sm
6080   */
6081   if (INSN(31,23) == BITS9(0,1,1,1,1,1,1,0,1) && INSN(21,21) == 1
6082       && INSN(15,10) == BITS6(1,1,0,1,0,1)) {
6083      Bool   isD = INSN(22,22) == 1;
6084      UInt   mm  = INSN(20,16);
6085      UInt   nn  = INSN(9,5);
6086      UInt   dd  = INSN(4,0);
6087      IRType ty  = isD ? Ity_F64 : Ity_F32;
6088      IRTemp res = newTemp(ty);
6089      assign(res, unop(mkABSF(ty),
6090                       triop(mkSUBF(ty),
6091                             mkexpr(mk_get_IR_rounding_mode()),
6092                             getQRegLO(nn,ty), getQRegLO(mm,ty))));
6093      putQReg128(dd, mkV128(0x0000));
6094      putQRegLO(dd, mkexpr(res));
6095      DIP("fabd %s, %s, %s\n",
6096          nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
6097      return True;
6098   }
6099
6100   /* -------------- {S,U}CVTF (vector, integer) -------------- */
6101   /* 31  28      22 21       15     9 4
6102      0q0 01110 0 sz 1  00001 110110 n d  SCVTF Vd, Vn
6103      0q1 01110 0 sz 1  00001 110110 n d  UCVTF Vd, Vn
6104      with laneage:
6105      case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
6106   */
6107   if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,0)
6108       && INSN(21,16) == BITS6(1,0,0,0,0,1)
6109       && INSN(15,10) == BITS6(1,1,0,1,1,0)) {
6110      Bool isQ   = INSN(30,30) == 1;
6111      Bool isU   = INSN(29,29) == 1;
6112      Bool isF64 = INSN(22,22) == 1;
6113      UInt nn    = INSN(9,5);
6114      UInt dd    = INSN(4,0);
6115      if (isQ || !isF64) {
6116         IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
6117         UInt   nLanes = 0;
6118         Bool   zeroHI = False;
6119         const HChar* arrSpec = NULL;
6120         Bool   ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
6121                                      isQ, isF64 );
6122         IROp   op = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
6123                         : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
6124         IRTemp rm = mk_get_IR_rounding_mode();
6125         UInt   i;
6126         vassert(ok); /* the 'if' above should ensure this */
6127         for (i = 0; i < nLanes; i++) {
6128            putQRegLane(dd, i,
6129                        binop(op, mkexpr(rm), getQRegLane(nn, i, tyI)));
6130         }
6131         if (zeroHI) {
6132            putQRegLane(dd, 1, mkU64(0));
6133         }
6134         DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
6135             nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
6136         return True;
6137      }
6138      /* else fall through */
6139   }
6140
6141   /* ---------- F{ADD,SUB,MUL,DIV,MLA,MLS} (vector) ---------- */
6142   /* 31  28      22 21 20 15     9 4                  case
6143      0q0 01110 0 sz 1  m  110101 n d  FADD Vd,Vn,Vm   1
6144      0q0 01110 1 sz 1  m  110101 n d  FSUB Vd,Vn,Vm   2
6145      0q1 01110 0 sz 1  m  110111 n d  FMUL Vd,Vn,Vm   3
6146      0q1 01110 0 sz 1  m  111111 n d  FDIV Vd,Vn,Vm   4
6147      0q0 01110 0 sz 1  m  110011 n d  FMLA Vd,Vn,Vm   5
6148      0q0 01110 1 sz 1  m  110011 n d  FMLS Vd,Vn,Vm   6
6149      0q1 01110 1 sz 1  m  110101 n d  FABD Vd,Vn,Vm   7
6150   */
6151   if (INSN(31,31) == 0
6152       && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
6153      Bool isQ   = INSN(30,30) == 1;
6154      UInt b29   = INSN(29,29);
6155      UInt b23   = INSN(23,23);
6156      Bool isF64 = INSN(22,22) == 1;
6157      UInt mm    = INSN(20,16);
6158      UInt b1510 = INSN(15,10);
6159      UInt nn    = INSN(9,5);
6160      UInt dd    = INSN(4,0);
6161      UInt ix    = 0;
6162      /**/ if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,1,0,1)) ix = 1;
6163      else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 2;
6164      else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,0,1,1,1)) ix = 3;
6165      else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,1,1,1,1)) ix = 4;
6166      else if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,0,1,1)) ix = 5;
6167      else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,0,1,1)) ix = 6;
6168      else if (b29 == 1 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 7;
6169      IRType laneTy = Ity_INVALID;
6170      Bool   zeroHI = False;
6171      const HChar* arr = "??";
6172      Bool ok
6173         = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
6174      /* Skip MLA/MLS for the time being */
6175      if (ok && ix >= 1 && ix <= 4) {
6176         const IROp ops64[4]
6177            = { Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2 };
6178         const IROp ops32[4]
6179            = { Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4 };
6180         const HChar* names[4]
6181            = { "fadd", "fsub", "fmul", "fdiv" };
6182         IROp   op = laneTy==Ity_F64 ? ops64[ix-1] : ops32[ix-1];
6183         IRTemp rm = mk_get_IR_rounding_mode();
6184         IRTemp t1 = newTemp(Ity_V128);
6185         IRTemp t2 = newTemp(Ity_V128);
6186         assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
6187         assign(t2, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t1))
6188                           : mkexpr(t1));
6189         putQReg128(dd, mkexpr(t2));
6190         DIP("%s %s.%s, %s.%s, %s.%s\n", names[ix-1],
6191             nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6192         return True;
6193      }
6194      if (ok && ix >= 5 && ix <= 6) {
6195         IROp opADD = laneTy==Ity_F64 ? Iop_Add64Fx2 : Iop_Add32Fx4;
6196         IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
6197         IROp opMUL = laneTy==Ity_F64 ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
6198         IRTemp rm = mk_get_IR_rounding_mode();
6199         IRTemp t1 = newTemp(Ity_V128);
6200         IRTemp t2 = newTemp(Ity_V128);
6201         // FIXME: double rounding; use FMA primops instead
6202         assign(t1, triop(opMUL,
6203                          mkexpr(rm), getQReg128(nn), getQReg128(mm)));
6204         assign(t2, triop(ix == 5 ? opADD : opSUB,
6205                          mkexpr(rm), getQReg128(dd), mkexpr(t1)));
6206         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6207                               : mkexpr(t2));
6208         DIP("%s %s.%s, %s.%s, %s.%s\n", ix == 5 ? "fmla" : "fmls",
6209             nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6210         return True;
6211      }
6212      if (ok && ix == 7) {
6213         IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
6214         IROp opABS = laneTy==Ity_F64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6215         IRTemp rm = mk_get_IR_rounding_mode();
6216         IRTemp t1 = newTemp(Ity_V128);
6217         IRTemp t2 = newTemp(Ity_V128);
6218         // FIXME: use Abd primop instead?
6219         assign(t1, triop(opSUB,
6220                          mkexpr(rm), getQReg128(nn), getQReg128(mm)));
6221         assign(t2, unop(opABS, mkexpr(t1)));
6222         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6223                               : mkexpr(t2));
6224         DIP("fabd %s.%s, %s.%s, %s.%s\n",
6225             nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6226         return True;
6227      }
6228   }
6229
6230   /* ------------ FCM{EQ,GE,GT}, FAC{GE,GT} (vector) ------------ */
6231   /* 31  28      22   20 15     9 4                  case
6232      0q1 01110 0 sz 1 m  111011 n d  FACGE Vd, Vn, Vm
6233      0q1 01110 1 sz 1 m  111011 n d  FACGT Vd, Vn, Vm
6234      0q0 01110 0 sz 1 m  111001 n d  FCMEQ Vd, Vn, Vm
6235      0q1 01110 0 sz 1 m  111001 n d  FCMGE Vd, Vn, Vm
6236      0q1 01110 1 sz 1 m  111001 n d  FCMGT Vd, Vn, Vm
6237   */
6238   if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1
6239       && INSN(15,12) == BITS4(1,1,1,0) && INSN(10,10) == 1) {
6240      Bool isQ   = INSN(30,30) == 1;
6241      UInt U     = INSN(29,29);
6242      UInt E     = INSN(23,23);
6243      Bool isF64 = INSN(22,22) == 1;
6244      UInt ac    = INSN(11,11);
6245      UInt mm    = INSN(20,16);
6246      UInt nn    = INSN(9,5);
6247      UInt dd    = INSN(4,0);
6248      /* */
6249      UInt   EUac   = (E << 2) | (U << 1) | ac;
6250      IROp   opABS  = Iop_INVALID;
6251      IROp   opCMP  = Iop_INVALID;
6252      IRType laneTy = Ity_INVALID;
6253      Bool   zeroHI = False;
6254      Bool   swap   = True;
6255      const HChar* arr = "??";
6256      const HChar* nm  = "??";
6257      Bool ok
6258         = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
6259      if (ok) {
6260         vassert((isF64 && laneTy == Ity_F64) || (!isF64 && laneTy == Ity_F32));
6261         switch (EUac) {
6262            case BITS3(0,0,0):
6263               nm    = "fcmeq";
6264               opCMP = isF64 ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
6265               swap  = False;
6266               break;
6267            case BITS3(0,1,0):
6268               nm    = "fcmge";
6269               opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6270               break;
6271            case BITS3(0,1,1):
6272               nm    = "facge";
6273               opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6274               opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6275               break;
6276            case BITS3(1,1,0):
6277               nm    = "fcmgt";
6278               opCMP = isF64 ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
6279               break;
6280            case BITS3(1,1,1):
6281               nm    = "fcagt";
6282               opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6283               opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6284               break;
6285            default:
6286               break;
6287         }
6288      }
6289      if (opCMP != Iop_INVALID) {
6290         IRExpr* argN = getQReg128(nn);
6291         IRExpr* argM = getQReg128(mm);
6292         if (opABS != Iop_INVALID) {
6293            argN = unop(opABS, argN);
6294            argM = unop(opABS, argM);
6295         }
6296         IRExpr* res = swap ? binop(opCMP, argM, argN)
6297                            : binop(opCMP, argN, argM);
6298         if (zeroHI) {
6299            res = unop(Iop_ZeroHI64ofV128, res);
6300         }
6301         putQReg128(dd, res);
6302         DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6303             nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6304         return True;
6305      }
6306      /* else fall through */
6307   }
6308
6309   /* -------------------- FCVTN -------------------- */
6310   /* 31  28    23  20    15     9 4
6311      0q0 01110 0s1 00001 011010 n d  FCVTN Vd, Vn
6312      where case q:s of 00: 16Fx4(lo) <- 32Fx4
6313                        01: 32Fx2(lo) <- 64Fx2
6314                        10: 16Fx4(hi) <- 32Fx4
6315                        11: 32Fx2(hi) <- 64Fx2
6316      Only deals with the 32Fx2 <- 64Fx2 version (s==1)
6317   */
6318   if (INSN(31,31) == 0 && INSN(29,23) == BITS7(0,0,1,1,1,0,0)
6319       && INSN(21,10) == BITS12(1,0,0,0,0,1,0,1,1,0,1,0)) {
6320      UInt bQ = INSN(30,30);
6321      UInt bS = INSN(22,22);
6322      UInt nn = INSN(9,5);
6323      UInt dd = INSN(4,0);
6324      if (bS == 1) {
6325         IRTemp  rm    = mk_get_IR_rounding_mode();
6326         IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
6327         IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
6328         putQRegLane(dd, 2 * bQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
6329         putQRegLane(dd, 2 * bQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
6330         if (bQ == 0) {
6331            putQRegLane(dd, 1, mkU64(0));
6332         }
6333         DIP("fcvtn%s %s.%s, %s.2d\n", bQ ? "2" : "",
6334             nameQReg128(dd), bQ ? "4s" : "2s", nameQReg128(nn));
6335         return True;
6336      }
6337      /* else fall through */
6338   }
6339
6340   /* ---------------- ADD/SUB (vector) ---------------- */
6341   /* 31  28    23   21 20 15     9 4
6342      0q0 01110 size 1  m  100001 n d  ADD Vd.T, Vn.T, Vm.T
6343      0q1 01110 size 1  m  100001 n d  SUB Vd.T, Vn.T, Vm.T
6344   */
6345   if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6346       && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
6347      Bool isQ    = INSN(30,30) == 1;
6348      UInt szBlg2 = INSN(23,22);
6349      Bool isSUB  = INSN(29,29) == 1;
6350      UInt mm     = INSN(20,16);
6351      UInt nn     = INSN(9,5);
6352      UInt dd     = INSN(4,0);
6353      Bool zeroHI = False;
6354      const HChar* arrSpec = "";
6355      Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6356      if (ok) {
6357         const IROp opsADD[4]
6358            = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
6359         const IROp opsSUB[4]
6360            = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
6361         vassert(szBlg2 < 4);
6362         IROp   op = isSUB ? opsSUB[szBlg2] : opsADD[szBlg2];
6363         IRTemp t  = newTemp(Ity_V128);
6364         assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
6365         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
6366                               : mkexpr(t));
6367         const HChar* nm = isSUB ? "sub" : "add";
6368         DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6369             nameQReg128(dd), arrSpec,
6370             nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6371         return True;
6372      }
6373      /* else fall through */
6374   }
6375
6376   /* ---------------- ADD/SUB (scalar) ---------------- */
6377   /* 31  28    23 21 20 15     9 4
6378      010 11110 11 1  m  100001 n d  ADD Dd, Dn, Dm
6379      011 11110 11 1  m  100001 n d  SUB Dd, Dn, Dm
6380   */
6381   if (INSN(31,30) == BITS2(0,1) && INSN(28,21) == BITS8(1,1,1,1,0,1,1,1)
6382       && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
6383      Bool isSUB = INSN(29,29) == 1;
6384      UInt mm    = INSN(20,16);
6385      UInt nn    = INSN(9,5);
6386      UInt dd    = INSN(4,0);
6387      IRTemp res = newTemp(Ity_I64);
6388      assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
6389                        getQRegLane(nn, 0, Ity_I64),
6390                        getQRegLane(mm, 0, Ity_I64)));
6391      putQRegLane(dd, 0, mkexpr(res));
6392      putQRegLane(dd, 1, mkU64(0));
6393      DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
6394          nameQRegLO(dd, Ity_I64),
6395          nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
6396      return True;
6397   }
6398
6399   /* ------------ MUL/PMUL/MLA/MLS (vector) ------------ */
6400   /* 31  28    23   21 20 15     9 4
6401      0q0 01110 size 1  m  100111 n d  MUL  Vd.T, Vn.T, Vm.T  B/H/S only
6402      0q1 01110 size 1  m  100111 n d  PMUL Vd.T, Vn.T, Vm.T  B only
6403      0q0 01110 size 1  m  100101 n d  MLA  Vd.T, Vn.T, Vm.T  B/H/S only
6404      0q1 01110 size 1  m  100101 n d  MLS  Vd.T, Vn.T, Vm.T  B/H/S only
6405   */
6406   if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6407       && INSN(21,21) == 1
6408       && (INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(1,0,0,1,0,1)) {
6409      Bool isQ    = INSN(30,30) == 1;
6410      UInt szBlg2 = INSN(23,22);
6411      UInt bit29  = INSN(29,29);
6412      UInt mm     = INSN(20,16);
6413      UInt nn     = INSN(9,5);
6414      UInt dd     = INSN(4,0);
6415      Bool isMLAS = INSN(11,11) == 0;
6416      const IROp opsADD[4]
6417         = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_INVALID };
6418      const IROp opsSUB[4]
6419         = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_INVALID };
6420      const IROp opsMUL[4]
6421         = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
6422      const IROp opsPMUL[4]
6423         = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
6424      /* Set opMUL and, if necessary, opACC.  A result value of
6425         Iop_INVALID for opMUL indicates that the instruction is
6426         invalid. */
6427      Bool zeroHI = False;
6428      const HChar* arrSpec = "";
6429      Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6430      vassert(szBlg2 < 4);
6431      IROp opACC = Iop_INVALID;
6432      IROp opMUL = Iop_INVALID;
6433      if (ok) {
6434         opMUL = (bit29 == 1 && !isMLAS) ? opsPMUL[szBlg2]
6435                                         : opsMUL[szBlg2];
6436         opACC = isMLAS ? (bit29 == 1 ? opsSUB[szBlg2] : opsADD[szBlg2])
6437                        : Iop_INVALID;
6438      }
6439      if (ok && opMUL != Iop_INVALID) {
6440         IRTemp t1 = newTemp(Ity_V128);
6441         assign(t1, binop(opMUL, getQReg128(nn), getQReg128(mm)));
6442         IRTemp t2 = newTemp(Ity_V128);
6443         assign(t2, opACC == Iop_INVALID
6444                       ? mkexpr(t1)
6445                       : binop(opACC, getQReg128(dd), mkexpr(t1)));
6446         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6447                               : mkexpr(t2));
6448         const HChar* nm = isMLAS ? (bit29 == 1 ? "mls" : "mla")
6449                                  : (bit29 == 1 ? "pmul" : "mul");
6450         DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6451             nameQReg128(dd), arrSpec,
6452             nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6453         return True;
6454      }
6455      /* else fall through */
6456   }
6457
6458   /* ---------------- {S,U}{MIN,MAX} (vector) ---------------- */
6459   /* 31  28    23   21 20 15     9 4
6460      0q0 01110 size 1  m  011011 n d  SMIN Vd.T, Vn.T, Vm.T
6461      0q1 01110 size 1  m  011011 n d  UMIN Vd.T, Vn.T, Vm.T
6462      0q0 01110 size 1  m  011001 n d  SMAX Vd.T, Vn.T, Vm.T
6463      0q1 01110 size 1  m  011001 n d  UMAX Vd.T, Vn.T, Vm.T
6464   */
6465   if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6466       && INSN(21,21) == 1
6467       && ((INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(0,1,1,0,0,1))) {
6468      Bool isQ    = INSN(30,30) == 1;
6469      Bool isU    = INSN(29,29) == 1;
6470      UInt szBlg2 = INSN(23,22);
6471      Bool isMAX  = INSN(11,11) == 0;
6472      UInt mm     = INSN(20,16);
6473      UInt nn     = INSN(9,5);
6474      UInt dd     = INSN(4,0);
6475      Bool zeroHI = False;
6476      const HChar* arrSpec = "";
6477      Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6478      if (ok) {
6479         const IROp opMINS[4]
6480            = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
6481         const IROp opMINU[4]
6482            = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
6483         const IROp opMAXS[4]
6484            = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
6485         const IROp opMAXU[4]
6486            = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
6487         vassert(szBlg2 < 4);
6488         IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
6489                         : (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
6490         IRTemp t = newTemp(Ity_V128);
6491         assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
6492         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
6493                               : mkexpr(t));
6494         const HChar* nm = isMAX ? (isU ? "umax" : "smax")
6495                                 : (isU ? "umin" : "smin");
6496         DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6497             nameQReg128(dd), arrSpec,
6498             nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6499         return True;
6500      }
6501      /* else fall through */
6502   }
6503
6504   /* -------------------- {S,U}{MIN,MAX}V -------------------- */
6505   /* 31  28    23   21    16 15     9 4
6506      0q0 01110 size 11000 1  101010 n d  SMINV Vd, Vn.T
6507      0q1 01110 size 11000 1  101010 n d  UMINV Vd, Vn.T
6508      0q0 01110 size 11000 0  101010 n d  SMAXV Vd, Vn.T
6509      0q1 01110 size 11000 0  101010 n d  UMAXV Vd, Vn.T
6510   */
6511   if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6512       && INSN(21,17) == BITS5(1,1,0,0,0)
6513       && INSN(15,10) == BITS6(1,0,1,0,1,0)) {
6514      Bool isQ    = INSN(30,30) == 1;
6515      Bool isU    = INSN(29,29) == 1;
6516      UInt szBlg2 = INSN(23,22);
6517      Bool isMAX  = INSN(16,16) == 0;
6518      UInt nn     = INSN(9,5);
6519      UInt dd     = INSN(4,0);
6520      Bool zeroHI = False;
6521      const HChar* arrSpec = "";
6522      Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
6523      if (ok) {
6524         if (szBlg2 == 3)         ok = False;
6525         if (szBlg2 == 2 && !isQ) ok = False;
6526      }
6527      if (ok) {
6528         const IROp opMINS[3]
6529            = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
6530         const IROp opMINU[3]
6531            = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
6532         const IROp opMAXS[3]
6533            = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
6534         const IROp opMAXU[3]
6535            = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
6536         vassert(szBlg2 < 3);
6537         IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
6538                         : (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
6539         IRTemp tN1 = newTemp(Ity_V128);
6540         assign(tN1, getQReg128(nn));
6541         /* If Q == 0, we're just folding lanes in the lower half of
6542            the value.  In which case, copy the lower half of the
6543            source into the upper half, so we can then treat it the
6544            same as the full width case. */
6545         IRTemp tN2 = newTemp(Ity_V128);
6546         assign(tN2, zeroHI ? mk_CatEvenLanes64x2(tN1,tN1) : mkexpr(tN1));
6547         IRTemp res = math_MINMAXV(tN2, op);
6548         if (res == IRTemp_INVALID)
6549            return False; /* means math_MINMAXV
6550                             doesn't handle this case yet */
6551         putQReg128(dd, mkexpr(res));
6552         const HChar* nm = isMAX ? (isU ? "umaxv" : "smaxv")
6553                                 : (isU ? "uminv" : "sminv");
6554         const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
6555         IRType laneTy = tys[szBlg2];
6556         DIP("%s %s, %s.%s\n", nm,
6557             nameQRegLO(dd, laneTy), nameQReg128(nn), arrSpec);
6558         return True;
6559      }
6560      /* else fall through */
6561   }
6562
6563   /* ------------ {AND,BIC,ORR,ORN} (vector) ------------ */
6564   /* 31  28    23  20 15     9 4
6565      0q0 01110 001 m  000111 n d  AND Vd.T, Vn.T, Vm.T
6566      0q0 01110 011 m  000111 n d  BIC Vd.T, Vn.T, Vm.T
6567      0q0 01110 101 m  000111 n d  ORR Vd.T, Vn.T, Vm.T
6568      0q0 01110 111 m  000111 n d  ORN Vd.T, Vn.T, Vm.T
6569      T is 16b when q==1, 8b when q==0
6570   */
6571   if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
6572       && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6573      Bool   isQ    = INSN(30,30) == 1;
6574      Bool   isORR  = INSN(23,23) == 1;
6575      Bool   invert = INSN(22,22) == 1;
6576      UInt   mm     = INSN(20,16);
6577      UInt   nn     = INSN(9,5);
6578      UInt   dd     = INSN(4,0);
6579      IRTemp res    = newTemp(Ity_V128);
6580      assign(res, binop(isORR ? Iop_OrV128 : Iop_AndV128,
6581                        getQReg128(nn),
6582                        invert ? unop(Iop_NotV128, getQReg128(mm))
6583                               : getQReg128(mm)));
6584      putQReg128(dd, isQ ? mkexpr(res)
6585                         : unop(Iop_ZeroHI64ofV128, mkexpr(res)));
6586      const HChar* names[4] = { "and", "bic", "orr", "orn" };
6587      const HChar* ar = isQ ? "16b" : "8b";
6588      DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
6589          nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
6590      return True;
6591   }
6592
6593   /* ---------- CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector) ---------- */
6594   /* 31  28    23   21     15     9 4                          ix
6595      0q1 01110 size 1  m   100011 n d  CMEQ  Vd.T, Vn.T, Vm.T  (1) ==
6596      0q0 01110 size 1  m   100011 n d  CMTST Vd.T, Vn.T, Vm.T  (2) &, != 0
6597
6598      0q1 01110 size 1  m   001101 n d  CMHI Vd.T, Vn.T, Vm.T   (3) >u
6599      0q0 01110 size 1  m   001101 n d  CMGT Vd.T, Vn.T, Vm.T   (4) >s
6600
6601      0q1 01110 size 1  m   001111 n d  CMHS Vd.T, Vn.T, Vm.T   (5) >=u
6602      0q0 01110 size 1  m   001111 n d  CMGE Vd.T, Vn.T, Vm.T   (6) >=s
6603
6604      0q1 01110 size 100000 100010 n d  CMGE Vd.T, Vn.T, #0     (7) >=s 0
6605      0q0 01110 size 100000 100010 n d  CMGT Vd.T, Vn.T, #0     (8) >s 0
6606
6607      0q1 01110 size 100000 100110 n d  CMLE Vd.T, Vn.T, #0     (9) <=s 0
6608      0q0 01110 size 100000 100110 n d  CMEQ Vd.T, Vn.T, #0     (10) == 0
6609
6610      0q0 01110 size 100000 101010 n d  CMLT Vd.T, Vn.T, #0     (11) <s 0
6611   */
6612   if (INSN(31,31) == 0
6613       && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
6614      Bool isQ    = INSN(30,30) == 1;
6615      UInt bit29  = INSN(29,29);
6616      UInt szBlg2 = INSN(23,22);
6617      UInt mm     = INSN(20,16);
6618      UInt b1510  = INSN(15,10);
6619      UInt nn     = INSN(9,5);
6620      UInt dd     = INSN(4,0);
6621      const IROp opsEQ[4]
6622         = { Iop_CmpEQ8x16,  Iop_CmpEQ16x8,  Iop_CmpEQ32x4,  Iop_CmpEQ64x2 };
6623      const IROp opsGTS[4]
6624         = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
6625      const IROp opsGTU[4]
6626         = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
6627      Bool zeroHI = False;
6628      const HChar* arrSpec = "??";
6629      Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
6630      UInt ix = 0;
6631      if (ok) {
6632         switch (b1510) {
6633            case BITS6(1,0,0,0,1,1): ix = bit29 ? 1 : 2; break;
6634            case BITS6(0,0,1,1,0,1): ix = bit29 ? 3 : 4; break;
6635            case BITS6(0,0,1,1,1,1): ix = bit29 ? 5 : 6; break;
6636            case BITS6(1,0,0,0,1,0):
6637               if (mm == 0) { ix = bit29 ? 7 : 8; }; break;
6638            case BITS6(1,0,0,1,1,0):
6639               if (mm == 0) { ix = bit29 ? 9 : 10; }; break;
6640            case BITS6(1,0,1,0,1,0):
6641               if (mm == 0 && bit29 == 0) { ix = 11; }; break;
6642            default: break;
6643         }
6644      }
6645      if (ix != 0) {
6646         vassert(ok && szBlg2 < 4);
6647         IRExpr* argL = getQReg128(nn);
6648         IRExpr* argR = (ix <= 6) ? getQReg128(mm) : mkV128(0x0000);
6649         IRExpr* res  = NULL;
6650         /* Some useful identities:
6651               x >  y   can be expressed directly
6652               x <  y   ==   y > x
6653               x <= y   ==   not (x > y)
6654               x >= y   ==   not (y > x)
6655         */
6656         switch (ix) {
6657            case 1: res = binop(opsEQ[szBlg2], argL, argR); break;
6658            case 2: res = unop(Iop_NotV128, binop(opsEQ[szBlg2],
6659                                            binop(Iop_AndV128, argL, argR),
6660                                                  mkV128(0x0000)));
6661                    break;
6662            case 3: res = binop(opsGTU[szBlg2], argL, argR); break;
6663            case 4: res = binop(opsGTS[szBlg2], argL, argR); break;
6664            case 5: res = unop(Iop_NotV128, binop(opsGTU[szBlg2], argR, argL));
6665                    break;
6666            case 6: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL));
6667                    break;
6668            case 7: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL));
6669                    break;
6670            case 8: res = binop(opsGTS[szBlg2], argL, argR); break;
6671            case 9: res = unop(Iop_NotV128,
6672                               binop(opsGTS[szBlg2], argL, argR));
6673                    break;
6674            case 10: res = binop(opsEQ[szBlg2],  argL, argR); break;
6675            case 11: res = binop(opsGTS[szBlg2], argR, argL); break;
6676            default: vassert(0);
6677         }
6678         vassert(res);
6679         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, res) : res);
6680         const HChar* nms[11] = { "eq", "tst", "hi", "gt", "hs", "ge",
6681                                  "ge", "gt", "le", "eq", "lt" };
6682         if (ix <= 6) {
6683            DIP("cm%s %s.%s, %s.%s, %s.%s\n", nms[ix-1],
6684                nameQReg128(dd), arrSpec,
6685                nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6686         } else {
6687            DIP("cm%s %s.%s, %s.%s, #0\n", nms[ix-1],
6688                nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
6689         }
6690         return True;
6691      }
6692      /* else fall through */
6693   }
6694
6695   /* -------------- {EOR,BSL,BIT,BIF} (vector) -------------- */
6696   /* 31  28    23   20 15     9 4
6697      0q1 01110 00 1 m  000111 n d  EOR Vd.T, Vm.T, Vn.T
6698      0q1 01110 01 1 m  000111 n d  BSL Vd.T, Vm.T, Vn.T
6699      0q1 01110 10 1 m  000111 n d  BIT Vd.T, Vm.T, Vn.T
6700      0q1 01110 11 1 m  000111 n d  BIF Vd.T, Vm.T, Vn.T
6701   */
6702   if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0)
6703       && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6704      Bool   isQ  = INSN(30,30) == 1;
6705      UInt   op   = INSN(23,22);
6706      UInt   mm   = INSN(20,16);
6707      UInt   nn   = INSN(9,5);
6708      UInt   dd   = INSN(4,0);
6709      IRTemp argD = newTemp(Ity_V128);
6710      IRTemp argN = newTemp(Ity_V128);
6711      IRTemp argM = newTemp(Ity_V128);
6712      assign(argD, getQReg128(dd));
6713      assign(argN, getQReg128(nn));
6714      assign(argM, getQReg128(mm));
6715      const IROp opXOR = Iop_XorV128;
6716      const IROp opAND = Iop_AndV128;
6717      const IROp opNOT = Iop_NotV128;
6718      IRExpr* res = NULL;
6719      switch (op) {
6720         case BITS2(0,0): /* EOR */
6721            res = binop(opXOR, mkexpr(argM), mkexpr(argN));
6722            break;
6723         case BITS2(0,1): /* BSL */
6724            res = binop(opXOR, mkexpr(argM),
6725                               binop(opAND,
6726                                     binop(opXOR, mkexpr(argM), mkexpr(argN)),
6727                                     mkexpr(argD)));
6728            break;
6729         case BITS2(1,0): /* BIT */
6730            res = binop(opXOR, mkexpr(argD),
6731                               binop(opAND,
6732                                     binop(opXOR, mkexpr(argD), mkexpr(argN)),
6733                                     mkexpr(argM)));
6734            break;
6735         case BITS2(1,1): /* BIF */
6736            res = binop(opXOR, mkexpr(argD),
6737                               binop(opAND,
6738                                     binop(opXOR, mkexpr(argD), mkexpr(argN)),
6739                                     unop(opNOT, mkexpr(argM))));
6740            break;
6741         default:
6742            vassert(0);
6743      }
6744      vassert(res);
6745      putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
6746      const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
6747      const HChar* arr = isQ ? "16b" : "8b";
6748      vassert(op < 4);
6749      DIP("%s %s.%s, %s.%s, %s.%s\n", nms[op],
6750          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6751      return True;
6752   }
6753
6754   /* ------------ {USHR,SSHR,SHL} (vector, immediate) ------------ */
6755   /* 31  28     22   18   15     9 4
6756      0q1 011110 immh immb 000001 n d  USHR Vd.T, Vn.T, #shift (1)
6757      0q0 011110 immh immb 000001 n d  SSHR Vd.T, Vn.T, #shift (2)
6758      0q0 011110 immh immb 010101 n d  SHL  Vd.T, Vn.T, #shift (3)
6759      laneTy, shift = case immh:immb of
6760                         0001:xxx -> B, SHR:8-xxx,    SHL:xxx
6761                         001x:xxx -> H, SHR:16-xxxx   SHL:xxxx
6762                         01xx:xxx -> S, SHR:32-xxxxx  SHL:xxxxx
6763                         1xxx:xxx -> D, SHR:64-xxxxxx SHL:xxxxxx
6764                         other    -> invalid
6765      As usual the case laneTy==D && q==0 is not allowed.
6766   */
6767   if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
6768       && INSN(10,10) == 1) {
6769      UInt ix = 0;
6770      /**/ if (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 1;
6771      else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 2;
6772      else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,1,0,1,0)) ix = 3;
6773      if (ix > 0) {
6774         Bool isQ  = INSN(30,30) == 1;
6775         UInt immh = INSN(22,19);
6776         UInt immb = INSN(18,16);
6777         UInt nn   = INSN(9,5);
6778         UInt dd   = INSN(4,0);
6779         const IROp opsSHRN[4]
6780            = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
6781         const IROp opsSARN[4]
6782            = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
6783         const IROp opsSHLN[4]
6784            = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
6785         UInt szBlg2 = 0;
6786         UInt shift  = 0;
6787         Bool ok     = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb);
6788         if (ix == 3) {
6789            /* The shift encoding has opposite sign for the leftwards
6790               case.  Adjust shift to compensate. */
6791            shift = (8 << szBlg2) - shift;
6792         }
6793         if (ok && szBlg2 < 4 && shift > 0 && shift < (8 << szBlg2)
6794             && !(szBlg2 == 3/*64bit*/ && !isQ)) {
6795            IROp op = Iop_INVALID;
6796            const HChar* nm = NULL;
6797            switch (ix) {
6798               case 1: op = opsSHRN[szBlg2]; nm = "ushr"; break;
6799               case 2: op = opsSARN[szBlg2]; nm = "sshr"; break;
6800               case 3: op = opsSHLN[szBlg2]; nm = "shl";  break;
6801               default: vassert(0);
6802            }
6803            IRExpr* src = getQReg128(nn);
6804            IRExpr* res = binop(op, src, mkU8(shift));
6805            putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
6806            HChar laneCh = "bhsd"[szBlg2];
6807            UInt  nLanes = (isQ ? 128 : 64) / (8 << szBlg2);
6808            DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
6809                nameQReg128(dd), nLanes, laneCh,
6810                nameQReg128(nn), nLanes, laneCh, shift);
6811            return True;
6812         }
6813         /* else fall through */
6814      }
6815   }
6816
6817   /* -------------------- {U,S}SHLL{,2} -------------------- */
6818   /* 31  28     22   18   15     9 4
6819      0q0 011110 immh immb 101001 n d  SSHLL Vd.Ta, Vn.Tb, #sh
6820      0q1 011110 immh immb 101001 n d  USHLL Vd.Ta, Vn.Tb, #sh
6821      where Ta,Tb,sh
6822        = case immh of 1xxx -> invalid
6823                       01xx -> 2d, 2s(q0)/4s(q1),  immh:immb - 32 (0..31)
6824                       001x -> 4s, 4h(q0)/8h(q1),  immh:immb - 16 (0..15)
6825                       0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8  (0..7)
6826                       0000 -> AdvSIMD modified immediate (???)
6827   */
6828   if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
6829       && INSN(15,10) == BITS6(1,0,1,0,0,1)) {
6830      Bool isQ   = INSN(30,30) == 1;
6831      Bool isU   = INSN(29,29) == 1;
6832      UInt immh  = INSN(22,19);
6833      UInt immb  = INSN(18,16);
6834      UInt nn    = INSN(9,5);
6835      UInt dd    = INSN(4,0);
6836      UInt immhb = (immh << 3) | immb;
6837      IRTemp  src  = newTemp(Ity_V128);
6838      IRTemp  zero = newTemp(Ity_V128);
6839      IRExpr* res  = NULL;
6840      UInt    sh   = 0;
6841      const HChar* ta = "??";
6842      const HChar* tb = "??";
6843      assign(src, getQReg128(nn));
6844      assign(zero, mkV128(0x0000));
6845      if (immh & 8) {
6846         /* invalid; don't assign to res */
6847      }
6848      else if (immh & 4) {
6849         sh = immhb - 32;
6850         vassert(sh < 32); /* so 32-sh is 1..32 */
6851         ta = "2d";
6852         tb = isQ ? "4s" : "2s";
6853         IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
6854                           : mk_InterleaveLO32x4(src, zero);
6855         res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
6856      }
6857      else if (immh & 2) {
6858         sh = immhb - 16;
6859         vassert(sh < 16); /* so 16-sh is 1..16 */
6860         ta = "4s";
6861         tb = isQ ? "8h" : "4h";
6862         IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
6863                           : mk_InterleaveLO16x8(src, zero);
6864         res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
6865      }
6866      else if (immh & 1) {
6867         sh = immhb - 8;
6868         vassert(sh < 8); /* so 8-sh is 1..8 */
6869         ta = "8h";
6870         tb = isQ ? "16b" : "8b";
6871         IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
6872                           : mk_InterleaveLO8x16(src, zero);
6873         res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
6874      } else {
6875         vassert(immh == 0);
6876         /* invalid; don't assign to res */
6877      }
6878      /* */
6879      if (res) {
6880         putQReg128(dd, res);
6881         DIP("%cshll%s %s.%s, %s.%s, #%d\n",
6882             isU ? 'u' : 's', isQ ? "2" : "",
6883             nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
6884         return True;
6885      }
6886      /* else fall through */
6887   }
6888
6889   /* -------------------- XTN{,2} -------------------- */
6890   /* 31  28    23   21     15     9 4  XTN{,2} Vd.Tb, Vn.Ta
6891      0q0 01110 size 100001 001010 n d
6892   */
6893   if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
6894       && INSN(21,16) == BITS6(1,0,0,0,0,1)
6895       && INSN(15,10) == BITS6(0,0,1,0,1,0)) {
6896      Bool isQ  = INSN(30,30) == 1;
6897      UInt size = INSN(23,22);
6898      UInt nn   = INSN(9,5);
6899      UInt dd   = INSN(4,0);
6900      IROp op   = Iop_INVALID;
6901      const HChar* tb = NULL;
6902      const HChar* ta = NULL;
6903      switch ((size << 1) | (isQ ? 1 : 0)) {
6904         case 0: tb = "8b";  ta = "8h"; op = Iop_NarrowUn16to8x8;  break;
6905         case 1: tb = "16b"; ta = "8h"; op = Iop_NarrowUn16to8x8;  break;
6906         case 2: tb = "4h";  ta = "4s"; op = Iop_NarrowUn32to16x4; break;
6907         case 3: tb = "8h";  ta = "4s"; op = Iop_NarrowUn32to16x4; break;
6908         case 4: tb = "2s";  ta = "2d"; op = Iop_NarrowUn64to32x2; break;
6909         case 5: tb = "4s";  ta = "2d"; op = Iop_NarrowUn64to32x2; break;
6910         case 6: break;
6911         case 7: break;
6912         default: vassert(0);
6913      }
6914      if (op != Iop_INVALID) {
6915         if (!isQ) {
6916            putQRegLane(dd, 1, mkU64(0));
6917         }
6918         putQRegLane(dd, isQ ? 1 : 0, unop(op, getQReg128(nn)));
6919         DIP("xtn%s %s.%s, %s.%s\n", isQ ? "2" : "",
6920             nameQReg128(dd), tb, nameQReg128(nn), ta);
6921         return True;
6922      }
6923      /* else fall through */
6924   }
6925
6926   /* ---------------- CNT (vector) ---------------- */
6927   /* 31 29     23 21           9 4
6928      0q 001110 00 100000010110 n d  CNT Vd.T, Vn.T
6929   */
6930
6931  if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
6932      && INSN(23,22) == BITS2(0,0)
6933      && INSN(21,10) == BITS12(1,0,0,0,0,0,0,1,0,1,1,0) ) {
6934     Bool isQ = INSN(30,30) == 1;
6935     UInt nn  = INSN(9,5);
6936     UInt dd  = INSN(4,0);
6937     const HChar* name = isQ ? "16b" : "8b";
6938
6939     IRExpr* res = unop(Iop_Cnt8x16, getQReg128(nn));
6940     putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
6941
6942     DIP("cnt %s.%s, %s.%s\n", nameQReg128(dd), name, nameQReg128(nn), name);
6943     return True;
6944  }
6945
6946
6947   /* ---------------- DUP (element, vector) ---------------- */
6948   /* 31  28       20   15     9 4
6949      0q0 01110000 imm5 000001 n d  DUP Vd.T, Vn.Ts[index]
6950   */
6951   if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
6952       && INSN(15,10) == BITS6(0,0,0,0,0,1)) {
6953      Bool   isQ  = INSN(30,30) == 1;
6954      UInt   imm5 = INSN(20,16);
6955      UInt   nn   = INSN(9,5);
6956      UInt   dd   = INSN(4,0);
6957      IRTemp w0   = newTemp(Ity_I64);
6958      const HChar* arT  = "??";
6959      const HChar* arTs = "??";
6960      IRType laneTy = Ity_INVALID;
6961      UInt   laneNo = 16; /* invalid */
6962      if (imm5 & 1) {
6963         arT    = isQ ? "16b" : "8b";
6964         arTs   = "b";
6965         laneNo = (imm5 >> 1) & 15;
6966         laneTy = Ity_I8;
6967         assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
6968      }
6969      else if (imm5 & 2) {
6970         arT    = isQ ? "8h" : "4h";
6971         arTs   = "h";
6972         laneNo = (imm5 >> 2) & 7;
6973         laneTy = Ity_I16;
6974         assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
6975      }
6976      else if (imm5 & 4) {
6977         arT    = isQ ? "4s" : "2s";
6978         arTs   = "s";
6979         laneNo = (imm5 >> 3) & 3;
6980         laneTy = Ity_I32;
6981         assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
6982      }
6983      else if ((imm5 & 8) && isQ) {
6984         arT  = "2d";
6985         arTs = "d";
6986         laneNo = (imm5 >> 4) & 1;
6987         laneTy = Ity_I64;
6988         assign(w0, getQRegLane(nn, laneNo, laneTy));
6989      }
6990      else {
6991         /* invalid; leave laneTy unchanged. */
6992      }
6993      /* */
6994      if (laneTy != Ity_INVALID) {
6995         vassert(laneNo < 16);
6996         IRTemp w1 = math_DUP_TO_64(w0, laneTy);
6997         putQReg128(dd, binop(Iop_64HLtoV128,
6998                              isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
6999         DIP("dup %s.%s, %s.%s[%u]\n",
7000             nameQReg128(dd), arT, nameQReg128(nn), arTs, laneNo);
7001         return True;
7002      }
7003      /* else fall through */
7004   }
7005
7006   /* ---------------- DUP (general, vector) ---------------- */
7007   /* 31  28    23  20   15     9 4
7008      0q0 01110 000 imm5 000011 n d  DUP Vd.T, Rn
7009      Q=0 writes 64, Q=1 writes 128
7010      imm5: xxxx1  8B(q=0)      or 16b(q=1),     R=W
7011            xxx10  4H(q=0)      or 8H(q=1),      R=W
7012            xx100  2S(q=0)      or 4S(q=1),      R=W
7013            x1000  Invalid(q=0) or 2D(q=1),      R=X
7014            x0000  Invalid(q=0) or Invalid(q=1)
7015   */
7016   if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
7017       && INSN(15,10) == BITS6(0,0,0,0,1,1)) {
7018      Bool   isQ  = INSN(30,30) == 1;
7019      UInt   imm5 = INSN(20,16);
7020      UInt   nn   = INSN(9,5);
7021      UInt   dd   = INSN(4,0);
7022      IRTemp w0   = newTemp(Ity_I64);
7023      const HChar* arT = "??";
7024      IRType laneTy = Ity_INVALID;
7025      if (imm5 & 1) {
7026         arT    = isQ ? "16b" : "8b";
7027         laneTy = Ity_I8;
7028         assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
7029      }
7030      else if (imm5 & 2) {
7031         arT    = isQ ? "8h" : "4h";
7032         laneTy = Ity_I16;
7033         assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
7034      }
7035      else if (imm5 & 4) {
7036         arT    = isQ ? "4s" : "2s";
7037         laneTy = Ity_I32;
7038         assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
7039      }
7040      else if ((imm5 & 8) && isQ) {
7041         arT    = "2d";
7042         laneTy = Ity_I64;
7043         assign(w0, getIReg64orZR(nn));
7044      }
7045      else {
7046         /* invalid; leave laneTy unchanged. */
7047      }
7048      /* */
7049      if (laneTy != Ity_INVALID) {
7050         IRTemp w1 = math_DUP_TO_64(w0, laneTy);
7051         putQReg128(dd, binop(Iop_64HLtoV128,
7052                              isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
7053         DIP("dup %s.%s, %s\n",
7054             nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
7055         return True;
7056      }
7057      /* else fall through */
7058   }
7059
7060   /* ---------------------- {S,U}MOV ---------------------- */
7061   /* 31  28        20   15     9 4
7062      0q0 01110 000 imm5 001111 n d  UMOV Xd/Wd, Vn.Ts[index]
7063      0q0 01110 000 imm5 001011 n d  SMOV Xd/Wd, Vn.Ts[index]
7064      dest is Xd when q==1, Wd when q==0
7065      UMOV:
7066         Ts,index,ops = case q:imm5 of
7067                          0:xxxx1 -> B, xxxx, 8Uto64
7068                          1:xxxx1 -> invalid
7069                          0:xxx10 -> H, xxx,  16Uto64
7070                          1:xxx10 -> invalid
7071                          0:xx100 -> S, xx,   32Uto64
7072                          1:xx100 -> invalid
7073                          1:x1000 -> D, x,    copy64
7074                          other   -> invalid
7075      SMOV:
7076         Ts,index,ops = case q:imm5 of
7077                          0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
7078                          1:xxxx1 -> B, xxxx, 8Sto64
7079                          0:xxx10 -> H, xxx,  (32Uto64 . 16Sto32)
7080                          1:xxx10 -> H, xxx,  16Sto64
7081                          0:xx100 -> invalid
7082                          1:xx100 -> S, xx,   32Sto64
7083                          1:x1000 -> invalid
7084                          other   -> invalid
7085   */
7086   if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
7087       && (INSN(15,10) & BITS6(1,1,1,0,1,1)) == BITS6(0,0,1,0,1,1)) {
7088      UInt bitQ = INSN(30,30) == 1;
7089      UInt imm5 = INSN(20,16);
7090      UInt nn   = INSN(9,5);
7091      UInt dd   = INSN(4,0);
7092      Bool isU  = INSN(12,12) == 1;
7093      const HChar* arTs = "??";
7094      UInt    laneNo = 16; /* invalid */
7095      // Setting 'res' to non-NULL determines valid/invalid
7096      IRExpr* res    = NULL;
7097      if (!bitQ && (imm5 & 1)) { // 0:xxxx1
7098         laneNo = (imm5 >> 1) & 15;
7099         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
7100         res = isU ? unop(Iop_8Uto64, lane)
7101                   : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
7102         arTs = "b";
7103      }
7104      else if (bitQ && (imm5 & 1)) { // 1:xxxx1
7105         laneNo = (imm5 >> 1) & 15;
7106         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
7107         res = isU ? NULL
7108                   : unop(Iop_8Sto64, lane);
7109         arTs = "b";
7110      }
7111      else if (!bitQ && (imm5 & 2)) { // 0:xxx10
7112         laneNo = (imm5 >> 2) & 7;
7113         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
7114         res = isU ? unop(Iop_16Uto64, lane)
7115                   : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
7116         arTs = "h";
7117      }
7118      else if (bitQ && (imm5 & 2)) { // 1:xxx10
7119         laneNo = (imm5 >> 2) & 7;
7120         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
7121         res = isU ? NULL
7122                   : unop(Iop_16Sto64, lane);
7123         arTs = "h";
7124      }
7125      else if (!bitQ && (imm5 & 4)) { // 0:xx100
7126         laneNo = (imm5 >> 3) & 3;
7127         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
7128         res = isU ? unop(Iop_32Uto64, lane)
7129                   : NULL;
7130         arTs = "s";
7131      }
7132      else if (bitQ && (imm5 & 4)) { // 1:xxx10
7133         laneNo = (imm5 >> 3) & 3;
7134         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
7135         res = isU ? NULL
7136                   : unop(Iop_32Sto64, lane);
7137         arTs = "s";
7138      }
7139      else if (bitQ && (imm5 & 8)) { // 1:x1000
7140         laneNo = (imm5 >> 4) & 1;
7141         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
7142         res = isU ? lane
7143                   : NULL;
7144         arTs = "d";
7145      }
7146      /* */
7147      if (res) {
7148         vassert(laneNo < 16);
7149         putIReg64orZR(dd, res);
7150         DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
7151             nameIRegOrZR(bitQ == 1, dd),
7152             nameQReg128(nn), arTs, laneNo);
7153         return True;
7154      }
7155      /* else fall through */
7156   }
7157
7158   /* -------------------- INS (general) -------------------- */
7159   /* 31  28       20   15     9 4
7160      010 01110000 imm5 000111 n d  INS Vd.Ts[ix], Rn
7161      where Ts,ix = case imm5 of xxxx1 -> B, xxxx
7162                                 xxx10 -> H, xxx
7163                                 xx100 -> S, xx
7164                                 x1000 -> D, x
7165   */
7166   if (INSN(31,21) == BITS11(0,1,0,0,1,1,1,0,0,0,0)
7167       && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
7168      UInt    imm5   = INSN(20,16);
7169      UInt    nn     = INSN(9,5);
7170      UInt    dd     = INSN(4,0);
7171      HChar   ts     = '?';
7172      UInt    laneNo = 16;
7173      IRExpr* src    = NULL;
7174      if (imm5 & 1) {
7175         src    = unop(Iop_64to8, getIReg64orZR(nn));
7176         laneNo = (imm5 >> 1) & 15;
7177         ts     = 'b';
7178      }
7179      else if (imm5 & 2) {
7180         src    = unop(Iop_64to16, getIReg64orZR(nn));
7181         laneNo = (imm5 >> 2) & 7;
7182         ts     = 'h';
7183      }
7184      else if (imm5 & 4) {
7185         src    = unop(Iop_64to32, getIReg64orZR(nn));
7186         laneNo = (imm5 >> 3) & 3;
7187         ts     = 's';
7188      }
7189      else if (imm5 & 8) {
7190         src    = getIReg64orZR(nn);
7191         laneNo = (imm5 >> 4) & 1;
7192         ts     = 'd';
7193      }
7194      /* */
7195      if (src) {
7196         vassert(laneNo < 16);
7197         putQRegLane(dd, laneNo, src);
7198         DIP("ins %s.%c[%u], %s\n",
7199             nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
7200         return True;
7201      }
7202      /* else invalid; fall through */
7203   }
7204
7205   /* -------------------- NEG (vector) -------------------- */
7206   /* 31  28    23 21    16      9 4
7207      0q1 01110 sz 10000 0101110 n d  NEG Vd, Vn
7208      sz is laneSz, q:sz == 011 is disallowed, as usual
7209   */
7210   if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0)
7211       && INSN(21,10) == BITS12(1,0,0,0,0,0,1,0,1,1,1,0)) {
7212      Bool isQ    = INSN(30,30) == 1;
7213      UInt szBlg2 = INSN(23,22);
7214      UInt nn     = INSN(9,5);
7215      UInt dd     = INSN(4,0);
7216      Bool zeroHI = False;
7217      const HChar* arrSpec = "";
7218      Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
7219      if (ok) {
7220         const IROp opSUB[4]
7221            = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
7222         IRTemp res = newTemp(Ity_V128);
7223         vassert(szBlg2 < 4);
7224         assign(res, binop(opSUB[szBlg2], mkV128(0x0000), getQReg128(nn)));
7225         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
7226                               : mkexpr(res));
7227         DIP("neg %s.%s, %s.%s\n",
7228             nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
7229         return True;
7230      }
7231      /* else fall through */
7232   }
7233
7234   /* -------------------- TBL, TBX -------------------- */
7235   /* 31  28        20 15 14  12  9 4
7236      0q0 01110 000 m  0  len 000 n d  TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
7237      0q0 01110 000 m  0  len 100 n d  TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
7238      where Ta = 16b(q=1) or 8b(q=0)
7239   */
7240   if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
7241       && INSN(15,15) == 0 && INSN(11,10) == BITS2(0,0)) {
7242      Bool isQ   = INSN(30,30) == 1;
7243      Bool isTBX = INSN(12,12) == 1;
7244      UInt mm    = INSN(20,16);
7245      UInt len   = INSN(14,13);
7246      UInt nn    = INSN(9,5);
7247      UInt dd    = INSN(4,0);
7248      /* The out-of-range values to use. */
7249      IRTemp oor_values = newTemp(Ity_V128);
7250      assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
7251      /* src value */
7252      IRTemp src = newTemp(Ity_V128);
7253      assign(src, getQReg128(mm));
7254      /* The table values */
7255      IRTemp tab[4];
7256      UInt   i;
7257      for (i = 0; i <= len; i++) {
7258         vassert(i < 4);
7259         tab[i] = newTemp(Ity_V128);
7260         assign(tab[i], getQReg128((nn + i) % 32));
7261      }
7262      IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
7263      putQReg128(dd, isQ ? mkexpr(res)
7264                         : unop(Iop_ZeroHI64ofV128, mkexpr(res)) );
7265      const HChar* Ta = isQ ? "16b" : "8b";
7266      const HChar* nm = isTBX ? "tbx" : "tbl";
7267      DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
7268          nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
7269      return True;
7270   }
7271   /* FIXME Temporary hacks to get through ld.so FIXME */
7272
7273   /* ------------------ movi vD.4s, #0x0 ------------------ */
7274   /* 0x4F 0x00 0x04 000 vD */
7275   if ((insn & 0xFFFFFFE0) == 0x4F000400) {
7276      UInt vD = INSN(4,0);
7277      putQReg128(vD, mkV128(0x0000));
7278      DIP("movi v%u.4s, #0x0\n", vD);
7279      return True;
7280   }
7281
7282   /* ---------------- MOV vD.16b, vN.16b ---------------- */
7283   /* 31        23  20 15     9 4
7284      010 01110 101 m  000111 n d   ORR vD.16b, vN.16b, vM.16b
7285      This only handles the N == M case.
7286   */
7287   if (INSN(31,24) == BITS8(0,1,0,0,1,1,1,0)
7288       && INSN(23,21) == BITS3(1,0,1) && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
7289      UInt mm = INSN(20,16);
7290      UInt nn = INSN(9,5);
7291      UInt dd = INSN(4,0);
7292      if (mm == nn) {
7293         putQReg128(dd, getQReg128(nn));
7294         DIP("mov v%u.16b, v%u.16b\n", dd, nn);
7295         return True;
7296      }
7297      /* else it's really an ORR; fall through. */
7298   }
7299
7300   /* ---------------- CMEQ_d_d_#0 ---------------- */
7301   /*
7302      010 11110 11 10000 0100 110 n d   CMEQ Dd, Dn, #0
7303   */
7304   if ((INSN(31,0) & 0xFFFFFC00) == 0x5EE09800) {
7305      UInt nn = INSN(9,5);
7306      UInt dd = INSN(4,0);
7307      putQReg128(dd, unop(Iop_ZeroHI64ofV128,
7308                          binop(Iop_CmpEQ64x2, getQReg128(nn),
7309                                mkV128(0x0000))));
7310      DIP("cmeq d%u, d%u, #0\n", dd, nn);
7311      return True;
7312   }
7313
7314   /* ---------------- SHL_d_d_#imm ---------------- */
7315   /* 31         22 21  18 15     9 4
7316      010 111110 1  ih3 ib 010101 n d  SHL Dd, Dn, #(ih3:ib)
7317   */
7318   if (INSN(31,22) == BITS10(0,1,0,1,1,1,1,1,0,1)
7319       && INSN(15,10) == BITS6(0,1,0,1,0,1)) {
7320      UInt nn = INSN(9,5);
7321      UInt dd = INSN(4,0);
7322      UInt sh = INSN(21,16);
7323      vassert(sh < 64);
7324      putQReg128(dd, unop(Iop_ZeroHI64ofV128,
7325                          binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
7326      DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
7327      return True;
7328   }
7329
7330   vex_printf("ARM64 front end: simd_and_fp\n");
7331   return False;
7332#  undef INSN
7333}
7334
7335
7336/*------------------------------------------------------------*/
7337/*--- Disassemble a single ARM64 instruction               ---*/
7338/*------------------------------------------------------------*/
7339
7340/* Disassemble a single ARM64 instruction into IR.  The instruction
7341   has is located at |guest_instr| and has guest IP of
7342   |guest_PC_curr_instr|, which will have been set before the call
7343   here.  Returns True iff the instruction was decoded, in which case
7344   *dres will be set accordingly, or False, in which case *dres should
7345   be ignored by the caller. */
7346
7347static
7348Bool disInstr_ARM64_WRK (
7349        /*MB_OUT*/DisResult* dres,
7350        Bool         (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
7351        Bool         resteerCisOk,
7352        void*        callback_opaque,
7353        UChar*       guest_instr,
7354        VexArchInfo* archinfo,
7355        VexAbiInfo*  abiinfo
7356     )
7357{
7358   // A macro to fish bits out of 'insn'.
7359#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
7360
7361//ZZ    DisResult dres;
7362//ZZ    UInt      insn;
7363//ZZ    //Bool      allow_VFP = False;
7364//ZZ    //UInt      hwcaps = archinfo->hwcaps;
7365//ZZ    IRTemp    condT; /* :: Ity_I32 */
7366//ZZ    UInt      summary;
7367//ZZ    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
7368//ZZ
7369//ZZ    /* What insn variants are we supporting today? */
7370//ZZ    //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
7371//ZZ    // etc etc
7372
7373   /* Set result defaults. */
7374   dres->whatNext    = Dis_Continue;
7375   dres->len         = 4;
7376   dres->continueAt  = 0;
7377   dres->jk_StopHere = Ijk_INVALID;
7378
7379   /* At least this is simple on ARM64: insns are all 4 bytes long, and
7380      4-aligned.  So just fish the whole thing out of memory right now
7381      and have done. */
7382   UInt insn = getUIntLittleEndianly( guest_instr );
7383
7384   if (0) vex_printf("insn: 0x%x\n", insn);
7385
7386   DIP("\t(arm64) 0x%llx:  ", (ULong)guest_PC_curr_instr);
7387
7388   vassert(0 == (guest_PC_curr_instr & 3ULL));
7389
7390   /* ----------------------------------------------------------- */
7391
7392   /* Spot "Special" instructions (see comment at top of file). */
7393   {
7394      UChar* code = (UChar*)guest_instr;
7395      /* Spot the 16-byte preamble:
7396            93CC0D8C   ror x12, x12, #3
7397            93CC358C   ror x12, x12, #13
7398            93CCCD8C   ror x12, x12, #51
7399            93CCF58C   ror x12, x12, #61
7400      */
7401      UInt word1 = 0x93CC0D8C;
7402      UInt word2 = 0x93CC358C;
7403      UInt word3 = 0x93CCCD8C;
7404      UInt word4 = 0x93CCF58C;
7405      if (getUIntLittleEndianly(code+ 0) == word1 &&
7406          getUIntLittleEndianly(code+ 4) == word2 &&
7407          getUIntLittleEndianly(code+ 8) == word3 &&
7408          getUIntLittleEndianly(code+12) == word4) {
7409         /* Got a "Special" instruction preamble.  Which one is it? */
7410         if (getUIntLittleEndianly(code+16) == 0xAA0A014A
7411                                               /* orr x10,x10,x10 */) {
7412            /* X3 = client_request ( X4 ) */
7413            DIP("x3 = client_request ( x4 )\n");
7414            putPC(mkU64( guest_PC_curr_instr + 20 ));
7415            dres->jk_StopHere = Ijk_ClientReq;
7416            dres->whatNext    = Dis_StopHere;
7417            return True;
7418         }
7419         else
7420         if (getUIntLittleEndianly(code+16) == 0xAA0B016B
7421                                               /* orr x11,x11,x11 */) {
7422            /* X3 = guest_NRADDR */
7423            DIP("x3 = guest_NRADDR\n");
7424            dres->len = 20;
7425            putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
7426            return True;
7427         }
7428         else
7429         if (getUIntLittleEndianly(code+16) == 0xAA0C018C
7430                                               /* orr x12,x12,x12 */) {
7431            /*  branch-and-link-to-noredir X8 */
7432            DIP("branch-and-link-to-noredir x8\n");
7433            putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
7434            putPC(getIReg64orZR(8));
7435            dres->jk_StopHere = Ijk_NoRedir;
7436            dres->whatNext    = Dis_StopHere;
7437            return True;
7438         }
7439         else
7440         if (getUIntLittleEndianly(code+16) == 0xAA090129
7441                                               /* orr x9,x9,x9 */) {
7442            /* IR injection */
7443            DIP("IR injection\n");
7444            vex_inject_ir(irsb, Iend_LE);
7445            // Invalidate the current insn. The reason is that the IRop we're
7446            // injecting here can change. In which case the translation has to
7447            // be redone. For ease of handling, we simply invalidate all the
7448            // time.
7449            stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
7450            stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(20)));
7451            putPC(mkU64( guest_PC_curr_instr + 20 ));
7452            dres->whatNext    = Dis_StopHere;
7453            dres->jk_StopHere = Ijk_InvalICache;
7454            return True;
7455         }
7456         /* We don't know what it is. */
7457         return False;
7458         /*NOTREACHED*/
7459      }
7460   }
7461
7462   /* ----------------------------------------------------------- */
7463
7464   /* Main ARM64 instruction decoder starts here. */
7465
7466   Bool ok = False;
7467
7468   /* insn[28:25] determines the top-level grouping, so let's start
7469      off with that.
7470
7471      For all of these dis_ARM64_ functions, we pass *dres with the
7472      normal default results "insn OK, 4 bytes long, keep decoding" so
7473      they don't need to change it.  However, decodes of control-flow
7474      insns may cause *dres to change.
7475   */
7476   switch (INSN(28,25)) {
7477      case BITS4(1,0,0,0): case BITS4(1,0,0,1):
7478         // Data processing - immediate
7479         ok = dis_ARM64_data_processing_immediate(dres, insn);
7480         break;
7481      case BITS4(1,0,1,0): case BITS4(1,0,1,1):
7482         // Branch, exception generation and system instructions
7483         ok = dis_ARM64_branch_etc(dres, insn, archinfo);
7484         break;
7485      case BITS4(0,1,0,0): case BITS4(0,1,1,0):
7486      case BITS4(1,1,0,0): case BITS4(1,1,1,0):
7487         // Loads and stores
7488         ok = dis_ARM64_load_store(dres, insn);
7489         break;
7490      case BITS4(0,1,0,1): case BITS4(1,1,0,1):
7491         // Data processing - register
7492         ok = dis_ARM64_data_processing_register(dres, insn);
7493         break;
7494      case BITS4(0,1,1,1): case BITS4(1,1,1,1):
7495         // Data processing - SIMD and floating point
7496         ok = dis_ARM64_simd_and_fp(dres, insn);
7497         break;
7498      case BITS4(0,0,0,0): case BITS4(0,0,0,1):
7499      case BITS4(0,0,1,0): case BITS4(0,0,1,1):
7500         // UNALLOCATED
7501         break;
7502      default:
7503         vassert(0); /* Can't happen */
7504   }
7505
7506   /* If the next-level down decoders failed, make sure |dres| didn't
7507      get changed. */
7508   if (!ok) {
7509      vassert(dres->whatNext    == Dis_Continue);
7510      vassert(dres->len         == 4);
7511      vassert(dres->continueAt  == 0);
7512      vassert(dres->jk_StopHere == Ijk_INVALID);
7513   }
7514
7515   return ok;
7516
7517#  undef INSN
7518}
7519
7520
7521/*------------------------------------------------------------*/
7522/*--- Top-level fn                                         ---*/
7523/*------------------------------------------------------------*/
7524
7525/* Disassemble a single instruction into IR.  The instruction
7526   is located in host memory at &guest_code[delta]. */
7527
7528DisResult disInstr_ARM64 ( IRSB*        irsb_IN,
7529                           Bool         (*resteerOkFn) ( void*, Addr64 ),
7530                           Bool         resteerCisOk,
7531                           void*        callback_opaque,
7532                           UChar*       guest_code_IN,
7533                           Long         delta_IN,
7534                           Addr64       guest_IP,
7535                           VexArch      guest_arch,
7536                           VexArchInfo* archinfo,
7537                           VexAbiInfo*  abiinfo,
7538                           Bool         host_bigendian_IN,
7539                           Bool         sigill_diag_IN )
7540{
7541   DisResult dres;
7542   vex_bzero(&dres, sizeof(dres));
7543
7544   /* Set globals (see top of this file) */
7545   vassert(guest_arch == VexArchARM64);
7546
7547   irsb                = irsb_IN;
7548   host_is_bigendian   = host_bigendian_IN;
7549   guest_PC_curr_instr = (Addr64)guest_IP;
7550
7551   /* Sanity checks */
7552   /* (x::UInt - 2) <= 15   ===   x >= 2 && x <= 17 (I hope) */
7553   vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
7554   vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
7555
7556   /* Try to decode */
7557   Bool ok = disInstr_ARM64_WRK( &dres,
7558                                 resteerOkFn, resteerCisOk, callback_opaque,
7559                                 (UChar*)&guest_code_IN[delta_IN],
7560                                 archinfo, abiinfo );
7561   if (ok) {
7562      /* All decode successes end up here. */
7563      vassert(dres.len == 4 || dres.len == 20);
7564      switch (dres.whatNext) {
7565         case Dis_Continue:
7566            putPC( mkU64(dres.len + guest_PC_curr_instr) );
7567            break;
7568         case Dis_ResteerU:
7569         case Dis_ResteerC:
7570            putPC(mkU64(dres.continueAt));
7571            break;
7572         case Dis_StopHere:
7573            break;
7574         default:
7575            vassert(0);
7576      }
7577      DIP("\n");
7578   } else {
7579      /* All decode failures end up here. */
7580      if (sigill_diag_IN) {
7581         Int   i, j;
7582         UChar buf[64];
7583         UInt  insn
7584                  = getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] );
7585         vex_bzero(buf, sizeof(buf));
7586         for (i = j = 0; i < 32; i++) {
7587            if (i > 0) {
7588              if ((i & 7) == 0) buf[j++] = ' ';
7589              else if ((i & 3) == 0) buf[j++] = '\'';
7590            }
7591            buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
7592         }
7593         vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
7594         vex_printf("disInstr(arm64): %s\n", buf);
7595      }
7596
7597      /* Tell the dispatcher that this insn cannot be decoded, and so
7598         has not been executed, and (is currently) the next to be
7599         executed.  PC should be up-to-date since it is made so at the
7600         start of each insn, but nevertheless be paranoid and update
7601         it again right now. */
7602      putPC( mkU64(guest_PC_curr_instr) );
7603      dres.whatNext    = Dis_StopHere;
7604      dres.len         = 0;
7605      dres.continueAt  = 0;
7606      dres.jk_StopHere = Ijk_NoDecode;
7607   }
7608   return dres;
7609}
7610
7611////////////////////////////////////////////////////////////////////////
7612////////////////////////////////////////////////////////////////////////
7613
7614/* Spare code for doing reference implementations of various 128-bit
7615   SIMD interleaves/deinterleaves/concatenation ops.  For 64-bit
7616   equivalents see the end of guest_arm_toIR.c. */
7617
7618////////////////////////////////////////////////////////////////
7619// 64x2 operations
7620//
7621static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 )
7622{
7623  // returns a0 b0
7624  return binop(Iop_64HLtoV128, unop(Iop_V128to64, mkexpr(a10)),
7625                               unop(Iop_V128to64, mkexpr(b10)));
7626}
7627
7628static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 )
7629{
7630  // returns a1 b1
7631  return binop(Iop_64HLtoV128, unop(Iop_V128HIto64, mkexpr(a10)),
7632                               unop(Iop_V128HIto64, mkexpr(b10)));
7633}
7634
7635
7636////////////////////////////////////////////////////////////////
7637// 32x4 operations
7638//
7639
7640// Split a 128 bit value into 4 32 bit ones, in 64-bit IRTemps with
7641// the top halves guaranteed to be zero.
7642static void breakV128to32s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
7643                             IRTemp* out0, IRTemp v128 )
7644{
7645  if (out3) *out3 = newTemp(Ity_I64);
7646  if (out2) *out2 = newTemp(Ity_I64);
7647  if (out1) *out1 = newTemp(Ity_I64);
7648  if (out0) *out0 = newTemp(Ity_I64);
7649  IRTemp hi64 = newTemp(Ity_I64);
7650  IRTemp lo64 = newTemp(Ity_I64);
7651  assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7652  assign(lo64, unop(Iop_V128to64,   mkexpr(v128)) );
7653  if (out3) assign(*out3, binop(Iop_Shr64, mkexpr(hi64), mkU8(32)));
7654  if (out2) assign(*out2, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFFFFFF)));
7655  if (out1) assign(*out1, binop(Iop_Shr64, mkexpr(lo64), mkU8(32)));
7656  if (out0) assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFFFFFF)));
7657}
7658
7659// Make a V128 bit value from 4 32 bit ones, each of which is in a 64 bit
7660// IRTemp.
7661static IRTemp mkV128from32s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7662{
7663  IRTemp hi64 = newTemp(Ity_I64);
7664  IRTemp lo64 = newTemp(Ity_I64);
7665  assign(hi64,
7666         binop(Iop_Or64,
7667               binop(Iop_Shl64, mkexpr(in3), mkU8(32)),
7668               binop(Iop_And64, mkexpr(in2), mkU64(0xFFFFFFFF))));
7669  assign(lo64,
7670         binop(Iop_Or64,
7671               binop(Iop_Shl64, mkexpr(in1), mkU8(32)),
7672               binop(Iop_And64, mkexpr(in0), mkU64(0xFFFFFFFF))));
7673  IRTemp res = newTemp(Ity_V128);
7674  assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
7675  return res;
7676}
7677
7678static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 )
7679{
7680  // returns a2 a0 b2 b0
7681  IRTemp a2, a0, b2, b0;
7682  breakV128to32s(NULL, &a2, NULL, &a0, a3210);
7683  breakV128to32s(NULL, &b2, NULL, &b0, b3210);
7684  return mkexpr(mkV128from32s(a2, a0, b2, b0));
7685}
7686
7687static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 )
7688{
7689  // returns a3 a1 b3 b1
7690  IRTemp a3, a1, b3, b1;
7691  breakV128to32s(&a3, NULL, &a1, NULL, a3210);
7692  breakV128to32s(&b3, NULL, &b1, NULL, b3210);
7693  return mkexpr(mkV128from32s(a3, a1, b3, b1));
7694}
7695
7696static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 )
7697{
7698  // returns a1 b1 a0 b0
7699  IRTemp a1, a0, b1, b0;
7700  breakV128to32s(NULL, NULL, &a1, &a0, a3210);
7701  breakV128to32s(NULL, NULL, &b1, &b0, b3210);
7702  return mkexpr(mkV128from32s(a1, b1, a0, b0));
7703}
7704
7705static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 )
7706{
7707  // returns a3 b3 a2 b2
7708  IRTemp a3, a2, b3, b2;
7709  breakV128to32s(&a3, &a2, NULL, NULL, a3210);
7710  breakV128to32s(&b3, &b2, NULL, NULL, b3210);
7711  return mkexpr(mkV128from32s(a3, b3, a2, b2));
7712}
7713
7714////////////////////////////////////////////////////////////////
7715// 16x8 operations
7716//
7717
7718static void breakV128to16s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
7719                             IRTemp* out4, IRTemp* out3, IRTemp* out2,
7720                             IRTemp* out1,IRTemp* out0, IRTemp v128 )
7721{
7722  if (out7) *out7 = newTemp(Ity_I64);
7723  if (out6) *out6 = newTemp(Ity_I64);
7724  if (out5) *out5 = newTemp(Ity_I64);
7725  if (out4) *out4 = newTemp(Ity_I64);
7726  if (out3) *out3 = newTemp(Ity_I64);
7727  if (out2) *out2 = newTemp(Ity_I64);
7728  if (out1) *out1 = newTemp(Ity_I64);
7729  if (out0) *out0 = newTemp(Ity_I64);
7730  IRTemp hi64 = newTemp(Ity_I64);
7731  IRTemp lo64 = newTemp(Ity_I64);
7732  assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7733  assign(lo64, unop(Iop_V128to64,   mkexpr(v128)) );
7734  if (out7)
7735    assign(*out7, binop(Iop_And64,
7736                        binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
7737                        mkU64(0xFFFF)));
7738  if (out6)
7739    assign(*out6, binop(Iop_And64,
7740                        binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
7741                        mkU64(0xFFFF)));
7742  if (out5)
7743    assign(*out5, binop(Iop_And64,
7744                        binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
7745                        mkU64(0xFFFF)));
7746  if (out4)
7747    assign(*out4, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFF)));
7748  if (out3)
7749    assign(*out3, binop(Iop_And64,
7750                        binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
7751                        mkU64(0xFFFF)));
7752  if (out2)
7753    assign(*out2, binop(Iop_And64,
7754                        binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
7755                        mkU64(0xFFFF)));
7756  if (out1)
7757    assign(*out1, binop(Iop_And64,
7758                        binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
7759                        mkU64(0xFFFF)));
7760  if (out0)
7761    assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFF)));
7762}
7763
7764static IRTemp mkV128from16s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
7765                              IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7766{
7767  IRTemp hi64 = newTemp(Ity_I64);
7768  IRTemp lo64 = newTemp(Ity_I64);
7769  assign(hi64,
7770         binop(Iop_Or64,
7771               binop(Iop_Or64,
7772                     binop(Iop_Shl64,
7773                           binop(Iop_And64, mkexpr(in7), mkU64(0xFFFF)),
7774                           mkU8(48)),
7775                     binop(Iop_Shl64,
7776                           binop(Iop_And64, mkexpr(in6), mkU64(0xFFFF)),
7777                           mkU8(32))),
7778               binop(Iop_Or64,
7779                     binop(Iop_Shl64,
7780                           binop(Iop_And64, mkexpr(in5), mkU64(0xFFFF)),
7781                           mkU8(16)),
7782                     binop(Iop_And64,
7783                           mkexpr(in4), mkU64(0xFFFF)))));
7784  assign(lo64,
7785         binop(Iop_Or64,
7786               binop(Iop_Or64,
7787                     binop(Iop_Shl64,
7788                           binop(Iop_And64, mkexpr(in3), mkU64(0xFFFF)),
7789                           mkU8(48)),
7790                     binop(Iop_Shl64,
7791                           binop(Iop_And64, mkexpr(in2), mkU64(0xFFFF)),
7792                           mkU8(32))),
7793               binop(Iop_Or64,
7794                     binop(Iop_Shl64,
7795                           binop(Iop_And64, mkexpr(in1), mkU64(0xFFFF)),
7796                           mkU8(16)),
7797                     binop(Iop_And64,
7798                           mkexpr(in0), mkU64(0xFFFF)))));
7799  IRTemp res = newTemp(Ity_V128);
7800  assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
7801  return res;
7802}
7803
7804static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
7805{
7806  // returns a6 a4 a2 a0 b6 b4 b2 b0
7807  IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
7808  breakV128to16s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
7809  breakV128to16s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
7810  return mkexpr(mkV128from16s(a6, a4, a2, a0, b6, b4, b2, b0));
7811}
7812
7813static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
7814{
7815  // returns a7 a5 a3 a1 b7 b5 b3 b1
7816  IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
7817  breakV128to16s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
7818  breakV128to16s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
7819  return mkexpr(mkV128from16s(a7, a5, a3, a1, b7, b5, b3, b1));
7820}
7821
7822static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 )
7823{
7824  // returns a3 b3 a2 b2 a1 b1 a0 b0
7825  IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
7826  breakV128to16s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
7827  breakV128to16s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
7828  return mkexpr(mkV128from16s(a3, b3, a2, b2, a1, b1, a0, b0));
7829}
7830
7831static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 )
7832{
7833  // returns a7 b7 a6 b6 a5 b5 a4 b4
7834  IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
7835  breakV128to16s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
7836  breakV128to16s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
7837  return mkexpr(mkV128from16s(a7, b7, a6, b6, a5, b5, a4, b4));
7838}
7839
7840////////////////////////////////////////////////////////////////
7841// 8x16 operations
7842//
7843
7844static void breakV128to8s ( IRTemp* outF, IRTemp* outE, IRTemp* outD,
7845                            IRTemp* outC, IRTemp* outB, IRTemp* outA,
7846                            IRTemp* out9, IRTemp* out8,
7847                            IRTemp* out7, IRTemp* out6, IRTemp* out5,
7848                            IRTemp* out4, IRTemp* out3, IRTemp* out2,
7849                            IRTemp* out1,IRTemp* out0, IRTemp v128 )
7850{
7851  if (outF) *outF = newTemp(Ity_I64);
7852  if (outE) *outE = newTemp(Ity_I64);
7853  if (outD) *outD = newTemp(Ity_I64);
7854  if (outC) *outC = newTemp(Ity_I64);
7855  if (outB) *outB = newTemp(Ity_I64);
7856  if (outA) *outA = newTemp(Ity_I64);
7857  if (out9) *out9 = newTemp(Ity_I64);
7858  if (out8) *out8 = newTemp(Ity_I64);
7859  if (out7) *out7 = newTemp(Ity_I64);
7860  if (out6) *out6 = newTemp(Ity_I64);
7861  if (out5) *out5 = newTemp(Ity_I64);
7862  if (out4) *out4 = newTemp(Ity_I64);
7863  if (out3) *out3 = newTemp(Ity_I64);
7864  if (out2) *out2 = newTemp(Ity_I64);
7865  if (out1) *out1 = newTemp(Ity_I64);
7866  if (out0) *out0 = newTemp(Ity_I64);
7867  IRTemp hi64 = newTemp(Ity_I64);
7868  IRTemp lo64 = newTemp(Ity_I64);
7869  assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7870  assign(lo64, unop(Iop_V128to64,   mkexpr(v128)) );
7871  if (outF)
7872    assign(*outF, binop(Iop_And64,
7873                        binop(Iop_Shr64, mkexpr(hi64), mkU8(56)),
7874                        mkU64(0xFF)));
7875  if (outE)
7876    assign(*outE, binop(Iop_And64,
7877                        binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
7878                        mkU64(0xFF)));
7879  if (outD)
7880    assign(*outD, binop(Iop_And64,
7881                        binop(Iop_Shr64, mkexpr(hi64), mkU8(40)),
7882                        mkU64(0xFF)));
7883  if (outC)
7884    assign(*outC, binop(Iop_And64,
7885                        binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
7886                        mkU64(0xFF)));
7887  if (outB)
7888    assign(*outB, binop(Iop_And64,
7889                        binop(Iop_Shr64, mkexpr(hi64), mkU8(24)),
7890                        mkU64(0xFF)));
7891  if (outA)
7892    assign(*outA, binop(Iop_And64,
7893                        binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
7894                        mkU64(0xFF)));
7895  if (out9)
7896    assign(*out9, binop(Iop_And64,
7897                        binop(Iop_Shr64, mkexpr(hi64), mkU8(8)),
7898                        mkU64(0xFF)));
7899  if (out8)
7900    assign(*out8, binop(Iop_And64,
7901                        binop(Iop_Shr64, mkexpr(hi64), mkU8(0)),
7902                        mkU64(0xFF)));
7903  if (out7)
7904    assign(*out7, binop(Iop_And64,
7905                        binop(Iop_Shr64, mkexpr(lo64), mkU8(56)),
7906                        mkU64(0xFF)));
7907  if (out6)
7908    assign(*out6, binop(Iop_And64,
7909                        binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
7910                        mkU64(0xFF)));
7911  if (out5)
7912    assign(*out5, binop(Iop_And64,
7913                        binop(Iop_Shr64, mkexpr(lo64), mkU8(40)),
7914                        mkU64(0xFF)));
7915  if (out4)
7916    assign(*out4, binop(Iop_And64,
7917                        binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
7918                        mkU64(0xFF)));
7919  if (out3)
7920    assign(*out3, binop(Iop_And64,
7921                        binop(Iop_Shr64, mkexpr(lo64), mkU8(24)),
7922                        mkU64(0xFF)));
7923  if (out2)
7924    assign(*out2, binop(Iop_And64,
7925                        binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
7926                        mkU64(0xFF)));
7927  if (out1)
7928    assign(*out1, binop(Iop_And64,
7929                        binop(Iop_Shr64, mkexpr(lo64), mkU8(8)),
7930                        mkU64(0xFF)));
7931  if (out0)
7932    assign(*out0, binop(Iop_And64,
7933                        binop(Iop_Shr64, mkexpr(lo64), mkU8(0)),
7934                        mkU64(0xFF)));
7935}
7936
7937static IRTemp mkV128from8s ( IRTemp inF, IRTemp inE, IRTemp inD, IRTemp inC,
7938                             IRTemp inB, IRTemp inA, IRTemp in9, IRTemp in8,
7939                             IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
7940                             IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7941{
7942  IRTemp vFE = newTemp(Ity_I64);
7943  IRTemp vDC = newTemp(Ity_I64);
7944  IRTemp vBA = newTemp(Ity_I64);
7945  IRTemp v98 = newTemp(Ity_I64);
7946  IRTemp v76 = newTemp(Ity_I64);
7947  IRTemp v54 = newTemp(Ity_I64);
7948  IRTemp v32 = newTemp(Ity_I64);
7949  IRTemp v10 = newTemp(Ity_I64);
7950  assign(vFE, binop(Iop_Or64,
7951                    binop(Iop_Shl64,
7952                          binop(Iop_And64, mkexpr(inF), mkU64(0xFF)), mkU8(8)),
7953                    binop(Iop_And64, mkexpr(inE), mkU64(0xFF))));
7954  assign(vDC, binop(Iop_Or64,
7955                    binop(Iop_Shl64,
7956                          binop(Iop_And64, mkexpr(inD), mkU64(0xFF)), mkU8(8)),
7957                    binop(Iop_And64, mkexpr(inC), mkU64(0xFF))));
7958  assign(vBA, binop(Iop_Or64,
7959                    binop(Iop_Shl64,
7960                          binop(Iop_And64, mkexpr(inB), mkU64(0xFF)), mkU8(8)),
7961                    binop(Iop_And64, mkexpr(inA), mkU64(0xFF))));
7962  assign(v98, binop(Iop_Or64,
7963                    binop(Iop_Shl64,
7964                          binop(Iop_And64, mkexpr(in9), mkU64(0xFF)), mkU8(8)),
7965                    binop(Iop_And64, mkexpr(in8), mkU64(0xFF))));
7966  assign(v76, binop(Iop_Or64,
7967                    binop(Iop_Shl64,
7968                          binop(Iop_And64, mkexpr(in7), mkU64(0xFF)), mkU8(8)),
7969                    binop(Iop_And64, mkexpr(in6), mkU64(0xFF))));
7970  assign(v54, binop(Iop_Or64,
7971                    binop(Iop_Shl64,
7972                          binop(Iop_And64, mkexpr(in5), mkU64(0xFF)), mkU8(8)),
7973                    binop(Iop_And64, mkexpr(in4), mkU64(0xFF))));
7974  assign(v32, binop(Iop_Or64,
7975                    binop(Iop_Shl64,
7976                          binop(Iop_And64, mkexpr(in3), mkU64(0xFF)), mkU8(8)),
7977                    binop(Iop_And64, mkexpr(in2), mkU64(0xFF))));
7978  assign(v10, binop(Iop_Or64,
7979                    binop(Iop_Shl64,
7980                          binop(Iop_And64, mkexpr(in1), mkU64(0xFF)), mkU8(8)),
7981                    binop(Iop_And64, mkexpr(in0), mkU64(0xFF))));
7982  return mkV128from16s(vFE, vDC, vBA, v98, v76, v54, v32, v10);
7983}
7984
7985static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
7986                                     IRTemp bFEDCBA9876543210 )
7987{
7988  // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7989  IRTemp aE, aC, aA, a8, a6, a4, a2, a0, bE, bC, bA, b8, b6, b4, b2, b0;
7990  breakV128to8s(NULL, &aE, NULL, &aC, NULL, &aA, NULL, &a8,
7991                NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0,
7992                aFEDCBA9876543210);
7993  breakV128to8s(NULL, &bE, NULL, &bC, NULL, &bA, NULL, &b8,
7994                NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0,
7995                bFEDCBA9876543210);
7996  return mkexpr(mkV128from8s(aE, aC, aA, a8, a6, a4, a2, a0,
7997                             bE, bC, bA, b8, b6, b4, b2, b0));
7998}
7999
8000static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
8001                                    IRTemp bFEDCBA9876543210 )
8002{
8003  // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
8004  IRTemp aF, aD, aB, a9, a7, a5, a3, a1, bF, bD, bB, b9, b7, b5, b3, b1;
8005  breakV128to8s(&aF, NULL, &aD, NULL, &aB, NULL, &a9, NULL,
8006                &a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL,
8007                aFEDCBA9876543210);
8008
8009  breakV128to8s(&bF, NULL, &bD, NULL, &bB, NULL, &b9, NULL,
8010                &b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL,
8011                aFEDCBA9876543210);
8012
8013  return mkexpr(mkV128from8s(aF, aD, aB, a9, a7, a5, a3, a1,
8014                             bF, bD, bB, b9, b7, b5, b3, b1));
8015}
8016
8017static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
8018                                     IRTemp bFEDCBA9876543210 )
8019{
8020  // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
8021  IRTemp a7, b7, a6, b6, a5, b5, a4, b4, a3, b3, a2, b2, a1, b1, a0, b0;
8022  breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
8023                &a7,  &a6,  &a5,  &a4,  &a3,  &a2,  &a1,  &a0,
8024                aFEDCBA9876543210);
8025  breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
8026                &b7,  &b6,  &b5,  &b4,  &b3,  &b2,  &b1,  &b0,
8027                bFEDCBA9876543210);
8028  return mkexpr(mkV128from8s(a7, b7, a6, b6, a5, b5, a4, b4,
8029                             a3, b3, a2, b2, a1, b1, a0, b0));
8030}
8031
8032static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
8033                                     IRTemp bFEDCBA9876543210 )
8034{
8035  // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
8036  IRTemp aF, bF, aE, bE, aD, bD, aC, bC, aB, bB, aA, bA, a9, b9, a8, b8;
8037  breakV128to8s(&aF,  &aE,  &aD,  &aC,  &aB,  &aA,  &a9,  &a8,
8038                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
8039                aFEDCBA9876543210);
8040  breakV128to8s(&bF,  &bE,  &bD,  &bC,  &bB,  &bA,  &b9,  &b8,
8041                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
8042                bFEDCBA9876543210);
8043  return mkexpr(mkV128from8s(aF, bF, aE, bE, aD, bD, aC, bC,
8044                             aB, bB, aA, bA, a9, b9, a8, b8));
8045}
8046
8047/*--------------------------------------------------------------------*/
8048/*--- end                                       guest_arm64_toIR.c ---*/
8049/*--------------------------------------------------------------------*/
8050