1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- begin                                     guest_arm64_toIR.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2013-2013 OpenWorks
12      info@open-works.net
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27   02110-1301, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32//ZZ /* XXXX thumb to check:
33//ZZ    that all cases where putIRegT writes r15, we generate a jump.
34//ZZ
35//ZZ    All uses of newTemp assign to an IRTemp and not a UInt
36//ZZ
37//ZZ    For all thumb loads and stores, including VFP ones, new-ITSTATE is
38//ZZ    backed out before the memory op, and restored afterwards.  This
39//ZZ    needs to happen even after we go uncond.  (and for sure it doesn't
40//ZZ    happen for VFP loads/stores right now).
41//ZZ
42//ZZ    VFP on thumb: check that we exclude all r13/r15 cases that we
43//ZZ    should.
44//ZZ
45//ZZ    XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
46//ZZ    taking into account the number of insns guarded by an IT.
47//ZZ
48//ZZ    remove the nasty hack, in the spechelper, of looking for Or32(...,
49//ZZ    0xE0) in as the first arg to armg_calculate_condition, and instead
50//ZZ    use Slice44 as specified in comments in the spechelper.
51//ZZ
52//ZZ    add specialisations for armg_calculate_flag_c and _v, as they
53//ZZ    are moderately often needed in Thumb code.
54//ZZ
55//ZZ    Correctness: ITSTATE handling in Thumb SVCs is wrong.
56//ZZ
57//ZZ    Correctness (obscure): in m_transtab, when invalidating code
58//ZZ    address ranges, invalidate up to 18 bytes after the end of the
59//ZZ    range.  This is because the ITSTATE optimisation at the top of
60//ZZ    _THUMB_WRK below analyses up to 18 bytes before the start of any
61//ZZ    given instruction, and so might depend on the invalidated area.
62//ZZ */
63//ZZ
64//ZZ /* Limitations, etc
65//ZZ
66//ZZ    - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
67//ZZ      These instructions are non-restartable in the case where the
68//ZZ      transfer(s) fault.
69//ZZ
70//ZZ    - SWP: the restart jump back is Ijk_Boring; it should be
71//ZZ      Ijk_NoRedir but that's expensive.  See comments on casLE() in
72//ZZ      guest_x86_toIR.c.
73//ZZ */
74
75/* "Special" instructions.
76
77   This instruction decoder can decode four special instructions
78   which mean nothing natively (are no-ops as far as regs/mem are
79   concerned) but have meaning for supporting Valgrind.  A special
80   instruction is flagged by a 16-byte preamble:
81
82      93CC0D8C 93CC358C 93CCCD8C 93CCF58C
83      (ror x12, x12, #3;   ror x12, x12, #13
84       ror x12, x12, #51;  ror x12, x12, #61)
85
86   Following that, one of the following 3 are allowed
87   (standard interpretation in parentheses):
88
89      AA0A014A (orr x10,x10,x10)   X3 = client_request ( X4 )
90      AA0B016B (orr x11,x11,x11)   X3 = guest_NRADDR
91      AA0C018C (orr x12,x12,x12)   branch-and-link-to-noredir X8
92      AA090129 (orr x9,x9,x9)      IR injection
93
94   Any other bytes following the 16-byte preamble are illegal and
95   constitute a failure in instruction decoding.  This all assumes
96   that the preamble will never occur except in specific code
97   fragments designed for Valgrind to catch.
98*/
99
100/* Translates ARM64 code to IR. */
101
102#include "libvex_basictypes.h"
103#include "libvex_ir.h"
104#include "libvex.h"
105#include "libvex_guest_arm64.h"
106
107#include "main_util.h"
108#include "main_globals.h"
109#include "guest_generic_bb_to_IR.h"
110#include "guest_arm64_defs.h"
111
112
113/*------------------------------------------------------------*/
114/*--- Globals                                              ---*/
115/*------------------------------------------------------------*/
116
117/* These are set at the start of the translation of a instruction, so
118   that we don't have to pass them around endlessly.  CONST means does
119   not change during translation of the instruction.
120*/
121
122/* CONST: is the host bigendian?  We need to know this in order to do
123   sub-register accesses to the SIMD/FP registers correctly. */
124static Bool host_is_bigendian;
125
126/* CONST: The guest address for the instruction currently being
127   translated.  */
128static Addr64 guest_PC_curr_instr;
129
130/* MOD: The IRSB* into which we're generating code. */
131static IRSB* irsb;
132
133
134/*------------------------------------------------------------*/
135/*--- Debugging output                                     ---*/
136/*------------------------------------------------------------*/
137
138#define DIP(format, args...)           \
139   if (vex_traceflags & VEX_TRACE_FE)  \
140      vex_printf(format, ## args)
141
142#define DIS(buf, format, args...)      \
143   if (vex_traceflags & VEX_TRACE_FE)  \
144      vex_sprintf(buf, format, ## args)
145
146
147/*------------------------------------------------------------*/
148/*--- Helper bits and pieces for deconstructing the        ---*/
149/*--- arm insn stream.                                     ---*/
150/*------------------------------------------------------------*/
151
152/* Do a little-endian load of a 32-bit word, regardless of the
153   endianness of the underlying host. */
154static inline UInt getUIntLittleEndianly ( UChar* p )
155{
156   UInt w = 0;
157   w = (w << 8) | p[3];
158   w = (w << 8) | p[2];
159   w = (w << 8) | p[1];
160   w = (w << 8) | p[0];
161   return w;
162}
163
164/* Sign extend a N-bit value up to 64 bits, by copying
165   bit N-1 into all higher positions. */
166static ULong sx_to_64 ( ULong x, UInt n )
167{
168   vassert(n > 1 && n < 64);
169   Long r = (Long)x;
170   r = (r << (64-n)) >> (64-n);
171   return (ULong)r;
172}
173
174//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
175//ZZ    endianness of the underlying host. */
176//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
177//ZZ {
178//ZZ    UShort w = 0;
179//ZZ    w = (w << 8) | p[1];
180//ZZ    w = (w << 8) | p[0];
181//ZZ    return w;
182//ZZ }
183//ZZ
184//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
185//ZZ    vassert(sh >= 0 && sh < 32);
186//ZZ    if (sh == 0)
187//ZZ       return x;
188//ZZ    else
189//ZZ       return (x << (32-sh)) | (x >> sh);
190//ZZ }
191//ZZ
192//ZZ static Int popcount32 ( UInt x )
193//ZZ {
194//ZZ    Int res = 0, i;
195//ZZ    for (i = 0; i < 32; i++) {
196//ZZ       res += (x & 1);
197//ZZ       x >>= 1;
198//ZZ    }
199//ZZ    return res;
200//ZZ }
201//ZZ
202//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
203//ZZ {
204//ZZ    UInt mask = 1 << ix;
205//ZZ    x &= ~mask;
206//ZZ    x |= ((b << ix) & mask);
207//ZZ    return x;
208//ZZ }
209
210#define BITS2(_b1,_b0)  \
211   (((_b1) << 1) | (_b0))
212
213#define BITS3(_b2,_b1,_b0)  \
214  (((_b2) << 2) | ((_b1) << 1) | (_b0))
215
216#define BITS4(_b3,_b2,_b1,_b0)  \
217   (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
218
219#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
220   ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4)  \
221    | BITS4((_b3),(_b2),(_b1),(_b0)))
222
223#define BITS5(_b4,_b3,_b2,_b1,_b0)  \
224   (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
225#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
226   (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
227#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
228   (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
229
230#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
231   (((_b8) << 8)  \
232    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
233
234#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
235   (((_b9) << 9) | ((_b8) << 8)  \
236    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
237
238#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
239   (((_b10) << 10)  \
240    | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
241
242#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
243   (((_b11) << 11)  \
244    | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
245
246// produces _uint[_bMax:_bMin]
247#define SLICE_UInt(_uint,_bMax,_bMin)  \
248   (( ((UInt)(_uint)) >> (_bMin))  \
249    & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
250
251
252/*------------------------------------------------------------*/
253/*--- Helper bits and pieces for creating IR fragments.    ---*/
254/*------------------------------------------------------------*/
255
256static IRExpr* mkV128 ( UShort w )
257{
258   return IRExpr_Const(IRConst_V128(w));
259}
260
261static IRExpr* mkU64 ( ULong i )
262{
263   return IRExpr_Const(IRConst_U64(i));
264}
265
266static IRExpr* mkU32 ( UInt i )
267{
268   return IRExpr_Const(IRConst_U32(i));
269}
270
271static IRExpr* mkU8 ( UInt i )
272{
273   vassert(i < 256);
274   return IRExpr_Const(IRConst_U8( (UChar)i ));
275}
276
277static IRExpr* mkexpr ( IRTemp tmp )
278{
279   return IRExpr_RdTmp(tmp);
280}
281
282static IRExpr* unop ( IROp op, IRExpr* a )
283{
284   return IRExpr_Unop(op, a);
285}
286
287static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
288{
289   return IRExpr_Binop(op, a1, a2);
290}
291
292static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
293{
294   return IRExpr_Triop(op, a1, a2, a3);
295}
296
297static IRExpr* loadLE ( IRType ty, IRExpr* addr )
298{
299   return IRExpr_Load(Iend_LE, ty, addr);
300}
301
302/* Add a statement to the list held by "irbb". */
303static void stmt ( IRStmt* st )
304{
305   addStmtToIRSB( irsb, st );
306}
307
308static void assign ( IRTemp dst, IRExpr* e )
309{
310   stmt( IRStmt_WrTmp(dst, e) );
311}
312
313static void storeLE ( IRExpr* addr, IRExpr* data )
314{
315   stmt( IRStmt_Store(Iend_LE, addr, data) );
316}
317
318//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
319//ZZ {
320//ZZ    if (guardT == IRTemp_INVALID) {
321//ZZ       /* unconditional */
322//ZZ       storeLE(addr, data);
323//ZZ    } else {
324//ZZ       stmt( IRStmt_StoreG(Iend_LE, addr, data,
325//ZZ                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
326//ZZ    }
327//ZZ }
328//ZZ
329//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
330//ZZ                             IRExpr* addr, IRExpr* alt,
331//ZZ                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
332//ZZ {
333//ZZ    if (guardT == IRTemp_INVALID) {
334//ZZ       /* unconditional */
335//ZZ       IRExpr* loaded = NULL;
336//ZZ       switch (cvt) {
337//ZZ          case ILGop_Ident32:
338//ZZ             loaded = loadLE(Ity_I32, addr); break;
339//ZZ          case ILGop_8Uto32:
340//ZZ             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
341//ZZ          case ILGop_8Sto32:
342//ZZ             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
343//ZZ          case ILGop_16Uto32:
344//ZZ             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
345//ZZ          case ILGop_16Sto32:
346//ZZ             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
347//ZZ          default:
348//ZZ             vassert(0);
349//ZZ       }
350//ZZ       vassert(loaded != NULL);
351//ZZ       assign(dst, loaded);
352//ZZ    } else {
353//ZZ       /* Generate a guarded load into 'dst', but apply 'cvt' to the
354//ZZ          loaded data before putting the data in 'dst'.  If the load
355//ZZ          does not take place, 'alt' is placed directly in 'dst'. */
356//ZZ       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
357//ZZ                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
358//ZZ    }
359//ZZ }
360
361/* Generate a new temporary of the given type. */
362static IRTemp newTemp ( IRType ty )
363{
364   vassert(isPlausibleIRType(ty));
365   return newIRTemp( irsb->tyenv, ty );
366}
367
368//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
369//ZZ    IRRoundingMode. */
370//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
371//ZZ {
372//ZZ    return mkU32(Irrm_NEAREST);
373//ZZ }
374//ZZ
375//ZZ /* Generate an expression for SRC rotated right by ROT. */
376//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
377//ZZ {
378//ZZ    vassert(rot >= 0 && rot < 32);
379//ZZ    if (rot == 0)
380//ZZ       return mkexpr(src);
381//ZZ    return
382//ZZ       binop(Iop_Or32,
383//ZZ             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
384//ZZ             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
385//ZZ }
386//ZZ
387//ZZ static IRExpr* mkU128 ( ULong i )
388//ZZ {
389//ZZ    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
390//ZZ }
391//ZZ
392//ZZ /* Generate a 4-aligned version of the given expression if
393//ZZ    the given condition is true.  Else return it unchanged. */
394//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
395//ZZ {
396//ZZ    if (b)
397//ZZ       return binop(Iop_And32, e, mkU32(~3));
398//ZZ    else
399//ZZ       return e;
400//ZZ }
401
402/* Other IR construction helpers. */
403static IROp mkAND ( IRType ty ) {
404   switch (ty) {
405      case Ity_I32: return Iop_And32;
406      case Ity_I64: return Iop_And64;
407      default: vpanic("mkAND");
408   }
409}
410
411static IROp mkOR ( IRType ty ) {
412   switch (ty) {
413      case Ity_I32: return Iop_Or32;
414      case Ity_I64: return Iop_Or64;
415      default: vpanic("mkOR");
416   }
417}
418
419static IROp mkXOR ( IRType ty ) {
420   switch (ty) {
421      case Ity_I32: return Iop_Xor32;
422      case Ity_I64: return Iop_Xor64;
423      default: vpanic("mkXOR");
424   }
425}
426
427static IROp mkSHL ( IRType ty ) {
428   switch (ty) {
429      case Ity_I32: return Iop_Shl32;
430      case Ity_I64: return Iop_Shl64;
431      default: vpanic("mkSHL");
432   }
433}
434
435static IROp mkSHR ( IRType ty ) {
436   switch (ty) {
437      case Ity_I32: return Iop_Shr32;
438      case Ity_I64: return Iop_Shr64;
439      default: vpanic("mkSHR");
440   }
441}
442
443static IROp mkSAR ( IRType ty ) {
444   switch (ty) {
445      case Ity_I32: return Iop_Sar32;
446      case Ity_I64: return Iop_Sar64;
447      default: vpanic("mkSAR");
448   }
449}
450
451static IROp mkNOT ( IRType ty ) {
452   switch (ty) {
453      case Ity_I32: return Iop_Not32;
454      case Ity_I64: return Iop_Not64;
455      default: vpanic("mkNOT");
456   }
457}
458
459static IROp mkADD ( IRType ty ) {
460   switch (ty) {
461      case Ity_I32: return Iop_Add32;
462      case Ity_I64: return Iop_Add64;
463      default: vpanic("mkADD");
464   }
465}
466
467static IROp mkSUB ( IRType ty ) {
468   switch (ty) {
469      case Ity_I32: return Iop_Sub32;
470      case Ity_I64: return Iop_Sub64;
471      default: vpanic("mkSUB");
472   }
473}
474
475static IROp mkADDF ( IRType ty ) {
476   switch (ty) {
477      case Ity_F32: return Iop_AddF32;
478      case Ity_F64: return Iop_AddF64;
479      default: vpanic("mkADDF");
480   }
481}
482
483static IROp mkSUBF ( IRType ty ) {
484   switch (ty) {
485      case Ity_F32: return Iop_SubF32;
486      case Ity_F64: return Iop_SubF64;
487      default: vpanic("mkSUBF");
488   }
489}
490
491static IROp mkMULF ( IRType ty ) {
492   switch (ty) {
493      case Ity_F32: return Iop_MulF32;
494      case Ity_F64: return Iop_MulF64;
495      default: vpanic("mkMULF");
496   }
497}
498
499static IROp mkDIVF ( IRType ty ) {
500   switch (ty) {
501      case Ity_F32: return Iop_DivF32;
502      case Ity_F64: return Iop_DivF64;
503      default: vpanic("mkMULF");
504   }
505}
506
507static IROp mkNEGF ( IRType ty ) {
508   switch (ty) {
509      case Ity_F32: return Iop_NegF32;
510      case Ity_F64: return Iop_NegF64;
511      default: vpanic("mkNEGF");
512   }
513}
514
515static IROp mkABSF ( IRType ty ) {
516   switch (ty) {
517      case Ity_F32: return Iop_AbsF32;
518      case Ity_F64: return Iop_AbsF64;
519      default: vpanic("mkNEGF");
520   }
521}
522
523static IROp mkSQRTF ( IRType ty ) {
524   switch (ty) {
525      case Ity_F32: return Iop_SqrtF32;
526      case Ity_F64: return Iop_SqrtF64;
527      default: vpanic("mkNEGF");
528   }
529}
530
531static IRExpr* mkU ( IRType ty, ULong imm ) {
532   switch (ty) {
533      case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
534      case Ity_I64: return mkU64(imm);
535      default: vpanic("mkU");
536   }
537}
538
539/* Generate IR to create 'arg rotated right by imm', for sane values
540   of 'ty' and 'imm'. */
541static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
542{
543   UInt w = 0;
544   if (ty == Ity_I64) {
545      w = 64;
546   } else {
547      vassert(ty == Ity_I32);
548      w = 32;
549   }
550   vassert(w != 0);
551   vassert(imm < w);
552   if (imm == 0) {
553      return arg;
554   }
555   IRTemp res = newTemp(ty);
556   assign(res, binop(mkOR(ty),
557                     binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
558                     binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
559   return res;
560}
561
562/* Generate IR to set the returned temp to either all-zeroes or
563   all ones, as a copy of arg<imm>. */
564static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
565{
566   UInt w = 0;
567   if (ty == Ity_I64) {
568      w = 64;
569   } else {
570      vassert(ty == Ity_I32);
571      w = 32;
572   }
573   vassert(w != 0);
574   vassert(imm < w);
575   IRTemp res = newTemp(ty);
576   assign(res, binop(mkSAR(ty),
577                     binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
578                     mkU8(w - 1)));
579   return res;
580}
581
582/* U-widen 8/16/32/64 bit int expr to 64. */
583static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
584{
585   switch (srcTy) {
586      case Ity_I64: return e;
587      case Ity_I32: return unop(Iop_32Uto64, e);
588      case Ity_I16: return unop(Iop_16Uto64, e);
589      case Ity_I8:  return unop(Iop_8Uto64, e);
590      default: vpanic("widenUto64(arm64)");
591   }
592}
593
594/* Narrow 64 bit int expr to 8/16/32/64.  Clearly only some
595   of these combinations make sense. */
596static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
597{
598   switch (dstTy) {
599      case Ity_I64: return e;
600      case Ity_I32: return unop(Iop_64to32, e);
601      case Ity_I16: return unop(Iop_64to16, e);
602      case Ity_I8:  return unop(Iop_64to8, e);
603      default: vpanic("narrowFrom64(arm64)");
604   }
605}
606
607
608/*------------------------------------------------------------*/
609/*--- Helpers for accessing guest registers.               ---*/
610/*------------------------------------------------------------*/
611
612#define OFFB_X0       offsetof(VexGuestARM64State,guest_X0)
613#define OFFB_X1       offsetof(VexGuestARM64State,guest_X1)
614#define OFFB_X2       offsetof(VexGuestARM64State,guest_X2)
615#define OFFB_X3       offsetof(VexGuestARM64State,guest_X3)
616#define OFFB_X4       offsetof(VexGuestARM64State,guest_X4)
617#define OFFB_X5       offsetof(VexGuestARM64State,guest_X5)
618#define OFFB_X6       offsetof(VexGuestARM64State,guest_X6)
619#define OFFB_X7       offsetof(VexGuestARM64State,guest_X7)
620#define OFFB_X8       offsetof(VexGuestARM64State,guest_X8)
621#define OFFB_X9       offsetof(VexGuestARM64State,guest_X9)
622#define OFFB_X10      offsetof(VexGuestARM64State,guest_X10)
623#define OFFB_X11      offsetof(VexGuestARM64State,guest_X11)
624#define OFFB_X12      offsetof(VexGuestARM64State,guest_X12)
625#define OFFB_X13      offsetof(VexGuestARM64State,guest_X13)
626#define OFFB_X14      offsetof(VexGuestARM64State,guest_X14)
627#define OFFB_X15      offsetof(VexGuestARM64State,guest_X15)
628#define OFFB_X16      offsetof(VexGuestARM64State,guest_X16)
629#define OFFB_X17      offsetof(VexGuestARM64State,guest_X17)
630#define OFFB_X18      offsetof(VexGuestARM64State,guest_X18)
631#define OFFB_X19      offsetof(VexGuestARM64State,guest_X19)
632#define OFFB_X20      offsetof(VexGuestARM64State,guest_X20)
633#define OFFB_X21      offsetof(VexGuestARM64State,guest_X21)
634#define OFFB_X22      offsetof(VexGuestARM64State,guest_X22)
635#define OFFB_X23      offsetof(VexGuestARM64State,guest_X23)
636#define OFFB_X24      offsetof(VexGuestARM64State,guest_X24)
637#define OFFB_X25      offsetof(VexGuestARM64State,guest_X25)
638#define OFFB_X26      offsetof(VexGuestARM64State,guest_X26)
639#define OFFB_X27      offsetof(VexGuestARM64State,guest_X27)
640#define OFFB_X28      offsetof(VexGuestARM64State,guest_X28)
641#define OFFB_X29      offsetof(VexGuestARM64State,guest_X29)
642#define OFFB_X30      offsetof(VexGuestARM64State,guest_X30)
643
644#define OFFB_XSP      offsetof(VexGuestARM64State,guest_XSP)
645#define OFFB_PC       offsetof(VexGuestARM64State,guest_PC)
646
647#define OFFB_CC_OP    offsetof(VexGuestARM64State,guest_CC_OP)
648#define OFFB_CC_DEP1  offsetof(VexGuestARM64State,guest_CC_DEP1)
649#define OFFB_CC_DEP2  offsetof(VexGuestARM64State,guest_CC_DEP2)
650#define OFFB_CC_NDEP  offsetof(VexGuestARM64State,guest_CC_NDEP)
651
652#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
653#define OFFB_NRADDR   offsetof(VexGuestARM64State,guest_NRADDR)
654
655#define OFFB_Q0       offsetof(VexGuestARM64State,guest_Q0)
656#define OFFB_Q1       offsetof(VexGuestARM64State,guest_Q1)
657#define OFFB_Q2       offsetof(VexGuestARM64State,guest_Q2)
658#define OFFB_Q3       offsetof(VexGuestARM64State,guest_Q3)
659#define OFFB_Q4       offsetof(VexGuestARM64State,guest_Q4)
660#define OFFB_Q5       offsetof(VexGuestARM64State,guest_Q5)
661#define OFFB_Q6       offsetof(VexGuestARM64State,guest_Q6)
662#define OFFB_Q7       offsetof(VexGuestARM64State,guest_Q7)
663#define OFFB_Q8       offsetof(VexGuestARM64State,guest_Q8)
664#define OFFB_Q9       offsetof(VexGuestARM64State,guest_Q9)
665#define OFFB_Q10      offsetof(VexGuestARM64State,guest_Q10)
666#define OFFB_Q11      offsetof(VexGuestARM64State,guest_Q11)
667#define OFFB_Q12      offsetof(VexGuestARM64State,guest_Q12)
668#define OFFB_Q13      offsetof(VexGuestARM64State,guest_Q13)
669#define OFFB_Q14      offsetof(VexGuestARM64State,guest_Q14)
670#define OFFB_Q15      offsetof(VexGuestARM64State,guest_Q15)
671#define OFFB_Q16      offsetof(VexGuestARM64State,guest_Q16)
672#define OFFB_Q17      offsetof(VexGuestARM64State,guest_Q17)
673#define OFFB_Q18      offsetof(VexGuestARM64State,guest_Q18)
674#define OFFB_Q19      offsetof(VexGuestARM64State,guest_Q19)
675#define OFFB_Q20      offsetof(VexGuestARM64State,guest_Q20)
676#define OFFB_Q21      offsetof(VexGuestARM64State,guest_Q21)
677#define OFFB_Q22      offsetof(VexGuestARM64State,guest_Q22)
678#define OFFB_Q23      offsetof(VexGuestARM64State,guest_Q23)
679#define OFFB_Q24      offsetof(VexGuestARM64State,guest_Q24)
680#define OFFB_Q25      offsetof(VexGuestARM64State,guest_Q25)
681#define OFFB_Q26      offsetof(VexGuestARM64State,guest_Q26)
682#define OFFB_Q27      offsetof(VexGuestARM64State,guest_Q27)
683#define OFFB_Q28      offsetof(VexGuestARM64State,guest_Q28)
684#define OFFB_Q29      offsetof(VexGuestARM64State,guest_Q29)
685#define OFFB_Q30      offsetof(VexGuestARM64State,guest_Q30)
686#define OFFB_Q31      offsetof(VexGuestARM64State,guest_Q31)
687
688#define OFFB_FPCR     offsetof(VexGuestARM64State,guest_FPCR)
689#define OFFB_FPSR     offsetof(VexGuestARM64State,guest_FPSR)
690//ZZ #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
691//ZZ #define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
692//ZZ #define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
693//ZZ #define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
694//ZZ #define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
695//ZZ #define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
696//ZZ #define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
697
698#define OFFB_CMSTART  offsetof(VexGuestARM64State,guest_CMSTART)
699#define OFFB_CMLEN    offsetof(VexGuestARM64State,guest_CMLEN)
700
701
702/* ---------------- Integer registers ---------------- */
703
704static Int offsetIReg64 ( UInt iregNo )
705{
706   /* Do we care about endianness here?  We do if sub-parts of integer
707      registers are accessed. */
708   switch (iregNo) {
709      case 0:  return OFFB_X0;
710      case 1:  return OFFB_X1;
711      case 2:  return OFFB_X2;
712      case 3:  return OFFB_X3;
713      case 4:  return OFFB_X4;
714      case 5:  return OFFB_X5;
715      case 6:  return OFFB_X6;
716      case 7:  return OFFB_X7;
717      case 8:  return OFFB_X8;
718      case 9:  return OFFB_X9;
719      case 10: return OFFB_X10;
720      case 11: return OFFB_X11;
721      case 12: return OFFB_X12;
722      case 13: return OFFB_X13;
723      case 14: return OFFB_X14;
724      case 15: return OFFB_X15;
725      case 16: return OFFB_X16;
726      case 17: return OFFB_X17;
727      case 18: return OFFB_X18;
728      case 19: return OFFB_X19;
729      case 20: return OFFB_X20;
730      case 21: return OFFB_X21;
731      case 22: return OFFB_X22;
732      case 23: return OFFB_X23;
733      case 24: return OFFB_X24;
734      case 25: return OFFB_X25;
735      case 26: return OFFB_X26;
736      case 27: return OFFB_X27;
737      case 28: return OFFB_X28;
738      case 29: return OFFB_X29;
739      case 30: return OFFB_X30;
740      /* but not 31 */
741      default: vassert(0);
742   }
743}
744
745static Int offsetIReg64orSP ( UInt iregNo )
746{
747   return iregNo == 31  ? OFFB_XSP  : offsetIReg64(iregNo);
748}
749
750static const HChar* nameIReg64orZR ( UInt iregNo )
751{
752   vassert(iregNo < 32);
753   static const HChar* names[32]
754      = { "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
755          "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
756          "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
757          "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
758   return names[iregNo];
759}
760
761static const HChar* nameIReg64orSP ( UInt iregNo )
762{
763   if (iregNo == 31) {
764      return "sp";
765   }
766   vassert(iregNo < 31);
767   return nameIReg64orZR(iregNo);
768}
769
770static IRExpr* getIReg64orSP ( UInt iregNo )
771{
772   vassert(iregNo < 32);
773   return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
774}
775
776static IRExpr* getIReg64orZR ( UInt iregNo )
777{
778   if (iregNo == 31) {
779      return mkU64(0);
780   }
781   vassert(iregNo < 31);
782   return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
783}
784
785static void putIReg64orSP ( UInt iregNo, IRExpr* e )
786{
787   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
788   stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
789}
790
791static void putIReg64orZR ( UInt iregNo, IRExpr* e )
792{
793   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
794   if (iregNo == 31) {
795      return;
796   }
797   vassert(iregNo < 31);
798   stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
799}
800
801static const HChar* nameIReg32orZR ( UInt iregNo )
802{
803   vassert(iregNo < 32);
804   static const HChar* names[32]
805      = { "w0",  "w1",  "w2",  "w3",  "w4",  "w5",  "w6",  "w7",
806          "w8",  "w9",  "w10", "w11", "w12", "w13", "w14", "w15",
807          "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
808          "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
809   return names[iregNo];
810}
811
812static const HChar* nameIReg32orSP ( UInt iregNo )
813{
814   if (iregNo == 31) {
815      return "wsp";
816   }
817   vassert(iregNo < 31);
818   return nameIReg32orZR(iregNo);
819}
820
821static IRExpr* getIReg32orSP ( UInt iregNo )
822{
823   vassert(iregNo < 32);
824   return unop(Iop_64to32,
825               IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
826}
827
828static IRExpr* getIReg32orZR ( UInt iregNo )
829{
830   if (iregNo == 31) {
831      return mkU32(0);
832   }
833   vassert(iregNo < 31);
834   return unop(Iop_64to32,
835               IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
836}
837
838static void putIReg32orSP ( UInt iregNo, IRExpr* e )
839{
840   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
841   stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
842}
843
844static void putIReg32orZR ( UInt iregNo, IRExpr* e )
845{
846   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
847   if (iregNo == 31) {
848      return;
849   }
850   vassert(iregNo < 31);
851   stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
852}
853
854static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
855{
856   vassert(is64 == True || is64 == False);
857   return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
858}
859
860static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
861{
862   vassert(is64 == True || is64 == False);
863   return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
864}
865
866static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
867{
868   vassert(is64 == True || is64 == False);
869   return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
870}
871
872static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
873{
874   vassert(is64 == True || is64 == False);
875   if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
876}
877
878static void putPC ( IRExpr* e )
879{
880   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
881   stmt( IRStmt_Put(OFFB_PC, e) );
882}
883
884
885/* ---------------- Vector (Q) registers ---------------- */
886
887static Int offsetQReg128 ( UInt qregNo )
888{
889   /* We don't care about endianness at this point.  It only becomes
890      relevant when dealing with sections of these registers.*/
891   switch (qregNo) {
892      case 0:  return OFFB_Q0;
893      case 1:  return OFFB_Q1;
894      case 2:  return OFFB_Q2;
895      case 3:  return OFFB_Q3;
896      case 4:  return OFFB_Q4;
897      case 5:  return OFFB_Q5;
898      case 6:  return OFFB_Q6;
899      case 7:  return OFFB_Q7;
900      case 8:  return OFFB_Q8;
901      case 9:  return OFFB_Q9;
902      case 10: return OFFB_Q10;
903      case 11: return OFFB_Q11;
904      case 12: return OFFB_Q12;
905      case 13: return OFFB_Q13;
906      case 14: return OFFB_Q14;
907      case 15: return OFFB_Q15;
908      case 16: return OFFB_Q16;
909      case 17: return OFFB_Q17;
910      case 18: return OFFB_Q18;
911      case 19: return OFFB_Q19;
912      case 20: return OFFB_Q20;
913      case 21: return OFFB_Q21;
914      case 22: return OFFB_Q22;
915      case 23: return OFFB_Q23;
916      case 24: return OFFB_Q24;
917      case 25: return OFFB_Q25;
918      case 26: return OFFB_Q26;
919      case 27: return OFFB_Q27;
920      case 28: return OFFB_Q28;
921      case 29: return OFFB_Q29;
922      case 30: return OFFB_Q30;
923      case 31: return OFFB_Q31;
924      default: vassert(0);
925   }
926}
927
928/* Write to a complete Qreg. */
929static void putQReg128 ( UInt qregNo, IRExpr* e )
930{
931   vassert(qregNo < 32);
932   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
933   stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
934}
935
936/* Read a complete Qreg. */
937static IRExpr* getQReg128 ( UInt qregNo )
938{
939   vassert(qregNo < 32);
940   return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
941}
942
943/* Produce the IR type for some sub-part of a vector.  For 32- and 64-
944   bit sub-parts we can choose either integer or float types, and
945   choose float on the basis that that is the common use case and so
946   will give least interference with Put-to-Get forwarding later
947   on. */
948static IRType preferredVectorSubTypeFromSize ( UInt szB )
949{
950   switch (szB) {
951      case 1:  return Ity_I8;
952      case 2:  return Ity_I16;
953      case 4:  return Ity_I32; //Ity_F32;
954      case 8:  return Ity_F64;
955      case 16: return Ity_V128;
956      default: vassert(0);
957   }
958}
959
960/* Find the offset of the laneNo'th lane of type laneTy in the given
961   Qreg.  Since the host is little-endian, the least significant lane
962   has the lowest offset. */
963static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
964{
965   vassert(!host_is_bigendian);
966   Int base = offsetQReg128(qregNo);
967   /* Since the host is little-endian, the least significant lane
968      will be at the lowest address. */
969   /* Restrict this to known types, so as to avoid silently accepting
970      stupid types. */
971   UInt laneSzB = 0;
972   switch (laneTy) {
973      case Ity_I8:                 laneSzB = 1;  break;
974      case Ity_I16:                laneSzB = 2;  break;
975      case Ity_F32: case Ity_I32:  laneSzB = 4;  break;
976      case Ity_F64: case Ity_I64:  laneSzB = 8;  break;
977      case Ity_V128:               laneSzB = 16; break;
978      default: break;
979   }
980   vassert(laneSzB > 0);
981   UInt minOff = laneNo * laneSzB;
982   UInt maxOff = minOff + laneSzB - 1;
983   vassert(maxOff < 16);
984   return base + minOff;
985}
986
987/* Put to the least significant lane of a Qreg. */
988static void putQRegLO ( UInt qregNo, IRExpr* e )
989{
990   IRType ty  = typeOfIRExpr(irsb->tyenv, e);
991   Int    off = offsetQRegLane(qregNo, ty, 0);
992   switch (ty) {
993      case Ity_I8:  case Ity_I16: case Ity_I32: case Ity_I64:
994      case Ity_F32: case Ity_F64: case Ity_V128:
995         break;
996      default:
997         vassert(0); // Other cases are probably invalid
998   }
999   stmt(IRStmt_Put(off, e));
1000}
1001
1002/* Get from the least significant lane of a Qreg. */
1003static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
1004{
1005   Int off = offsetQRegLane(qregNo, ty, 0);
1006   switch (ty) {
1007      case Ity_I8:
1008      case Ity_I16:
1009      case Ity_I32: case Ity_I64:
1010      case Ity_F32: case Ity_F64: case Ity_V128:
1011         break;
1012      default:
1013         vassert(0); // Other cases are ATC
1014   }
1015   return IRExpr_Get(off, ty);
1016}
1017
1018static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
1019{
1020   static const HChar* namesQ[32]
1021      = { "q0",  "q1",  "q2",  "q3",  "q4",  "q5",  "q6",  "q7",
1022          "q8",  "q9",  "q10", "q11", "q12", "q13", "q14", "q15",
1023          "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1024          "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1025   static const HChar* namesD[32]
1026      = { "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
1027          "d8",  "d9",  "d10", "d11", "d12", "d13", "d14", "d15",
1028          "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1029          "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1030   static const HChar* namesS[32]
1031      = { "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
1032          "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
1033          "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1034          "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1035   static const HChar* namesH[32]
1036      = { "h0",  "h1",  "h2",  "h3",  "h4",  "h5",  "h6",  "h7",
1037          "h8",  "h9",  "h10", "h11", "h12", "h13", "h14", "h15",
1038          "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1039          "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1040   static const HChar* namesB[32]
1041      = { "b0",  "b1",  "b2",  "b3",  "b4",  "b5",  "b6",  "b7",
1042          "b8",  "b9",  "b10", "b11", "b12", "b13", "b14", "b15",
1043          "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1044          "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1045   vassert(qregNo < 32);
1046   switch (sizeofIRType(laneTy)) {
1047      case 1:  return namesB[qregNo];
1048      case 2:  return namesH[qregNo];
1049      case 4:  return namesS[qregNo];
1050      case 8:  return namesD[qregNo];
1051      case 16: return namesQ[qregNo];
1052      default: vassert(0);
1053   }
1054   /*NOTREACHED*/
1055}
1056
1057static const HChar* nameQReg128 ( UInt qregNo )
1058{
1059   return nameQRegLO(qregNo, Ity_V128);
1060}
1061
1062/* Find the offset of the most significant half (8 bytes) of the given
1063   Qreg.  This requires knowing the endianness of the host. */
1064static Int offsetQRegHI64 ( UInt qregNo )
1065{
1066   return offsetQRegLane(qregNo, Ity_I64, 1);
1067}
1068
1069static IRExpr* getQRegHI64 ( UInt qregNo )
1070{
1071   return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
1072}
1073
1074static void putQRegHI64 ( UInt qregNo, IRExpr* e )
1075{
1076   IRType ty  = typeOfIRExpr(irsb->tyenv, e);
1077   Int    off = offsetQRegHI64(qregNo);
1078   switch (ty) {
1079      case Ity_I64: case Ity_F64:
1080         break;
1081      default:
1082         vassert(0); // Other cases are plain wrong
1083   }
1084   stmt(IRStmt_Put(off, e));
1085}
1086
1087/* Put to a specified lane of a Qreg. */
1088static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1089{
1090   IRType laneTy  = typeOfIRExpr(irsb->tyenv, e);
1091   Int    off     = offsetQRegLane(qregNo, laneTy, laneNo);
1092   switch (laneTy) {
1093      case Ity_F64: case Ity_I64:
1094      case Ity_I32: case Ity_F32:
1095      case Ity_I16:
1096      case Ity_I8:
1097         break;
1098      default:
1099         vassert(0); // Other cases are ATC
1100   }
1101   stmt(IRStmt_Put(off, e));
1102}
1103
1104/* Get from a specified lane of a Qreg. */
1105static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1106{
1107   Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1108   switch (laneTy) {
1109      case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1110      case Ity_F64:
1111         break;
1112      default:
1113         vassert(0); // Other cases are ATC
1114   }
1115   return IRExpr_Get(off, laneTy);
1116}
1117
1118
1119//ZZ /* ---------------- Misc registers ---------------- */
1120//ZZ
1121//ZZ static void putMiscReg32 ( UInt    gsoffset,
1122//ZZ                            IRExpr* e, /* :: Ity_I32 */
1123//ZZ                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
1124//ZZ {
1125//ZZ    switch (gsoffset) {
1126//ZZ       case OFFB_FPSCR:   break;
1127//ZZ       case OFFB_QFLAG32: break;
1128//ZZ       case OFFB_GEFLAG0: break;
1129//ZZ       case OFFB_GEFLAG1: break;
1130//ZZ       case OFFB_GEFLAG2: break;
1131//ZZ       case OFFB_GEFLAG3: break;
1132//ZZ       default: vassert(0); /* awaiting more cases */
1133//ZZ    }
1134//ZZ    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1135//ZZ
1136//ZZ    if (guardT == IRTemp_INVALID) {
1137//ZZ       /* unconditional write */
1138//ZZ       stmt(IRStmt_Put(gsoffset, e));
1139//ZZ    } else {
1140//ZZ       stmt(IRStmt_Put(
1141//ZZ          gsoffset,
1142//ZZ          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1143//ZZ                      e, IRExpr_Get(gsoffset, Ity_I32) )
1144//ZZ       ));
1145//ZZ    }
1146//ZZ }
1147//ZZ
1148//ZZ static IRTemp get_ITSTATE ( void )
1149//ZZ {
1150//ZZ    ASSERT_IS_THUMB;
1151//ZZ    IRTemp t = newTemp(Ity_I32);
1152//ZZ    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1153//ZZ    return t;
1154//ZZ }
1155//ZZ
1156//ZZ static void put_ITSTATE ( IRTemp t )
1157//ZZ {
1158//ZZ    ASSERT_IS_THUMB;
1159//ZZ    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1160//ZZ }
1161//ZZ
1162//ZZ static IRTemp get_QFLAG32 ( void )
1163//ZZ {
1164//ZZ    IRTemp t = newTemp(Ity_I32);
1165//ZZ    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1166//ZZ    return t;
1167//ZZ }
1168//ZZ
1169//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1170//ZZ {
1171//ZZ    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1172//ZZ }
1173//ZZ
1174//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1175//ZZ    Status Register) to indicate that overflow or saturation occurred.
1176//ZZ    Nb: t must be zero to denote no saturation, and any nonzero
1177//ZZ    value to indicate saturation. */
1178//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1179//ZZ {
1180//ZZ    IRTemp old = get_QFLAG32();
1181//ZZ    IRTemp nyu = newTemp(Ity_I32);
1182//ZZ    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1183//ZZ    put_QFLAG32(nyu, condT);
1184//ZZ }
1185
1186
1187/* ---------------- FPCR stuff ---------------- */
1188
1189/* Generate IR to get hold of the rounding mode bits in FPCR, and
1190   convert them to IR format.  Bind the final result to the
1191   returned temp. */
1192static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1193{
1194   /* The ARMvfp encoding for rounding mode bits is:
1195         00  to nearest
1196         01  to +infinity
1197         10  to -infinity
1198         11  to zero
1199      We need to convert that to the IR encoding:
1200         00  to nearest (the default)
1201         10  to +infinity
1202         01  to -infinity
1203         11  to zero
1204      Which can be done by swapping bits 0 and 1.
1205      The rmode bits are at 23:22 in FPSCR.
1206   */
1207   IRTemp armEncd = newTemp(Ity_I32);
1208   IRTemp swapped = newTemp(Ity_I32);
1209   /* Fish FPCR[23:22] out, and slide to bottom.  Doesn't matter that
1210      we don't zero out bits 24 and above, since the assignment to
1211      'swapped' will mask them out anyway. */
1212   assign(armEncd,
1213          binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1214   /* Now swap them. */
1215   assign(swapped,
1216          binop(Iop_Or32,
1217                binop(Iop_And32,
1218                      binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1219                      mkU32(2)),
1220                binop(Iop_And32,
1221                      binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1222                      mkU32(1))
1223         ));
1224   return swapped;
1225}
1226
1227
1228/*------------------------------------------------------------*/
1229/*--- Helpers for flag handling and conditional insns      ---*/
1230/*------------------------------------------------------------*/
1231
1232static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1233{
1234   switch (cond) {
1235      case ARM64CondEQ:  return "eq";
1236      case ARM64CondNE:  return "ne";
1237      case ARM64CondCS:  return "cs";  // or 'hs'
1238      case ARM64CondCC:  return "cc";  // or 'lo'
1239      case ARM64CondMI:  return "mi";
1240      case ARM64CondPL:  return "pl";
1241      case ARM64CondVS:  return "vs";
1242      case ARM64CondVC:  return "vc";
1243      case ARM64CondHI:  return "hi";
1244      case ARM64CondLS:  return "ls";
1245      case ARM64CondGE:  return "ge";
1246      case ARM64CondLT:  return "lt";
1247      case ARM64CondGT:  return "gt";
1248      case ARM64CondLE:  return "le";
1249      case ARM64CondAL:  return "al";
1250      case ARM64CondNV:  return "nv";
1251      default: vpanic("name_ARM64Condcode");
1252   }
1253}
1254
1255/* and a handy shorthand for it */
1256static const HChar* nameCC ( ARM64Condcode cond ) {
1257   return nameARM64Condcode(cond);
1258}
1259
1260
1261/* Build IR to calculate some particular condition from stored
1262   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1263   Ity_I64, suitable for narrowing.  Although the return type is
1264   Ity_I64, the returned value is either 0 or 1.  'cond' must be
1265   :: Ity_I64 and must denote the condition to compute in
1266   bits 7:4, and be zero everywhere else.
1267*/
1268static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1269{
1270   vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1271   /* And 'cond' had better produce a value in which only bits 7:4 are
1272      nonzero.  However, obviously we can't assert for that. */
1273
1274   /* So what we're constructing for the first argument is
1275      "(cond << 4) | stored-operation".
1276      However, as per comments above, 'cond' must be supplied
1277      pre-shifted to this function.
1278
1279      This pairing scheme requires that the ARM64_CC_OP_ values all fit
1280      in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1281      8 bits of the first argument. */
1282   IRExpr** args
1283      = mkIRExprVec_4(
1284           binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1285           IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1286           IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1287           IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1288        );
1289   IRExpr* call
1290      = mkIRExprCCall(
1291           Ity_I64,
1292           0/*regparm*/,
1293           "arm64g_calculate_condition", &arm64g_calculate_condition,
1294           args
1295        );
1296
1297   /* Exclude the requested condition, OP and NDEP from definedness
1298      checking.  We're only interested in DEP1 and DEP2. */
1299   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1300   return call;
1301}
1302
1303
1304/* Build IR to calculate some particular condition from stored
1305   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1306   Ity_I64, suitable for narrowing.  Although the return type is
1307   Ity_I64, the returned value is either 0 or 1.
1308*/
1309static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1310{
1311  /* First arg is "(cond << 4) | condition".  This requires that the
1312     ARM64_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1313     (COND, OP) pair in the lowest 8 bits of the first argument. */
1314   vassert(cond >= 0 && cond <= 15);
1315   return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1316}
1317
1318
1319/* Build IR to calculate just the carry flag from stored
1320   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1321   Ity_I64. */
1322static IRExpr* mk_arm64g_calculate_flag_c ( void )
1323{
1324   IRExpr** args
1325      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
1326                       IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1327                       IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1328                       IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1329   IRExpr* call
1330      = mkIRExprCCall(
1331           Ity_I64,
1332           0/*regparm*/,
1333           "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1334           args
1335        );
1336   /* Exclude OP and NDEP from definedness checking.  We're only
1337      interested in DEP1 and DEP2. */
1338   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1339   return call;
1340}
1341
1342
1343//ZZ /* Build IR to calculate just the overflow flag from stored
1344//ZZ    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1345//ZZ    Ity_I32. */
1346//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1347//ZZ {
1348//ZZ    IRExpr** args
1349//ZZ       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1350//ZZ                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1351//ZZ                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1352//ZZ                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1353//ZZ    IRExpr* call
1354//ZZ       = mkIRExprCCall(
1355//ZZ            Ity_I32,
1356//ZZ            0/*regparm*/,
1357//ZZ            "armg_calculate_flag_v", &armg_calculate_flag_v,
1358//ZZ            args
1359//ZZ         );
1360//ZZ    /* Exclude OP and NDEP from definedness checking.  We're only
1361//ZZ       interested in DEP1 and DEP2. */
1362//ZZ    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1363//ZZ    return call;
1364//ZZ }
1365
1366
1367/* Build IR to calculate N Z C V in bits 31:28 of the
1368   returned word. */
1369static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1370{
1371   IRExpr** args
1372      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
1373                       IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1374                       IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1375                       IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1376   IRExpr* call
1377      = mkIRExprCCall(
1378           Ity_I64,
1379           0/*regparm*/,
1380           "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1381           args
1382        );
1383   /* Exclude OP and NDEP from definedness checking.  We're only
1384      interested in DEP1 and DEP2. */
1385   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1386   return call;
1387}
1388
1389
1390/* Build IR to set the flags thunk, in the most general case. */
1391static
1392void setFlags_D1_D2_ND ( UInt cc_op,
1393                         IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1394{
1395   vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1396   vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1397   vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1398   vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1399   stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(cc_op) ));
1400   stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1401   stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1402   stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1403}
1404
1405/* Build IR to set the flags thunk after ADD or SUB. */
1406static
1407void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1408{
1409   IRTemp argL64 = IRTemp_INVALID;
1410   IRTemp argR64 = IRTemp_INVALID;
1411   IRTemp z64    = newTemp(Ity_I64);
1412   if (is64) {
1413      argL64 = argL;
1414      argR64 = argR;
1415   } else {
1416      argL64 = newTemp(Ity_I64);
1417      argR64 = newTemp(Ity_I64);
1418      assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1419      assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1420   }
1421   assign(z64, mkU64(0));
1422   UInt cc_op = ARM64G_CC_OP_NUMBER;
1423   /**/ if ( isSUB &&  is64) { cc_op = ARM64G_CC_OP_SUB64; }
1424   else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1425   else if (!isSUB &&  is64) { cc_op = ARM64G_CC_OP_ADD64; }
1426   else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1427   else                      { vassert(0); }
1428   setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1429}
1430
1431static
1432void setFlags_ADC_SBC(Bool is64, Bool isSBC, IRTemp argL, IRTemp argR, IRTemp oldC)
1433{
1434   IRTemp argL64 = IRTemp_INVALID;
1435   IRTemp argR64 = IRTemp_INVALID;
1436   IRTemp oldC64 = IRTemp_INVALID;
1437   if (is64) {
1438      argL64 = argL;
1439      argR64 = argR;
1440      oldC64 = oldC;
1441   } else {
1442      argL64 = newTemp(Ity_I64);
1443      argR64 = newTemp(Ity_I64);
1444      oldC64 = newTemp(Ity_I64);
1445      assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1446      assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1447      assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1448   }
1449   UInt cc_op = ARM64G_CC_OP_NUMBER;
1450   /**/ if ( isSBC &&  is64) { cc_op = ARM64G_CC_OP_SBC64; }
1451   else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1452   else if (!isSBC &&  is64) { cc_op = ARM64G_CC_OP_ADC64; }
1453   else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1454   else                      { vassert(0); }
1455   setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1456}
1457
1458/* Build IR to set the flags thunk after ADD or SUB, if the given
1459   condition evaluates to True at run time.  If not, the flags are set
1460   to the specified NZCV value. */
1461static
1462void setFlags_ADD_SUB_conditionally (
1463        Bool is64, Bool isSUB,
1464        IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1465     )
1466{
1467   /* Generate IR as follows:
1468        CC_OP   = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1469        CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1470        CC_DEP2 = ITE(cond, argR64, 0)
1471        CC_NDEP = 0
1472   */
1473
1474   IRTemp z64 = newTemp(Ity_I64);
1475   assign(z64, mkU64(0));
1476
1477   /* Establish the operation and operands for the True case. */
1478   IRTemp t_dep1 = IRTemp_INVALID;
1479   IRTemp t_dep2 = IRTemp_INVALID;
1480   UInt   t_op   = ARM64G_CC_OP_NUMBER;
1481   /**/ if ( isSUB &&  is64) { t_op = ARM64G_CC_OP_SUB64; }
1482   else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1483   else if (!isSUB &&  is64) { t_op = ARM64G_CC_OP_ADD64; }
1484   else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1485   else                      { vassert(0); }
1486   /* */
1487   if (is64) {
1488      t_dep1 = argL;
1489      t_dep2 = argR;
1490   } else {
1491      t_dep1 = newTemp(Ity_I64);
1492      t_dep2 = newTemp(Ity_I64);
1493      assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1494      assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1495   }
1496
1497   /* Establish the operation and operands for the False case. */
1498   IRTemp f_dep1 = newTemp(Ity_I64);
1499   IRTemp f_dep2 = z64;
1500   UInt   f_op   = ARM64G_CC_OP_COPY;
1501   assign(f_dep1, mkU64(nzcv << 28));
1502
1503   /* Final thunk values */
1504   IRTemp dep1 = newTemp(Ity_I64);
1505   IRTemp dep2 = newTemp(Ity_I64);
1506   IRTemp op   = newTemp(Ity_I64);
1507
1508   assign(op,   IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1509   assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1510   assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1511
1512   /* finally .. */
1513   stmt( IRStmt_Put( OFFB_CC_OP,   mkexpr(op) ));
1514   stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1515   stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1516   stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1517}
1518
1519/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1520static
1521void setFlags_LOGIC ( Bool is64, IRTemp res )
1522{
1523   IRTemp res64 = IRTemp_INVALID;
1524   IRTemp z64   = newTemp(Ity_I64);
1525   UInt   cc_op = ARM64G_CC_OP_NUMBER;
1526   if (is64) {
1527      res64 = res;
1528      cc_op = ARM64G_CC_OP_LOGIC64;
1529   } else {
1530      res64 = newTemp(Ity_I64);
1531      assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1532      cc_op = ARM64G_CC_OP_LOGIC32;
1533   }
1534   assign(z64, mkU64(0));
1535   setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1536}
1537
1538/* Build IR to set the flags thunk to a given NZCV value.  NZCV is
1539   located in bits 31:28 of the supplied value. */
1540static
1541void setFlags_COPY ( IRTemp nzcv_28x0 )
1542{
1543   IRTemp z64 = newTemp(Ity_I64);
1544   assign(z64, mkU64(0));
1545   setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1546}
1547
1548
1549//ZZ /* Minor variant of the above that sets NDEP to zero (if it
1550//ZZ    sets it at all) */
1551//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1552//ZZ                              IRTemp t_dep2,
1553//ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1554//ZZ {
1555//ZZ    IRTemp z32 = newTemp(Ity_I32);
1556//ZZ    assign( z32, mkU32(0) );
1557//ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1558//ZZ }
1559//ZZ
1560//ZZ
1561//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
1562//ZZ    sets it at all) */
1563//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1564//ZZ                              IRTemp t_ndep,
1565//ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1566//ZZ {
1567//ZZ    IRTemp z32 = newTemp(Ity_I32);
1568//ZZ    assign( z32, mkU32(0) );
1569//ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1570//ZZ }
1571//ZZ
1572//ZZ
1573//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1574//ZZ    sets them at all) */
1575//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1576//ZZ                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1577//ZZ {
1578//ZZ    IRTemp z32 = newTemp(Ity_I32);
1579//ZZ    assign( z32, mkU32(0) );
1580//ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1581//ZZ }
1582
1583
1584/*------------------------------------------------------------*/
1585/*--- Misc math helpers                                    ---*/
1586/*------------------------------------------------------------*/
1587
1588/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
1589static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
1590{
1591   IRTemp maskT = newTemp(Ity_I64);
1592   IRTemp res   = newTemp(Ity_I64);
1593   vassert(sh >= 1 && sh <= 63);
1594   assign(maskT, mkU64(mask));
1595   assign( res,
1596           binop(Iop_Or64,
1597                 binop(Iop_Shr64,
1598                       binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
1599                       mkU8(sh)),
1600                 binop(Iop_And64,
1601                       binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
1602                       mkexpr(maskT))
1603                 )
1604           );
1605   return res;
1606}
1607
1608/* Generates byte swaps within 32-bit lanes. */
1609static IRTemp math_UINTSWAP64 ( IRTemp src )
1610{
1611   IRTemp res;
1612   res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1613   res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
1614   return res;
1615}
1616
1617/* Generates byte swaps within 16-bit lanes. */
1618static IRTemp math_USHORTSWAP64 ( IRTemp src )
1619{
1620   IRTemp res;
1621   res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1622   return res;
1623}
1624
1625/* Generates a 64-bit byte swap. */
1626static IRTemp math_BYTESWAP64 ( IRTemp src )
1627{
1628   IRTemp res;
1629   res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1630   res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
1631   res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
1632   return res;
1633}
1634
1635/* Generates a 64-bit bit swap. */
1636static IRTemp math_BITSWAP64 ( IRTemp src )
1637{
1638   IRTemp res;
1639   res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
1640   res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
1641   res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
1642   return math_BYTESWAP64(res);
1643}
1644
1645/* Duplicates the bits at the bottom of the given word to fill the
1646   whole word.  src :: Ity_I64 is assumed to have zeroes everywhere
1647   except for the bottom bits. */
1648static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
1649{
1650   if (srcTy == Ity_I8) {
1651      IRTemp t16 = newTemp(Ity_I64);
1652      assign(t16, binop(Iop_Or64, mkexpr(src),
1653                                  binop(Iop_Shl64, mkexpr(src), mkU8(8))));
1654      IRTemp t32 = newTemp(Ity_I64);
1655      assign(t32, binop(Iop_Or64, mkexpr(t16),
1656                                  binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
1657      IRTemp t64 = newTemp(Ity_I64);
1658      assign(t64, binop(Iop_Or64, mkexpr(t32),
1659                                  binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
1660      return t64;
1661   }
1662   if (srcTy == Ity_I16) {
1663      IRTemp t32 = newTemp(Ity_I64);
1664      assign(t32, binop(Iop_Or64, mkexpr(src),
1665                                  binop(Iop_Shl64, mkexpr(src), mkU8(16))));
1666      IRTemp t64 = newTemp(Ity_I64);
1667      assign(t64, binop(Iop_Or64, mkexpr(t32),
1668                                  binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
1669      return t64;
1670   }
1671   if (srcTy == Ity_I32) {
1672      IRTemp t64 = newTemp(Ity_I64);
1673      assign(t64, binop(Iop_Or64, mkexpr(src),
1674                                  binop(Iop_Shl64, mkexpr(src), mkU8(32))));
1675      return t64;
1676   }
1677   if (srcTy == Ity_I64) {
1678      return src;
1679   }
1680   vassert(0);
1681}
1682
1683
1684/*------------------------------------------------------------*/
1685/*--- FP comparison helpers                                ---*/
1686/*------------------------------------------------------------*/
1687
1688/* irRes :: Ity_I32 holds a floating point comparison result encoded
1689   as an IRCmpF64Result.  Generate code to convert it to an
1690   ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
1691   Assign a new temp to hold that value, and return the temp. */
1692static
1693IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
1694{
1695   IRTemp ix       = newTemp(Ity_I64);
1696   IRTemp termL    = newTemp(Ity_I64);
1697   IRTemp termR    = newTemp(Ity_I64);
1698   IRTemp nzcv     = newTemp(Ity_I64);
1699   IRTemp irRes    = newTemp(Ity_I64);
1700
1701   /* This is where the fun starts.  We have to convert 'irRes' from
1702      an IR-convention return result (IRCmpF64Result) to an
1703      ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
1704      4 bits of 'nzcv'. */
1705   /* Map compare result from IR to ARM(nzcv) */
1706   /*
1707      FP cmp result | IR   | ARM(nzcv)
1708      --------------------------------
1709      UN              0x45   0011
1710      LT              0x01   1000
1711      GT              0x00   0010
1712      EQ              0x40   0110
1713   */
1714   /* Now since you're probably wondering WTF ..
1715
1716      ix fishes the useful bits out of the IR value, bits 6 and 0, and
1717      places them side by side, giving a number which is 0, 1, 2 or 3.
1718
1719      termL is a sequence cooked up by GNU superopt.  It converts ix
1720         into an almost correct value NZCV value (incredibly), except
1721         for the case of UN, where it produces 0100 instead of the
1722         required 0011.
1723
1724      termR is therefore a correction term, also computed from ix.  It
1725         is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
1726         the final correct value, we subtract termR from termL.
1727
1728      Don't take my word for it.  There's a test program at the bottom
1729      of guest_arm_toIR.c, to try this out with.
1730   */
1731   assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
1732
1733   assign(
1734      ix,
1735      binop(Iop_Or64,
1736            binop(Iop_And64,
1737                  binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
1738                  mkU64(3)),
1739            binop(Iop_And64, mkexpr(irRes), mkU64(1))));
1740
1741   assign(
1742      termL,
1743      binop(Iop_Add64,
1744            binop(Iop_Shr64,
1745                  binop(Iop_Sub64,
1746                        binop(Iop_Shl64,
1747                              binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
1748                              mkU8(62)),
1749                        mkU64(1)),
1750                  mkU8(61)),
1751            mkU64(1)));
1752
1753   assign(
1754      termR,
1755      binop(Iop_And64,
1756            binop(Iop_And64,
1757                  mkexpr(ix),
1758                  binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
1759            mkU64(1)));
1760
1761   assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
1762   return nzcv;
1763}
1764
1765
1766/*------------------------------------------------------------*/
1767/*--- Data processing (immediate)                          ---*/
1768/*------------------------------------------------------------*/
1769
1770/* Helper functions for supporting "DecodeBitMasks" */
1771
1772static ULong dbm_ROR ( Int width, ULong x, Int rot )
1773{
1774   vassert(width > 0 && width <= 64);
1775   vassert(rot >= 0 && rot < width);
1776   if (rot == 0) return x;
1777   ULong res = x >> rot;
1778   res |= (x << (width - rot));
1779   if (width < 64)
1780     res &= ((1ULL << width) - 1);
1781   return res;
1782}
1783
1784static ULong dbm_RepTo64( Int esize, ULong x )
1785{
1786   switch (esize) {
1787      case 64:
1788         return x;
1789      case 32:
1790         x &= 0xFFFFFFFF; x |= (x << 32);
1791         return x;
1792      case 16:
1793         x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
1794         return x;
1795      case 8:
1796         x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
1797         return x;
1798      case 4:
1799         x &= 0xF; x |= (x << 4); x |= (x << 8);
1800         x |= (x << 16); x |= (x << 32);
1801         return x;
1802      case 2:
1803         x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
1804         x |= (x << 16); x |= (x << 32);
1805         return x;
1806      default:
1807         break;
1808   }
1809   vpanic("dbm_RepTo64");
1810   /*NOTREACHED*/
1811   return 0;
1812}
1813
1814static Int dbm_highestSetBit ( ULong x )
1815{
1816   Int i;
1817   for (i = 63; i >= 0; i--) {
1818      if (x & (1ULL << i))
1819         return i;
1820   }
1821   vassert(x == 0);
1822   return -1;
1823}
1824
1825static
1826Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
1827                          ULong immN, ULong imms, ULong immr, Bool immediate,
1828                          UInt M /*32 or 64*/)
1829{
1830   vassert(immN < (1ULL << 1));
1831   vassert(imms < (1ULL << 6));
1832   vassert(immr < (1ULL << 6));
1833   vassert(immediate == False || immediate == True);
1834   vassert(M == 32 || M == 64);
1835
1836   Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
1837   if (len < 1) { /* printf("fail1\n"); */ return False; }
1838   vassert(len <= 6);
1839   vassert(M >= (1 << len));
1840
1841   vassert(len >= 1 && len <= 6);
1842   ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
1843                  (1 << len) - 1;
1844   vassert(levels >= 1 && levels <= 63);
1845
1846   if (immediate && ((imms & levels) == levels)) {
1847      /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
1848      return False;
1849   }
1850
1851   ULong S = imms & levels;
1852   ULong R = immr & levels;
1853   Int   diff = S - R;
1854   diff &= 63;
1855   Int esize = 1 << len;
1856   vassert(2 <= esize && esize <= 64);
1857
1858   /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
1859      same below with d.  S can be 63 in which case we have an out of
1860      range and hence undefined shift. */
1861   vassert(S >= 0 && S <= 63);
1862   vassert(esize >= (S+1));
1863   ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
1864                  //(1ULL << (S+1)) - 1;
1865                  ((1ULL << S) - 1) + (1ULL << S);
1866
1867   Int d = // diff<len-1:0>
1868           diff & ((1 << len)-1);
1869   vassert(esize >= (d+1));
1870   vassert(d >= 0 && d <= 63);
1871
1872   ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
1873                  //(1ULL << (d+1)) - 1;
1874                  ((1ULL << d) - 1) + (1ULL << d);
1875
1876   if (esize != 64) vassert(elem_s < (1ULL << esize));
1877   if (esize != 64) vassert(elem_d < (1ULL << esize));
1878
1879   if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
1880   if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
1881
1882   return True;
1883}
1884
1885
1886static
1887Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
1888                                         UInt insn)
1889{
1890#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
1891
1892   /* insn[28:23]
1893      10000x PC-rel addressing
1894      10001x Add/subtract (immediate)
1895      100100 Logical (immediate)
1896      100101 Move Wide (immediate)
1897      100110 Bitfield
1898      100111 Extract
1899   */
1900
1901   /* ------------------ ADD/SUB{,S} imm12 ------------------ */
1902   if (INSN(28,24) == BITS5(1,0,0,0,1)) {
1903      Bool is64   = INSN(31,31) == 1;
1904      Bool isSub  = INSN(30,30) == 1;
1905      Bool setCC  = INSN(29,29) == 1;
1906      UInt sh     = INSN(23,22);
1907      UInt uimm12 = INSN(21,10);
1908      UInt nn     = INSN(9,5);
1909      UInt dd     = INSN(4,0);
1910      const HChar* nm = isSub ? "sub" : "add";
1911      if (sh >= 2) {
1912         /* Invalid; fall through */
1913      } else {
1914         vassert(sh <= 1);
1915         uimm12 <<= (12 * sh);
1916         if (is64) {
1917            IRTemp argL  = newTemp(Ity_I64);
1918            IRTemp argR  = newTemp(Ity_I64);
1919            IRTemp res   = newTemp(Ity_I64);
1920            assign(argL, getIReg64orSP(nn));
1921            assign(argR, mkU64(uimm12));
1922            assign(res,  binop(isSub ? Iop_Sub64 : Iop_Add64,
1923                               mkexpr(argL), mkexpr(argR)));
1924            if (setCC) {
1925               putIReg64orZR(dd, mkexpr(res));
1926               setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
1927               DIP("%ss %s, %s, 0x%x\n",
1928                   nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
1929            } else {
1930               putIReg64orSP(dd, mkexpr(res));
1931               DIP("%s %s, %s, 0x%x\n",
1932                   nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
1933            }
1934         } else {
1935            IRTemp argL  = newTemp(Ity_I32);
1936            IRTemp argR  = newTemp(Ity_I32);
1937            IRTemp res   = newTemp(Ity_I32);
1938            assign(argL, getIReg32orSP(nn));
1939            assign(argR, mkU32(uimm12));
1940            assign(res,  binop(isSub ? Iop_Sub32 : Iop_Add32,
1941                               mkexpr(argL), mkexpr(argR)));
1942            if (setCC) {
1943               putIReg32orZR(dd, mkexpr(res));
1944               setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
1945               DIP("%ss %s, %s, 0x%x\n",
1946                   nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
1947            } else {
1948               putIReg32orSP(dd, mkexpr(res));
1949               DIP("%s %s, %s, 0x%x\n",
1950                   nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
1951            }
1952         }
1953         return True;
1954      }
1955   }
1956
1957   /* -------------------- ADR/ADRP -------------------- */
1958   if (INSN(28,24) == BITS5(1,0,0,0,0)) {
1959      UInt  bP    = INSN(31,31);
1960      UInt  immLo = INSN(30,29);
1961      UInt  immHi = INSN(23,5);
1962      UInt  rD    = INSN(4,0);
1963      ULong uimm  = (immHi << 2) | immLo;
1964      ULong simm  = sx_to_64(uimm, 21);
1965      ULong val;
1966      if (bP) {
1967         val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
1968      } else {
1969         val = guest_PC_curr_instr + simm;
1970      }
1971      putIReg64orZR(rD, mkU64(val));
1972      DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
1973      return True;
1974   }
1975
1976   /* -------------------- LOGIC(imm) -------------------- */
1977   if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
1978      /* 31 30 28     22 21   15   9  4
1979         sf op 100100 N  immr imms Rn Rd
1980           op=00: AND  Rd|SP, Rn, #imm
1981           op=01: ORR  Rd|SP, Rn, #imm
1982           op=10: EOR  Rd|SP, Rn, #imm
1983           op=11: ANDS Rd|ZR, Rn, #imm
1984      */
1985      Bool  is64 = INSN(31,31) == 1;
1986      UInt  op   = INSN(30,29);
1987      UInt  N    = INSN(22,22);
1988      UInt  immR = INSN(21,16);
1989      UInt  immS = INSN(15,10);
1990      UInt  nn   = INSN(9,5);
1991      UInt  dd   = INSN(4,0);
1992      ULong imm  = 0;
1993      Bool  ok;
1994      if (N == 1 && !is64)
1995         goto after_logic_imm; /* not allowed; fall through */
1996      ok = dbm_DecodeBitMasks(&imm, NULL,
1997                              N, immS, immR, True, is64 ? 64 : 32);
1998      if (!ok)
1999         goto after_logic_imm;
2000
2001      const HChar* names[4] = { "and", "orr", "eor", "ands" };
2002      const IROp   ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2003      const IROp   ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2004
2005      vassert(op < 4);
2006      if (is64) {
2007         IRExpr* argL = getIReg64orZR(nn);
2008         IRExpr* argR = mkU64(imm);
2009         IRTemp  res  = newTemp(Ity_I64);
2010         assign(res, binop(ops64[op], argL, argR));
2011         if (op < 3) {
2012            putIReg64orSP(dd, mkexpr(res));
2013            DIP("%s %s, %s, 0x%llx\n", names[op],
2014                nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2015         } else {
2016            putIReg64orZR(dd, mkexpr(res));
2017            setFlags_LOGIC(True/*is64*/, res);
2018            DIP("%s %s, %s, 0x%llx\n", names[op],
2019                nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2020         }
2021      } else {
2022         IRExpr* argL = getIReg32orZR(nn);
2023         IRExpr* argR = mkU32((UInt)imm);
2024         IRTemp  res  = newTemp(Ity_I32);
2025         assign(res, binop(ops32[op], argL, argR));
2026         if (op < 3) {
2027            putIReg32orSP(dd, mkexpr(res));
2028            DIP("%s %s, %s, 0x%x\n", names[op],
2029                nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2030         } else {
2031            putIReg32orZR(dd, mkexpr(res));
2032            setFlags_LOGIC(False/*!is64*/, res);
2033            DIP("%s %s, %s, 0x%x\n", names[op],
2034                nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2035         }
2036      }
2037      return True;
2038   }
2039   after_logic_imm:
2040
2041   /* -------------------- MOV{Z,N,K} -------------------- */
2042   if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2043      /* 31 30 28      22 20    4
2044         |  |  |       |  |     |
2045         sf 10 100 101 hw imm16 Rd   MOV(Z) Rd, (imm16 << (16*hw))
2046         sf 00 100 101 hw imm16 Rd   MOV(N) Rd, ~(imm16 << (16*hw))
2047         sf 11 100 101 hw imm16 Rd   MOV(K) Rd, (imm16 << (16*hw))
2048      */
2049      Bool is64   = INSN(31,31) == 1;
2050      UInt subopc = INSN(30,29);
2051      UInt hw     = INSN(22,21);
2052      UInt imm16  = INSN(20,5);
2053      UInt dd     = INSN(4,0);
2054      if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2055         /* invalid; fall through */
2056      } else {
2057         ULong imm64 = ((ULong)imm16) << (16 * hw);
2058         if (!is64)
2059            vassert(imm64 < 0x100000000ULL);
2060         switch (subopc) {
2061            case BITS2(1,0): // MOVZ
2062               putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2063               DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2064               break;
2065            case BITS2(0,0): // MOVN
2066               imm64 = ~imm64;
2067               if (!is64)
2068                  imm64 &= 0xFFFFFFFFULL;
2069               putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2070               DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2071               break;
2072            case BITS2(1,1): // MOVK
2073               /* This is more complex.  We are inserting a slice into
2074                  the destination register, so we need to have the old
2075                  value of it. */
2076               if (is64) {
2077                  IRTemp old = newTemp(Ity_I64);
2078                  assign(old, getIReg64orZR(dd));
2079                  ULong mask = 0xFFFFULL << (16 * hw);
2080                  IRExpr* res
2081                     = binop(Iop_Or64,
2082                             binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2083                             mkU64(imm64));
2084                  putIReg64orZR(dd, res);
2085                  DIP("movk %s, 0x%x, lsl %u\n",
2086                      nameIReg64orZR(dd), imm16, 16*hw);
2087               } else {
2088                  IRTemp old = newTemp(Ity_I32);
2089                  assign(old, getIReg32orZR(dd));
2090                  vassert(hw <= 1);
2091                  UInt mask = 0xFFFF << (16 * hw);
2092                  IRExpr* res
2093                     = binop(Iop_Or32,
2094                             binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2095                             mkU32((UInt)imm64));
2096                  putIReg32orZR(dd, res);
2097                  DIP("movk %s, 0x%x, lsl %u\n",
2098                      nameIReg32orZR(dd), imm16, 16*hw);
2099               }
2100               break;
2101            default:
2102               vassert(0);
2103         }
2104         return True;
2105      }
2106   }
2107
2108   /* -------------------- {U,S,}BFM -------------------- */
2109   /*    30 28     22 21   15   9  4
2110
2111      sf 10 100110 N  immr imms nn dd
2112         UBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2113         UBFM Xd, Xn, #immr, #imms   when sf=1, N=1
2114
2115      sf 00 100110 N  immr imms nn dd
2116         SBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2117         SBFM Xd, Xn, #immr, #imms   when sf=1, N=1
2118
2119      sf 01 100110 N  immr imms nn dd
2120         BFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2121         BFM Xd, Xn, #immr, #imms   when sf=1, N=1
2122   */
2123   if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2124      UInt sf     = INSN(31,31);
2125      UInt opc    = INSN(30,29);
2126      UInt N      = INSN(22,22);
2127      UInt immR   = INSN(21,16);
2128      UInt immS   = INSN(15,10);
2129      UInt nn     = INSN(9,5);
2130      UInt dd     = INSN(4,0);
2131      Bool inZero = False;
2132      Bool extend = False;
2133      const HChar* nm = "???";
2134      /* skip invalid combinations */
2135      switch (opc) {
2136         case BITS2(0,0):
2137            inZero = True; extend = True; nm = "sbfm"; break;
2138         case BITS2(0,1):
2139            inZero = False; extend = False; nm = "bfm"; break;
2140         case BITS2(1,0):
2141            inZero = True; extend = False; nm = "ubfm"; break;
2142         case BITS2(1,1):
2143            goto after_bfm; /* invalid */
2144         default:
2145            vassert(0);
2146      }
2147      if (sf == 1 && N != 1) goto after_bfm;
2148      if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2149                             || ((immS >> 5) & 1) != 0)) goto after_bfm;
2150      ULong wmask = 0, tmask = 0;
2151      Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2152                                   N, immS, immR, False, sf == 1 ? 64 : 32);
2153      if (!ok) goto after_bfm; /* hmmm */
2154
2155      Bool   is64 = sf == 1;
2156      IRType ty   = is64 ? Ity_I64 : Ity_I32;
2157
2158      IRTemp dst = newTemp(ty);
2159      IRTemp src = newTemp(ty);
2160      IRTemp bot = newTemp(ty);
2161      IRTemp top = newTemp(ty);
2162      IRTemp res = newTemp(ty);
2163      assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2164      assign(src, getIRegOrZR(is64, nn));
2165      /* perform bitfield move on low bits */
2166      assign(bot, binop(mkOR(ty),
2167                        binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2168                        binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2169                                         mkU(ty, wmask))));
2170      /* determine extension bits (sign, zero or dest register) */
2171      assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2172      /* combine extension bits and result bits */
2173      assign(res, binop(mkOR(ty),
2174                        binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2175                        binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2176      putIRegOrZR(is64, dd, mkexpr(res));
2177      DIP("%s %s, %s, immR=%u, immS=%u\n",
2178          nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2179      return True;
2180   }
2181   after_bfm:
2182
2183   /* ---------------------- EXTR ---------------------- */
2184   /*   30 28     22 20 15   9 4
2185      1 00 100111 10 m  imm6 n d  EXTR Xd, Xn, Xm, #imm6
2186      0 00 100111 00 m  imm6 n d  EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2187   */
2188   if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2189      Bool is64  = INSN(31,31) == 1;
2190      UInt mm    = INSN(20,16);
2191      UInt imm6  = INSN(15,10);
2192      UInt nn    = INSN(9,5);
2193      UInt dd    = INSN(4,0);
2194      Bool valid = True;
2195      if (INSN(31,31) != INSN(22,22))
2196        valid = False;
2197      if (!is64 && imm6 >= 32)
2198        valid = False;
2199      if (!valid) goto after_extr;
2200      IRType ty    = is64 ? Ity_I64 : Ity_I32;
2201      IRTemp srcHi = newTemp(ty);
2202      IRTemp srcLo = newTemp(ty);
2203      IRTemp res   = newTemp(ty);
2204      assign(srcHi, getIRegOrZR(is64, nn));
2205      assign(srcLo, getIRegOrZR(is64, mm));
2206      if (imm6 == 0) {
2207        assign(res, mkexpr(srcLo));
2208      } else {
2209        UInt szBits = 8 * sizeofIRType(ty);
2210        vassert(imm6 > 0 && imm6 < szBits);
2211        assign(res, binop(mkOR(ty),
2212                          binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2213                          binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2214      }
2215      putIRegOrZR(is64, dd, mkexpr(res));
2216      DIP("extr %s, %s, %s, #%u\n",
2217          nameIRegOrZR(is64,dd),
2218          nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2219      return True;
2220   }
2221  after_extr:
2222
2223   vex_printf("ARM64 front end: data_processing_immediate\n");
2224   return False;
2225#  undef INSN
2226}
2227
2228
2229/*------------------------------------------------------------*/
2230/*--- Data processing (register) instructions              ---*/
2231/*------------------------------------------------------------*/
2232
2233static const HChar* nameSH ( UInt sh ) {
2234   switch (sh) {
2235      case 0: return "lsl";
2236      case 1: return "lsr";
2237      case 2: return "asr";
2238      case 3: return "ror";
2239      default: vassert(0);
2240   }
2241}
2242
2243/* Generate IR to get a register value, possibly shifted by an
2244   immediate.  Returns either a 32- or 64-bit temporary holding the
2245   result.  After the shift, the value can optionally be NOT-ed
2246   too.
2247
2248   sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR.  sh_amt may only be
2249   in the range 0 to (is64 ? 64 : 32)-1.  For some instructions, ROR
2250   isn't allowed, but it's the job of the caller to check that.
2251*/
2252static IRTemp getShiftedIRegOrZR ( Bool is64,
2253                                   UInt sh_how, UInt sh_amt, UInt regNo,
2254                                   Bool invert )
2255{
2256   vassert(sh_how < 4);
2257   vassert(sh_amt < (is64 ? 64 : 32));
2258   IRType ty = is64 ? Ity_I64 : Ity_I32;
2259   IRTemp t0 = newTemp(ty);
2260   assign(t0, getIRegOrZR(is64, regNo));
2261   IRTemp t1 = newTemp(ty);
2262   switch (sh_how) {
2263      case BITS2(0,0):
2264         assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2265         break;
2266      case BITS2(0,1):
2267         assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2268         break;
2269      case BITS2(1,0):
2270         assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2271         break;
2272      case BITS2(1,1):
2273         assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2274         break;
2275      default:
2276         vassert(0);
2277   }
2278   if (invert) {
2279      IRTemp t2 = newTemp(ty);
2280      assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2281      return t2;
2282   } else {
2283      return t1;
2284   }
2285}
2286
2287
2288static
2289Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2290                                        UInt insn)
2291{
2292#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
2293
2294   /* ------------------- ADD/SUB(reg) ------------------- */
2295   /* x==0 => 32 bit op      x==1 => 64 bit op
2296      sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2297
2298      31 30 29 28    23 21 20 15   9  4
2299      |  |  |  |     |  |  |  |    |  |
2300      x  0  0  01011 sh 0  Rm imm6 Rn Rd   ADD  Rd,Rn, sh(Rm,imm6)
2301      x  0  1  01011 sh 0  Rm imm6 Rn Rd   ADDS Rd,Rn, sh(Rm,imm6)
2302      x  1  0  01011 sh 0  Rm imm6 Rn Rd   SUB  Rd,Rn, sh(Rm,imm6)
2303      x  1  1  01011 sh 0  Rm imm6 Rn Rd   SUBS Rd,Rn, sh(Rm,imm6)
2304   */
2305   if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2306      UInt   bX    = INSN(31,31);
2307      UInt   bOP   = INSN(30,30); /* 0: ADD, 1: SUB */
2308      UInt   bS    = INSN(29, 29); /* set flags? */
2309      UInt   sh    = INSN(23,22);
2310      UInt   rM    = INSN(20,16);
2311      UInt   imm6  = INSN(15,10);
2312      UInt   rN    = INSN(9,5);
2313      UInt   rD    = INSN(4,0);
2314      Bool   isSUB = bOP == 1;
2315      Bool   is64  = bX == 1;
2316      IRType ty    = is64 ? Ity_I64 : Ity_I32;
2317      if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2318         /* invalid; fall through */
2319      } else {
2320         IRTemp argL = newTemp(ty);
2321         assign(argL, getIRegOrZR(is64, rN));
2322         IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2323         IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
2324         IRTemp res  = newTemp(ty);
2325         assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2326         if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2327         if (bS) {
2328            setFlags_ADD_SUB(is64, isSUB, argL, argR);
2329         }
2330         DIP("%s%s %s, %s, %s, %s #%u\n",
2331             bOP ? "sub" : "add", bS ? "s" : "",
2332             nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2333             nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2334         return True;
2335      }
2336   }
2337
2338   /* ------------------- ADC/SBC(reg) ------------------- */
2339   /* x==0 => 32 bit op      x==1 => 64 bit op
2340
2341      31 30 29 28    23 21 20 15     9  4
2342      |  |  |  |     |  |  |  |      |  |
2343      x  0  0  11010 00 0  Rm 000000 Rn Rd   ADC  Rd,Rn,Rm
2344      x  0  1  11010 00 0  Rm 000000 Rn Rd   ADCS Rd,Rn,Rm
2345      x  1  0  11010 00 0  Rm 000000 Rn Rd   SBC  Rd,Rn,Rm
2346      x  1  1  11010 00 0  Rm 000000 Rn Rd   SBCS Rd,Rn,Rm
2347   */
2348
2349   if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2350      UInt   bX    = INSN(31,31);
2351      UInt   bOP   = INSN(30,30); /* 0: ADC, 1: SBC */
2352      UInt   bS    = INSN(29,29); /* set flags */
2353      UInt   rM    = INSN(20,16);
2354      UInt   rN    = INSN(9,5);
2355      UInt   rD    = INSN(4,0);
2356
2357      Bool   isSUB = bOP == 1;
2358      Bool   is64  = bX == 1;
2359      IRType ty    = is64 ? Ity_I64 : Ity_I32;
2360
2361      IRTemp oldC = newTemp(ty);
2362      assign(oldC,
2363             is64 ? mk_arm64g_calculate_flag_c()
2364                  : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2365
2366      IRTemp argL = newTemp(ty);
2367      assign(argL, getIRegOrZR(is64, rN));
2368      IRTemp argR = newTemp(ty);
2369      assign(argR, getIRegOrZR(is64, rM));
2370
2371      IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
2372      IRTemp res  = newTemp(ty);
2373      if (isSUB) {
2374         IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2375         IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2376         assign(res,
2377                binop(op,
2378                      binop(op, mkexpr(argL), mkexpr(argR)),
2379                      binop(xorOp, mkexpr(oldC), one)));
2380      } else {
2381         assign(res,
2382                binop(op,
2383                      binop(op, mkexpr(argL), mkexpr(argR)),
2384                      mkexpr(oldC)));
2385      }
2386
2387      if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2388
2389      if (bS) {
2390         setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2391      }
2392
2393      DIP("%s%s %s, %s, %s\n",
2394          bOP ? "sbc" : "adc", bS ? "s" : "",
2395          nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2396          nameIRegOrZR(is64, rM));
2397      return True;
2398   }
2399
2400
2401
2402   /* -------------------- LOGIC(reg) -------------------- */
2403   /* x==0 => 32 bit op      x==1 => 64 bit op
2404      N==0 => inv? is no-op (no inversion)
2405      N==1 => inv? is NOT
2406      sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2407
2408      31 30 28    23 21 20 15   9  4
2409      |  |  |     |  |  |  |    |  |
2410      x  00 01010 sh N  Rm imm6 Rn Rd  AND  Rd,Rn, inv?(sh(Rm,imm6))
2411      x  01 01010 sh N  Rm imm6 Rn Rd  ORR  Rd,Rn, inv?(sh(Rm,imm6))
2412      x  10 01010 sh N  Rm imm6 Rn Rd  EOR  Rd,Rn, inv?(sh(Rm,imm6))
2413      x  11 01010 sh N  Rm imm6 Rn Rd  ANDS Rd,Rn, inv?(sh(Rm,imm6))
2414      With N=1, the names are: BIC ORN EON BICS
2415   */
2416   if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2417      UInt   bX   = INSN(31,31);
2418      UInt   sh   = INSN(23,22);
2419      UInt   bN   = INSN(21,21);
2420      UInt   rM   = INSN(20,16);
2421      UInt   imm6 = INSN(15,10);
2422      UInt   rN   = INSN(9,5);
2423      UInt   rD   = INSN(4,0);
2424      Bool   is64 = bX == 1;
2425      IRType ty   = is64 ? Ity_I64 : Ity_I32;
2426      if (!is64 && imm6 > 31) {
2427         /* invalid; fall though */
2428      } else {
2429         IRTemp argL = newTemp(ty);
2430         assign(argL, getIRegOrZR(is64, rN));
2431         IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2432         IROp   op   = Iop_INVALID;
2433         switch (INSN(30,29)) {
2434            case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2435            case BITS2(0,1):                  op = mkOR(ty);  break;
2436            case BITS2(1,0):                  op = mkXOR(ty); break;
2437            default: vassert(0);
2438         }
2439         IRTemp res = newTemp(ty);
2440         assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2441         if (INSN(30,29) == BITS2(1,1)) {
2442            setFlags_LOGIC(is64, res);
2443         }
2444         putIRegOrZR(is64, rD, mkexpr(res));
2445
2446         static const HChar* names_op[8]
2447            = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2448         vassert(((bN << 2) | INSN(30,29)) < 8);
2449         const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2450         /* Special-case the printing of "MOV" */
2451         if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2452            DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2453                                nameIRegOrZR(is64, rM));
2454         } else {
2455            DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2456                nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2457                nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2458         }
2459         return True;
2460      }
2461   }
2462
2463   /* -------------------- {U,S}MULH -------------------- */
2464   /* 31       23 22 20 15     9   4
2465      10011011 1  10 Rm 011111 Rn Rd   UMULH Xd,Xn,Xm
2466      10011011 0  10 Rm 011111 Rn Rd   SMULH Xd,Xn,Xm
2467   */
2468   if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2469       && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
2470      Bool isU = INSN(23,23) == 1;
2471      UInt mm  = INSN(20,16);
2472      UInt nn  = INSN(9,5);
2473      UInt dd  = INSN(4,0);
2474      putIReg64orZR(dd, unop(Iop_128HIto64,
2475                             binop(isU ? Iop_MullU64 : Iop_MullS64,
2476                                   getIReg64orZR(nn), getIReg64orZR(mm))));
2477      DIP("%cmulh %s, %s, %s\n",
2478          isU ? 'u' : 's',
2479          nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2480      return True;
2481   }
2482
2483   /* -------------------- M{ADD,SUB} -------------------- */
2484   /* 31 30           20 15 14 9 4
2485      sf 00 11011 000 m  0  a  n r   MADD Rd,Rn,Rm,Ra  d = a+m*n
2486      sf 00 11011 000 m  1  a  n r   MADD Rd,Rn,Rm,Ra  d = a-m*n
2487   */
2488   if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2489      Bool is64  = INSN(31,31) == 1;
2490      UInt mm    = INSN(20,16);
2491      Bool isAdd = INSN(15,15) == 0;
2492      UInt aa    = INSN(14,10);
2493      UInt nn    = INSN(9,5);
2494      UInt dd    = INSN(4,0);
2495      if (is64) {
2496         putIReg64orZR(
2497            dd,
2498            binop(isAdd ? Iop_Add64 : Iop_Sub64,
2499                  getIReg64orZR(aa),
2500                  binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
2501      } else {
2502         putIReg32orZR(
2503            dd,
2504            binop(isAdd ? Iop_Add32 : Iop_Sub32,
2505                  getIReg32orZR(aa),
2506                  binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
2507      }
2508      DIP("%s %s, %s, %s, %s\n",
2509          isAdd ? "madd" : "msub",
2510          nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
2511          nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
2512      return True;
2513   }
2514
2515   /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
2516   /* 31 30 28        20 15   11 9  4
2517      sf 00 1101 0100 mm cond 00 nn dd   CSEL  Rd,Rn,Rm
2518      sf 00 1101 0100 mm cond 01 nn dd   CSINC Rd,Rn,Rm
2519      sf 10 1101 0100 mm cond 00 nn dd   CSINV Rd,Rn,Rm
2520      sf 10 1101 0100 mm cond 01 nn dd   CSNEG Rd,Rn,Rm
2521      In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
2522   */
2523   if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
2524      Bool    is64 = INSN(31,31) == 1;
2525      UInt    b30  = INSN(30,30);
2526      UInt    mm   = INSN(20,16);
2527      UInt    cond = INSN(15,12);
2528      UInt    b10  = INSN(10,10);
2529      UInt    nn   = INSN(9,5);
2530      UInt    dd   = INSN(4,0);
2531      UInt    op   = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
2532      IRType  ty   = is64 ? Ity_I64 : Ity_I32;
2533      IRExpr* argL = getIRegOrZR(is64, nn);
2534      IRExpr* argR = getIRegOrZR(is64, mm);
2535      switch (op) {
2536         case BITS2(0,0):
2537            break;
2538         case BITS2(0,1):
2539            argR = binop(mkADD(ty), argR, mkU(ty,1));
2540            break;
2541         case BITS2(1,0):
2542            argR = unop(mkNOT(ty), argR);
2543            break;
2544         case BITS2(1,1):
2545            argR = binop(mkSUB(ty), mkU(ty,0), argR);
2546            break;
2547         default:
2548            vassert(0);
2549      }
2550      putIRegOrZR(
2551         is64, dd,
2552         IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
2553                    argL, argR)
2554      );
2555      const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
2556      DIP("%s %s, %s, %s, %s\n", op_nm[op],
2557          nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
2558          nameIRegOrZR(is64, mm), nameCC(cond));
2559      return True;
2560   }
2561
2562   /* -------------- ADD/SUB(extended reg) -------------- */
2563   /*     28         20 15  12   9 4
2564      000 01011 00 1 m  opt imm3 n d   ADD  Wd|SP, Wn|SP, Wm ext&lsld
2565      100 01011 00 1 m  opt imm3 n d   ADD  Xd|SP, Xn|SP, Rm ext&lsld
2566
2567      001 01011 00 1 m  opt imm3 n d   ADDS Wd,    Wn|SP, Wm ext&lsld
2568      101 01011 00 1 m  opt imm3 n d   ADDS Xd,    Xn|SP, Rm ext&lsld
2569
2570      010 01011 00 1 m  opt imm3 n d   SUB  Wd|SP, Wn|SP, Wm ext&lsld
2571      110 01011 00 1 m  opt imm3 n d   SUB  Xd|SP, Xn|SP, Rm ext&lsld
2572
2573      011 01011 00 1 m  opt imm3 n d   SUBS Wd,    Wn|SP, Wm ext&lsld
2574      111 01011 00 1 m  opt imm3 n d   SUBS Xd,    Xn|SP, Rm ext&lsld
2575
2576      The 'm' operand is extended per opt, thusly:
2577
2578        000   Xm & 0xFF           UXTB
2579        001   Xm & 0xFFFF         UXTH
2580        010   Xm & (2^32)-1       UXTW
2581        011   Xm                  UXTX
2582
2583        100   Xm sx from bit 7    SXTB
2584        101   Xm sx from bit 15   SXTH
2585        110   Xm sx from bit 31   SXTW
2586        111   Xm                  SXTX
2587
2588      In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
2589      operation on Xm.  In the 32 bit case, UXTW, UXTX, SXTW and SXTX
2590      are the identity operation on Wm.
2591
2592      After extension, the value is shifted left by imm3 bits, which
2593      may only be in the range 0 .. 4 inclusive.
2594   */
2595   if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
2596      Bool is64  = INSN(31,31) == 1;
2597      Bool isSub = INSN(30,30) == 1;
2598      Bool setCC = INSN(29,29) == 1;
2599      UInt mm    = INSN(20,16);
2600      UInt opt   = INSN(15,13);
2601      UInt imm3  = INSN(12,10);
2602      UInt nn    = INSN(9,5);
2603      UInt dd    = INSN(4,0);
2604      const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
2605                                  "sxtb", "sxth", "sxtw", "sxtx" };
2606      /* Do almost the same thing in the 32- and 64-bit cases. */
2607      IRTemp xN = newTemp(Ity_I64);
2608      IRTemp xM = newTemp(Ity_I64);
2609      assign(xN, getIReg64orSP(nn));
2610      assign(xM, getIReg64orZR(mm));
2611      IRExpr* xMw  = mkexpr(xM); /* "xM widened" */
2612      Int     shSX = 0;
2613      /* widen Xm .. */
2614      switch (opt) {
2615         case BITS3(0,0,0): // UXTB
2616            xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
2617         case BITS3(0,0,1): // UXTH
2618            xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
2619         case BITS3(0,1,0): // UXTW -- noop for the 32bit case
2620            if (is64) {
2621               xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
2622            }
2623            break;
2624         case BITS3(0,1,1): // UXTX -- always a noop
2625            break;
2626         case BITS3(1,0,0): // SXTB
2627            shSX = 56; goto sxTo64;
2628         case BITS3(1,0,1): // SXTH
2629            shSX = 48; goto sxTo64;
2630         case BITS3(1,1,0): // SXTW -- noop for the 32bit case
2631            if (is64) {
2632               shSX = 32; goto sxTo64;
2633            }
2634            break;
2635         case BITS3(1,1,1): // SXTX -- always a noop
2636            break;
2637         sxTo64:
2638            vassert(shSX >= 32);
2639            xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
2640                        mkU8(shSX));
2641            break;
2642         default:
2643            vassert(0);
2644      }
2645      /* and now shift */
2646      IRTemp argL = xN;
2647      IRTemp argR = newTemp(Ity_I64);
2648      assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
2649      IRTemp res = newTemp(Ity_I64);
2650      assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2651                        mkexpr(argL), mkexpr(argR)));
2652      if (is64) {
2653         if (setCC) {
2654            putIReg64orZR(dd, mkexpr(res));
2655            setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2656         } else {
2657            putIReg64orSP(dd, mkexpr(res));
2658         }
2659      } else {
2660         if (setCC) {
2661            IRTemp argL32 = newTemp(Ity_I32);
2662            IRTemp argR32 = newTemp(Ity_I32);
2663            putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
2664            assign(argL32, unop(Iop_64to32, mkexpr(argL)));
2665            assign(argR32, unop(Iop_64to32, mkexpr(argR)));
2666            setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
2667         } else {
2668            putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
2669         }
2670      }
2671      DIP("%s%s %s, %s, %s %s lsl %u\n",
2672          isSub ? "sub" : "add", setCC ? "s" : "",
2673          setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
2674          nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
2675          nameExt[opt], imm3);
2676      return True;
2677   }
2678
2679   /* ---------------- CCMP/CCMN(imm) ---------------- */
2680   /* Bizarrely, these appear in the "data processing register"
2681      category, even though they are operations against an
2682      immediate. */
2683   /* 31   29        20   15   11 9    3
2684      sf 1 111010010 imm5 cond 10 Rn 0 nzcv   CCMP Rn, #imm5, #nzcv, cond
2685      sf 0 111010010 imm5 cond 10 Rn 0 nzcv   CCMN Rn, #imm5, #nzcv, cond
2686
2687      Operation is:
2688         (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
2689         (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
2690   */
2691   if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
2692       && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
2693      Bool is64  = INSN(31,31) == 1;
2694      Bool isSUB = INSN(30,30) == 1;
2695      UInt imm5  = INSN(20,16);
2696      UInt cond  = INSN(15,12);
2697      UInt nn    = INSN(9,5);
2698      UInt nzcv  = INSN(3,0);
2699
2700      IRTemp condT = newTemp(Ity_I1);
2701      assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
2702
2703      IRType ty   = is64 ? Ity_I64 : Ity_I32;
2704      IRTemp argL = newTemp(ty);
2705      IRTemp argR = newTemp(ty);
2706
2707      if (is64) {
2708         assign(argL, getIReg64orZR(nn));
2709         assign(argR, mkU64(imm5));
2710      } else {
2711         assign(argL, getIReg32orZR(nn));
2712         assign(argR, mkU32(imm5));
2713      }
2714      setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
2715
2716      DIP("ccm%c %s, #%u, #%u, %s\n",
2717          isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
2718          imm5, nzcv, nameCC(cond));
2719      return True;
2720   }
2721
2722   /* ---------------- CCMP/CCMN(reg) ---------------- */
2723   /* 31   29        20 15   11 9    3
2724      sf 1 111010010 Rm cond 00 Rn 0 nzcv   CCMP Rn, Rm, #nzcv, cond
2725      sf 0 111010010 Rm cond 00 Rn 0 nzcv   CCMN Rn, Rm, #nzcv, cond
2726      Operation is:
2727         (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
2728         (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
2729   */
2730   if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
2731       && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
2732      Bool is64  = INSN(31,31) == 1;
2733      Bool isSUB = INSN(30,30) == 1;
2734      UInt mm    = INSN(20,16);
2735      UInt cond  = INSN(15,12);
2736      UInt nn    = INSN(9,5);
2737      UInt nzcv  = INSN(3,0);
2738
2739      IRTemp condT = newTemp(Ity_I1);
2740      assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
2741
2742      IRType ty   = is64 ? Ity_I64 : Ity_I32;
2743      IRTemp argL = newTemp(ty);
2744      IRTemp argR = newTemp(ty);
2745
2746      if (is64) {
2747         assign(argL, getIReg64orZR(nn));
2748         assign(argR, getIReg64orZR(mm));
2749      } else {
2750         assign(argL, getIReg32orZR(nn));
2751         assign(argR, getIReg32orZR(mm));
2752      }
2753      setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
2754
2755      DIP("ccm%c %s, %s, #%u, %s\n",
2756          isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
2757          nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
2758      return True;
2759   }
2760
2761
2762   /* -------------- REV/REV16/REV32/RBIT -------------- */
2763   /* 31 30 28       20    15   11 9 4
2764
2765      1  10 11010110 00000 0000 11 n d    (1) REV   Xd, Xn
2766      0  10 11010110 00000 0000 10 n d    (2) REV   Wd, Wn
2767
2768      1  10 11010110 00000 0000 00 n d    (3) RBIT  Xd, Xn
2769      0  10 11010110 00000 0000 00 n d    (4) RBIT  Wd, Wn
2770
2771      1  10 11010110 00000 0000 01 n d    (5) REV16 Xd, Xn
2772      0  10 11010110 00000 0000 01 n d    (6) REV16 Wd, Wn
2773
2774      1  10 11010110 00000 0000 10 n d    (7) REV32 Xd, Xn
2775   */
2776   if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
2777       && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
2778      UInt b31 = INSN(31,31);
2779      UInt opc = INSN(11,10);
2780
2781      UInt ix = 0;
2782      /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
2783      else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
2784      else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
2785      else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
2786      else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
2787      else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
2788      else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
2789      if (ix >= 1 && ix <= 7) {
2790         Bool   is64  = ix == 1 || ix == 3 || ix == 5 || ix == 7;
2791         UInt   nn    = INSN(9,5);
2792         UInt   dd    = INSN(4,0);
2793         IRTemp src   = newTemp(Ity_I64);
2794         IRTemp dst   = IRTemp_INVALID;
2795         IRTemp (*math)(IRTemp) = NULL;
2796         switch (ix) {
2797            case 1: case 2: math = math_BYTESWAP64;   break;
2798            case 3: case 4: math = math_BITSWAP64;    break;
2799            case 5: case 6: math = math_USHORTSWAP64; break;
2800            case 7:         math = math_UINTSWAP64;   break;
2801            default: vassert(0);
2802         }
2803         const HChar* names[7]
2804           = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
2805         const HChar* nm = names[ix-1];
2806         vassert(math);
2807         if (ix == 6) {
2808            /* This has to be special cased, since the logic below doesn't
2809               handle it correctly. */
2810            assign(src, getIReg64orZR(nn));
2811            dst = math(src);
2812            putIReg64orZR(dd,
2813                          unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
2814         } else if (is64) {
2815            assign(src, getIReg64orZR(nn));
2816            dst = math(src);
2817            putIReg64orZR(dd, mkexpr(dst));
2818         } else {
2819            assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
2820            dst = math(src);
2821            putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
2822         }
2823         DIP("%s %s, %s\n", nm,
2824             nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
2825         return True;
2826      }
2827      /* else fall through */
2828   }
2829
2830   /* -------------------- CLZ/CLS -------------------- */
2831   /*    30 28   24   20    15      9 4
2832      sf 10 1101 0110 00000 00010 0 n d    CLZ Rd, Rn
2833      sf 10 1101 0110 00000 00010 1 n d    CLS Rd, Rn
2834   */
2835   if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
2836       && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
2837      Bool   is64  = INSN(31,31) == 1;
2838      Bool   isCLS = INSN(10,10) == 1;
2839      UInt   nn    = INSN(9,5);
2840      UInt   dd    = INSN(4,0);
2841      IRTemp src   = newTemp(Ity_I64);
2842      IRTemp dst   = newTemp(Ity_I64);
2843      if (!isCLS) { // CLS not yet supported
2844         if (is64) {
2845            assign(src, getIReg64orZR(nn));
2846            assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
2847                                   mkU64(64),
2848                                   unop(Iop_Clz64, mkexpr(src))));
2849            putIReg64orZR(dd, mkexpr(dst));
2850         } else {
2851            assign(src, binop(Iop_Shl64,
2852                              unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
2853            assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
2854                                   mkU64(32),
2855                                   unop(Iop_Clz64, mkexpr(src))));
2856            putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
2857         }
2858         DIP("cl%c %s, %s\n",
2859             isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
2860         return True;
2861      }
2862   }
2863
2864   /* -------------------- LSLV/LSRV/ASRV -------------------- */
2865   /*    30 28        20 15   11 9 4
2866      sf 00 1101 0110 m  0010 00 n d   LSLV Rd,Rn,Rm
2867      sf 00 1101 0110 m  0010 01 n d   LSRV Rd,Rn,Rm
2868      sf 00 1101 0110 m  0010 10 n d   ASRV Rd,Rn,Rm
2869   */
2870   if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
2871       && INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) {
2872      Bool   is64 = INSN(31,31) == 1;
2873      UInt   mm   = INSN(20,16);
2874      UInt   op   = INSN(11,10);
2875      UInt   nn   = INSN(9,5);
2876      UInt   dd   = INSN(4,0);
2877      IRType ty   = is64 ? Ity_I64 : Ity_I32;
2878      IRTemp srcL = newTemp(ty);
2879      IRTemp srcR = newTemp(Ity_I8);
2880      IRTemp res  = newTemp(ty);
2881      IROp   iop  = Iop_INVALID;
2882      assign(srcL, getIRegOrZR(is64, nn));
2883      assign(srcR,
2884             unop(Iop_64to8,
2885                  binop(Iop_And64,
2886                        getIReg64orZR(mm), mkU64(is64 ? 63 : 31))));
2887      switch (op) {
2888         case BITS2(0,0): iop = mkSHL(ty); break;
2889         case BITS2(0,1): iop = mkSHR(ty); break;
2890         case BITS2(1,0): iop = mkSAR(ty); break;
2891         default: vassert(0);
2892      }
2893      assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR)));
2894      putIRegOrZR(is64, dd, mkexpr(res));
2895      vassert(op < 3);
2896      const HChar* names[3] = { "lslv", "lsrv", "asrv" };
2897      DIP("%s %s, %s, %s\n",
2898          names[op], nameIRegOrZR(is64,dd),
2899                     nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
2900      return True;
2901   }
2902
2903   /* -------------------- SDIV/UDIV -------------------- */
2904   /*    30 28        20 15    10 9 4
2905      sf 00 1101 0110 m  00001  1 n d  SDIV Rd,Rn,Rm
2906      sf 00 1101 0110 m  00001  0 n d  UDIV Rd,Rn,Rm
2907   */
2908   if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
2909       && INSN(15,11) == BITS5(0,0,0,0,1)) {
2910      Bool is64 = INSN(31,31) == 1;
2911      UInt mm   = INSN(20,16);
2912      Bool isS  = INSN(10,10) == 1;
2913      UInt nn   = INSN(9,5);
2914      UInt dd   = INSN(4,0);
2915      if (isS) {
2916         putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
2917                                     getIRegOrZR(is64, nn),
2918                                     getIRegOrZR(is64, mm)));
2919      } else {
2920         putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
2921                                     getIRegOrZR(is64, nn),
2922                                     getIRegOrZR(is64, mm)));
2923      }
2924      DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
2925          nameIRegOrZR(is64, dd),
2926          nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
2927      return True;
2928   }
2929
2930   /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
2931   /* 31        23  20 15 14 9 4
2932      1001 1011 101 m  0  a  n d   UMADDL Xd,Wn,Wm,Xa
2933      1001 1011 001 m  0  a  n d   SMADDL Xd,Wn,Wm,Xa
2934      1001 1011 101 m  1  a  n d   UMSUBL Xd,Wn,Wm,Xa
2935      1001 1011 001 m  1  a  n d   SMSUBL Xd,Wn,Wm,Xa
2936      with operation
2937         Xd = Xa +/- (Wn *u/s Wm)
2938   */
2939   if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
2940      Bool   isU   = INSN(23,23) == 1;
2941      UInt   mm    = INSN(20,16);
2942      Bool   isAdd = INSN(15,15) == 0;
2943      UInt   aa    = INSN(14,10);
2944      UInt   nn    = INSN(9,5);
2945      UInt   dd    = INSN(4,0);
2946      IRTemp wN    = newTemp(Ity_I32);
2947      IRTemp wM    = newTemp(Ity_I32);
2948      IRTemp xA    = newTemp(Ity_I64);
2949      IRTemp muld  = newTemp(Ity_I64);
2950      IRTemp res   = newTemp(Ity_I64);
2951      assign(wN, getIReg32orZR(nn));
2952      assign(wM, getIReg32orZR(mm));
2953      assign(xA, getIReg64orZR(aa));
2954      assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
2955                         mkexpr(wN), mkexpr(wM)));
2956      assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
2957                        mkexpr(xA), mkexpr(muld)));
2958      putIReg64orZR(dd, mkexpr(res));
2959      DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
2960          nameIReg64orZR(dd), nameIReg32orZR(nn),
2961          nameIReg32orZR(mm), nameIReg64orZR(aa));
2962      return True;
2963   }
2964   vex_printf("ARM64 front end: data_processing_register\n");
2965   return False;
2966#  undef INSN
2967}
2968
2969
2970/*------------------------------------------------------------*/
2971/*--- Load and Store instructions                          ---*/
2972/*------------------------------------------------------------*/
2973
2974/* Generate the EA for a "reg + reg" style amode.  This is done from
2975   parts of the insn, but for sanity checking sake it takes the whole
2976   insn.  This appears to depend on insn[15:12], with opt=insn[15:13]
2977   and S=insn[12]:
2978
2979   The possible forms, along with their opt:S values, are:
2980      011:0   Xn|SP + Xm
2981      111:0   Xn|SP + Xm
2982      011:1   Xn|SP + Xm * transfer_szB
2983      111:1   Xn|SP + Xm * transfer_szB
2984      010:0   Xn|SP + 32Uto64(Wm)
2985      010:1   Xn|SP + 32Uto64(Wm) * transfer_szB
2986      110:0   Xn|SP + 32Sto64(Wm)
2987      110:1   Xn|SP + 32Sto64(Wm) * transfer_szB
2988
2989   Rm is insn[20:16].  Rn is insn[9:5].  Rt is insn[4:0].  Log2 of
2990   the transfer size is insn[23,31,30].  For integer loads/stores,
2991   insn[23] is zero, hence szLg2 can be at most 3 in such cases.
2992
2993   If the decoding fails, it returns IRTemp_INVALID.
2994
2995   isInt is True iff this is decoding is for transfers to/from integer
2996   registers.  If False it is for transfers to/from vector registers.
2997*/
2998static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
2999{
3000   UInt    optS  = SLICE_UInt(insn, 15, 12);
3001   UInt    mm    = SLICE_UInt(insn, 20, 16);
3002   UInt    nn    = SLICE_UInt(insn, 9, 5);
3003   UInt    szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
3004                   | SLICE_UInt(insn, 31, 30); // Log2 of the size
3005
3006   buf[0] = 0;
3007
3008   /* Sanity checks, that this really is a load/store insn. */
3009   if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
3010      goto fail;
3011
3012   if (isInt
3013       && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
3014       && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
3015       && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
3016       && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
3017      goto fail;
3018
3019   if (!isInt
3020       && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
3021      goto fail;
3022
3023   /* Throw out non-verified but possibly valid cases. */
3024   switch (szLg2) {
3025      case BITS3(0,0,0): break; //  8 bit, valid for both int and vec
3026      case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
3027      case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
3028      case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
3029      case BITS3(1,0,0): // can only ever be valid for the vector case
3030                         if (isInt) goto fail; else goto fail;
3031      case BITS3(1,0,1): // these sizes are never valid
3032      case BITS3(1,1,0):
3033      case BITS3(1,1,1): goto fail;
3034
3035      default: vassert(0);
3036   }
3037
3038   IRExpr* rhs  = NULL;
3039   switch (optS) {
3040      case BITS4(1,1,1,0): goto fail; //ATC
3041      case BITS4(0,1,1,0):
3042         rhs = getIReg64orZR(mm);
3043         vex_sprintf(buf, "[%s, %s]",
3044                     nameIReg64orZR(nn), nameIReg64orZR(mm));
3045         break;
3046      case BITS4(1,1,1,1): goto fail; //ATC
3047      case BITS4(0,1,1,1):
3048         rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
3049         vex_sprintf(buf, "[%s, %s lsl %u]",
3050                     nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
3051         break;
3052      case BITS4(0,1,0,0):
3053         rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
3054         vex_sprintf(buf, "[%s, %s uxtx]",
3055                     nameIReg64orZR(nn), nameIReg32orZR(mm));
3056         break;
3057      case BITS4(0,1,0,1):
3058         rhs = binop(Iop_Shl64,
3059                     unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
3060         vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
3061                     nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
3062         break;
3063      case BITS4(1,1,0,0):
3064         rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
3065         vex_sprintf(buf, "[%s, %s sxtx]",
3066                     nameIReg64orZR(nn), nameIReg32orZR(mm));
3067         break;
3068      case BITS4(1,1,0,1):
3069         rhs = binop(Iop_Shl64,
3070                     unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
3071         vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
3072                     nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
3073         break;
3074      default:
3075         /* The rest appear to be genuinely invalid */
3076         goto fail;
3077   }
3078
3079   vassert(rhs);
3080   IRTemp res = newTemp(Ity_I64);
3081   assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
3082   return res;
3083
3084  fail:
3085   vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
3086   return IRTemp_INVALID;
3087}
3088
3089
3090/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
3091   bits of DATAE :: Ity_I64. */
3092static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
3093{
3094   IRExpr* addrE = mkexpr(addr);
3095   switch (szB) {
3096      case 8:
3097         storeLE(addrE, dataE);
3098         break;
3099      case 4:
3100         storeLE(addrE, unop(Iop_64to32, dataE));
3101         break;
3102      case 2:
3103         storeLE(addrE, unop(Iop_64to16, dataE));
3104         break;
3105      case 1:
3106         storeLE(addrE, unop(Iop_64to8, dataE));
3107         break;
3108      default:
3109         vassert(0);
3110   }
3111}
3112
3113
3114/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
3115   placing the result in an Ity_I64 temporary. */
3116static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
3117{
3118   IRTemp  res   = newTemp(Ity_I64);
3119   IRExpr* addrE = mkexpr(addr);
3120   switch (szB) {
3121      case 8:
3122         assign(res, loadLE(Ity_I64,addrE));
3123         break;
3124      case 4:
3125         assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
3126         break;
3127      case 2:
3128         assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
3129         break;
3130      case 1:
3131         assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
3132         break;
3133      default:
3134         vassert(0);
3135   }
3136   return res;
3137}
3138
3139
3140static
3141Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
3142{
3143#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
3144
3145   /* ------------ LDR,STR (immediate, uimm12) ----------- */
3146   /* uimm12 is scaled by the transfer size
3147
3148      31 29  26    21    9  4
3149      |  |   |     |     |  |
3150      11 111 00100 imm12 nn tt    STR  Xt, [Xn|SP, #imm12 * 8]
3151      11 111 00101 imm12 nn tt    LDR  Xt, [Xn|SP, #imm12 * 8]
3152
3153      10 111 00100 imm12 nn tt    STR  Wt, [Xn|SP, #imm12 * 4]
3154      10 111 00101 imm12 nn tt    LDR  Wt, [Xn|SP, #imm12 * 4]
3155
3156      01 111 00100 imm12 nn tt    STRH Wt, [Xn|SP, #imm12 * 2]
3157      01 111 00101 imm12 nn tt    LDRH Wt, [Xn|SP, #imm12 * 2]
3158
3159      00 111 00100 imm12 nn tt    STRB Wt, [Xn|SP, #imm12 * 1]
3160      00 111 00101 imm12 nn tt    LDRB Wt, [Xn|SP, #imm12 * 1]
3161   */
3162   if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
3163      UInt   szLg2 = INSN(31,30);
3164      UInt   szB   = 1 << szLg2;
3165      Bool   isLD  = INSN(22,22) == 1;
3166      UInt   offs  = INSN(21,10) * szB;
3167      UInt   nn    = INSN(9,5);
3168      UInt   tt    = INSN(4,0);
3169      IRTemp ta    = newTemp(Ity_I64);
3170      assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
3171      if (nn == 31) { /* FIXME generate stack alignment check */ }
3172      vassert(szLg2 < 4);
3173      if (isLD) {
3174         putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
3175      } else {
3176         gen_narrowing_store(szB, ta, getIReg64orZR(tt));
3177      }
3178      const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
3179      const HChar* st_name[4] = { "strb", "strh", "str", "str" };
3180      DIP("%s %s, [%s, #%u]\n",
3181          (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
3182          nameIReg64orSP(nn), offs);
3183      return True;
3184   }
3185
3186   /* ------------ LDUR,STUR (immediate, simm9) ----------- */
3187   /*
3188      31 29  26      20   11 9  4
3189      |  |   |       |    |  |  |
3190      (at-Rn-then-Rn=EA)  |  |  |
3191      sz 111 00000 0 imm9 01 Rn Rt   STR Rt, [Xn|SP], #simm9
3192      sz 111 00001 0 imm9 01 Rn Rt   LDR Rt, [Xn|SP], #simm9
3193
3194      (at-EA-then-Rn=EA)
3195      sz 111 00000 0 imm9 11 Rn Rt   STR Rt, [Xn|SP, #simm9]!
3196      sz 111 00001 0 imm9 11 Rn Rt   LDR Rt, [Xn|SP, #simm9]!
3197
3198      (at-EA)
3199      sz 111 00000 0 imm9 00 Rn Rt   STR Rt, [Xn|SP, #simm9]
3200      sz 111 00001 0 imm9 00 Rn Rt   LDR Rt, [Xn|SP, #simm9]
3201
3202      simm9 is unscaled.
3203
3204      The case 'wback && Rn == Rt && Rt != 31' is disallowed.  In the
3205      load case this is because would create two competing values for
3206      Rt.  In the store case the reason is unclear, but the spec
3207      disallows it anyway.
3208
3209      Stores are narrowing, loads are unsigned widening.  sz encodes
3210      the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
3211   */
3212   if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
3213       == BITS9(1,1,1, 0,0,0,0,0, 0)) {
3214      UInt szLg2  = INSN(31,30);
3215      UInt szB    = 1 << szLg2;
3216      Bool isLoad = INSN(22,22) == 1;
3217      UInt imm9   = INSN(20,12);
3218      UInt nn     = INSN(9,5);
3219      UInt tt     = INSN(4,0);
3220      Bool wBack  = INSN(10,10) == 1;
3221      UInt how    = INSN(11,10);
3222      if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
3223         /* undecodable; fall through */
3224      } else {
3225         if (nn == 31) { /* FIXME generate stack alignment check */ }
3226
3227         // Compute the transfer address TA and the writeback address WA.
3228         IRTemp tRN = newTemp(Ity_I64);
3229         assign(tRN, getIReg64orSP(nn));
3230         IRTemp tEA = newTemp(Ity_I64);
3231         Long simm9 = (Long)sx_to_64(imm9, 9);
3232         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3233
3234         IRTemp tTA = newTemp(Ity_I64);
3235         IRTemp tWA = newTemp(Ity_I64);
3236         switch (how) {
3237            case BITS2(0,1):
3238               assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3239            case BITS2(1,1):
3240               assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3241            case BITS2(0,0):
3242               assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3243            default:
3244               vassert(0); /* NOTREACHED */
3245         }
3246
3247         /* Normally rN would be updated after the transfer.  However, in
3248            the special case typifed by
3249               str x30, [sp,#-16]!
3250            it is necessary to update SP before the transfer, (1)
3251            because Memcheck will otherwise complain about a write
3252            below the stack pointer, and (2) because the segfault
3253            stack extension mechanism will otherwise extend the stack
3254            only down to SP before the instruction, which might not be
3255            far enough, if the -16 bit takes the actual access
3256            address to the next page.
3257         */
3258         Bool earlyWBack
3259           = wBack && simm9 < 0 && szB == 8
3260             && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
3261
3262         if (wBack && earlyWBack)
3263            putIReg64orSP(nn, mkexpr(tEA));
3264
3265         if (isLoad) {
3266            putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
3267         } else {
3268            gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
3269         }
3270
3271         if (wBack && !earlyWBack)
3272            putIReg64orSP(nn, mkexpr(tEA));
3273
3274         const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
3275         const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
3276         const HChar* fmt_str = NULL;
3277         switch (how) {
3278            case BITS2(0,1):
3279               fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3280               break;
3281            case BITS2(1,1):
3282               fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3283               break;
3284            case BITS2(0,0):
3285               fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
3286               break;
3287            default:
3288               vassert(0);
3289         }
3290         DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
3291                      nameIRegOrZR(szB == 8, tt),
3292                      nameIReg64orSP(nn), simm9);
3293         return True;
3294      }
3295   }
3296
3297   /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
3298   /* L==1 => mm==LD
3299      L==0 => mm==ST
3300      x==0 => 32 bit transfers, and zero extended loads
3301      x==1 => 64 bit transfers
3302      simm7 is scaled by the (single-register) transfer size
3303
3304      (at-Rn-then-Rn=EA)
3305      x0 101 0001 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP], #imm
3306
3307      (at-EA-then-Rn=EA)
3308      x0 101 0011 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]!
3309
3310      (at-EA)
3311      x0 101 0010 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]
3312   */
3313
3314   UInt insn_30_23 = INSN(30,23);
3315   if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
3316       || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
3317       || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
3318      UInt bL     = INSN(22,22);
3319      UInt bX     = INSN(31,31);
3320      UInt bWBack = INSN(23,23);
3321      UInt rT1    = INSN(4,0);
3322      UInt rN     = INSN(9,5);
3323      UInt rT2    = INSN(14,10);
3324      Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
3325      if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
3326          || (bL && rT1 == rT2)) {
3327         /* undecodable; fall through */
3328      } else {
3329         if (rN == 31) { /* FIXME generate stack alignment check */ }
3330
3331         // Compute the transfer address TA and the writeback address WA.
3332         IRTemp tRN = newTemp(Ity_I64);
3333         assign(tRN, getIReg64orSP(rN));
3334         IRTemp tEA = newTemp(Ity_I64);
3335         simm7 = (bX ? 8 : 4) * simm7;
3336         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3337
3338         IRTemp tTA = newTemp(Ity_I64);
3339         IRTemp tWA = newTemp(Ity_I64);
3340         switch (INSN(24,23)) {
3341            case BITS2(0,1):
3342               assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3343            case BITS2(1,1):
3344               assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3345            case BITS2(1,0):
3346               assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3347            default:
3348               vassert(0); /* NOTREACHED */
3349         }
3350
3351         /* Normally rN would be updated after the transfer.  However, in
3352            the special case typifed by
3353               stp x29, x30, [sp,#-112]!
3354            it is necessary to update SP before the transfer, (1)
3355            because Memcheck will otherwise complain about a write
3356            below the stack pointer, and (2) because the segfault
3357            stack extension mechanism will otherwise extend the stack
3358            only down to SP before the instruction, which might not be
3359            far enough, if the -112 bit takes the actual access
3360            address to the next page.
3361         */
3362         Bool earlyWBack
3363           = bWBack && simm7 < 0
3364             && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
3365
3366         if (bWBack && earlyWBack)
3367            putIReg64orSP(rN, mkexpr(tEA));
3368
3369         /**/ if (bL == 1 && bX == 1) {
3370            // 64 bit load
3371            putIReg64orZR(rT1, loadLE(Ity_I64,
3372                                      binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3373            putIReg64orZR(rT2, loadLE(Ity_I64,
3374                                      binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
3375         } else if (bL == 1 && bX == 0) {
3376            // 32 bit load
3377            putIReg32orZR(rT1, loadLE(Ity_I32,
3378                                      binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3379            putIReg32orZR(rT2, loadLE(Ity_I32,
3380                                      binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
3381         } else if (bL == 0 && bX == 1) {
3382            // 64 bit store
3383            storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3384                    getIReg64orZR(rT1));
3385            storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
3386                    getIReg64orZR(rT2));
3387         } else {
3388            vassert(bL == 0 && bX == 0);
3389            // 32 bit store
3390            storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3391                    getIReg32orZR(rT1));
3392            storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
3393                    getIReg32orZR(rT2));
3394         }
3395
3396         if (bWBack && !earlyWBack)
3397            putIReg64orSP(rN, mkexpr(tEA));
3398
3399         const HChar* fmt_str = NULL;
3400         switch (INSN(24,23)) {
3401            case BITS2(0,1):
3402               fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3403               break;
3404            case BITS2(1,1):
3405               fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3406               break;
3407            case BITS2(1,0):
3408               fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3409               break;
3410            default:
3411               vassert(0);
3412         }
3413         DIP(fmt_str, bL == 0 ? "st" : "ld",
3414                      nameIRegOrZR(bX == 1, rT1),
3415                      nameIRegOrZR(bX == 1, rT2),
3416                      nameIReg64orSP(rN), simm7);
3417         return True;
3418      }
3419   }
3420
3421   /* ---------------- LDR (literal, int reg) ---------------- */
3422   /* 31 29      23    4
3423      00 011 000 imm19 Rt   LDR   Wt, [PC + sxTo64(imm19 << 2)]
3424      01 011 000 imm19 Rt   LDR   Xt, [PC + sxTo64(imm19 << 2)]
3425      10 011 000 imm19 Rt   LDRSW Xt, [PC + sxTo64(imm19 << 2)]
3426      11 011 000 imm19 Rt   prefetch  [PC + sxTo64(imm19 << 2)]
3427      Just handles the first two cases for now.
3428   */
3429   if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
3430      UInt  imm19 = INSN(23,5);
3431      UInt  rT    = INSN(4,0);
3432      UInt  bX    = INSN(30,30);
3433      ULong ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
3434      if (bX) {
3435         putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
3436      } else {
3437         putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
3438      }
3439      DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
3440      return True;
3441   }
3442
3443   /* -------------- {LD,ST}R (integer register) --------------- */
3444   /* 31 29        20 15     12 11 9  4
3445      |  |         |  |      |  |  |  |
3446      11 111000011 Rm option S  10 Rn Rt  LDR  Xt, [Xn|SP, R<m>{ext/sh}]
3447      10 111000011 Rm option S  10 Rn Rt  LDR  Wt, [Xn|SP, R<m>{ext/sh}]
3448      01 111000011 Rm option S  10 Rn Rt  LDRH Wt, [Xn|SP, R<m>{ext/sh}]
3449      00 111000011 Rm option S  10 Rn Rt  LDRB Wt, [Xn|SP, R<m>{ext/sh}]
3450
3451      11 111000001 Rm option S  10 Rn Rt  STR  Xt, [Xn|SP, R<m>{ext/sh}]
3452      10 111000001 Rm option S  10 Rn Rt  STR  Wt, [Xn|SP, R<m>{ext/sh}]
3453      01 111000001 Rm option S  10 Rn Rt  STRH Wt, [Xn|SP, R<m>{ext/sh}]
3454      00 111000001 Rm option S  10 Rn Rt  STRB Wt, [Xn|SP, R<m>{ext/sh}]
3455   */
3456   if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
3457       && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3458      HChar  dis_buf[64];
3459      UInt   szLg2 = INSN(31,30);
3460      Bool   isLD  = INSN(22,22) == 1;
3461      UInt   tt    = INSN(4,0);
3462      IRTemp ea    = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3463      if (ea != IRTemp_INVALID) {
3464         switch (szLg2) {
3465            case 3: /* 64 bit */
3466               if (isLD) {
3467                  putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
3468                  DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
3469               } else {
3470                  storeLE(mkexpr(ea), getIReg64orZR(tt));
3471                  DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
3472               }
3473               break;
3474            case 2: /* 32 bit */
3475               if (isLD) {
3476                  putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
3477                  DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
3478               } else {
3479                  storeLE(mkexpr(ea), getIReg32orZR(tt));
3480                  DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
3481               }
3482               break;
3483            case 1: /* 16 bit */
3484               if (isLD) {
3485                  putIReg64orZR(tt, unop(Iop_16Uto64,
3486                                         loadLE(Ity_I16, mkexpr(ea))));
3487                  DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3488               } else {
3489                  storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
3490                  DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3491               }
3492               break;
3493            case 0: /* 8 bit */
3494               if (isLD) {
3495                  putIReg64orZR(tt, unop(Iop_8Uto64,
3496                                         loadLE(Ity_I8, mkexpr(ea))));
3497                  DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
3498               } else {
3499                  storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
3500                  DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
3501               }
3502               break;
3503            default:
3504               vassert(0);
3505         }
3506         return True;
3507      }
3508   }
3509
3510   /* -------------- LDRS{B,H,W} (uimm12) -------------- */
3511   /* 31 29  26  23 21    9 4
3512      10 111 001 10 imm12 n t   LDRSW Xt, [Xn|SP, #pimm12 * 4]
3513      01 111 001 1x imm12 n t   LDRSH Rt, [Xn|SP, #pimm12 * 2]
3514      00 111 001 1x imm12 n t   LDRSB Rt, [Xn|SP, #pimm12 * 1]
3515      where
3516         Rt is Wt when x==1, Xt when x==0
3517   */
3518   if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
3519      /* Further checks on bits 31:30 and 22 */
3520      Bool valid = False;
3521      switch ((INSN(31,30) << 1) | INSN(22,22)) {
3522         case BITS3(1,0,0):
3523         case BITS3(0,1,0): case BITS3(0,1,1):
3524         case BITS3(0,0,0): case BITS3(0,0,1):
3525            valid = True;
3526            break;
3527      }
3528      if (valid) {
3529         UInt    szLg2 = INSN(31,30);
3530         UInt    bitX  = INSN(22,22);
3531         UInt    imm12 = INSN(21,10);
3532         UInt    nn    = INSN(9,5);
3533         UInt    tt    = INSN(4,0);
3534         UInt    szB   = 1 << szLg2;
3535         IRExpr* ea    = binop(Iop_Add64,
3536                               getIReg64orSP(nn), mkU64(imm12 * szB));
3537         switch (szB) {
3538            case 4:
3539               vassert(bitX == 0);
3540               putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
3541               DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
3542                   nameIReg64orSP(nn), imm12 * szB);
3543               break;
3544            case 2:
3545               if (bitX == 1) {
3546                  putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
3547               } else {
3548                  putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
3549               }
3550               DIP("ldrsh %s, [%s, #%u]\n",
3551                   nameIRegOrZR(bitX == 0, tt),
3552                   nameIReg64orSP(nn), imm12 * szB);
3553               break;
3554            case 1:
3555               if (bitX == 1) {
3556                  putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
3557               } else {
3558                  putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
3559               }
3560               DIP("ldrsb %s, [%s, #%u]\n",
3561                   nameIRegOrZR(bitX == 0, tt),
3562                   nameIReg64orSP(nn), imm12 * szB);
3563               break;
3564            default:
3565               vassert(0);
3566         }
3567         return True;
3568      }
3569      /* else fall through */
3570   }
3571
3572   /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
3573   /* (at-Rn-then-Rn=EA)
3574      31 29      23 21 20   11 9 4
3575      00 111 000 1x 0  imm9 01 n t  LDRSB Rt, [Xn|SP], #simm9
3576      01 111 000 1x 0  imm9 01 n t  LDRSH Rt, [Xn|SP], #simm9
3577      10 111 000 10 0  imm9 01 n t  LDRSW Xt, [Xn|SP], #simm9
3578
3579      (at-EA-then-Rn=EA)
3580      00 111 000 1x 0  imm9 11 n t  LDRSB Rt, [Xn|SP, #simm9]!
3581      01 111 000 1x 0  imm9 11 n t  LDRSH Rt, [Xn|SP, #simm9]!
3582      10 111 000 10 0  imm9 11 n t  LDRSW Xt, [Xn|SP, #simm9]!
3583      where
3584         Rt is Wt when x==1, Xt when x==0
3585         transfer-at-Rn when [11]==0, at EA when [11]==1
3586   */
3587   if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3588       && INSN(21,21) == 0 && INSN(10,10) == 1) {
3589      /* Further checks on bits 31:30 and 22 */
3590      Bool valid = False;
3591      switch ((INSN(31,30) << 1) | INSN(22,22)) {
3592         case BITS3(1,0,0):                    // LDRSW Xt
3593         case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
3594         case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
3595            valid = True;
3596            break;
3597      }
3598      if (valid) {
3599         UInt   szLg2 = INSN(31,30);
3600         UInt   imm9  = INSN(20,12);
3601         Bool   atRN  = INSN(11,11) == 0;
3602         UInt   nn    = INSN(9,5);
3603         UInt   tt    = INSN(4,0);
3604         IRTemp tRN   = newTemp(Ity_I64);
3605         IRTemp tEA   = newTemp(Ity_I64);
3606         IRTemp tTA   = IRTemp_INVALID;
3607         ULong  simm9 = sx_to_64(imm9, 9);
3608         Bool   is64  = INSN(22,22) == 0;
3609         assign(tRN, getIReg64orSP(nn));
3610         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3611         tTA = atRN ? tRN : tEA;
3612         HChar ch = '?';
3613         /* There are 5 cases:
3614               byte     load,           SX to 64
3615               byte     load, SX to 32, ZX to 64
3616               halfword load,           SX to 64
3617               halfword load, SX to 32, ZX to 64
3618               word     load,           SX to 64
3619            The ifs below handle them in the listed order.
3620         */
3621         if (szLg2 == 0) {
3622            ch = 'b';
3623            if (is64) {
3624               putIReg64orZR(tt, unop(Iop_8Sto64,
3625                                      loadLE(Ity_I8, mkexpr(tTA))));
3626            } else {
3627               putIReg32orZR(tt, unop(Iop_8Sto32,
3628                                      loadLE(Ity_I8, mkexpr(tTA))));
3629            }
3630         }
3631         else if (szLg2 == 1) {
3632            ch = 'h';
3633            if (is64) {
3634               putIReg64orZR(tt, unop(Iop_16Sto64,
3635                                      loadLE(Ity_I16, mkexpr(tTA))));
3636            } else {
3637               putIReg32orZR(tt, unop(Iop_16Sto32,
3638                                      loadLE(Ity_I16, mkexpr(tTA))));
3639            }
3640         }
3641         else if (szLg2 == 2 && is64) {
3642            ch = 'w';
3643            putIReg64orZR(tt, unop(Iop_32Sto64,
3644                                   loadLE(Ity_I32, mkexpr(tTA))));
3645         }
3646         else {
3647            vassert(0);
3648         }
3649         putIReg64orSP(nn, mkexpr(tEA));
3650         DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
3651             ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
3652         return True;
3653      }
3654      /* else fall through */
3655   }
3656
3657   /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
3658   /* 31 29      23 21 20   11 9 4
3659      00 111 000 1x 0  imm9 00 n t  LDURSB Rt, [Xn|SP, #simm9]
3660      01 111 000 1x 0  imm9 00 n t  LDURSH Rt, [Xn|SP, #simm9]
3661      10 111 000 10 0  imm9 00 n t  LDURSW Xt, [Xn|SP, #simm9]
3662      where
3663         Rt is Wt when x==1, Xt when x==0
3664   */
3665   if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3666       && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
3667      /* Further checks on bits 31:30 and 22 */
3668      Bool valid = False;
3669      switch ((INSN(31,30) << 1) | INSN(22,22)) {
3670         case BITS3(1,0,0):                    // LDURSW Xt
3671         case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
3672         case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
3673            valid = True;
3674            break;
3675      }
3676      if (valid) {
3677         UInt   szLg2 = INSN(31,30);
3678         UInt   imm9  = INSN(20,12);
3679         UInt   nn    = INSN(9,5);
3680         UInt   tt    = INSN(4,0);
3681         IRTemp tRN   = newTemp(Ity_I64);
3682         IRTemp tEA   = newTemp(Ity_I64);
3683         ULong  simm9 = sx_to_64(imm9, 9);
3684         Bool   is64  = INSN(22,22) == 0;
3685         assign(tRN, getIReg64orSP(nn));
3686         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3687         HChar ch = '?';
3688         /* There are 5 cases:
3689               byte     load,           SX to 64
3690               byte     load, SX to 32, ZX to 64
3691               halfword load,           SX to 64
3692               halfword load, SX to 32, ZX to 64
3693               word     load,           SX to 64
3694            The ifs below handle them in the listed order.
3695         */
3696         if (szLg2 == 0) {
3697            ch = 'b';
3698            if (is64) {
3699               putIReg64orZR(tt, unop(Iop_8Sto64,
3700                                      loadLE(Ity_I8, mkexpr(tEA))));
3701            } else {
3702               putIReg32orZR(tt, unop(Iop_8Sto32,
3703                                      loadLE(Ity_I8, mkexpr(tEA))));
3704            }
3705         }
3706         else if (szLg2 == 1) {
3707            ch = 'h';
3708            if (is64) {
3709               putIReg64orZR(tt, unop(Iop_16Sto64,
3710                                      loadLE(Ity_I16, mkexpr(tEA))));
3711            } else {
3712               putIReg32orZR(tt, unop(Iop_16Sto32,
3713                                      loadLE(Ity_I16, mkexpr(tEA))));
3714            }
3715         }
3716         else if (szLg2 == 2 && is64) {
3717            ch = 'w';
3718            putIReg64orZR(tt, unop(Iop_32Sto64,
3719                                   loadLE(Ity_I32, mkexpr(tEA))));
3720         }
3721         else {
3722            vassert(0);
3723         }
3724         DIP("ldurs%c %s, [%s, #%lld]",
3725             ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
3726         return True;
3727      }
3728      /* else fall through */
3729   }
3730
3731   /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
3732   /* L==1    => mm==LD
3733      L==0    => mm==ST
3734      sz==00  => 32 bit (S) transfers
3735      sz==01  => 64 bit (D) transfers
3736      sz==10  => 128 bit (Q) transfers
3737      sz==11  isn't allowed
3738      simm7 is scaled by the (single-register) transfer size
3739
3740      31 29       22 21   14 9 4
3741      sz 101 1001 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP], #imm
3742      (at-Rn-then-Rn=EA)
3743
3744      sz 101 1011 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]!
3745      (at-EA-then-Rn=EA)
3746
3747      sz 101 1010 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]
3748      (at-EA)
3749   */
3750
3751   UInt insn_29_23 = INSN(29,23);
3752   if (insn_29_23 == BITS7(1,0,1,1,0,0,1)
3753       || insn_29_23 == BITS7(1,0,1,1,0,1,1)
3754       || insn_29_23 == BITS7(1,0,1,1,0,1,0)) {
3755      UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
3756      Bool isLD   = INSN(22,22) == 1;
3757      Bool wBack  = INSN(23,23) == 1;
3758      Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
3759      UInt tt2    = INSN(14,10);
3760      UInt nn     = INSN(9,5);
3761      UInt tt1    = INSN(4,0);
3762      if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
3763         /* undecodable; fall through */
3764      } else {
3765         if (nn == 31) { /* FIXME generate stack alignment check */ }
3766
3767         // Compute the transfer address TA and the writeback address WA.
3768         UInt   szB = 4 << szSlg2; /* szB is the per-register size */
3769         IRTemp tRN = newTemp(Ity_I64);
3770         assign(tRN, getIReg64orSP(nn));
3771         IRTemp tEA = newTemp(Ity_I64);
3772         simm7 = szB * simm7;
3773         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3774
3775         IRTemp tTA = newTemp(Ity_I64);
3776         IRTemp tWA = newTemp(Ity_I64);
3777         switch (INSN(24,23)) {
3778            case BITS2(0,1):
3779               assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3780            case BITS2(1,1):
3781               assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3782            case BITS2(1,0):
3783               assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3784            default:
3785               vassert(0); /* NOTREACHED */
3786         }
3787
3788         IRType ty = Ity_INVALID;
3789         switch (szB) {
3790            case 4:  ty = Ity_F32;  break;
3791            case 8:  ty = Ity_F64;  break;
3792            case 16: ty = Ity_V128; break;
3793            default: vassert(0);
3794         }
3795
3796         /* Normally rN would be updated after the transfer.  However, in
3797            the special cases typifed by
3798               stp q0, q1, [sp,#-512]!
3799               stp d0, d1, [sp,#-512]!
3800               stp s0, s1, [sp,#-512]!
3801            it is necessary to update SP before the transfer, (1)
3802            because Memcheck will otherwise complain about a write
3803            below the stack pointer, and (2) because the segfault
3804            stack extension mechanism will otherwise extend the stack
3805            only down to SP before the instruction, which might not be
3806            far enough, if the -512 bit takes the actual access
3807            address to the next page.
3808         */
3809         Bool earlyWBack
3810           = wBack && simm7 < 0
3811             && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
3812
3813         if (wBack && earlyWBack)
3814            putIReg64orSP(nn, mkexpr(tEA));
3815
3816         if (isLD) {
3817            if (szB < 16) {
3818               putQReg128(tt1, mkV128(0x0000));
3819            }
3820            putQRegLO(tt1,
3821                      loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
3822            if (szB < 16) {
3823               putQReg128(tt2, mkV128(0x0000));
3824            }
3825            putQRegLO(tt2,
3826                      loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
3827         } else {
3828            storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
3829                    getQRegLO(tt1, ty));
3830            storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
3831                    getQRegLO(tt2, ty));
3832         }
3833
3834         if (wBack && !earlyWBack)
3835            putIReg64orSP(nn, mkexpr(tEA));
3836
3837         const HChar* fmt_str = NULL;
3838         switch (INSN(24,23)) {
3839            case BITS2(0,1):
3840               fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3841               break;
3842            case BITS2(1,1):
3843               fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3844               break;
3845            case BITS2(1,0):
3846               fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3847               break;
3848            default:
3849               vassert(0);
3850         }
3851         DIP(fmt_str, isLD ? "ld" : "st",
3852                      nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
3853                      nameIReg64orSP(nn), simm7);
3854         return True;
3855      }
3856   }
3857
3858   /* -------------- {LD,ST}R (vector register) --------------- */
3859   /* 31 29     23  20 15     12 11 9  4
3860      |  |      |   |  |      |  |  |  |
3861      00 111100 011 Rm option S  10 Rn Rt  LDR Bt, [Xn|SP, R<m>{ext/sh}]
3862      01 111100 011 Rm option S  10 Rn Rt  LDR Ht, [Xn|SP, R<m>{ext/sh}]
3863      10 111100 011 Rm option S  10 Rn Rt  LDR St, [Xn|SP, R<m>{ext/sh}]
3864      11 111100 011 Rm option S  10 Rn Rt  LDR Dt, [Xn|SP, R<m>{ext/sh}]
3865      00 111100 111 Rm option S  10 Rn Rt  LDR Qt, [Xn|SP, R<m>{ext/sh}]
3866
3867      00 111100 001 Rm option S  10 Rn Rt  STR Bt, [Xn|SP, R<m>{ext/sh}]
3868      01 111100 001 Rm option S  10 Rn Rt  STR Ht, [Xn|SP, R<m>{ext/sh}]
3869      10 111100 001 Rm option S  10 Rn Rt  STR St, [Xn|SP, R<m>{ext/sh}]
3870      11 111100 001 Rm option S  10 Rn Rt  STR Dt, [Xn|SP, R<m>{ext/sh}]
3871      00 111100 101 Rm option S  10 Rn Rt  STR Qt, [Xn|SP, R<m>{ext/sh}]
3872   */
3873   if (INSN(29,24) == BITS6(1,1,1,1,0,0)
3874       && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3875      HChar  dis_buf[64];
3876      UInt   szLg2 = (INSN(23,23) << 2) | INSN(31,30);
3877      Bool   isLD  = INSN(22,22) == 1;
3878      UInt   tt    = INSN(4,0);
3879      if (szLg2 >= 4) goto after_LDR_STR_vector_register;
3880      IRTemp ea    = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
3881      if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
3882      switch (szLg2) {
3883         case 0: /* 8 bit */
3884            if (isLD) {
3885               putQReg128(tt, mkV128(0x0000));
3886               putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
3887               DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
3888            } else {
3889               vassert(0); //ATC
3890               storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
3891               DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
3892            }
3893            break;
3894         case 1:
3895            if (isLD) {
3896               putQReg128(tt, mkV128(0x0000));
3897               putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
3898               DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
3899            } else {
3900               vassert(0); //ATC
3901               storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
3902               DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
3903            }
3904            break;
3905         case 2: /* 32 bit */
3906            if (isLD) {
3907               putQReg128(tt, mkV128(0x0000));
3908               putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
3909               DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
3910            } else {
3911               storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
3912               DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
3913            }
3914            break;
3915         case 3: /* 64 bit */
3916            if (isLD) {
3917               putQReg128(tt, mkV128(0x0000));
3918               putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
3919               DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
3920            } else {
3921               storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
3922               DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
3923            }
3924            break;
3925         case 4:  return False; //ATC
3926         default: vassert(0);
3927      }
3928      return True;
3929   }
3930  after_LDR_STR_vector_register:
3931
3932   /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
3933   /* 31 29      22 20 15  12 11 9  4
3934      |  |       |  |  |   |  |  |  |
3935      10 1110001 01 Rm opt S 10 Rn Rt    LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
3936
3937      01 1110001 01 Rm opt S 10 Rn Rt    LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
3938      01 1110001 11 Rm opt S 10 Rn Rt    LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
3939
3940      00 1110001 01 Rm opt S 10 Rn Rt    LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
3941      00 1110001 11 Rm opt S 10 Rn Rt    LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
3942   */
3943   if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3944       && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3945      HChar  dis_buf[64];
3946      UInt   szLg2  = INSN(31,30);
3947      Bool   sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
3948      UInt   tt     = INSN(4,0);
3949      if (szLg2 == 3) goto after_LDRS_integer_register;
3950      IRTemp ea     = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3951      if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
3952      /* Enumerate the 5 variants explicitly. */
3953      if (szLg2 == 2/*32 bit*/ && sxTo64) {
3954         putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
3955         DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
3956         return True;
3957      }
3958      else
3959      if (szLg2 == 1/*16 bit*/) {
3960         if (sxTo64) {
3961            putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
3962            DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
3963         } else {
3964            putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
3965            DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3966         }
3967         return True;
3968      }
3969      else
3970      if (szLg2 == 0/*8 bit*/) {
3971         if (sxTo64) {
3972            putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
3973            DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
3974         } else {
3975            putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
3976            DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
3977         }
3978         return True;
3979      }
3980      /* else it's an invalid combination */
3981   }
3982  after_LDRS_integer_register:
3983
3984   /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
3985   /* This is the Unsigned offset variant only.  The Post-Index and
3986      Pre-Index variants are below.
3987
3988      31 29      23 21    9 4
3989      00 111 101 01 imm12 n t   LDR Bt, [Xn|SP + imm12 * 1]
3990      01 111 101 01 imm12 n t   LDR Ht, [Xn|SP + imm12 * 2]
3991      10 111 101 01 imm12 n t   LDR St, [Xn|SP + imm12 * 4]
3992      11 111 101 01 imm12 n t   LDR Dt, [Xn|SP + imm12 * 8]
3993      00 111 101 11 imm12 n t   LDR Qt, [Xn|SP + imm12 * 16]
3994
3995      00 111 101 00 imm12 n t   STR Bt, [Xn|SP + imm12 * 1]
3996      01 111 101 00 imm12 n t   STR Ht, [Xn|SP + imm12 * 2]
3997      10 111 101 00 imm12 n t   STR St, [Xn|SP + imm12 * 4]
3998      11 111 101 00 imm12 n t   STR Dt, [Xn|SP + imm12 * 8]
3999      00 111 101 10 imm12 n t   STR Qt, [Xn|SP + imm12 * 16]
4000   */
4001   if (INSN(29,24) == BITS6(1,1,1,1,0,1)
4002       && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
4003      UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
4004      Bool   isLD   = INSN(22,22) == 1;
4005      UInt   pimm12 = INSN(21,10) << szLg2;
4006      UInt   nn     = INSN(9,5);
4007      UInt   tt     = INSN(4,0);
4008      IRTemp tEA    = newTemp(Ity_I64);
4009      IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
4010      assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
4011      if (isLD) {
4012         if (szLg2 < 4) {
4013            putQReg128(tt, mkV128(0x0000));
4014         }
4015         putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
4016      } else {
4017         storeLE(mkexpr(tEA), getQRegLO(tt, ty));
4018      }
4019      DIP("%s %s, [%s, #%u]\n",
4020          isLD ? "ldr" : "str",
4021          nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
4022      return True;
4023   }
4024
4025   /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
4026   /* These are the Post-Index and Pre-Index variants.
4027
4028      31 29      23   20   11 9 4
4029      (at-Rn-then-Rn=EA)
4030      00 111 100 01 0 imm9 01 n t   LDR Bt, [Xn|SP], #simm
4031      01 111 100 01 0 imm9 01 n t   LDR Ht, [Xn|SP], #simm
4032      10 111 100 01 0 imm9 01 n t   LDR St, [Xn|SP], #simm
4033      11 111 100 01 0 imm9 01 n t   LDR Dt, [Xn|SP], #simm
4034      00 111 100 11 0 imm9 01 n t   LDR Qt, [Xn|SP], #simm
4035
4036      (at-EA-then-Rn=EA)
4037      00 111 100 01 0 imm9 11 n t   LDR Bt, [Xn|SP, #simm]!
4038      01 111 100 01 0 imm9 11 n t   LDR Ht, [Xn|SP, #simm]!
4039      10 111 100 01 0 imm9 11 n t   LDR St, [Xn|SP, #simm]!
4040      11 111 100 01 0 imm9 11 n t   LDR Dt, [Xn|SP, #simm]!
4041      00 111 100 11 0 imm9 11 n t   LDR Qt, [Xn|SP, #simm]!
4042
4043      Stores are the same except with bit 22 set to 0.
4044   */
4045   if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4046       && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
4047       && INSN(21,21) == 0 && INSN(10,10) == 1) {
4048      UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
4049      Bool   isLD   = INSN(22,22) == 1;
4050      UInt   imm9   = INSN(20,12);
4051      Bool   atRN   = INSN(11,11) == 0;
4052      UInt   nn     = INSN(9,5);
4053      UInt   tt     = INSN(4,0);
4054      IRTemp tRN    = newTemp(Ity_I64);
4055      IRTemp tEA    = newTemp(Ity_I64);
4056      IRTemp tTA    = IRTemp_INVALID;
4057      IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
4058      ULong  simm9  = sx_to_64(imm9, 9);
4059      assign(tRN, getIReg64orSP(nn));
4060      assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4061      tTA = atRN ? tRN : tEA;
4062      if (isLD) {
4063         if (szLg2 < 4) {
4064            putQReg128(tt, mkV128(0x0000));
4065         }
4066         putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
4067      } else {
4068         storeLE(mkexpr(tTA), getQRegLO(tt, ty));
4069      }
4070      putIReg64orSP(nn, mkexpr(tEA));
4071      DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
4072          isLD ? "ldr" : "str",
4073          nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
4074      return True;
4075   }
4076
4077   /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
4078   /* 31 29      23   20   11 9 4
4079      00 111 100 01 0 imm9 00 n t   LDR Bt, [Xn|SP, #simm]
4080      01 111 100 01 0 imm9 00 n t   LDR Ht, [Xn|SP, #simm]
4081      10 111 100 01 0 imm9 00 n t   LDR St, [Xn|SP, #simm]
4082      11 111 100 01 0 imm9 00 n t   LDR Dt, [Xn|SP, #simm]
4083      00 111 100 11 0 imm9 00 n t   LDR Qt, [Xn|SP, #simm]
4084
4085      00 111 100 00 0 imm9 00 n t   STR Bt, [Xn|SP, #simm]
4086      01 111 100 00 0 imm9 00 n t   STR Ht, [Xn|SP, #simm]
4087      10 111 100 00 0 imm9 00 n t   STR St, [Xn|SP, #simm]
4088      11 111 100 00 0 imm9 00 n t   STR Dt, [Xn|SP, #simm]
4089      00 111 100 10 0 imm9 00 n t   STR Qt, [Xn|SP, #simm]
4090   */
4091   if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4092       && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
4093       && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
4094      UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
4095      Bool   isLD   = INSN(22,22) == 1;
4096      UInt   imm9   = INSN(20,12);
4097      UInt   nn     = INSN(9,5);
4098      UInt   tt     = INSN(4,0);
4099      ULong  simm9  = sx_to_64(imm9, 9);
4100      IRTemp tEA    = newTemp(Ity_I64);
4101      IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
4102      assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
4103      if (isLD) {
4104         if (szLg2 < 4) {
4105            putQReg128(tt, mkV128(0x0000));
4106         }
4107         putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
4108      } else {
4109         storeLE(mkexpr(tEA), getQRegLO(tt, ty));
4110      }
4111      DIP("%s %s, [%s, #%lld]\n",
4112          isLD ? "ldur" : "stur",
4113          nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
4114      return True;
4115   }
4116
4117   /* ---------------- LDR (literal, SIMD&FP) ---------------- */
4118   /* 31 29      23    4
4119      00 011 100 imm19 t    LDR St, [PC + sxTo64(imm19 << 2)]
4120      01 011 100 imm19 t    LDR Dt, [PC + sxTo64(imm19 << 2)]
4121      10 011 100 imm19 t    LDR Qt, [PC + sxTo64(imm19 << 2)]
4122   */
4123   if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
4124      UInt   szB   = 4 << INSN(31,30);
4125      UInt   imm19 = INSN(23,5);
4126      UInt   tt    = INSN(4,0);
4127      ULong  ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4128      IRType ty    = preferredVectorSubTypeFromSize(szB);
4129      putQReg128(tt, mkV128(0x0000));
4130      putQRegLO(tt, loadLE(ty, mkU64(ea)));
4131      DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
4132      return True;
4133   }
4134
4135   /* ---------- LD1/ST1 (single structure, no offset) ---------- */
4136   /* 31        23        15
4137      0Q00 1101 0L00 0000 xx0S sz N T
4138                          ----
4139                          opcode
4140      1011 1111 1011 1111 0010 00 0 0 <- mask
4141      0000 1101 0000 0000 0000 00 0 0 <- result
4142
4143      FIXME does this assume that the host is little endian?
4144   */
4145
4146   if ((insn & 0xBFBF2000) == 0x0D000000) {
4147      Bool   isLD = INSN(22,22) == 1;
4148      UInt   rN   = INSN(9,5);
4149      UInt   vT   = INSN(4,0);
4150      UInt   q    = INSN(30, 30);
4151      UInt   xx   = INSN(15, 14);
4152      UInt   opcode = INSN(15, 13);
4153      UInt   s    = INSN(12, 12);
4154      UInt   sz   = INSN(11, 10);
4155
4156      UInt   index = (q << 3) | (s << 2) | sz;
4157      const HChar* name = "";
4158      Bool   valid = False;
4159      IRType laneTy = Ity_I8;
4160
4161      if (opcode == 0x0) { // 8 bit variant
4162         name = "b";
4163         valid = True;
4164      } else if (opcode == 0x2 && (sz & 1) == 0) { // 16 bit variant
4165         name = "h";
4166         laneTy = Ity_I16;
4167         index >>= 1;
4168         valid = True;
4169      } else if (opcode == 0x4 && sz == 0x0) { // 32 bit variant
4170         name = "s";
4171         laneTy = Ity_I32;
4172         index >>= 2;
4173         valid = True;
4174      } else if (opcode == 0x4 && sz == 0x1 && s == 0) { // 64 bit variant
4175         name = "d";
4176         laneTy = Ity_I64;
4177         index >>= 3;
4178         valid = True;
4179      }
4180
4181      if (valid) {
4182         IRTemp tEA  = newTemp(Ity_I64);
4183         assign(tEA, getIReg64orSP(rN));
4184         if (rN == 31) { /* FIXME generate stack alignment check */ }
4185         if (isLD) {
4186            putQRegLane(vT, index, loadLE(laneTy, mkexpr(tEA)));
4187         } else {
4188            storeLE(mkexpr(tEA), getQRegLane(vT, index, laneTy));
4189         }
4190
4191         DIP("%s {v%u.%s}[%d], [%s]\n", isLD ? "ld1" : "st1",
4192             vT, name, index, nameIReg64orSP(rN));
4193         return True;
4194      }
4195
4196   }
4197
4198
4199   /* ---------- LD1/ST1 (multiple structure, no offset, one register variant) ---------- */
4200   /* 31        23
4201      0100 1100 0100 0000 0111 11 N T   LD1 {vT.2d},  [Xn|SP]
4202      0100 1100 0000 0000 0111 11 N T   ST1 {vT.2d},  [Xn|SP]
4203      0100 1100 0100 0000 0111 10 N T   LD1 {vT.4s},  [Xn|SP]
4204      0100 1100 0000 0000 0111 10 N T   ST1 {vT.4s},  [Xn|SP]
4205      0100 1100 0100 0000 0111 01 N T   LD1 {vT.8h},  [Xn|SP]
4206      0100 1100 0000 0000 0111 01 N T   ST1 {vT.8h},  [Xn|SP]
4207      0100 1100 0100 0000 0111 00 N T   LD1 {vT.16b}, [Xn|SP]
4208      0100 1100 0000 0000 0111 00 N T   ST1 {vT.16b}, [Xn|SP]
4209      FIXME does this assume that the host is little endian?
4210   */
4211   if (   (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases
4212       || (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases
4213      ) {
4214      Bool   isLD = INSN(22,22) == 1;
4215      UInt   rN   = INSN(9,5);
4216      UInt   vT   = INSN(4,0);
4217      IRTemp tEA  = newTemp(Ity_I64);
4218      const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4219      const HChar* name = names[INSN(11,10)];
4220      assign(tEA, getIReg64orSP(rN));
4221      if (rN == 31) { /* FIXME generate stack alignment check */ }
4222      if (isLD) {
4223         putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4224      } else {
4225         storeLE(mkexpr(tEA), getQReg128(vT));
4226      }
4227      DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
4228          vT, name, nameIReg64orSP(rN));
4229      return True;
4230   }
4231
4232   /* 31        23
4233      0000 1100 0100 0000 0111 11 N T   LD1 {vT.1d}, [Xn|SP]
4234      0000 1100 0000 0000 0111 11 N T   ST1 {vT.1d}, [Xn|SP]
4235      0000 1100 0100 0000 0111 10 N T   LD1 {vT.2s}, [Xn|SP]
4236      0000 1100 0000 0000 0111 10 N T   ST1 {vT.2s}, [Xn|SP]
4237      0000 1100 0100 0000 0111 01 N T   LD1 {vT.4h}, [Xn|SP]
4238      0000 1100 0000 0000 0111 01 N T   ST1 {vT.4h}, [Xn|SP]
4239      0000 1100 0100 0000 0111 00 N T   LD1 {vT.8b}, [Xn|SP]
4240      0000 1100 0000 0000 0111 00 N T   ST1 {vT.8b}, [Xn|SP]
4241      FIXME does this assume that the host is little endian?
4242   */
4243   if (   (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases
4244       || (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases
4245      ) {
4246      Bool   isLD = INSN(22,22) == 1;
4247      UInt   rN   = INSN(9,5);
4248      UInt   vT   = INSN(4,0);
4249      IRTemp tEA  = newTemp(Ity_I64);
4250      const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4251      const HChar* name = names[INSN(11,10)];
4252      assign(tEA, getIReg64orSP(rN));
4253      if (rN == 31) { /* FIXME generate stack alignment check */ }
4254      if (isLD) {
4255         putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4256         putQRegLane(vT, 1, mkU64(0));
4257      } else {
4258         storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4259      }
4260      DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
4261          vT, name, nameIReg64orSP(rN));
4262      return True;
4263   }
4264
4265   /* ---------- LD1/ST1 (multiple structure, post-index, one register variant) ---------- */
4266   /* 31        23
4267      0100 1100 1001 1111 0111 11 N T  ST1 {vT.2d},  [xN|SP], #16
4268      0100 1100 1101 1111 0111 11 N T  LD1 {vT.2d},  [xN|SP], #16
4269      0100 1100 1001 1111 0111 10 N T  ST1 {vT.4s},  [xN|SP], #16
4270      0100 1100 1101 1111 0111 10 N T  LD1 {vT.4s},  [xN|SP], #16
4271      0100 1100 1001 1111 0111 01 N T  ST1 {vT.8h},  [xN|SP], #16
4272      0100 1100 1101 1111 0111 01 N T  LD1 {vT.8h},  [xN|SP], #16
4273      0100 1100 1001 1111 0111 00 N T  ST1 {vT.16b}, [xN|SP], #16
4274      0100 1100 1101 1111 0111 00 N T  LD1 {vT.16b}, [xN|SP], #16
4275      Note that #16 is implied and cannot be any other value.
4276      FIXME does this assume that the host is little endian?
4277   */
4278   if (   (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases
4279       || (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases
4280      ) {
4281      Bool   isLD = INSN(22,22) == 1;
4282      UInt   rN   = INSN(9,5);
4283      UInt   vT   = INSN(4,0);
4284      IRTemp tEA  = newTemp(Ity_I64);
4285      const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4286      const HChar* name = names[INSN(11,10)];
4287      assign(tEA, getIReg64orSP(rN));
4288      if (rN == 31) { /* FIXME generate stack alignment check */ }
4289      if (isLD) {
4290         putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4291      } else {
4292         storeLE(mkexpr(tEA), getQReg128(vT));
4293      }
4294      putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16)));
4295      DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1",
4296          vT, name, nameIReg64orSP(rN));
4297      return True;
4298   }
4299
4300   /* 31        23
4301      0000 1100 1001 1111 0111 11 N T  ST1 {vT.1d}, [xN|SP], #8
4302      0000 1100 1101 1111 0111 11 N T  LD1 {vT.1d}, [xN|SP], #8
4303      0000 1100 1001 1111 0111 10 N T  ST1 {vT.2s}, [xN|SP], #8
4304      0000 1100 1101 1111 0111 10 N T  LD1 {vT.2s}, [xN|SP], #8
4305      0000 1100 1001 1111 0111 01 N T  ST1 {vT.4h}, [xN|SP], #8
4306      0000 1100 1101 1111 0111 01 N T  LD1 {vT.4h}, [xN|SP], #8
4307      0000 1100 1001 1111 0111 00 N T  ST1 {vT.8b}, [xN|SP], #8
4308      0000 1100 1101 1111 0111 00 N T  LD1 {vT.8b}, [xN|SP], #8
4309      Note that #8 is implied and cannot be any other value.
4310      FIXME does this assume that the host is little endian?
4311   */
4312   if (   (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases
4313       || (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases
4314      ) {
4315      Bool   isLD = INSN(22,22) == 1;
4316      UInt   rN  = INSN(9,5);
4317      UInt   vT  = INSN(4,0);
4318      IRTemp tEA = newTemp(Ity_I64);
4319      const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4320      const HChar* name = names[INSN(11,10)];
4321      assign(tEA, getIReg64orSP(rN));
4322      if (rN == 31) { /* FIXME generate stack alignment check */ }
4323      if (isLD) {
4324         putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4325         putQRegLane(vT, 1, mkU64(0));
4326      } else {
4327         storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4328      }
4329      putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8)));
4330      DIP("%s {v%u.%s}, [%s], #8\n",  isLD ? "ld1" : "st1",
4331          vT, name, nameIReg64orSP(rN));
4332      return True;
4333   }
4334
4335   /* ---------- LD2/ST2 (multiple structures, post index) ---------- */
4336   /* Only a very few cases. */
4337   /* 31        23             11 9 4
4338      0100 1100 1101 1111 1000 11 n t  LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4339      0100 1100 1001 1111 1000 11 n t  ST2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4340      0100 1100 1101 1111 1000 10 n t  LD2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4341      0100 1100 1001 1111 1000 10 n t  ST2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4342   */
4343   if (   (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d
4344       || (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d
4345       || (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s
4346       || (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s
4347      ) {
4348      Bool   isLD = INSN(22,22) == 1;
4349      UInt   rN   = INSN(9,5);
4350      UInt   vT   = INSN(4,0);
4351      IRTemp tEA  = newTemp(Ity_I64);
4352      UInt   sz   = INSN(11,10);
4353      const HChar* name = "??";
4354      assign(tEA, getIReg64orSP(rN));
4355      if (rN == 31) { /* FIXME generate stack alignment check */ }
4356      IRExpr* tEA_0  = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4357      IRExpr* tEA_8  = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
4358      IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4359      IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
4360      if (sz == BITS2(1,1)) {
4361         name = "2d";
4362         if (isLD) {
4363            putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
4364            putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16));
4365            putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
4366            putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24));
4367         } else {
4368            storeLE(tEA_0,  getQRegLane((vT+0) % 32, 0, Ity_I64));
4369            storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64));
4370            storeLE(tEA_8,  getQRegLane((vT+1) % 32, 0, Ity_I64));
4371            storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64));
4372         }
4373      }
4374      else if (sz == BITS2(1,0)) {
4375         /* Uh, this is ugly.  TODO: better. */
4376         name = "4s";
4377         IRExpr* tEA_4  = binop(Iop_Add64, mkexpr(tEA), mkU64(4));
4378         IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12));
4379         IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20));
4380         IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28));
4381         if (isLD) {
4382            putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0));
4383            putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8));
4384            putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16));
4385            putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24));
4386            putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4));
4387            putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12));
4388            putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20));
4389            putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28));
4390         } else {
4391            storeLE(tEA_0,  getQRegLane((vT+0) % 32, 0, Ity_I32));
4392            storeLE(tEA_8,  getQRegLane((vT+0) % 32, 1, Ity_I32));
4393            storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32));
4394            storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32));
4395            storeLE(tEA_4,  getQRegLane((vT+1) % 32, 0, Ity_I32));
4396            storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32));
4397            storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32));
4398            storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32));
4399         }
4400      }
4401      else {
4402         vassert(0); // Can't happen.
4403      }
4404      putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
4405      DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2",
4406          (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
4407      return True;
4408   }
4409
4410   /* ---------- LD1/ST1 (multiple structures, no offset) ---------- */
4411   /* Only a very few cases. */
4412   /* 31        23
4413      0100 1100 0100 0000 1010 00 n t  LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4414      0100 1100 0000 0000 1010 00 n t  ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4415   */
4416   if (   (insn & 0xFFFFFC00) == 0x4C40A000 // LD1
4417       || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1
4418      ) {
4419      Bool   isLD = INSN(22,22) == 1;
4420      UInt   rN   = INSN(9,5);
4421      UInt   vT   = INSN(4,0);
4422      IRTemp tEA  = newTemp(Ity_I64);
4423      const HChar* name = "16b";
4424      assign(tEA, getIReg64orSP(rN));
4425      if (rN == 31) { /* FIXME generate stack alignment check */ }
4426      IRExpr* tEA_0  = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4427      IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4428      if (isLD) {
4429         putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4430         putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4431      } else {
4432         storeLE(tEA_0,  getQReg128((vT+0) % 32));
4433         storeLE(tEA_16, getQReg128((vT+1) % 32));
4434      }
4435      DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1",
4436          (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
4437      return True;
4438   }
4439
4440   /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
4441   /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
4442   /* 31 29     23  20      14    9 4
4443      sz 001000 010 11111 0 11111 n t   LDX{R,RH,RB}  Rt, [Xn|SP]
4444      sz 001000 010 11111 1 11111 n t   LDAX{R,RH,RB} Rt, [Xn|SP]
4445      sz 001000 000 s     0 11111 n t   STX{R,RH,RB}  Ws, Rt, [Xn|SP]
4446      sz 001000 000 s     1 11111 n t   STLX{R,RH,RB} Ws, Rt, [Xn|SP]
4447   */
4448   if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
4449       && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
4450       && INSN(14,10) == BITS5(1,1,1,1,1)) {
4451      UInt szBlg2     = INSN(31,30);
4452      Bool isLD       = INSN(22,22) == 1;
4453      Bool isAcqOrRel = INSN(15,15) == 1;
4454      UInt ss         = INSN(20,16);
4455      UInt nn         = INSN(9,5);
4456      UInt tt         = INSN(4,0);
4457
4458      vassert(szBlg2 < 4);
4459      UInt   szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
4460      IRType ty  = integerIRTypeOfSize(szB);
4461      const HChar* suffix[4] = { "rb", "rh", "r", "r" };
4462
4463      IRTemp ea = newTemp(Ity_I64);
4464      assign(ea, getIReg64orSP(nn));
4465      /* FIXME generate check that ea is szB-aligned */
4466
4467      if (isLD && ss == BITS5(1,1,1,1,1)) {
4468         IRTemp res = newTemp(ty);
4469         stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
4470         putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
4471         if (isAcqOrRel) {
4472            stmt(IRStmt_MBE(Imbe_Fence));
4473         }
4474         DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
4475             nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4476         return True;
4477      }
4478      if (!isLD) {
4479         if (isAcqOrRel) {
4480            stmt(IRStmt_MBE(Imbe_Fence));
4481         }
4482         IRTemp  res  = newTemp(Ity_I1);
4483         IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
4484         stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
4485         /* IR semantics: res is 1 if store succeeds, 0 if it fails.
4486            Need to set rS to 1 on failure, 0 on success. */
4487         putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
4488                                            mkU64(1)));
4489         DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
4490             nameIRegOrZR(False, ss),
4491             nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4492         return True;
4493      }
4494      /* else fall through */
4495   }
4496
4497   /* ------------------ LDA{R,RH,RB} ------------------ */
4498   /* ------------------ STL{R,RH,RB} ------------------ */
4499   /* 31 29     23  20      14    9 4
4500      sz 001000 110 11111 1 11111 n t   LDAR<sz> Rt, [Xn|SP]
4501      sz 001000 100 11111 1 11111 n t   STLR<sz> Rt, [Xn|SP]
4502   */
4503   if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
4504       && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
4505      UInt szBlg2 = INSN(31,30);
4506      Bool isLD   = INSN(22,22) == 1;
4507      UInt nn     = INSN(9,5);
4508      UInt tt     = INSN(4,0);
4509
4510      vassert(szBlg2 < 4);
4511      UInt   szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
4512      IRType ty  = integerIRTypeOfSize(szB);
4513      const HChar* suffix[4] = { "rb", "rh", "r", "r" };
4514
4515      IRTemp ea = newTemp(Ity_I64);
4516      assign(ea, getIReg64orSP(nn));
4517      /* FIXME generate check that ea is szB-aligned */
4518
4519      if (isLD) {
4520         IRTemp res = newTemp(ty);
4521         assign(res, loadLE(ty, mkexpr(ea)));
4522         putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
4523         stmt(IRStmt_MBE(Imbe_Fence));
4524         DIP("lda%s %s, [%s]\n", suffix[szBlg2],
4525             nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4526      } else {
4527         stmt(IRStmt_MBE(Imbe_Fence));
4528         IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
4529         storeLE(mkexpr(ea), data);
4530         DIP("stl%s %s, [%s]\n", suffix[szBlg2],
4531             nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4532      }
4533      return True;
4534   }
4535
4536   /* ------------------ PRFM (immediate) ------------------ */
4537   /* 31 29        21    9 4
4538      11 11100110  imm12 n t PRFM <option>, [Xn|SP{, #pimm}]
4539   */
4540
4541   if (INSN(31, 22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
4542      /* TODO: decode */
4543      DIP("prfm ??? (imm)");
4544      return True;
4545   }
4546
4547   vex_printf("ARM64 front end: load_store\n");
4548   return False;
4549#  undef INSN
4550}
4551
4552
4553/*------------------------------------------------------------*/
4554/*--- Control flow and misc instructions                   ---*/
4555/*------------------------------------------------------------*/
4556
4557static
4558Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
4559                          VexArchInfo* archinfo)
4560{
4561#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
4562
4563   /* ---------------------- B cond ----------------------- */
4564   /* 31        24    4 3
4565      0101010 0 imm19 0 cond */
4566   if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
4567      UInt  cond   = INSN(3,0);
4568      ULong uimm64 = INSN(23,5) << 2;
4569      Long  simm64 = (Long)sx_to_64(uimm64, 21);
4570      vassert(dres->whatNext    == Dis_Continue);
4571      vassert(dres->len         == 4);
4572      vassert(dres->continueAt  == 0);
4573      vassert(dres->jk_StopHere == Ijk_INVALID);
4574      stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
4575                        Ijk_Boring,
4576                        IRConst_U64(guest_PC_curr_instr + simm64),
4577                        OFFB_PC) );
4578      putPC(mkU64(guest_PC_curr_instr + 4));
4579      dres->whatNext    = Dis_StopHere;
4580      dres->jk_StopHere = Ijk_Boring;
4581      DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
4582      return True;
4583   }
4584
4585   /* -------------------- B{L} uncond -------------------- */
4586   if (INSN(30,26) == BITS5(0,0,1,0,1)) {
4587      /* 000101 imm26  B  (PC + sxTo64(imm26 << 2))
4588         100101 imm26  B  (PC + sxTo64(imm26 << 2))
4589      */
4590      UInt  bLink  = INSN(31,31);
4591      ULong uimm64 = INSN(25,0) << 2;
4592      Long  simm64 = (Long)sx_to_64(uimm64, 28);
4593      if (bLink) {
4594         putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
4595      }
4596      putPC(mkU64(guest_PC_curr_instr + simm64));
4597      dres->whatNext = Dis_StopHere;
4598      dres->jk_StopHere = Ijk_Call;
4599      DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
4600                          guest_PC_curr_instr + simm64);
4601      return True;
4602   }
4603
4604   /* --------------------- B{L} reg --------------------- */
4605   /* 31      24 22 20    15     9  4
4606      1101011 00 10 11111 000000 nn 00000  RET  Rn
4607      1101011 00 01 11111 000000 nn 00000  CALL Rn
4608      1101011 00 00 11111 000000 nn 00000  JMP  Rn
4609   */
4610   if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
4611       && INSN(20,16) == BITS5(1,1,1,1,1)
4612       && INSN(15,10) == BITS6(0,0,0,0,0,0)
4613       && INSN(4,0) == BITS5(0,0,0,0,0)) {
4614      UInt branch_type = INSN(22,21);
4615      UInt nn          = INSN(9,5);
4616      if (branch_type == BITS2(1,0) /* RET */) {
4617         putPC(getIReg64orZR(nn));
4618         dres->whatNext = Dis_StopHere;
4619         dres->jk_StopHere = Ijk_Ret;
4620         DIP("ret %s\n", nameIReg64orZR(nn));
4621         return True;
4622      }
4623      if (branch_type == BITS2(0,1) /* CALL */) {
4624         IRTemp dst = newTemp(Ity_I64);
4625         assign(dst, getIReg64orZR(nn));
4626         putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
4627         putPC(mkexpr(dst));
4628         dres->whatNext = Dis_StopHere;
4629         dres->jk_StopHere = Ijk_Call;
4630         DIP("blr %s\n", nameIReg64orZR(nn));
4631         return True;
4632      }
4633      if (branch_type == BITS2(0,0) /* JMP */) {
4634         putPC(getIReg64orZR(nn));
4635         dres->whatNext = Dis_StopHere;
4636         dres->jk_StopHere = Ijk_Boring;
4637         DIP("jmp %s\n", nameIReg64orZR(nn));
4638         return True;
4639      }
4640   }
4641
4642   /* -------------------- CB{N}Z -------------------- */
4643   /* sf 011 010 1 imm19 Rt   CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
4644      sf 011 010 0 imm19 Rt   CBZ  Xt|Wt, (PC + sxTo64(imm19 << 2))
4645   */
4646   if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
4647      Bool    is64   = INSN(31,31) == 1;
4648      Bool    bIfZ   = INSN(24,24) == 0;
4649      ULong   uimm64 = INSN(23,5) << 2;
4650      UInt    rT     = INSN(4,0);
4651      Long    simm64 = (Long)sx_to_64(uimm64, 21);
4652      IRExpr* cond   = NULL;
4653      if (is64) {
4654         cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
4655                      getIReg64orZR(rT), mkU64(0));
4656      } else {
4657         cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
4658                      getIReg32orZR(rT), mkU32(0));
4659      }
4660      stmt( IRStmt_Exit(cond,
4661                        Ijk_Boring,
4662                        IRConst_U64(guest_PC_curr_instr + simm64),
4663                        OFFB_PC) );
4664      putPC(mkU64(guest_PC_curr_instr + 4));
4665      dres->whatNext    = Dis_StopHere;
4666      dres->jk_StopHere = Ijk_Boring;
4667      DIP("cb%sz %s, 0x%llx\n",
4668          bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
4669          guest_PC_curr_instr + simm64);
4670      return True;
4671   }
4672
4673   /* -------------------- TB{N}Z -------------------- */
4674   /* 31 30      24 23  18  5 4
4675      b5 011 011 1  b40 imm14 t  TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
4676      b5 011 011 0  b40 imm14 t  TBZ  Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
4677   */
4678   if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
4679      UInt    b5     = INSN(31,31);
4680      Bool    bIfZ   = INSN(24,24) == 0;
4681      UInt    b40    = INSN(23,19);
4682      UInt    imm14  = INSN(18,5);
4683      UInt    tt     = INSN(4,0);
4684      UInt    bitNo  = (b5 << 5) | b40;
4685      ULong   uimm64 = imm14 << 2;
4686      Long    simm64 = sx_to_64(uimm64, 16);
4687      IRExpr* cond
4688         = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
4689                 binop(Iop_And64,
4690                       binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
4691                       mkU64(1)),
4692                 mkU64(0));
4693      stmt( IRStmt_Exit(cond,
4694                        Ijk_Boring,
4695                        IRConst_U64(guest_PC_curr_instr + simm64),
4696                        OFFB_PC) );
4697      putPC(mkU64(guest_PC_curr_instr + 4));
4698      dres->whatNext    = Dis_StopHere;
4699      dres->jk_StopHere = Ijk_Boring;
4700      DIP("tb%sz %s, #%u, 0x%llx\n",
4701          bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
4702          guest_PC_curr_instr + simm64);
4703      return True;
4704   }
4705
4706   /* -------------------- SVC -------------------- */
4707   /* 11010100 000 imm16 000 01
4708      Don't bother with anything except the imm16==0 case.
4709   */
4710   if (INSN(31,0) == 0xD4000001) {
4711      putPC(mkU64(guest_PC_curr_instr + 4));
4712      dres->whatNext    = Dis_StopHere;
4713      dres->jk_StopHere = Ijk_Sys_syscall;
4714      DIP("svc #0\n");
4715      return True;
4716   }
4717
4718   /* ------------------ M{SR,RS} ------------------ */
4719   /* Only handles the case where the system register is TPIDR_EL0.
4720      0xD51BD0 010 Rt   MSR tpidr_el0, rT
4721      0xD53BD0 010 Rt   MRS rT, tpidr_el0
4722   */
4723   if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
4724       || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
4725      Bool toSys = INSN(21,21) == 0;
4726      UInt tt    = INSN(4,0);
4727      if (toSys) {
4728         stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
4729         DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
4730      } else {
4731         putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
4732         DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
4733      }
4734      return True;
4735   }
4736   /* Cases for FPCR
4737      0xD51B44 000 Rt  MSR fpcr, rT
4738      0xD53B44 000 Rt  MSR rT, fpcr
4739   */
4740   if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
4741       || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
4742      Bool toSys = INSN(21,21) == 0;
4743      UInt tt    = INSN(4,0);
4744      if (toSys) {
4745         stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
4746         DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
4747      } else {
4748         putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
4749         DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
4750      }
4751      return True;
4752   }
4753   /* Cases for FPSR
4754      0xD51B44 001 Rt  MSR fpsr, rT
4755      0xD53B44 001 Rt  MSR rT, fpsr
4756   */
4757   if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
4758       || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
4759      Bool toSys = INSN(21,21) == 0;
4760      UInt tt    = INSN(4,0);
4761      if (toSys) {
4762         stmt( IRStmt_Put( OFFB_FPSR, getIReg32orZR(tt)) );
4763         DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
4764      } else {
4765         putIReg32orZR(tt, IRExpr_Get(OFFB_FPSR, Ity_I32));
4766         DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
4767      }
4768      return True;
4769   }
4770   /* Cases for NZCV
4771      D51B42 000 Rt  MSR nzcv, rT
4772      D53B42 000 Rt  MRS rT, nzcv
4773   */
4774   if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
4775       || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
4776      Bool  toSys = INSN(21,21) == 0;
4777      UInt  tt    = INSN(4,0);
4778      if (toSys) {
4779         IRTemp t = newTemp(Ity_I64);
4780         assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
4781         setFlags_COPY(t);
4782         DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
4783      } else {
4784         IRTemp res = newTemp(Ity_I64);
4785         assign(res, mk_arm64g_calculate_flags_nzcv());
4786         putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
4787         DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
4788      }
4789      return True;
4790   }
4791   /* Cases for DCZID_EL0
4792      Don't support arbitrary reads and writes to this register.  Just
4793      return the value 16, which indicates that the DC ZVA instruction
4794      is not permitted, so we don't have to emulate it.
4795      D5 3B 00 111 Rt  MRS rT, dczid_el0
4796   */
4797   if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
4798      UInt tt = INSN(4,0);
4799      putIReg64orZR(tt, mkU64(1<<4));
4800      DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
4801      return True;
4802   }
4803   /* Cases for CTR_EL0
4804      We just handle reads, and make up a value from the D and I line
4805      sizes in the VexArchInfo we are given, and patch in the following
4806      fields that the Foundation model gives ("natively"):
4807      CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
4808      D5 3B 00 001 Rt  MRS rT, dczid_el0
4809   */
4810   if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
4811      UInt tt = INSN(4,0);
4812      /* Need to generate a value from dMinLine_lg2_szB and
4813         dMinLine_lg2_szB.  The value in the register is in 32-bit
4814         units, so need to subtract 2 from the values in the
4815         VexArchInfo.  We can assume that the values here are valid --
4816         disInstr_ARM64 checks them -- so there's no need to deal with
4817         out-of-range cases. */
4818      vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
4819              && archinfo->arm64_dMinLine_lg2_szB <= 17
4820              && archinfo->arm64_iMinLine_lg2_szB >= 2
4821              && archinfo->arm64_iMinLine_lg2_szB <= 17);
4822      UInt val
4823         = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
4824                      | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
4825      putIReg64orZR(tt, mkU64(val));
4826      DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
4827      return True;
4828   }
4829
4830   /* ------------------ IC_IVAU ------------------ */
4831   /* D5 0B 75 001 Rt  ic ivau, rT
4832   */
4833   if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
4834      /* We will always be provided with a valid iMinLine value. */
4835      vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
4836              && archinfo->arm64_iMinLine_lg2_szB <= 17);
4837      /* Round the requested address, in rT, down to the start of the
4838         containing block. */
4839      UInt   tt      = INSN(4,0);
4840      ULong  lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
4841      IRTemp addr    = newTemp(Ity_I64);
4842      assign( addr, binop( Iop_And64,
4843                           getIReg64orZR(tt),
4844                           mkU64(~(lineszB - 1))) );
4845      /* Set the invalidation range, request exit-and-invalidate, with
4846         continuation at the next instruction. */
4847      stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
4848      stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(lineszB)));
4849      /* be paranoid ... */
4850      stmt( IRStmt_MBE(Imbe_Fence) );
4851      putPC(mkU64( guest_PC_curr_instr + 4 ));
4852      dres->whatNext    = Dis_StopHere;
4853      dres->jk_StopHere = Ijk_InvalICache;
4854      DIP("ic ivau, %s\n", nameIReg64orZR(tt));
4855      return True;
4856   }
4857
4858   /* ------------------ DC_CVAU ------------------ */
4859   /* D5 0B 7B 001 Rt  dc cvau, rT
4860   */
4861   if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
4862      /* Exactly the same scheme as for IC IVAU, except we observe the
4863         dMinLine size, and request an Ijk_FlushDCache instead of
4864         Ijk_InvalICache. */
4865      /* We will always be provided with a valid dMinLine value. */
4866      vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
4867              && archinfo->arm64_dMinLine_lg2_szB <= 17);
4868      /* Round the requested address, in rT, down to the start of the
4869         containing block. */
4870      UInt   tt      = INSN(4,0);
4871      ULong  lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
4872      IRTemp addr    = newTemp(Ity_I64);
4873      assign( addr, binop( Iop_And64,
4874                           getIReg64orZR(tt),
4875                           mkU64(~(lineszB - 1))) );
4876      /* Set the flush range, request exit-and-flush, with
4877         continuation at the next instruction. */
4878      stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
4879      stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(lineszB)));
4880      /* be paranoid ... */
4881      stmt( IRStmt_MBE(Imbe_Fence) );
4882      putPC(mkU64( guest_PC_curr_instr + 4 ));
4883      dres->whatNext    = Dis_StopHere;
4884      dres->jk_StopHere = Ijk_FlushDCache;
4885      DIP("dc cvau, %s\n", nameIReg64orZR(tt));
4886      return True;
4887   }
4888
4889   /* ------------------ ISB, DMB, DSB ------------------ */
4890   if (INSN(31,0) == 0xD5033FDF) {
4891      stmt(IRStmt_MBE(Imbe_Fence));
4892      DIP("isb\n");
4893      return True;
4894   }
4895   if (INSN(31,0) == 0xD5033BBF) {
4896      stmt(IRStmt_MBE(Imbe_Fence));
4897      DIP("dmb ish\n");
4898      return True;
4899   }
4900   if (INSN(31,0) == 0xD5033ABF) {
4901      stmt(IRStmt_MBE(Imbe_Fence));
4902      DIP("dmb ishst\n");
4903      return True;
4904   }
4905   if (INSN(31,0) == 0xD50339BF) {
4906      stmt(IRStmt_MBE(Imbe_Fence));
4907      DIP("dmb ishld\n");
4908      return True;
4909   }
4910   if (INSN(31,0) == 0xD5033B9F) {
4911      stmt(IRStmt_MBE(Imbe_Fence));
4912      DIP("dsb ish\n");
4913      return True;
4914   }
4915   if (INSN(31,0) == 0xD5033F9F) {
4916      stmt(IRStmt_MBE(Imbe_Fence));
4917      DIP("dsb sy\n");
4918      return True;
4919   }
4920
4921   /* -------------------- NOP -------------------- */
4922   if (INSN(31,0) == 0xD503201F) {
4923      DIP("nop\n");
4924      return True;
4925   }
4926
4927  //fail:
4928   vex_printf("ARM64 front end: branch_etc\n");
4929   return False;
4930#  undef INSN
4931}
4932
4933
4934/*------------------------------------------------------------*/
4935/*--- SIMD and FP instructions                             ---*/
4936/*------------------------------------------------------------*/
4937
4938/* begin FIXME -- rm temp scaffolding */
4939static IRExpr* mk_CatEvenLanes64x2 ( IRTemp, IRTemp );
4940static IRExpr* mk_CatOddLanes64x2  ( IRTemp, IRTemp );
4941
4942static IRExpr* mk_CatEvenLanes32x4 ( IRTemp, IRTemp );
4943static IRExpr* mk_CatOddLanes32x4  ( IRTemp, IRTemp );
4944static IRExpr* mk_InterleaveLO32x4 ( IRTemp, IRTemp );
4945static IRExpr* mk_InterleaveHI32x4 ( IRTemp, IRTemp );
4946
4947static IRExpr* mk_CatEvenLanes16x8 ( IRTemp, IRTemp );
4948static IRExpr* mk_CatOddLanes16x8  ( IRTemp, IRTemp );
4949static IRExpr* mk_InterleaveLO16x8 ( IRTemp, IRTemp );
4950static IRExpr* mk_InterleaveHI16x8 ( IRTemp, IRTemp );
4951
4952static IRExpr* mk_CatEvenLanes8x16 ( IRTemp, IRTemp );
4953static IRExpr* mk_CatOddLanes8x16  ( IRTemp, IRTemp );
4954static IRExpr* mk_InterleaveLO8x16 ( IRTemp, IRTemp );
4955static IRExpr* mk_InterleaveHI8x16 ( IRTemp, IRTemp );
4956/* end FIXME -- rm temp scaffolding */
4957
4958/* Generate N copies of |bit| in the bottom of a ULong. */
4959static ULong Replicate ( ULong bit, Int N )
4960{
4961   vassert(bit <= 1 && N >= 1 && N < 64);
4962   if (bit == 0) {
4963      return 0;
4964    } else {
4965      /* Careful.  This won't work for N == 64. */
4966      return (1ULL << N) - 1;
4967   }
4968}
4969
4970static ULong Replicate32x2 ( ULong bits32 )
4971{
4972   vassert(0 == (bits32 & ~0xFFFFFFFFULL));
4973   return (bits32 << 32) | bits32;
4974}
4975
4976static ULong Replicate16x4 ( ULong bits16 )
4977{
4978   vassert(0 == (bits16 & ~0xFFFFULL));
4979   return Replicate32x2((bits16 << 16) | bits16);
4980}
4981
4982static ULong Replicate8x8 ( ULong bits8 )
4983{
4984   vassert(0 == (bits8 & ~0xFFULL));
4985   return Replicate16x4((bits8 << 8) | bits8);
4986}
4987
4988/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
4989   |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
4990   is 64.  In the former case, the upper 32 bits of the returned value
4991   are guaranteed to be zero. */
4992static ULong VFPExpandImm ( ULong imm8, Int N )
4993{
4994   vassert(imm8 <= 0xFF);
4995   vassert(N == 32 || N == 64);
4996   Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
4997   Int F = N - E - 1;
4998   ULong imm8_6 = (imm8 >> 6) & 1;
4999   /* sign: 1 bit */
5000   /* exp:  E bits */
5001   /* frac: F bits */
5002   ULong sign = (imm8 >> 7) & 1;
5003   ULong exp  = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
5004   ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
5005   vassert(sign < (1ULL << 1));
5006   vassert(exp  < (1ULL << E));
5007   vassert(frac < (1ULL << F));
5008   vassert(1 + E + F == N);
5009   ULong res = (sign << (E+F)) | (exp << F) | frac;
5010   return res;
5011}
5012
5013/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
5014   This might fail, as indicated by the returned Bool.  Page 2530 of
5015   the manual. */
5016static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
5017                               UInt op, UInt cmode, UInt imm8 )
5018{
5019   vassert(op <= 1);
5020   vassert(cmode <= 15);
5021   vassert(imm8 <= 255);
5022
5023   *res = 0; /* will overwrite iff returning True */
5024
5025   ULong imm64    = 0;
5026   Bool  testimm8 = False;
5027
5028   switch (cmode >> 1) {
5029      case 0:
5030         testimm8 = False; imm64 = Replicate32x2(imm8); break;
5031      case 1:
5032         testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
5033      case 2:
5034         testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
5035      case 3:
5036         testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
5037      case 4:
5038          testimm8 = False; imm64 = Replicate16x4(imm8); break;
5039      case 5:
5040          testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
5041      case 6:
5042          testimm8 = True;
5043          if ((cmode & 1) == 0)
5044              imm64 = Replicate32x2((imm8 << 8) | 0xFF);
5045          else
5046              imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
5047          break;
5048      case 7:
5049         testimm8 = False;
5050         if ((cmode & 1) == 0 && op == 0)
5051             imm64 = Replicate8x8(imm8);
5052         if ((cmode & 1) == 0 && op == 1) {
5053             imm64 = 0;   imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
5054             imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
5055             imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
5056             imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
5057             imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
5058             imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
5059             imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
5060             imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
5061         }
5062         if ((cmode & 1) == 1 && op == 0) {
5063            ULong imm8_7  = (imm8 >> 7) & 1;
5064            ULong imm8_6  = (imm8 >> 6) & 1;
5065            ULong imm8_50 = imm8 & 63;
5066            ULong imm32 = (imm8_7                 << (1 + 5 + 6 + 19))
5067                          | ((imm8_6 ^ 1)         << (5 + 6 + 19))
5068                          | (Replicate(imm8_6, 5) << (6 + 19))
5069                          | (imm8_50              << 19);
5070            imm64 = Replicate32x2(imm32);
5071         }
5072         if ((cmode & 1) == 1 && op == 1) {
5073            // imm64 = imm8<7>:NOT(imm8<6>)
5074            //                :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
5075            ULong imm8_7  = (imm8 >> 7) & 1;
5076            ULong imm8_6  = (imm8 >> 6) & 1;
5077            ULong imm8_50 = imm8 & 63;
5078            imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
5079                    | (Replicate(imm8_6, 8) << 54)
5080                    | (imm8_50 << 48);
5081         }
5082         break;
5083      default:
5084        vassert(0);
5085   }
5086
5087   if (testimm8 && imm8 == 0)
5088      return False;
5089
5090   *res = imm64;
5091   return True;
5092}
5093
5094
5095/* Help a bit for decoding laneage for vector operations that can be
5096   of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
5097   and SZ bits, typically for vector floating point. */
5098static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI,  /*OUT*/IRType* tyF,
5099                               /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
5100                               /*OUT*/const HChar** arrSpec,
5101                               Bool bitQ, Bool bitSZ )
5102{
5103   vassert(bitQ == True || bitQ == False);
5104   vassert(bitSZ == True || bitSZ == False);
5105   if (bitQ && bitSZ) { // 2x64
5106      if (tyI)       *tyI       = Ity_I64;
5107      if (tyF)       *tyF       = Ity_F64;
5108      if (nLanes)    *nLanes    = 2;
5109      if (zeroUpper) *zeroUpper = False;
5110      if (arrSpec)   *arrSpec   = "2d";
5111      return True;
5112   }
5113   if (bitQ && !bitSZ) { // 4x32
5114      if (tyI)       *tyI       = Ity_I32;
5115      if (tyF)       *tyF       = Ity_F32;
5116      if (nLanes)    *nLanes    = 4;
5117      if (zeroUpper) *zeroUpper = False;
5118      if (arrSpec)   *arrSpec   = "4s";
5119      return True;
5120   }
5121   if (!bitQ && !bitSZ) { // 2x32
5122      if (tyI)       *tyI       = Ity_I32;
5123      if (tyF)       *tyF       = Ity_F32;
5124      if (nLanes)    *nLanes    = 2;
5125      if (zeroUpper) *zeroUpper = True;
5126      if (arrSpec)   *arrSpec   = "2s";
5127      return True;
5128   }
5129   // Else impliedly 1x64, which isn't allowed.
5130   return False;
5131}
5132
5133/* Helper for decoding laneage for simple vector operations,
5134   eg integer add. */
5135static Bool getLaneInfo_SIMPLE ( /*OUT*/Bool* zeroUpper,
5136                                 /*OUT*/const HChar** arrSpec,
5137                                 Bool bitQ, UInt szBlg2 )
5138{
5139   vassert(bitQ == True || bitQ == False);
5140   vassert(szBlg2 < 4);
5141   Bool zu = False;
5142   const HChar* as = NULL;
5143   switch ((szBlg2 << 1) | (bitQ ? 1 : 0)) {
5144      case 0: zu = True;  as = "8b";  break;
5145      case 1: zu = False; as = "16b"; break;
5146      case 2: zu = True;  as = "4h";  break;
5147      case 3: zu = False; as = "8h";  break;
5148      case 4: zu = True;  as = "2s";  break;
5149      case 5: zu = False; as = "4s";  break;
5150      case 6: return False; // impliedly 1x64
5151      case 7: zu = False; as = "2d";  break;
5152      default: vassert(0);
5153   }
5154   vassert(as);
5155   if (arrSpec)   *arrSpec = as;
5156   if (zeroUpper) *zeroUpper = zu;
5157   return True;
5158}
5159
5160
5161/* Helper for decoding laneage for shift-style vector operations
5162   that involve an immediate shift amount. */
5163static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
5164                                    UInt immh, UInt immb )
5165{
5166   vassert(immh < (1<<4));
5167   vassert(immb < (1<<3));
5168   UInt immhb = (immh << 3) | immb;
5169   if (immh & 8) {
5170      if (shift)  *shift  = 128 - immhb;
5171      if (szBlg2) *szBlg2 = 3;
5172      return True;
5173   }
5174   if (immh & 4) {
5175      if (shift)  *shift  = 64 - immhb;
5176      if (szBlg2) *szBlg2 = 2;
5177      return True;
5178   }
5179   if (immh & 2) {
5180      if (shift)  *shift  = 32 - immhb;
5181      if (szBlg2) *szBlg2 = 1;
5182      return True;
5183   }
5184   if (immh & 1) {
5185      if (shift)  *shift  = 16 - immhb;
5186      if (szBlg2) *szBlg2 = 0;
5187      return True;
5188   }
5189   return False;
5190}
5191
5192
5193/* Generate IR to fold all lanes of the V128 value in 'src' as
5194   characterised by the operator 'op', and return the result in the
5195   bottom bits of a V128, with all other bits set to zero. */
5196static IRTemp math_MINMAXV ( IRTemp src, IROp op )
5197{
5198   /* The basic idea is to use repeated applications of Iop_CatEven*
5199      and Iop_CatOdd* operators to 'src' so as to clone each lane into
5200      a complete vector.  Then fold all those vectors with 'op' and
5201      zero out all but the least significant lane. */
5202   switch (op) {
5203      case Iop_Min8Sx16: case Iop_Min8Ux16:
5204      case Iop_Max8Sx16: case Iop_Max8Ux16: {
5205         /* NB: temp naming here is misleading -- the naming is for 8
5206            lanes of 16 bit, whereas what is being operated on is 16
5207            lanes of 8 bits. */
5208         IRTemp x76543210 = src;
5209         IRTemp x76547654 = newTemp(Ity_V128);
5210         IRTemp x32103210 = newTemp(Ity_V128);
5211         assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5212         assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
5213         IRTemp x76767676 = newTemp(Ity_V128);
5214         IRTemp x54545454 = newTemp(Ity_V128);
5215         IRTemp x32323232 = newTemp(Ity_V128);
5216         IRTemp x10101010 = newTemp(Ity_V128);
5217         assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5218         assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5219         assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5220         assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
5221         IRTemp x77777777 = newTemp(Ity_V128);
5222         IRTemp x66666666 = newTemp(Ity_V128);
5223         IRTemp x55555555 = newTemp(Ity_V128);
5224         IRTemp x44444444 = newTemp(Ity_V128);
5225         IRTemp x33333333 = newTemp(Ity_V128);
5226         IRTemp x22222222 = newTemp(Ity_V128);
5227         IRTemp x11111111 = newTemp(Ity_V128);
5228         IRTemp x00000000 = newTemp(Ity_V128);
5229         assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5230         assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5231         assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5232         assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5233         assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5234         assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5235         assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5236         assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
5237         /* Naming not misleading after here. */
5238         IRTemp xAllF = newTemp(Ity_V128);
5239         IRTemp xAllE = newTemp(Ity_V128);
5240         IRTemp xAllD = newTemp(Ity_V128);
5241         IRTemp xAllC = newTemp(Ity_V128);
5242         IRTemp xAllB = newTemp(Ity_V128);
5243         IRTemp xAllA = newTemp(Ity_V128);
5244         IRTemp xAll9 = newTemp(Ity_V128);
5245         IRTemp xAll8 = newTemp(Ity_V128);
5246         IRTemp xAll7 = newTemp(Ity_V128);
5247         IRTemp xAll6 = newTemp(Ity_V128);
5248         IRTemp xAll5 = newTemp(Ity_V128);
5249         IRTemp xAll4 = newTemp(Ity_V128);
5250         IRTemp xAll3 = newTemp(Ity_V128);
5251         IRTemp xAll2 = newTemp(Ity_V128);
5252         IRTemp xAll1 = newTemp(Ity_V128);
5253         IRTemp xAll0 = newTemp(Ity_V128);
5254         assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
5255         assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
5256         assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
5257         assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
5258         assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
5259         assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
5260         assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
5261         assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
5262         assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
5263         assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
5264         assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
5265         assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
5266         assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
5267         assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
5268         assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
5269         assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
5270         IRTemp maxFE = newTemp(Ity_V128);
5271         IRTemp maxDC = newTemp(Ity_V128);
5272         IRTemp maxBA = newTemp(Ity_V128);
5273         IRTemp max98 = newTemp(Ity_V128);
5274         IRTemp max76 = newTemp(Ity_V128);
5275         IRTemp max54 = newTemp(Ity_V128);
5276         IRTemp max32 = newTemp(Ity_V128);
5277         IRTemp max10 = newTemp(Ity_V128);
5278         assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
5279         assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
5280         assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
5281         assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
5282         assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
5283         assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
5284         assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
5285         assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
5286         IRTemp maxFEDC = newTemp(Ity_V128);
5287         IRTemp maxBA98 = newTemp(Ity_V128);
5288         IRTemp max7654 = newTemp(Ity_V128);
5289         IRTemp max3210 = newTemp(Ity_V128);
5290         assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
5291         assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
5292         assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5293         assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5294         IRTemp maxFEDCBA98 = newTemp(Ity_V128);
5295         IRTemp max76543210 = newTemp(Ity_V128);
5296         assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
5297         assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
5298         IRTemp maxAllLanes = newTemp(Ity_V128);
5299         assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
5300                                       mkexpr(max76543210)));
5301         IRTemp res = newTemp(Ity_V128);
5302         assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
5303         return res;
5304      }
5305      case Iop_Min16Sx8: case Iop_Min16Ux8:
5306      case Iop_Max16Sx8: case Iop_Max16Ux8: {
5307         IRTemp x76543210 = src;
5308         IRTemp x76547654 = newTemp(Ity_V128);
5309         IRTemp x32103210 = newTemp(Ity_V128);
5310         assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5311         assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
5312         IRTemp x76767676 = newTemp(Ity_V128);
5313         IRTemp x54545454 = newTemp(Ity_V128);
5314         IRTemp x32323232 = newTemp(Ity_V128);
5315         IRTemp x10101010 = newTemp(Ity_V128);
5316         assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5317         assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5318         assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5319         assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
5320         IRTemp x77777777 = newTemp(Ity_V128);
5321         IRTemp x66666666 = newTemp(Ity_V128);
5322         IRTemp x55555555 = newTemp(Ity_V128);
5323         IRTemp x44444444 = newTemp(Ity_V128);
5324         IRTemp x33333333 = newTemp(Ity_V128);
5325         IRTemp x22222222 = newTemp(Ity_V128);
5326         IRTemp x11111111 = newTemp(Ity_V128);
5327         IRTemp x00000000 = newTemp(Ity_V128);
5328         assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5329         assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5330         assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5331         assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5332         assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5333         assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5334         assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5335         assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
5336         IRTemp max76 = newTemp(Ity_V128);
5337         IRTemp max54 = newTemp(Ity_V128);
5338         IRTemp max32 = newTemp(Ity_V128);
5339         IRTemp max10 = newTemp(Ity_V128);
5340         assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
5341         assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
5342         assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
5343         assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
5344         IRTemp max7654 = newTemp(Ity_V128);
5345         IRTemp max3210 = newTemp(Ity_V128);
5346         assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5347         assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5348         IRTemp max76543210 = newTemp(Ity_V128);
5349         assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
5350         IRTemp res = newTemp(Ity_V128);
5351         assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
5352         return res;
5353      }
5354      case Iop_Min32Sx4: case Iop_Min32Ux4:
5355      case Iop_Max32Sx4: case Iop_Max32Ux4: {
5356         IRTemp x3210 = src;
5357         IRTemp x3232 = newTemp(Ity_V128);
5358         IRTemp x1010 = newTemp(Ity_V128);
5359         assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
5360         assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
5361         IRTemp x3333 = newTemp(Ity_V128);
5362         IRTemp x2222 = newTemp(Ity_V128);
5363         IRTemp x1111 = newTemp(Ity_V128);
5364         IRTemp x0000 = newTemp(Ity_V128);
5365         assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
5366         assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
5367         assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
5368         assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
5369         IRTemp max32 = newTemp(Ity_V128);
5370         IRTemp max10 = newTemp(Ity_V128);
5371         assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
5372         assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
5373         IRTemp max3210 = newTemp(Ity_V128);
5374         assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5375         IRTemp res = newTemp(Ity_V128);
5376         assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
5377         return res;
5378      }
5379      default:
5380         vassert(0);
5381   }
5382}
5383
5384
5385/* Generate IR for TBL and TBX.  This deals with the 128 bit case
5386   only. */
5387static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
5388                             IRTemp oor_values )
5389{
5390   vassert(len >= 0 && len <= 3);
5391
5392   /* Generate some useful constants as concisely as possible. */
5393   IRTemp half15 = newTemp(Ity_I64);
5394   assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
5395   IRTemp half16 = newTemp(Ity_I64);
5396   assign(half16, mkU64(0x1010101010101010ULL));
5397
5398   /* A zero vector */
5399   IRTemp allZero = newTemp(Ity_V128);
5400   assign(allZero, mkV128(0x0000));
5401   /* A vector containing 15 in each 8-bit lane */
5402   IRTemp all15 = newTemp(Ity_V128);
5403   assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
5404   /* A vector containing 16 in each 8-bit lane */
5405   IRTemp all16 = newTemp(Ity_V128);
5406   assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
5407   /* A vector containing 32 in each 8-bit lane */
5408   IRTemp all32 = newTemp(Ity_V128);
5409   assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
5410   /* A vector containing 48 in each 8-bit lane */
5411   IRTemp all48 = newTemp(Ity_V128);
5412   assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
5413   /* A vector containing 64 in each 8-bit lane */
5414   IRTemp all64 = newTemp(Ity_V128);
5415   assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
5416
5417   /* Group the 16/32/48/64 vectors so as to be indexable. */
5418   IRTemp allXX[4] = { all16, all32, all48, all64 };
5419
5420   /* Compute the result for each table vector, with zeroes in places
5421      where the index values are out of range, and OR them into the
5422      running vector. */
5423   IRTemp running_result = newTemp(Ity_V128);
5424   assign(running_result, mkV128(0));
5425
5426   UInt tabent;
5427   for (tabent = 0; tabent <= len; tabent++) {
5428      vassert(tabent >= 0 && tabent < 4);
5429      IRTemp bias = newTemp(Ity_V128);
5430      assign(bias,
5431             mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
5432      IRTemp biased_indices = newTemp(Ity_V128);
5433      assign(biased_indices,
5434             binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
5435      IRTemp valid_mask = newTemp(Ity_V128);
5436      assign(valid_mask,
5437             binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
5438      IRTemp safe_biased_indices = newTemp(Ity_V128);
5439      assign(safe_biased_indices,
5440             binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
5441      IRTemp results_or_junk = newTemp(Ity_V128);
5442      assign(results_or_junk,
5443             binop(Iop_Perm8x16, mkexpr(tab[tabent]),
5444                                 mkexpr(safe_biased_indices)));
5445      IRTemp results_or_zero = newTemp(Ity_V128);
5446      assign(results_or_zero,
5447             binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
5448      /* And OR that into the running result. */
5449      IRTemp tmp = newTemp(Ity_V128);
5450      assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
5451                        mkexpr(running_result)));
5452      running_result = tmp;
5453   }
5454
5455   /* So now running_result holds the overall result where the indices
5456      are in range, and zero in out-of-range lanes.  Now we need to
5457      compute an overall validity mask and use this to copy in the
5458      lanes in the oor_values for out of range indices.  This is
5459      unnecessary for TBL but will get folded out by iropt, so we lean
5460      on that and generate the same code for TBL and TBX here. */
5461   IRTemp overall_valid_mask = newTemp(Ity_V128);
5462   assign(overall_valid_mask,
5463          binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
5464   IRTemp result = newTemp(Ity_V128);
5465   assign(result,
5466          binop(Iop_OrV128,
5467                mkexpr(running_result),
5468                binop(Iop_AndV128,
5469                      mkexpr(oor_values),
5470                      unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
5471   return result;
5472}
5473
5474
5475static
5476Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
5477{
5478#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
5479
5480   /* ---------------- FMOV (general) ---------------- */
5481   /* case   30       23   20 18  15     9 4
5482       (1) 0 00 11110 00 1 00 111 000000 n d     FMOV Sd,      Wn
5483       (2) 1 00 11110 01 1 00 111 000000 n d     FMOV Dd,      Xn
5484       (3) 1 00 11110 10 1 01 111 000000 n d     FMOV Vd.D[1], Xn
5485
5486       (4) 0 00 11110 00 1 00 110 000000 n d     FMOV Wd, Sn
5487       (5) 1 00 11110 01 1 00 110 000000 n d     FMOV Xd, Dn
5488       (6) 1 00 11110 10 1 01 110 000000 n d     FMOV Xd, Vn.D[1]
5489   */
5490   if (INSN(30,24) == BITS7(0,0,1,1,1,1,0)
5491       && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5492      UInt sf = INSN(31,31);
5493      UInt ty = INSN(23,22); // type
5494      UInt rm = INSN(20,19); // rmode
5495      UInt op = INSN(18,16); // opcode
5496      UInt nn = INSN(9,5);
5497      UInt dd = INSN(4,0);
5498      UInt ix = 0; // case
5499      if (sf == 0) {
5500         if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
5501            ix = 1;
5502         else
5503         if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
5504            ix = 4;
5505      } else {
5506         vassert(sf == 1);
5507         if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
5508            ix = 2;
5509         else
5510         if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
5511            ix = 5;
5512         else
5513         if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
5514            ix = 3;
5515         else
5516         if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
5517            ix = 6;
5518      }
5519      if (ix > 0) {
5520         switch (ix) {
5521            case 1:
5522               putQReg128(dd, mkV128(0));
5523               putQRegLO(dd, getIReg32orZR(nn));
5524               DIP("fmov s%u, w%u\n", dd, nn);
5525               break;
5526            case 2:
5527               putQReg128(dd, mkV128(0));
5528               putQRegLO(dd, getIReg64orZR(nn));
5529               DIP("fmov d%u, x%u\n", dd, nn);
5530               break;
5531            case 3:
5532               putQRegHI64(dd, getIReg64orZR(nn));
5533               DIP("fmov v%u.d[1], x%u\n", dd, nn);
5534               break;
5535            case 4:
5536               putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
5537               DIP("fmov w%u, s%u\n", dd, nn);
5538               break;
5539            case 5:
5540               putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
5541               DIP("fmov x%u, d%u\n", dd, nn);
5542               break;
5543            case 6:
5544               putIReg64orZR(dd, getQRegHI64(nn));
5545               DIP("fmov x%u, v%u.d[1]\n", dd, nn);
5546               break;
5547            default:
5548               vassert(0);
5549         }
5550         return True;
5551      }
5552      /* undecodable; fall through */
5553   }
5554
5555   /* -------------- FMOV (scalar, immediate) -------------- */
5556   /* 31  28    23   20   12  9     4
5557      000 11110 00 1 imm8 100 00000 d  FMOV Sd, #imm
5558      000 11110 01 1 imm8 100 00000 d  FMOV Dd, #imm
5559   */
5560   if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5561       && INSN(21,21) == 1 && INSN(12,5) == BITS8(1,0,0,0,0,0,0,0)) {
5562      Bool  isD  = INSN(22,22) == 1;
5563      UInt  imm8 = INSN(20,13);
5564      UInt  dd   = INSN(4,0);
5565      ULong imm  = VFPExpandImm(imm8, isD ? 64 : 32);
5566      if (!isD) {
5567         vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
5568      }
5569      putQReg128(dd, mkV128(0));
5570      putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
5571      DIP("fmov %s, #0x%llx\n",
5572          nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
5573      return True;
5574   }
5575
5576   /* -------------- {FMOV,MOVI} (vector, immediate) -------------- */
5577   /* 31    28          18  15    11 9     4
5578      0q op 01111 00000 abc cmode 01 defgh d  MOV Dd,   #imm (q=0)
5579                                              MOV Vd.2d #imm (q=1)
5580      Allowable op:cmode
5581         FMOV = 1:1111
5582         MOVI = 0:xx00, 1:0x00, 1:10x0, 1:110x, x:1110
5583   */
5584   if (INSN(31,31) == 0
5585       && INSN(28,19) == BITS10(0,1,1,1,1,0,0,0,0,0)
5586       && INSN(11,10) == BITS2(0,1)) {
5587      UInt  bitQ     = INSN(30,30);
5588      UInt  bitOP    = INSN(29,29);
5589      UInt  cmode    = INSN(15,12);
5590      UInt  imm8     = (INSN(18,16) << 5) | INSN(9,5);
5591      UInt  dd       = INSN(4,0);
5592      ULong imm64lo  = 0;
5593      UInt  op_cmode = (bitOP << 4) | cmode;
5594      Bool  ok       = False;
5595      switch (op_cmode) {
5596         case BITS5(1,1,1,1,1): // 1:1111
5597         case BITS5(0,0,0,0,0): case BITS5(0,0,1,0,0):
5598         case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:xx00
5599         case BITS5(1,0,0,0,0): case BITS5(1,0,1,0,0): // 1:0x00
5600         case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
5601         case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
5602         case BITS5(1,1,1,1,0): case BITS5(0,1,1,1,0): // x:1110
5603            ok = True; break;
5604         default:
5605           break;
5606      }
5607      if (ok) {
5608         ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, imm8);
5609      }
5610      if (ok) {
5611         ULong imm64hi = (bitQ == 0 && bitOP == 0)  ? 0  : imm64lo;
5612         putQReg128(dd, binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo)));
5613         DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
5614         return True;
5615      }
5616      /* else fall through */
5617   }
5618
5619   /* -------------- {S,U}CVTF (vector, integer, scalar) -------------- */
5620   /* 31  28    23 21     15     9 4                ix (u:sz)
5621      010 11110 00 100001 110110 n d  SCVTF Sd, Sn   0
5622        0       01                    SCVTF Dd, Dn   1
5623        1       00                    UCVTF Sd, Sn   2
5624        1       01                    UCVTF Dd, Dn   3
5625   */
5626   if (INSN(31,30) == BITS2(0,1) && INSN(28,23) == BITS6(1,1,1,1,0,0)
5627       && INSN(21, 10) == BITS12(1,0,0,0,0,1,1,1,0,1,1,0)) {
5628      Bool is64 = INSN(22,22);
5629      Bool isU  = INSN(29,29);
5630      UInt nn   = INSN(9,5);
5631      UInt dd   = INSN(4,0);
5632
5633      UInt ix   = (INSN(29,29) << 1) | INSN(22,22);
5634
5635      const IROp ops[4]
5636        = { Iop_I32StoF32, Iop_I64StoF64,
5637            Iop_I32UtoF32, Iop_I64UtoF64 };
5638
5639      putQReg128(dd, mkV128(0));
5640      putQRegLO(dd, binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), getQRegLO(nn, is64 ? Ity_I64 : Ity_I32)));
5641
5642      DIP("%ccvtf %s, %s\n",
5643          isU ? 'u' : 's', nameQRegLO(dd, is64 ? Ity_F64 : Ity_F32),
5644          nameQRegLO(nn, is64 ? Ity_I64 : Ity_I32));
5645
5646      return True;
5647   }
5648   /* -------------- {S,U}CVTF (scalar, fixed-point) -------------- */
5649   /* 31  28    23 21 20 18  15     9 4                  ix
5650      000 11110 00 1  00 010 000000 n d  SCVTF Sd, Wn    0
5651      000 11110 01 1  00 010 000000 n d  SCVTF Dd, Wn    1
5652      100 11110 00 1  00 010 000000 n d  SCVTF Sd, Xn    2
5653      100 11110 01 1  00 010 000000 n d  SCVTF Dd, Xn    3
5654
5655      000 11110 00 1  00 011 000000 n d  UCVTF Sd, Wn    4
5656      000 11110 01 1  00 011 000000 n d  UCVTF Dd, Wn    5
5657      100 11110 00 1  00 011 000000 n d  UCVTF Sd, Xn    6
5658      100 11110 01 1  00 011 000000 n d  UCVTF Dd, Xn    7
5659
5660      These are signed/unsigned conversion from integer registers to
5661      FP registers, all 4 32/64-bit combinations, rounded per FPCR.
5662   */
5663   if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,17) == BITS5(1,0,0,0,1)
5664       && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5665      Bool isI64 = INSN(31,31) == 1;
5666      Bool isF64 = INSN(22,22) == 1;
5667      Bool isU   = INSN(16,16) == 1;
5668      UInt nn    = INSN(9,5);
5669      UInt dd    = INSN(4,0);
5670      UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
5671      const IROp ops[8]
5672        = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
5673            Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
5674      IRExpr* src = getIRegOrZR(isI64, nn);
5675      IRExpr* res = (isF64 && !isI64)
5676                       ? unop(ops[ix], src)
5677                       : binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src);
5678      putQReg128(dd, mkV128(0));
5679      putQRegLO(dd, res);
5680      DIP("%ccvtf %s, %s\n",
5681          isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
5682          nameIRegOrZR(isI64, nn));
5683      return True;
5684   }
5685
5686   /* ------------ F{ADD,SUB,MUL,DIV,NMUL} (scalar) ------------ */
5687   /* 31        23  20 15   11 9 4
5688      ---------------- 0000 ------   FMUL  --------
5689      000 11110 001 m  0001 10 n d   FDIV  Sd,Sn,Sm
5690      000 11110 011 m  0001 10 n d   FDIV  Dd,Dn,Dm
5691      ---------------- 0010 ------   FADD  --------
5692      ---------------- 0011 ------   FSUB  --------
5693      ---------------- 1000 ------   FNMUL --------
5694   */
5695   if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5696       && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5697      Bool   isD = INSN(22,22) == 1;
5698      UInt   mm  = INSN(20,16);
5699      UInt   op  = INSN(15,12);
5700      UInt   nn  = INSN(9,5);
5701      UInt   dd  = INSN(4,0);
5702      IROp   iop = Iop_INVALID;
5703      IRType ty  = isD ? Ity_F64 : Ity_F32;
5704      Bool   neg = False;
5705      const HChar* nm = "???";
5706      switch (op) {
5707         case BITS4(0,0,0,0): nm = "fmul";  iop = mkMULF(ty); break;
5708         case BITS4(0,0,0,1): nm = "fdiv";  iop = mkDIVF(ty); break;
5709         case BITS4(0,0,1,0): nm = "fadd";  iop = mkADDF(ty); break;
5710         case BITS4(0,0,1,1): nm = "fsub";  iop = mkSUBF(ty); break;
5711         case BITS4(1,0,0,0): nm = "fnmul"; iop = mkMULF(ty);
5712                              neg = True; break;
5713         default:             return False;
5714      }
5715      vassert(iop != Iop_INVALID);
5716      IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()),
5717                           getQRegLO(nn, ty), getQRegLO(mm, ty));
5718      IRTemp res = newTemp(ty);
5719      assign(res, neg ? unop(mkNEGF(ty),resE) : resE);
5720      putQReg128(dd, mkV128(0));
5721      putQRegLO(dd, mkexpr(res));
5722      DIP("%s %s, %s, %s\n",
5723          nm, nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
5724      return True;
5725   }
5726
5727   /* ------------ F{MOV,ABS,NEG,SQRT} D/D or S/S ------------ */
5728   /* 31        23 21    16 14    9 4
5729      000 11110 00 10000 00 10000 n d  FMOV Sd, Sn
5730      000 11110 01 10000 00 10000 n d  FMOV Dd, Dn
5731      ------------------ 01 ---------  FABS ------
5732      ------------------ 10 ---------  FNEG ------
5733      ------------------ 11 ---------  FSQRT -----
5734   */
5735   if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5736       && INSN(21,17) == BITS5(1,0,0,0,0)
5737       && INSN(14,10) == BITS5(1,0,0,0,0)) {
5738      Bool   isD = INSN(22,22) == 1;
5739      UInt   opc = INSN(16,15);
5740      UInt   nn  = INSN(9,5);
5741      UInt   dd  = INSN(4,0);
5742      IRType ty  = isD ? Ity_F64 : Ity_F32;
5743      IRTemp res = newTemp(ty);
5744      if (opc == BITS2(0,0)) {
5745         assign(res, getQRegLO(nn, ty));
5746         putQReg128(dd, mkV128(0x0000));
5747         putQRegLO(dd, mkexpr(res));
5748         DIP("fmov %s, %s\n",
5749             nameQRegLO(dd, ty), nameQRegLO(nn, ty));
5750         return True;
5751      }
5752      if (opc == BITS2(1,0) || opc == BITS2(0,1)) {
5753         Bool isAbs = opc == BITS2(0,1);
5754         IROp op    = isAbs ? mkABSF(ty) : mkNEGF(ty);
5755         assign(res, unop(op, getQRegLO(nn, ty)));
5756         putQReg128(dd, mkV128(0x0000));
5757         putQRegLO(dd, mkexpr(res));
5758         DIP("%s %s, %s\n", isAbs ? "fabs" : "fneg",
5759             nameQRegLO(dd, ty), nameQRegLO(nn, ty));
5760         return True;
5761      }
5762      if (opc == BITS2(1,1)) {
5763         assign(res,
5764                binop(mkSQRTF(ty),
5765                      mkexpr(mk_get_IR_rounding_mode()), getQRegLO(nn, ty)));
5766         putQReg128(dd, mkV128(0x0000));
5767         putQRegLO(dd, mkexpr(res));
5768         DIP("fsqrt %s, %s\n", nameQRegLO(dd, ty), nameQRegLO(nn, ty));
5769         return True;
5770      }
5771      /* else fall through; other cases are ATC */
5772   }
5773
5774   /* ---------------- F{ABS,NEG} (vector) ---------------- */
5775   /* 31  28      22 21    16       9 4
5776      0q0 01110 1 sz 10000 01111 10 n d  FABS Vd.T, Vn.T
5777      0q1 01110 1 sz 10000 01111 10 n d  FNEG Vd.T, Vn.T
5778   */
5779   if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,1)
5780       && INSN(21,17) == BITS5(1,0,0,0,0)
5781       && INSN(16,10) == BITS7(0,1,1,1,1,1,0)) {
5782      UInt bitQ   = INSN(30,30);
5783      UInt bitSZ  = INSN(22,22);
5784      Bool isFNEG = INSN(29,29) == 1;
5785      UInt nn     = INSN(9,5);
5786      UInt dd     = INSN(4,0);
5787      const HChar* ar = "??";
5788      IRType tyF    = Ity_INVALID;
5789      Bool   zeroHI = False;
5790      Bool   ok     = getLaneInfo_Q_SZ(NULL, &tyF, NULL, &zeroHI, &ar,
5791                                       (Bool)bitQ, (Bool)bitSZ);
5792      if (ok) {
5793         vassert(tyF == Ity_F64 || tyF == Ity_F32);
5794         IROp op = (tyF == Ity_F64) ? (isFNEG ? Iop_Neg64Fx2 : Iop_Abs64Fx2)
5795                                    : (isFNEG ? Iop_Neg32Fx4 : Iop_Abs32Fx4);
5796         IRTemp res = newTemp(Ity_V128);
5797         assign(res, unop(op, getQReg128(nn)));
5798         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
5799                               : mkexpr(res));
5800         DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
5801             nameQReg128(dd), ar, nameQReg128(nn), ar);
5802         return True;
5803      }
5804      /* else fall through */
5805   }
5806
5807   /* -------------------- FCMP,FCMPE -------------------- */
5808   /* 31        23   20    15      9 4
5809      000 11110 01 1     m 00 1000 n 10 000  FCMPE Dn, Dm
5810      000 11110 01 1 00000 00 1000 n 11 000  FCMPE Dn, #0.0
5811      000 11110 01 1     m 00 1000 n 00 000  FCMP  Dn, Dm
5812      000 11110 01 1 00000 00 1000 n 01 000  FCMP  Dn, #0.0
5813
5814      000 11110 00 1     m 00 1000 n 10 000  FCMPE Sn, Sm
5815      000 11110 00 1 00000 00 1000 n 11 000  FCMPE Sn, #0.0
5816      000 11110 00 1     m 00 1000 n 00 000  FCMP  Sn, Sm
5817      000 11110 00 1 00000 00 1000 n 01 000  FCMP  Sn, #0.0
5818
5819      FCMPE generates Invalid Operation exn if either arg is any kind
5820      of NaN.  FCMP generates Invalid Operation exn if either arg is a
5821      signalling NaN.  We ignore this detail here and produce the same
5822      IR for both.
5823   */
5824   if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) && INSN(21,21) == 1
5825       && INSN(15,10) == BITS6(0,0,1,0,0,0) && INSN(2,0) == BITS3(0,0,0)) {
5826      Bool   isD     = INSN(22,22) == 1;
5827      UInt   mm      = INSN(20,16);
5828      UInt   nn      = INSN(9,5);
5829      Bool   isCMPE  = INSN(4,4) == 1;
5830      Bool   cmpZero = INSN(3,3) == 1;
5831      IRType ty      = isD ? Ity_F64 : Ity_F32;
5832      Bool   valid   = True;
5833      if (cmpZero && mm != 0) valid = False;
5834      if (valid) {
5835         IRTemp argL  = newTemp(ty);
5836         IRTemp argR  = newTemp(ty);
5837         IRTemp irRes = newTemp(Ity_I32);
5838         assign(argL, getQRegLO(nn, ty));
5839         assign(argR,
5840                cmpZero
5841                   ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
5842                   : getQRegLO(mm, ty));
5843         assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
5844                             mkexpr(argL), mkexpr(argR)));
5845         IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
5846         IRTemp nzcv_28x0 = newTemp(Ity_I64);
5847         assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
5848         setFlags_COPY(nzcv_28x0);
5849         DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ty),
5850             cmpZero ? "#0.0" : nameQRegLO(mm, ty));
5851         return True;
5852      }
5853   }
5854
5855   /* -------------------- F{N}M{ADD,SUB} -------------------- */
5856   /* 31          22   20 15 14 9 4   ix
5857      000 11111 0 sz 0 m  0  a  n d   0   FMADD  Fd,Fn,Fm,Fa
5858      000 11111 0 sz 0 m  1  a  n d   1   FMSUB  Fd,Fn,Fm,Fa
5859      000 11111 0 sz 1 m  0  a  n d   2   FNMADD Fd,Fn,Fm,Fa
5860      000 11111 0 sz 1 m  1  a  n d   3   FNMSUB Fd,Fn,Fm,Fa
5861      where Fx=Dx when sz=1, Fx=Sx when sz=0
5862
5863               -----SPEC------    ----IMPL----
5864      fmadd       a +    n * m    a + n * m
5865      fmsub       a + (-n) * m    a - n * m
5866      fnmadd   (-a) + (-n) * m    -(a + n * m)
5867      fnmsub   (-a) +    n * m    -(a - n * m)
5868   */
5869   if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,1,0)) {
5870      Bool    isD   = INSN(22,22) == 1;
5871      UInt    mm    = INSN(20,16);
5872      UInt    aa    = INSN(14,10);
5873      UInt    nn    = INSN(9,5);
5874      UInt    dd    = INSN(4,0);
5875      UInt    ix    = (INSN(21,21) << 1) | INSN(15,15);
5876      IRType  ty    = isD ? Ity_F64 : Ity_F32;
5877      IROp    opADD = mkADDF(ty);
5878      IROp    opSUB = mkSUBF(ty);
5879      IROp    opMUL = mkMULF(ty);
5880      IROp    opNEG = mkNEGF(ty);
5881      IRTemp  res   = newTemp(ty);
5882      IRExpr* eA    = getQRegLO(aa, ty);
5883      IRExpr* eN    = getQRegLO(nn, ty);
5884      IRExpr* eM    = getQRegLO(mm, ty);
5885      IRExpr* rm    = mkexpr(mk_get_IR_rounding_mode());
5886      IRExpr* eNxM  = triop(opMUL, rm, eN, eM);
5887      switch (ix) {
5888         case 0:  assign(res, triop(opADD, rm, eA, eNxM)); break;
5889         case 1:  assign(res, triop(opSUB, rm, eA, eNxM)); break;
5890         case 2:  assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
5891         case 3:  assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
5892         default: vassert(0);
5893      }
5894      putQReg128(dd, mkV128(0x0000));
5895      putQRegLO(dd, mkexpr(res));
5896      const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
5897      DIP("%s %s, %s, %s, %s\n",
5898          names[ix], nameQRegLO(dd, ty), nameQRegLO(nn, ty),
5899                     nameQRegLO(mm, ty), nameQRegLO(aa, ty));
5900      return True;
5901   }
5902
5903   /* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */
5904   /*    30       23   20 18  15     9 4
5905      sf 00 11110 0x 1 00 000 000000 n d  FCVTNS Rd, Fn (round to
5906      sf 00 11110 0x 1 00 001 000000 n d  FCVTNU Rd, Fn  nearest)
5907      ---------------- 01 --------------  FCVTP-------- (round to +inf)
5908      ---------------- 10 --------------  FCVTM-------- (round to -inf)
5909      ---------------- 11 --------------  FCVTZ-------- (round to zero)
5910
5911      Rd is Xd when sf==1, Wd when sf==0
5912      Fn is Dn when x==1, Sn when x==0
5913      20:19 carry the rounding mode, using the same encoding as FPCR
5914   */
5915   if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,21) == 1
5916       && INSN(18,17) == BITS2(0,0) && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5917      Bool isI64 = INSN(31,31) == 1;
5918      Bool isF64 = INSN(22,22) == 1;
5919      UInt rm    = INSN(20,19);
5920      Bool isU   = INSN(16,16) == 1;
5921      UInt nn    = INSN(9,5);
5922      UInt dd    = INSN(4,0);
5923      /* Decide on the IR rounding mode to use. */
5924      IRRoundingMode irrm = 8; /*impossible*/
5925      HChar ch = '?';
5926      switch (rm) {
5927         case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
5928         case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
5929         case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
5930         case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
5931         default: vassert(0);
5932      }
5933      vassert(irrm != 8);
5934      /* Decide on the conversion primop, based on the source size,
5935         dest size and signedness (8 possibilities).  Case coding:
5936            F32 ->s I32   0
5937            F32 ->u I32   1
5938            F32 ->s I64   2
5939            F32 ->u I64   3
5940            F64 ->s I32   4
5941            F64 ->u I32   5
5942            F64 ->s I64   6
5943            F64 ->u I64   7
5944      */
5945      UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
5946      vassert(ix < 8);
5947      const IROp ops[8]
5948         = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
5949             Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
5950      IROp op = ops[ix];
5951      // A bit of ATCery: bounce all cases we haven't seen an example of.
5952      if (/* F32toI32S */
5953             (op == Iop_F32toI32S && irrm == Irrm_ZERO)   /* FCVTZS Wd,Sn */
5954          || (op == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
5955          || (op == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
5956          /* F32toI32U */
5957          || (op == Iop_F32toI32U && irrm == Irrm_ZERO)   /* FCVTZU Wd,Sn */
5958          || (op == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
5959          /* F32toI64S */
5960          || (op == Iop_F32toI64S && irrm == Irrm_ZERO)   /* FCVTZS Xd,Sn */
5961          /* F32toI64U */
5962          || (op == Iop_F32toI64U && irrm == Irrm_ZERO)   /* FCVTZU Xd,Sn */
5963          || (op == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
5964          /* F64toI32S */
5965          || (op == Iop_F64toI32S && irrm == Irrm_ZERO)   /* FCVTZS Wd,Dn */
5966          || (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
5967          || (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
5968          /* F64toI32U */
5969          || (op == Iop_F64toI32U && irrm == Irrm_ZERO)   /* FCVTZU Wd,Dn */
5970          || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
5971          || (op == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
5972          /* F64toI64S */
5973          || (op == Iop_F64toI64S && irrm == Irrm_ZERO)   /* FCVTZS Xd,Dn */
5974          || (op == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
5975          || (op == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
5976          /* F64toI64U */
5977          || (op == Iop_F64toI64U && irrm == Irrm_ZERO)   /* FCVTZU Xd,Dn */
5978          || (op == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
5979         ) {
5980        /* validated */
5981      } else {
5982        return False;
5983      }
5984      IRType srcTy  = isF64 ? Ity_F64 : Ity_F32;
5985      IRType dstTy  = isI64 ? Ity_I64 : Ity_I32;
5986      IRTemp src    = newTemp(srcTy);
5987      IRTemp dst    = newTemp(dstTy);
5988      assign(src, getQRegLO(nn, srcTy));
5989      assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
5990      putIRegOrZR(isI64, dd, mkexpr(dst));
5991      DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
5992          nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
5993      return True;
5994   }
5995
5996   /* -------- FCVTAS (KLUDGED) (scalar, integer) -------- */
5997   /*   30       23   20 18  15     9 4
5998      1 00 11110 0x 1 00 100 000000 n d  FCVTAS Xd, Fn
5999      0 00 11110 0x 1 00 100 000000 n d  FCVTAS Wd, Fn
6000      Fn is Dn when x==1, Sn when x==0
6001   */
6002   if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0)
6003       && INSN(21,16) == BITS6(1,0,0,1,0,0)
6004       && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
6005      Bool isI64 = INSN(31,31) == 1;
6006      Bool isF64 = INSN(22,22) == 1;
6007      UInt nn    = INSN(9,5);
6008      UInt dd    = INSN(4,0);
6009      /* Decide on the IR rounding mode to use. */
6010      /* KLUDGE: should be Irrm_NEAREST_TIE_AWAY_0 */
6011      IRRoundingMode irrm = Irrm_NEAREST;
6012      /* Decide on the conversion primop. */
6013      IROp   op    = isI64 ? (isF64 ? Iop_F64toI64S :  Iop_F32toI64S)
6014                           : (isF64 ? Iop_F64toI32S :  Iop_F32toI32S);
6015      IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
6016      IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
6017      IRTemp src   = newTemp(srcTy);
6018      IRTemp dst   = newTemp(dstTy);
6019      assign(src, getQRegLO(nn, srcTy));
6020      assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
6021      putIRegOrZR(isI64, dd, mkexpr(dst));
6022      DIP("fcvtas %s, %s (KLUDGED)\n",
6023          nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
6024      return True;
6025   }
6026
6027   /* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */
6028   /* 31        23 21   17  14    9 4
6029      000 11110 0x 1001 111 10000 n d  FRINTI Fd, Fm (round per FPCR)
6030                        rm
6031      x==0 => S-registers, x==1 => D-registers
6032      rm (17:15) encodings:
6033         111 per FPCR  (FRINTI)
6034         001 +inf      (FRINTP)
6035         010 -inf      (FRINTM)
6036         011 zero      (FRINTZ)
6037         000 tieeven
6038         100 tieaway   (FRINTA) -- !! FIXME KLUDGED !!
6039         110 per FPCR + "exact = TRUE"
6040         101 unallocated
6041   */
6042   if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
6043       && INSN(21,18) == BITS4(1,0,0,1) && INSN(14,10) == BITS5(1,0,0,0,0)) {
6044      Bool    isD   = INSN(22,22) == 1;
6045      UInt    rm    = INSN(17,15);
6046      UInt    nn    = INSN(9,5);
6047      UInt    dd    = INSN(4,0);
6048      IRType  ty    = isD ? Ity_F64 : Ity_F32;
6049      IRExpr* irrmE = NULL;
6050      UChar   ch    = '?';
6051      switch (rm) {
6052         case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
6053         case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
6054         case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
6055         // The following is a kludge.  Should be: Irrm_NEAREST_TIE_AWAY_0
6056         case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
6057         default: break;
6058      }
6059      if (irrmE) {
6060         IRTemp src = newTemp(ty);
6061         IRTemp dst = newTemp(ty);
6062         assign(src, getQRegLO(nn, ty));
6063         assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
6064                           irrmE, mkexpr(src)));
6065         putQReg128(dd, mkV128(0x0000));
6066         putQRegLO(dd, mkexpr(dst));
6067         DIP("frint%c %s, %s\n",
6068             ch, nameQRegLO(dd, ty), nameQRegLO(nn, ty));
6069         return True;
6070      }
6071      /* else unhandled rounding mode case -- fall through */
6072   }
6073
6074   /* ------------------ FCVT (scalar) ------------------ */
6075   /* 31        23 21    16 14    9 4
6076      000 11110 11 10001 00 10000 n d   FCVT Sd, Hn (unimp)
6077      --------- 11 ----- 01 ---------   FCVT Dd, Hn (unimp)
6078      --------- 00 ----- 11 ---------   FCVT Hd, Sn (unimp)
6079      --------- 00 ----- 01 ---------   FCVT Dd, Sn
6080      --------- 01 ----- 11 ---------   FCVT Hd, Dn (unimp)
6081      --------- 01 ----- 00 ---------   FCVT Sd, Dn
6082      Rounding, when dst is smaller than src, is per the FPCR.
6083   */
6084   if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0)
6085       && INSN(21,17) == BITS5(1,0,0,0,1)
6086       && INSN(14,10) == BITS5(1,0,0,0,0)) {
6087      UInt b2322 = INSN(23,22);
6088      UInt b1615 = INSN(16,15);
6089      UInt nn    = INSN(9,5);
6090      UInt dd    = INSN(4,0);
6091      if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
6092         /* Convert S to D */
6093         IRTemp res = newTemp(Ity_F64);
6094         assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
6095         putQReg128(dd, mkV128(0x0000));
6096         putQRegLO(dd, mkexpr(res));
6097         DIP("fcvt %s, %s\n",
6098             nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
6099         return True;
6100      }
6101      if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
6102         /* Convert D to S */
6103         IRTemp res = newTemp(Ity_F32);
6104         assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
6105                                         getQRegLO(nn, Ity_F64)));
6106         putQReg128(dd, mkV128(0x0000));
6107         putQRegLO(dd, mkexpr(res));
6108         DIP("fcvt %s, %s\n",
6109             nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
6110         return True;
6111      }
6112      /* else unhandled */
6113   }
6114
6115   /* ------------------ FABD (scalar) ------------------ */
6116   /* 31        23  20 15     9 4
6117      011 11110 111 m  110101 n d  FABD  Dd, Dn, Dm
6118      011 11110 101 m  110101 n d  FABD  Sd, Sn, Sm
6119   */
6120   if (INSN(31,23) == BITS9(0,1,1,1,1,1,1,0,1) && INSN(21,21) == 1
6121       && INSN(15,10) == BITS6(1,1,0,1,0,1)) {
6122      Bool   isD = INSN(22,22) == 1;
6123      UInt   mm  = INSN(20,16);
6124      UInt   nn  = INSN(9,5);
6125      UInt   dd  = INSN(4,0);
6126      IRType ty  = isD ? Ity_F64 : Ity_F32;
6127      IRTemp res = newTemp(ty);
6128      assign(res, unop(mkABSF(ty),
6129                       triop(mkSUBF(ty),
6130                             mkexpr(mk_get_IR_rounding_mode()),
6131                             getQRegLO(nn,ty), getQRegLO(mm,ty))));
6132      putQReg128(dd, mkV128(0x0000));
6133      putQRegLO(dd, mkexpr(res));
6134      DIP("fabd %s, %s, %s\n",
6135          nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
6136      return True;
6137   }
6138
6139   /* -------------- {S,U}CVTF (vector, integer) -------------- */
6140   /* 31  28      22 21       15     9 4
6141      0q0 01110 0 sz 1  00001 110110 n d  SCVTF Vd, Vn
6142      0q1 01110 0 sz 1  00001 110110 n d  UCVTF Vd, Vn
6143      with laneage:
6144      case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
6145   */
6146   if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,0)
6147       && INSN(21,16) == BITS6(1,0,0,0,0,1)
6148       && INSN(15,10) == BITS6(1,1,0,1,1,0)) {
6149      Bool isQ   = INSN(30,30) == 1;
6150      Bool isU   = INSN(29,29) == 1;
6151      Bool isF64 = INSN(22,22) == 1;
6152      UInt nn    = INSN(9,5);
6153      UInt dd    = INSN(4,0);
6154      if (isQ || !isF64) {
6155         IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
6156         UInt   nLanes = 0;
6157         Bool   zeroHI = False;
6158         const HChar* arrSpec = NULL;
6159         Bool   ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
6160                                      isQ, isF64 );
6161         IROp   op = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
6162                         : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
6163         IRTemp rm = mk_get_IR_rounding_mode();
6164         UInt   i;
6165         vassert(ok); /* the 'if' above should ensure this */
6166         for (i = 0; i < nLanes; i++) {
6167            putQRegLane(dd, i,
6168                        binop(op, mkexpr(rm), getQRegLane(nn, i, tyI)));
6169         }
6170         if (zeroHI) {
6171            putQRegLane(dd, 1, mkU64(0));
6172         }
6173         DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
6174             nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
6175         return True;
6176      }
6177      /* else fall through */
6178   }
6179
6180   /* ---------- F{ADD,SUB,MUL,DIV,MLA,MLS} (vector) ---------- */
6181   /* 31  28      22 21 20 15     9 4                  case
6182      0q0 01110 0 sz 1  m  110101 n d  FADD Vd,Vn,Vm   1
6183      0q0 01110 1 sz 1  m  110101 n d  FSUB Vd,Vn,Vm   2
6184      0q1 01110 0 sz 1  m  110111 n d  FMUL Vd,Vn,Vm   3
6185      0q1 01110 0 sz 1  m  111111 n d  FDIV Vd,Vn,Vm   4
6186      0q0 01110 0 sz 1  m  110011 n d  FMLA Vd,Vn,Vm   5
6187      0q0 01110 1 sz 1  m  110011 n d  FMLS Vd,Vn,Vm   6
6188      0q1 01110 1 sz 1  m  110101 n d  FABD Vd,Vn,Vm   7
6189   */
6190   if (INSN(31,31) == 0
6191       && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
6192      Bool isQ   = INSN(30,30) == 1;
6193      UInt b29   = INSN(29,29);
6194      UInt b23   = INSN(23,23);
6195      Bool isF64 = INSN(22,22) == 1;
6196      UInt mm    = INSN(20,16);
6197      UInt b1510 = INSN(15,10);
6198      UInt nn    = INSN(9,5);
6199      UInt dd    = INSN(4,0);
6200      UInt ix    = 0;
6201      /**/ if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,1,0,1)) ix = 1;
6202      else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 2;
6203      else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,0,1,1,1)) ix = 3;
6204      else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,1,1,1,1)) ix = 4;
6205      else if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,0,1,1)) ix = 5;
6206      else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,0,1,1)) ix = 6;
6207      else if (b29 == 1 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 7;
6208      IRType laneTy = Ity_INVALID;
6209      Bool   zeroHI = False;
6210      const HChar* arr = "??";
6211      Bool ok
6212         = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
6213      /* Skip MLA/MLS for the time being */
6214      if (ok && ix >= 1 && ix <= 4) {
6215         const IROp ops64[4]
6216            = { Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2 };
6217         const IROp ops32[4]
6218            = { Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4 };
6219         const HChar* names[4]
6220            = { "fadd", "fsub", "fmul", "fdiv" };
6221         IROp   op = laneTy==Ity_F64 ? ops64[ix-1] : ops32[ix-1];
6222         IRTemp rm = mk_get_IR_rounding_mode();
6223         IRTemp t1 = newTemp(Ity_V128);
6224         IRTemp t2 = newTemp(Ity_V128);
6225         assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
6226         assign(t2, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t1))
6227                           : mkexpr(t1));
6228         putQReg128(dd, mkexpr(t2));
6229         DIP("%s %s.%s, %s.%s, %s.%s\n", names[ix-1],
6230             nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6231         return True;
6232      }
6233      if (ok && ix >= 5 && ix <= 6) {
6234         IROp opADD = laneTy==Ity_F64 ? Iop_Add64Fx2 : Iop_Add32Fx4;
6235         IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
6236         IROp opMUL = laneTy==Ity_F64 ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
6237         IRTemp rm = mk_get_IR_rounding_mode();
6238         IRTemp t1 = newTemp(Ity_V128);
6239         IRTemp t2 = newTemp(Ity_V128);
6240         // FIXME: double rounding; use FMA primops instead
6241         assign(t1, triop(opMUL,
6242                          mkexpr(rm), getQReg128(nn), getQReg128(mm)));
6243         assign(t2, triop(ix == 5 ? opADD : opSUB,
6244                          mkexpr(rm), getQReg128(dd), mkexpr(t1)));
6245         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6246                               : mkexpr(t2));
6247         DIP("%s %s.%s, %s.%s, %s.%s\n", ix == 5 ? "fmla" : "fmls",
6248             nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6249         return True;
6250      }
6251      if (ok && ix == 7) {
6252         IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
6253         IROp opABS = laneTy==Ity_F64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6254         IRTemp rm = mk_get_IR_rounding_mode();
6255         IRTemp t1 = newTemp(Ity_V128);
6256         IRTemp t2 = newTemp(Ity_V128);
6257         // FIXME: use Abd primop instead?
6258         assign(t1, triop(opSUB,
6259                          mkexpr(rm), getQReg128(nn), getQReg128(mm)));
6260         assign(t2, unop(opABS, mkexpr(t1)));
6261         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6262                               : mkexpr(t2));
6263         DIP("fabd %s.%s, %s.%s, %s.%s\n",
6264             nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6265         return True;
6266      }
6267   }
6268
6269   /* ------------ FCM{EQ,GE,GT}, FAC{GE,GT} (vector) ------------ */
6270   /* 31  28      22   20 15     9 4                  case
6271      0q1 01110 0 sz 1 m  111011 n d  FACGE Vd, Vn, Vm
6272      0q1 01110 1 sz 1 m  111011 n d  FACGT Vd, Vn, Vm
6273      0q0 01110 0 sz 1 m  111001 n d  FCMEQ Vd, Vn, Vm
6274      0q1 01110 0 sz 1 m  111001 n d  FCMGE Vd, Vn, Vm
6275      0q1 01110 1 sz 1 m  111001 n d  FCMGT Vd, Vn, Vm
6276   */
6277   if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1
6278       && INSN(15,12) == BITS4(1,1,1,0) && INSN(10,10) == 1) {
6279      Bool isQ   = INSN(30,30) == 1;
6280      UInt U     = INSN(29,29);
6281      UInt E     = INSN(23,23);
6282      Bool isF64 = INSN(22,22) == 1;
6283      UInt ac    = INSN(11,11);
6284      UInt mm    = INSN(20,16);
6285      UInt nn    = INSN(9,5);
6286      UInt dd    = INSN(4,0);
6287      /* */
6288      UInt   EUac   = (E << 2) | (U << 1) | ac;
6289      IROp   opABS  = Iop_INVALID;
6290      IROp   opCMP  = Iop_INVALID;
6291      IRType laneTy = Ity_INVALID;
6292      Bool   zeroHI = False;
6293      Bool   swap   = True;
6294      const HChar* arr = "??";
6295      const HChar* nm  = "??";
6296      Bool ok
6297         = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
6298      if (ok) {
6299         vassert((isF64 && laneTy == Ity_F64) || (!isF64 && laneTy == Ity_F32));
6300         switch (EUac) {
6301            case BITS3(0,0,0):
6302               nm    = "fcmeq";
6303               opCMP = isF64 ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
6304               swap  = False;
6305               break;
6306            case BITS3(0,1,0):
6307               nm    = "fcmge";
6308               opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6309               break;
6310            case BITS3(0,1,1):
6311               nm    = "facge";
6312               opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6313               opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6314               break;
6315            case BITS3(1,1,0):
6316               nm    = "fcmgt";
6317               opCMP = isF64 ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
6318               break;
6319            case BITS3(1,1,1):
6320               nm    = "fcagt";
6321               opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6322               opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6323               break;
6324            default:
6325               break;
6326         }
6327      }
6328      if (opCMP != Iop_INVALID) {
6329         IRExpr* argN = getQReg128(nn);
6330         IRExpr* argM = getQReg128(mm);
6331         if (opABS != Iop_INVALID) {
6332            argN = unop(opABS, argN);
6333            argM = unop(opABS, argM);
6334         }
6335         IRExpr* res = swap ? binop(opCMP, argM, argN)
6336                            : binop(opCMP, argN, argM);
6337         if (zeroHI) {
6338            res = unop(Iop_ZeroHI64ofV128, res);
6339         }
6340         putQReg128(dd, res);
6341         DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6342             nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6343         return True;
6344      }
6345      /* else fall through */
6346   }
6347
6348   /* -------------------- FCVTN -------------------- */
6349   /* 31  28    23  20    15     9 4
6350      0q0 01110 0s1 00001 011010 n d  FCVTN Vd, Vn
6351      where case q:s of 00: 16Fx4(lo) <- 32Fx4
6352                        01: 32Fx2(lo) <- 64Fx2
6353                        10: 16Fx4(hi) <- 32Fx4
6354                        11: 32Fx2(hi) <- 64Fx2
6355      Only deals with the 32Fx2 <- 64Fx2 version (s==1)
6356   */
6357   if (INSN(31,31) == 0 && INSN(29,23) == BITS7(0,0,1,1,1,0,0)
6358       && INSN(21,10) == BITS12(1,0,0,0,0,1,0,1,1,0,1,0)) {
6359      UInt bQ = INSN(30,30);
6360      UInt bS = INSN(22,22);
6361      UInt nn = INSN(9,5);
6362      UInt dd = INSN(4,0);
6363      if (bS == 1) {
6364         IRTemp  rm    = mk_get_IR_rounding_mode();
6365         IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
6366         IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
6367         putQRegLane(dd, 2 * bQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
6368         putQRegLane(dd, 2 * bQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
6369         if (bQ == 0) {
6370            putQRegLane(dd, 1, mkU64(0));
6371         }
6372         DIP("fcvtn%s %s.%s, %s.2d\n", bQ ? "2" : "",
6373             nameQReg128(dd), bQ ? "4s" : "2s", nameQReg128(nn));
6374         return True;
6375      }
6376      /* else fall through */
6377   }
6378
6379   /* ---------------- ADD/SUB (vector) ---------------- */
6380   /* 31  28    23   21 20 15     9 4
6381      0q0 01110 size 1  m  100001 n d  ADD Vd.T, Vn.T, Vm.T
6382      0q1 01110 size 1  m  100001 n d  SUB Vd.T, Vn.T, Vm.T
6383   */
6384   if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6385       && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
6386      Bool isQ    = INSN(30,30) == 1;
6387      UInt szBlg2 = INSN(23,22);
6388      Bool isSUB  = INSN(29,29) == 1;
6389      UInt mm     = INSN(20,16);
6390      UInt nn     = INSN(9,5);
6391      UInt dd     = INSN(4,0);
6392      Bool zeroHI = False;
6393      const HChar* arrSpec = "";
6394      Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6395      if (ok) {
6396         const IROp opsADD[4]
6397            = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
6398         const IROp opsSUB[4]
6399            = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
6400         vassert(szBlg2 < 4);
6401         IROp   op = isSUB ? opsSUB[szBlg2] : opsADD[szBlg2];
6402         IRTemp t  = newTemp(Ity_V128);
6403         assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
6404         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
6405                               : mkexpr(t));
6406         const HChar* nm = isSUB ? "sub" : "add";
6407         DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6408             nameQReg128(dd), arrSpec,
6409             nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6410         return True;
6411      }
6412      /* else fall through */
6413   }
6414
6415   /* ---------------- ADD/SUB (scalar) ---------------- */
6416   /* 31  28    23 21 20 15     9 4
6417      010 11110 11 1  m  100001 n d  ADD Dd, Dn, Dm
6418      011 11110 11 1  m  100001 n d  SUB Dd, Dn, Dm
6419   */
6420   if (INSN(31,30) == BITS2(0,1) && INSN(28,21) == BITS8(1,1,1,1,0,1,1,1)
6421       && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
6422      Bool isSUB = INSN(29,29) == 1;
6423      UInt mm    = INSN(20,16);
6424      UInt nn    = INSN(9,5);
6425      UInt dd    = INSN(4,0);
6426      IRTemp res = newTemp(Ity_I64);
6427      assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
6428                        getQRegLane(nn, 0, Ity_I64),
6429                        getQRegLane(mm, 0, Ity_I64)));
6430      putQRegLane(dd, 0, mkexpr(res));
6431      putQRegLane(dd, 1, mkU64(0));
6432      DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
6433          nameQRegLO(dd, Ity_I64),
6434          nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
6435      return True;
6436   }
6437
6438   /* ------------ MUL/PMUL/MLA/MLS (vector) ------------ */
6439   /* 31  28    23   21 20 15     9 4
6440      0q0 01110 size 1  m  100111 n d  MUL  Vd.T, Vn.T, Vm.T  B/H/S only
6441      0q1 01110 size 1  m  100111 n d  PMUL Vd.T, Vn.T, Vm.T  B only
6442      0q0 01110 size 1  m  100101 n d  MLA  Vd.T, Vn.T, Vm.T  B/H/S only
6443      0q1 01110 size 1  m  100101 n d  MLS  Vd.T, Vn.T, Vm.T  B/H/S only
6444   */
6445   if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6446       && INSN(21,21) == 1
6447       && (INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(1,0,0,1,0,1)) {
6448      Bool isQ    = INSN(30,30) == 1;
6449      UInt szBlg2 = INSN(23,22);
6450      UInt bit29  = INSN(29,29);
6451      UInt mm     = INSN(20,16);
6452      UInt nn     = INSN(9,5);
6453      UInt dd     = INSN(4,0);
6454      Bool isMLAS = INSN(11,11) == 0;
6455      const IROp opsADD[4]
6456         = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_INVALID };
6457      const IROp opsSUB[4]
6458         = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_INVALID };
6459      const IROp opsMUL[4]
6460         = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
6461      const IROp opsPMUL[4]
6462         = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
6463      /* Set opMUL and, if necessary, opACC.  A result value of
6464         Iop_INVALID for opMUL indicates that the instruction is
6465         invalid. */
6466      Bool zeroHI = False;
6467      const HChar* arrSpec = "";
6468      Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6469      vassert(szBlg2 < 4);
6470      IROp opACC = Iop_INVALID;
6471      IROp opMUL = Iop_INVALID;
6472      if (ok) {
6473         opMUL = (bit29 == 1 && !isMLAS) ? opsPMUL[szBlg2]
6474                                         : opsMUL[szBlg2];
6475         opACC = isMLAS ? (bit29 == 1 ? opsSUB[szBlg2] : opsADD[szBlg2])
6476                        : Iop_INVALID;
6477      }
6478      if (ok && opMUL != Iop_INVALID) {
6479         IRTemp t1 = newTemp(Ity_V128);
6480         assign(t1, binop(opMUL, getQReg128(nn), getQReg128(mm)));
6481         IRTemp t2 = newTemp(Ity_V128);
6482         assign(t2, opACC == Iop_INVALID
6483                       ? mkexpr(t1)
6484                       : binop(opACC, getQReg128(dd), mkexpr(t1)));
6485         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6486                               : mkexpr(t2));
6487         const HChar* nm = isMLAS ? (bit29 == 1 ? "mls" : "mla")
6488                                  : (bit29 == 1 ? "pmul" : "mul");
6489         DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6490             nameQReg128(dd), arrSpec,
6491             nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6492         return True;
6493      }
6494      /* else fall through */
6495   }
6496
6497   /* ---------------- {S,U}{MIN,MAX} (vector) ---------------- */
6498   /* 31  28    23   21 20 15     9 4
6499      0q0 01110 size 1  m  011011 n d  SMIN Vd.T, Vn.T, Vm.T
6500      0q1 01110 size 1  m  011011 n d  UMIN Vd.T, Vn.T, Vm.T
6501      0q0 01110 size 1  m  011001 n d  SMAX Vd.T, Vn.T, Vm.T
6502      0q1 01110 size 1  m  011001 n d  UMAX Vd.T, Vn.T, Vm.T
6503   */
6504   if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6505       && INSN(21,21) == 1
6506       && ((INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(0,1,1,0,0,1))) {
6507      Bool isQ    = INSN(30,30) == 1;
6508      Bool isU    = INSN(29,29) == 1;
6509      UInt szBlg2 = INSN(23,22);
6510      Bool isMAX  = INSN(11,11) == 0;
6511      UInt mm     = INSN(20,16);
6512      UInt nn     = INSN(9,5);
6513      UInt dd     = INSN(4,0);
6514      Bool zeroHI = False;
6515      const HChar* arrSpec = "";
6516      Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6517      if (ok) {
6518         const IROp opMINS[4]
6519            = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
6520         const IROp opMINU[4]
6521            = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
6522         const IROp opMAXS[4]
6523            = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
6524         const IROp opMAXU[4]
6525            = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
6526         vassert(szBlg2 < 4);
6527         IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
6528                         : (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
6529         IRTemp t = newTemp(Ity_V128);
6530         assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
6531         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
6532                               : mkexpr(t));
6533         const HChar* nm = isMAX ? (isU ? "umax" : "smax")
6534                                 : (isU ? "umin" : "smin");
6535         DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6536             nameQReg128(dd), arrSpec,
6537             nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6538         return True;
6539      }
6540      /* else fall through */
6541   }
6542
6543   /* -------------------- {S,U}{MIN,MAX}V -------------------- */
6544   /* 31  28    23   21    16 15     9 4
6545      0q0 01110 size 11000 1  101010 n d  SMINV Vd, Vn.T
6546      0q1 01110 size 11000 1  101010 n d  UMINV Vd, Vn.T
6547      0q0 01110 size 11000 0  101010 n d  SMAXV Vd, Vn.T
6548      0q1 01110 size 11000 0  101010 n d  UMAXV Vd, Vn.T
6549   */
6550   if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6551       && INSN(21,17) == BITS5(1,1,0,0,0)
6552       && INSN(15,10) == BITS6(1,0,1,0,1,0)) {
6553      Bool isQ    = INSN(30,30) == 1;
6554      Bool isU    = INSN(29,29) == 1;
6555      UInt szBlg2 = INSN(23,22);
6556      Bool isMAX  = INSN(16,16) == 0;
6557      UInt nn     = INSN(9,5);
6558      UInt dd     = INSN(4,0);
6559      Bool zeroHI = False;
6560      const HChar* arrSpec = "";
6561      Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
6562      if (ok) {
6563         if (szBlg2 == 3)         ok = False;
6564         if (szBlg2 == 2 && !isQ) ok = False;
6565      }
6566      if (ok) {
6567         const IROp opMINS[3]
6568            = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
6569         const IROp opMINU[3]
6570            = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
6571         const IROp opMAXS[3]
6572            = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
6573         const IROp opMAXU[3]
6574            = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
6575         vassert(szBlg2 < 3);
6576         IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
6577                         : (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
6578         IRTemp tN1 = newTemp(Ity_V128);
6579         assign(tN1, getQReg128(nn));
6580         /* If Q == 0, we're just folding lanes in the lower half of
6581            the value.  In which case, copy the lower half of the
6582            source into the upper half, so we can then treat it the
6583            same as the full width case. */
6584         IRTemp tN2 = newTemp(Ity_V128);
6585         assign(tN2, zeroHI ? mk_CatEvenLanes64x2(tN1,tN1) : mkexpr(tN1));
6586         IRTemp res = math_MINMAXV(tN2, op);
6587         if (res == IRTemp_INVALID)
6588            return False; /* means math_MINMAXV
6589                             doesn't handle this case yet */
6590         putQReg128(dd, mkexpr(res));
6591         const HChar* nm = isMAX ? (isU ? "umaxv" : "smaxv")
6592                                 : (isU ? "uminv" : "sminv");
6593         const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
6594         IRType laneTy = tys[szBlg2];
6595         DIP("%s %s, %s.%s\n", nm,
6596             nameQRegLO(dd, laneTy), nameQReg128(nn), arrSpec);
6597         return True;
6598      }
6599      /* else fall through */
6600   }
6601   /* ------------ UMULL (vector) ------------ */
6602   /* 31  28    23 21 20 15     9 4
6603      001 01110 sz 1  m  110000 n d UMULL Vd.Ta, Vn.Tb, Vm.Tb
6604
6605   */
6606   if (INSN(31,24) == BITS8(0,0,1,0,1,1,1,0) && INSN(23,22) != BITS2(1,1)
6607       && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,1,0,0,0,0)) {
6608      UInt mm = INSN(20,16);
6609      UInt nn = INSN(9,5);
6610      UInt dd = INSN(4,0);
6611      UInt sz = INSN(23,22);
6612
6613      const HChar* nameTa[3] = { "8h", "4s", "2d" };
6614      const HChar* nameTb[3] = { "8b", "4h", "2s" };
6615      const IROp ops[3] = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2 };
6616
6617      putQReg128(dd, binop(ops[sz], getQRegLO(nn, Ity_I64), getQRegLO(mm, Ity_I64)));
6618
6619      DIP("umull %s.%s, %s.%s, %s.%s\n", nameQReg128(dd), nameTa[sz],
6620          nameQReg128(nn), nameTb[sz], nameQReg128(mm), nameTb[sz]);
6621      return True;
6622   }
6623
6624
6625   /* ------------ {AND,BIC,ORR,ORN} (vector) ------------ */
6626   /* 31  28    23  20 15     9 4
6627      0q0 01110 001 m  000111 n d  AND Vd.T, Vn.T, Vm.T
6628      0q0 01110 011 m  000111 n d  BIC Vd.T, Vn.T, Vm.T
6629      0q0 01110 101 m  000111 n d  ORR Vd.T, Vn.T, Vm.T
6630      0q0 01110 111 m  000111 n d  ORN Vd.T, Vn.T, Vm.T
6631      T is 16b when q==1, 8b when q==0
6632   */
6633   if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
6634       && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6635      Bool   isQ    = INSN(30,30) == 1;
6636      Bool   isORR  = INSN(23,23) == 1;
6637      Bool   invert = INSN(22,22) == 1;
6638      UInt   mm     = INSN(20,16);
6639      UInt   nn     = INSN(9,5);
6640      UInt   dd     = INSN(4,0);
6641      IRTemp res    = newTemp(Ity_V128);
6642      assign(res, binop(isORR ? Iop_OrV128 : Iop_AndV128,
6643                        getQReg128(nn),
6644                        invert ? unop(Iop_NotV128, getQReg128(mm))
6645                               : getQReg128(mm)));
6646      putQReg128(dd, isQ ? mkexpr(res)
6647                         : unop(Iop_ZeroHI64ofV128, mkexpr(res)));
6648      const HChar* names[4] = { "and", "bic", "orr", "orn" };
6649      const HChar* ar = isQ ? "16b" : "8b";
6650      DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
6651          nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
6652      return True;
6653   }
6654
6655   /* ---------- CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector) ---------- */
6656   /* 31  28    23   21     15     9 4                          ix
6657      0q1 01110 size 1  m   100011 n d  CMEQ  Vd.T, Vn.T, Vm.T  (1) ==
6658      0q0 01110 size 1  m   100011 n d  CMTST Vd.T, Vn.T, Vm.T  (2) &, != 0
6659
6660      0q1 01110 size 1  m   001101 n d  CMHI Vd.T, Vn.T, Vm.T   (3) >u
6661      0q0 01110 size 1  m   001101 n d  CMGT Vd.T, Vn.T, Vm.T   (4) >s
6662
6663      0q1 01110 size 1  m   001111 n d  CMHS Vd.T, Vn.T, Vm.T   (5) >=u
6664      0q0 01110 size 1  m   001111 n d  CMGE Vd.T, Vn.T, Vm.T   (6) >=s
6665
6666      0q1 01110 size 100000 100010 n d  CMGE Vd.T, Vn.T, #0     (7) >=s 0
6667      0q0 01110 size 100000 100010 n d  CMGT Vd.T, Vn.T, #0     (8) >s 0
6668
6669      0q1 01110 size 100000 100110 n d  CMLE Vd.T, Vn.T, #0     (9) <=s 0
6670      0q0 01110 size 100000 100110 n d  CMEQ Vd.T, Vn.T, #0     (10) == 0
6671
6672      0q0 01110 size 100000 101010 n d  CMLT Vd.T, Vn.T, #0     (11) <s 0
6673   */
6674   if (INSN(31,31) == 0
6675       && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
6676      Bool isQ    = INSN(30,30) == 1;
6677      UInt bit29  = INSN(29,29);
6678      UInt szBlg2 = INSN(23,22);
6679      UInt mm     = INSN(20,16);
6680      UInt b1510  = INSN(15,10);
6681      UInt nn     = INSN(9,5);
6682      UInt dd     = INSN(4,0);
6683      const IROp opsEQ[4]
6684         = { Iop_CmpEQ8x16,  Iop_CmpEQ16x8,  Iop_CmpEQ32x4,  Iop_CmpEQ64x2 };
6685      const IROp opsGTS[4]
6686         = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
6687      const IROp opsGTU[4]
6688         = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
6689      Bool zeroHI = False;
6690      const HChar* arrSpec = "??";
6691      Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
6692      UInt ix = 0;
6693      if (ok) {
6694         switch (b1510) {
6695            case BITS6(1,0,0,0,1,1): ix = bit29 ? 1 : 2; break;
6696            case BITS6(0,0,1,1,0,1): ix = bit29 ? 3 : 4; break;
6697            case BITS6(0,0,1,1,1,1): ix = bit29 ? 5 : 6; break;
6698            case BITS6(1,0,0,0,1,0):
6699               if (mm == 0) { ix = bit29 ? 7 : 8; }; break;
6700            case BITS6(1,0,0,1,1,0):
6701               if (mm == 0) { ix = bit29 ? 9 : 10; }; break;
6702            case BITS6(1,0,1,0,1,0):
6703               if (mm == 0 && bit29 == 0) { ix = 11; }; break;
6704            default: break;
6705         }
6706      }
6707      if (ix != 0) {
6708         vassert(ok && szBlg2 < 4);
6709         IRExpr* argL = getQReg128(nn);
6710         IRExpr* argR = (ix <= 6) ? getQReg128(mm) : mkV128(0x0000);
6711         IRExpr* res  = NULL;
6712         /* Some useful identities:
6713               x >  y   can be expressed directly
6714               x <  y   ==   y > x
6715               x <= y   ==   not (x > y)
6716               x >= y   ==   not (y > x)
6717         */
6718         switch (ix) {
6719            case 1: res = binop(opsEQ[szBlg2], argL, argR); break;
6720            case 2: res = unop(Iop_NotV128, binop(opsEQ[szBlg2],
6721                                            binop(Iop_AndV128, argL, argR),
6722                                                  mkV128(0x0000)));
6723                    break;
6724            case 3: res = binop(opsGTU[szBlg2], argL, argR); break;
6725            case 4: res = binop(opsGTS[szBlg2], argL, argR); break;
6726            case 5: res = unop(Iop_NotV128, binop(opsGTU[szBlg2], argR, argL));
6727                    break;
6728            case 6: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL));
6729                    break;
6730            case 7: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL));
6731                    break;
6732            case 8: res = binop(opsGTS[szBlg2], argL, argR); break;
6733            case 9: res = unop(Iop_NotV128,
6734                               binop(opsGTS[szBlg2], argL, argR));
6735                    break;
6736            case 10: res = binop(opsEQ[szBlg2],  argL, argR); break;
6737            case 11: res = binop(opsGTS[szBlg2], argR, argL); break;
6738            default: vassert(0);
6739         }
6740         vassert(res);
6741         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, res) : res);
6742         const HChar* nms[11] = { "eq", "tst", "hi", "gt", "hs", "ge",
6743                                  "ge", "gt", "le", "eq", "lt" };
6744         if (ix <= 6) {
6745            DIP("cm%s %s.%s, %s.%s, %s.%s\n", nms[ix-1],
6746                nameQReg128(dd), arrSpec,
6747                nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6748         } else {
6749            DIP("cm%s %s.%s, %s.%s, #0\n", nms[ix-1],
6750                nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
6751         }
6752         return True;
6753      }
6754      /* else fall through */
6755   }
6756
6757   /* -------------- {EOR,BSL,BIT,BIF} (vector) -------------- */
6758   /* 31  28    23   20 15     9 4
6759      0q1 01110 00 1 m  000111 n d  EOR Vd.T, Vm.T, Vn.T
6760      0q1 01110 01 1 m  000111 n d  BSL Vd.T, Vm.T, Vn.T
6761      0q1 01110 10 1 m  000111 n d  BIT Vd.T, Vm.T, Vn.T
6762      0q1 01110 11 1 m  000111 n d  BIF Vd.T, Vm.T, Vn.T
6763   */
6764   if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0)
6765       && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6766      Bool   isQ  = INSN(30,30) == 1;
6767      UInt   op   = INSN(23,22);
6768      UInt   mm   = INSN(20,16);
6769      UInt   nn   = INSN(9,5);
6770      UInt   dd   = INSN(4,0);
6771      IRTemp argD = newTemp(Ity_V128);
6772      IRTemp argN = newTemp(Ity_V128);
6773      IRTemp argM = newTemp(Ity_V128);
6774      assign(argD, getQReg128(dd));
6775      assign(argN, getQReg128(nn));
6776      assign(argM, getQReg128(mm));
6777      const IROp opXOR = Iop_XorV128;
6778      const IROp opAND = Iop_AndV128;
6779      const IROp opNOT = Iop_NotV128;
6780      IRExpr* res = NULL;
6781      switch (op) {
6782         case BITS2(0,0): /* EOR */
6783            res = binop(opXOR, mkexpr(argM), mkexpr(argN));
6784            break;
6785         case BITS2(0,1): /* BSL */
6786            res = binop(opXOR, mkexpr(argM),
6787                               binop(opAND,
6788                                     binop(opXOR, mkexpr(argM), mkexpr(argN)),
6789                                     mkexpr(argD)));
6790            break;
6791         case BITS2(1,0): /* BIT */
6792            res = binop(opXOR, mkexpr(argD),
6793                               binop(opAND,
6794                                     binop(opXOR, mkexpr(argD), mkexpr(argN)),
6795                                     mkexpr(argM)));
6796            break;
6797         case BITS2(1,1): /* BIF */
6798            res = binop(opXOR, mkexpr(argD),
6799                               binop(opAND,
6800                                     binop(opXOR, mkexpr(argD), mkexpr(argN)),
6801                                     unop(opNOT, mkexpr(argM))));
6802            break;
6803         default:
6804            vassert(0);
6805      }
6806      vassert(res);
6807      putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
6808      const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
6809      const HChar* arr = isQ ? "16b" : "8b";
6810      vassert(op < 4);
6811      DIP("%s %s.%s, %s.%s, %s.%s\n", nms[op],
6812          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6813      return True;
6814   }
6815
6816   /* ------------ USHR (scalar, immediate) ------------ */
6817   /* 31  28     22   18   15     9 4
6818      011 111110 immh immb 000001 n d  USHR Vd, Vn, #shift
6819   */
6820   if (INSN(31,23) == BITS9(0,1,1, 1,1,1,1,1,0)
6821       && INSN(15,10) == BITS6(0,0,0,0,0,1)) {
6822      UInt immh = INSN(22,19);
6823      UInt immb = INSN(18,16);
6824      UInt nn   = INSN(9,5);
6825      UInt dd   = INSN(4,0);
6826
6827      UInt szBlg2 = 0;
6828      UInt shift  = 0;
6829      Bool ok     = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb);
6830
6831      if (szBlg2 == 3) {
6832         putQRegHI64(dd, mkU64(0x0));
6833         putQRegLO(dd, binop(Iop_Shr64, getQRegLO(nn, Ity_I64), mkU8(shift)));
6834         DIP("ushr %s, %s\n", nameQRegLO(dd, Ity_I64), nameQRegLO(nn, Ity_I64));
6835         return True;
6836      }
6837   }
6838   /* ------------ {USHR,SSHR,SHL} (vector, immediate) ------------ */
6839   /* 31  28     22   18   15     9 4
6840      0q1 011110 immh immb 000001 n d  USHR Vd.T, Vn.T, #shift (1)
6841      0q1 011110 immh immb 010001 n d  SRI  Vd.T, Vn.T, #shift (1)
6842      0q0 011110 immh immb 000001 n d  SSHR Vd.T, Vn.T, #shift (2)
6843      0q0 011110 immh immb 010101 n d  SHL  Vd.T, Vn.T, #shift (3)
6844      0q1 011110 immh immb 010101 n d  SLI  Vd.T, Vn.T, #shift (3)
6845      laneTy, shift = case immh:immb of
6846                         0001:xxx -> B, SHR:8-xxx,    SHL:xxx
6847                         001x:xxx -> H, SHR:16-xxxx   SHL:xxxx
6848                         01xx:xxx -> S, SHR:32-xxxxx  SHL:xxxxx
6849                         1xxx:xxx -> D, SHR:64-xxxxxx SHL:xxxxxx
6850                         other    -> invalid
6851      As usual the case laneTy==D && q==0 is not allowed.
6852   */
6853   if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
6854       && INSN(10,10) == 1) {
6855      UInt ix = 0;
6856      /**/ if (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 1;
6857      else if (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,1,0,0,0)) ix = 1;
6858      else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 2;
6859      else if (                    INSN(15,11) == BITS5(0,1,0,1,0)) ix = 3;
6860      if (ix > 0) {
6861         Bool isQ  = INSN(30,30) == 1;
6862         UInt immh = INSN(22,19);
6863         UInt immb = INSN(18,16);
6864         UInt nn   = INSN(9,5);
6865         UInt dd   = INSN(4,0);
6866         Bool isInsert = (ix == 3 && INSN(29,29) == 1)
6867                         || (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,1,0,0,0));
6868
6869         const IROp opsSHRN[4]
6870            = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
6871         const IROp opsSARN[4]
6872            = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
6873         const IROp opsSHLN[4]
6874            = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
6875         UInt szBlg2 = 0;
6876         UInt shift  = 0;
6877         Bool ok     = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb);
6878         if (ix == 3) {
6879            /* The shift encoding has opposite sign for the leftwards
6880               case.  Adjust shift to compensate. */
6881            shift = (8 << szBlg2) - shift;
6882         }
6883         if (ok && szBlg2 < 4 && shift >= 0 && shift <= (8 << szBlg2)
6884             && !(szBlg2 == 3/*64bit*/ && !isQ)) {
6885            IROp op = Iop_INVALID;
6886            const HChar* nm = NULL;
6887            switch (ix) {
6888               case 1: op = opsSHRN[szBlg2]; nm = isInsert ? "sri" : "ushr"; break;
6889               case 2: op = opsSARN[szBlg2]; nm = "sshr"; break;
6890               case 3: op = opsSHLN[szBlg2]; nm = isInsert ? "sli" : "shl";  break;
6891               default: vassert(0);
6892            }
6893            IRTemp mask = newTemp(Ity_V128);
6894            IRTemp res;
6895            IRTemp candidate  = newTemp(Ity_V128);
6896
6897            assign(candidate, binop(op, getQReg128(nn), mkU8(shift)));
6898
6899            if (isInsert) {
6900              assign(mask, binop(op,
6901                                 binop(Iop_64HLtoV128,
6902                                       mkU64(0xFFFFFFFFFFFFFFFFULL),
6903                                       mkU64(0xFFFFFFFFFFFFFFFFULL)),
6904                                 mkU8(shift)));
6905              res = newTemp(Ity_V128);
6906
6907              assign(res, binop(Iop_OrV128,
6908                                binop(Iop_AndV128,
6909                                      unop(Iop_NotV128, mkexpr(mask)),
6910                                      getQReg128(dd)),
6911                                mkexpr(candidate)));
6912            } else {
6913               res = candidate;
6914            }
6915
6916            putQReg128(dd, isQ ? mkexpr(res) : unop(Iop_ZeroHI64ofV128, mkexpr(res)));
6917            HChar laneCh = "bhsd"[szBlg2];
6918            UInt  nLanes = (isQ ? 128 : 64) / (8 << szBlg2);
6919            DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
6920                nameQReg128(dd), nLanes, laneCh,
6921                nameQReg128(nn), nLanes, laneCh, shift);
6922            return True;
6923         }
6924         /* else fall through */
6925      }
6926   }
6927
6928   /* -------------------- SHRN{,2} -------------------- */
6929   /* 31  28     22   18   15     9 4
6930      0q0 011110 immh immb 100001 n d  SHRN  Vd.Tb, Vn.Ta, #sh
6931
6932      where Ta,Tb,sh
6933        = case immh of 1xxx -> invalid
6934                       01xx -> 2d, 2s(q0)/4s(q1),  64 - immh:immb (0..31)
6935                       001x -> 4s, 4h(q0)/8h(q1),  32 - immh:immb (0..15)
6936                       0001 -> 8h, 8b(q0)/16b(q1),  8 - immh:immb  (0..7)
6937                       0000 -> AdvSIMD modified immediate (???)
6938   */
6939
6940   if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
6941       && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
6942      Bool isQ = INSN(30,30) == 1;
6943      UInt immh  = INSN(22,19);
6944      UInt immb  = INSN(18,16);
6945      UInt nn    = INSN(9,5);
6946      UInt dd    = INSN(4,0);
6947      IRTemp  src  = newTemp(Ity_V128);
6948      IRTemp  zero = newTemp(Ity_V128);
6949      IRExpr* res  = NULL;
6950      const HChar* ta = "??";
6951      const HChar* tb = "??";
6952
6953      UInt szBlg2 = 0;
6954      UInt shift  = 0;
6955      Bool ok     = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb);
6956
6957      if (ok && shift >= 0 && szBlg2 < 3 && shift <= (8 << szBlg2)) {
6958         const IROp opsSHR[3] = { Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
6959         const HChar* tas[3] = { "8h", "4s", "2d" };
6960         const HChar* tbs_q0[3] = { "8b", "4h", "2s" };
6961         const HChar* tbs_q1[3] = { "16b", "8h", "4s" };
6962         assign(src, binop(opsSHR[szBlg2], getQReg128(nn), mkU8(shift)));
6963         assign(zero, mkV128(0x0000));
6964         switch(szBlg2) {
6965            case 0:
6966               res = mk_CatEvenLanes8x16(zero, src);
6967               break;
6968            case 1:
6969               res = mk_CatEvenLanes16x8(zero, src);
6970               break;
6971            case 2:
6972               res = mk_CatEvenLanes32x4(zero, src);
6973               break;
6974            default:
6975               break;
6976         }
6977
6978         if (res != NULL) {
6979            if (isQ) {
6980               putQRegHI64(dd, unop(Iop_V128to64, res));
6981            } else {
6982               putQReg128(dd, res);
6983            }
6984            DIP("shrn%s %s.%s, %s.%s, #%d\n",
6985                isQ ? "2" : "", nameQReg128(dd), isQ ? tbs_q1[szBlg2] : tbs_q0[szBlg2],
6986                nameQReg128(nn), tas[szBlg2], shift);
6987            return True;
6988         }
6989      }
6990   }
6991
6992   /* -------------------- {U,S}SHLL{,2} -------------------- */
6993   /* 31  28     22   18   15     9 4
6994      0q0 011110 immh immb 101001 n d  SSHLL Vd.Ta, Vn.Tb, #sh
6995      0q1 011110 immh immb 101001 n d  USHLL Vd.Ta, Vn.Tb, #sh
6996      where Ta,Tb,sh
6997        = case immh of 1xxx -> invalid
6998                       01xx -> 2d, 2s(q0)/4s(q1),  immh:immb - 32 (0..31)
6999                       001x -> 4s, 4h(q0)/8h(q1),  immh:immb - 16 (0..15)
7000                       0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8  (0..7)
7001                       0000 -> AdvSIMD modified immediate (???)
7002   */
7003   if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
7004       && INSN(15,10) == BITS6(1,0,1,0,0,1)) {
7005      Bool isQ   = INSN(30,30) == 1;
7006      Bool isU   = INSN(29,29) == 1;
7007      UInt immh  = INSN(22,19);
7008      UInt immb  = INSN(18,16);
7009      UInt nn    = INSN(9,5);
7010      UInt dd    = INSN(4,0);
7011      UInt immhb = (immh << 3) | immb;
7012      IRTemp  src  = newTemp(Ity_V128);
7013      IRTemp  zero = newTemp(Ity_V128);
7014      IRExpr* res  = NULL;
7015      UInt    sh   = 0;
7016      const HChar* ta = "??";
7017      const HChar* tb = "??";
7018      assign(src, getQReg128(nn));
7019      assign(zero, mkV128(0x0000));
7020      if (immh & 8) {
7021         /* invalid; don't assign to res */
7022      }
7023      else if (immh & 4) {
7024         sh = immhb - 32;
7025         vassert(sh < 32); /* so 32-sh is 1..32 */
7026         ta = "2d";
7027         tb = isQ ? "4s" : "2s";
7028         IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
7029                           : mk_InterleaveLO32x4(src, zero);
7030         res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
7031      }
7032      else if (immh & 2) {
7033         sh = immhb - 16;
7034         vassert(sh < 16); /* so 16-sh is 1..16 */
7035         ta = "4s";
7036         tb = isQ ? "8h" : "4h";
7037         IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
7038                           : mk_InterleaveLO16x8(src, zero);
7039         res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
7040      }
7041      else if (immh & 1) {
7042         sh = immhb - 8;
7043         vassert(sh < 8); /* so 8-sh is 1..8 */
7044         ta = "8h";
7045         tb = isQ ? "16b" : "8b";
7046         IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
7047                           : mk_InterleaveLO8x16(src, zero);
7048         res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
7049      } else {
7050         vassert(immh == 0);
7051         /* invalid; don't assign to res */
7052      }
7053      /* */
7054      if (res) {
7055         putQReg128(dd, res);
7056         DIP("%cshll%s %s.%s, %s.%s, #%d\n",
7057             isU ? 'u' : 's', isQ ? "2" : "",
7058             nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
7059         return True;
7060      }
7061      /* else fall through */
7062   }
7063
7064   /* -------------------- XTN{,2} -------------------- */
7065   /* 31  28    23   21     15     9 4  XTN{,2} Vd.Tb, Vn.Ta
7066      0q0 01110 size 100001 001010 n d
7067   */
7068   if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
7069       && INSN(21,16) == BITS6(1,0,0,0,0,1)
7070       && INSN(15,10) == BITS6(0,0,1,0,1,0)) {
7071      Bool isQ  = INSN(30,30) == 1;
7072      UInt size = INSN(23,22);
7073      UInt nn   = INSN(9,5);
7074      UInt dd   = INSN(4,0);
7075      IROp op   = Iop_INVALID;
7076      const HChar* tb = NULL;
7077      const HChar* ta = NULL;
7078      switch ((size << 1) | (isQ ? 1 : 0)) {
7079         case 0: tb = "8b";  ta = "8h"; op = Iop_NarrowUn16to8x8;  break;
7080         case 1: tb = "16b"; ta = "8h"; op = Iop_NarrowUn16to8x8;  break;
7081         case 2: tb = "4h";  ta = "4s"; op = Iop_NarrowUn32to16x4; break;
7082         case 3: tb = "8h";  ta = "4s"; op = Iop_NarrowUn32to16x4; break;
7083         case 4: tb = "2s";  ta = "2d"; op = Iop_NarrowUn64to32x2; break;
7084         case 5: tb = "4s";  ta = "2d"; op = Iop_NarrowUn64to32x2; break;
7085         case 6: break;
7086         case 7: break;
7087         default: vassert(0);
7088      }
7089      if (op != Iop_INVALID) {
7090         if (!isQ) {
7091            putQRegLane(dd, 1, mkU64(0));
7092         }
7093         putQRegLane(dd, isQ ? 1 : 0, unop(op, getQReg128(nn)));
7094         DIP("xtn%s %s.%s, %s.%s\n", isQ ? "2" : "",
7095             nameQReg128(dd), tb, nameQReg128(nn), ta);
7096         return True;
7097      }
7098      /* else fall through */
7099   }
7100
7101   /* ---------------- CNT (vector) ---------------- */
7102   /* 31 29     23 21           9 4
7103      0q 001110 00 100000010110 n d  CNT Vd.T, Vn.T
7104   */
7105
7106  if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
7107      && INSN(23,22) == BITS2(0,0)
7108      && INSN(21,10) == BITS12(1,0,0,0,0,0,0,1,0,1,1,0) ) {
7109     Bool isQ = INSN(30,30) == 1;
7110     UInt nn  = INSN(9,5);
7111     UInt dd  = INSN(4,0);
7112     const HChar* name = isQ ? "16b" : "8b";
7113
7114     IRExpr* res = unop(Iop_Cnt8x16, getQReg128(nn));
7115     putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
7116
7117     DIP("cnt %s.%s, %s.%s\n", nameQReg128(dd), name, nameQReg128(nn), name);
7118     return True;
7119  }
7120
7121
7122   /* ---------------- DUP (element, vector) ---------------- */
7123   /* 31  28       20   15     9 4
7124      0q0 01110000 imm5 000001 n d  DUP Vd.T, Vn.Ts[index]
7125   */
7126   if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
7127       && INSN(15,10) == BITS6(0,0,0,0,0,1)) {
7128      Bool   isQ  = INSN(30,30) == 1;
7129      UInt   imm5 = INSN(20,16);
7130      UInt   nn   = INSN(9,5);
7131      UInt   dd   = INSN(4,0);
7132      IRTemp w0   = newTemp(Ity_I64);
7133      const HChar* arT  = "??";
7134      const HChar* arTs = "??";
7135      IRType laneTy = Ity_INVALID;
7136      UInt   laneNo = 16; /* invalid */
7137      if (imm5 & 1) {
7138         arT    = isQ ? "16b" : "8b";
7139         arTs   = "b";
7140         laneNo = (imm5 >> 1) & 15;
7141         laneTy = Ity_I8;
7142         assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
7143      }
7144      else if (imm5 & 2) {
7145         arT    = isQ ? "8h" : "4h";
7146         arTs   = "h";
7147         laneNo = (imm5 >> 2) & 7;
7148         laneTy = Ity_I16;
7149         assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
7150      }
7151      else if (imm5 & 4) {
7152         arT    = isQ ? "4s" : "2s";
7153         arTs   = "s";
7154         laneNo = (imm5 >> 3) & 3;
7155         laneTy = Ity_I32;
7156         assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
7157      }
7158      else if ((imm5 & 8) && isQ) {
7159         arT  = "2d";
7160         arTs = "d";
7161         laneNo = (imm5 >> 4) & 1;
7162         laneTy = Ity_I64;
7163         assign(w0, getQRegLane(nn, laneNo, laneTy));
7164      }
7165      else {
7166         /* invalid; leave laneTy unchanged. */
7167      }
7168      /* */
7169      if (laneTy != Ity_INVALID) {
7170         vassert(laneNo < 16);
7171         IRTemp w1 = math_DUP_TO_64(w0, laneTy);
7172         putQReg128(dd, binop(Iop_64HLtoV128,
7173                              isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
7174         DIP("dup %s.%s, %s.%s[%u]\n",
7175             nameQReg128(dd), arT, nameQReg128(nn), arTs, laneNo);
7176         return True;
7177      }
7178      /* else fall through */
7179   }
7180
7181   /* ---------------- DUP (general, vector) ---------------- */
7182   /* 31  28    23  20   15     9 4
7183      0q0 01110 000 imm5 000011 n d  DUP Vd.T, Rn
7184      Q=0 writes 64, Q=1 writes 128
7185      imm5: xxxx1  8B(q=0)      or 16b(q=1),     R=W
7186            xxx10  4H(q=0)      or 8H(q=1),      R=W
7187            xx100  2S(q=0)      or 4S(q=1),      R=W
7188            x1000  Invalid(q=0) or 2D(q=1),      R=X
7189            x0000  Invalid(q=0) or Invalid(q=1)
7190   */
7191   if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
7192       && INSN(15,10) == BITS6(0,0,0,0,1,1)) {
7193      Bool   isQ  = INSN(30,30) == 1;
7194      UInt   imm5 = INSN(20,16);
7195      UInt   nn   = INSN(9,5);
7196      UInt   dd   = INSN(4,0);
7197      IRTemp w0   = newTemp(Ity_I64);
7198      const HChar* arT = "??";
7199      IRType laneTy = Ity_INVALID;
7200      if (imm5 & 1) {
7201         arT    = isQ ? "16b" : "8b";
7202         laneTy = Ity_I8;
7203         assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
7204      }
7205      else if (imm5 & 2) {
7206         arT    = isQ ? "8h" : "4h";
7207         laneTy = Ity_I16;
7208         assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
7209      }
7210      else if (imm5 & 4) {
7211         arT    = isQ ? "4s" : "2s";
7212         laneTy = Ity_I32;
7213         assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
7214      }
7215      else if ((imm5 & 8) && isQ) {
7216         arT    = "2d";
7217         laneTy = Ity_I64;
7218         assign(w0, getIReg64orZR(nn));
7219      }
7220      else {
7221         /* invalid; leave laneTy unchanged. */
7222      }
7223      /* */
7224      if (laneTy != Ity_INVALID) {
7225         IRTemp w1 = math_DUP_TO_64(w0, laneTy);
7226         putQReg128(dd, binop(Iop_64HLtoV128,
7227                              isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
7228         DIP("dup %s.%s, %s\n",
7229             nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
7230         return True;
7231      }
7232      /* else fall through */
7233   }
7234
7235   /* --------------------- {S,U}ADDLV --------------------- */
7236   /* 31  28    23 21           9 4
7237      0qu 01110 sz 110000001110 n d  {U,S}ADDLV Vd, Vn.T
7238
7239      sz V T(q=1/0)
7240      -- - ----
7241      00 h 16/8b
7242      01 s 8/4h
7243      10 d 4s (q can't be 0)
7244      11 invalid
7245   */
7246   if (INSN(31,31) == 0 && INSN(28, 24) == BITS5(0,1,1,1,0)
7247       && INSN(21, 10) == BITS12(1,1,0,0,0,0,0,0,1,1,1,0)) {
7248      UInt bitQ = INSN(30,30);
7249      UInt bitU = INSN(29,29);
7250      UInt sz   = INSN(23,22);
7251      UInt nn   = INSN(9,5);
7252      UInt dd   = INSN(4,0);
7253
7254      Bool valid = !((sz == BITS2(1,1)) || (bitQ == 0 && sz == BITS2(1,0)));
7255      if (valid) {
7256        const IRType ddTypes[3] = { Ity_I16, Ity_I32, Ity_I64 };
7257        const HChar* suffixesQ[3] = { "16b", "8h", "4s" };
7258        const HChar* suffixesq[3] = { "8b", "4h", "invalid" };
7259
7260        IRTemp src = newTemp(Ity_V128);
7261        IRExpr* half = mkU64(0xFFFFFFFFFFFFFFFFULL);
7262        IRExpr* zero = mkU64(0x0);
7263
7264        IRExpr* mask = binop(Iop_64HLtoV128, zero, half);
7265        assign(src, bitQ ? getQReg128(nn) : binop(Iop_AndV128, getQReg128(nn), mask));
7266
7267        IROp op;
7268        switch (sz) {
7269        case BITS2(0,0): op = bitU ? Iop_AddLV8Ux16 : Iop_AddLV8Sx16; break;
7270        case BITS2(0,1): op = bitU ? Iop_AddLV16Ux8 : Iop_AddLV16Sx8; break;
7271        case BITS2(1,0): op = bitU ? Iop_AddLV32Ux4 : Iop_AddLV32Sx4; break;
7272        default: vassert(0);
7273        }
7274
7275        putQReg128(dd, unop(op, mkexpr(src)));
7276
7277        DIP("%saddlv %s,%s.%s\n", bitU ? "u" : "s", nameQRegLO(dd, ddTypes[sz]),
7278            nameQReg128(nn), bitQ ? suffixesQ[sz] : suffixesq[sz]);
7279
7280        return True;
7281      }
7282      /* else fall through */
7283   }
7284   /* ---------------------- {S,U}MOV ---------------------- */
7285   /* 31  28        20   15     9 4
7286      0q0 01110 000 imm5 001111 n d  UMOV Xd/Wd, Vn.Ts[index]
7287      0q0 01110 000 imm5 001011 n d  SMOV Xd/Wd, Vn.Ts[index]
7288      dest is Xd when q==1, Wd when q==0
7289      UMOV:
7290         Ts,index,ops = case q:imm5 of
7291                          0:xxxx1 -> B, xxxx, 8Uto64
7292                          1:xxxx1 -> invalid
7293                          0:xxx10 -> H, xxx,  16Uto64
7294                          1:xxx10 -> invalid
7295                          0:xx100 -> S, xx,   32Uto64
7296                          1:xx100 -> invalid
7297                          1:x1000 -> D, x,    copy64
7298                          other   -> invalid
7299      SMOV:
7300         Ts,index,ops = case q:imm5 of
7301                          0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
7302                          1:xxxx1 -> B, xxxx, 8Sto64
7303                          0:xxx10 -> H, xxx,  (32Uto64 . 16Sto32)
7304                          1:xxx10 -> H, xxx,  16Sto64
7305                          0:xx100 -> invalid
7306                          1:xx100 -> S, xx,   32Sto64
7307                          1:x1000 -> invalid
7308                          other   -> invalid
7309   */
7310   if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
7311       && (INSN(15,10) & BITS6(1,1,1,0,1,1)) == BITS6(0,0,1,0,1,1)) {
7312      UInt bitQ = INSN(30,30) == 1;
7313      UInt imm5 = INSN(20,16);
7314      UInt nn   = INSN(9,5);
7315      UInt dd   = INSN(4,0);
7316      Bool isU  = INSN(12,12) == 1;
7317      const HChar* arTs = "??";
7318      UInt    laneNo = 16; /* invalid */
7319      // Setting 'res' to non-NULL determines valid/invalid
7320      IRExpr* res    = NULL;
7321      if (!bitQ && (imm5 & 1)) { // 0:xxxx1
7322         laneNo = (imm5 >> 1) & 15;
7323         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
7324         res = isU ? unop(Iop_8Uto64, lane)
7325                   : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
7326         arTs = "b";
7327      }
7328      else if (bitQ && (imm5 & 1)) { // 1:xxxx1
7329         laneNo = (imm5 >> 1) & 15;
7330         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
7331         res = isU ? NULL
7332                   : unop(Iop_8Sto64, lane);
7333         arTs = "b";
7334      }
7335      else if (!bitQ && (imm5 & 2)) { // 0:xxx10
7336         laneNo = (imm5 >> 2) & 7;
7337         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
7338         res = isU ? unop(Iop_16Uto64, lane)
7339                   : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
7340         arTs = "h";
7341      }
7342      else if (bitQ && (imm5 & 2)) { // 1:xxx10
7343         laneNo = (imm5 >> 2) & 7;
7344         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
7345         res = isU ? NULL
7346                   : unop(Iop_16Sto64, lane);
7347         arTs = "h";
7348      }
7349      else if (!bitQ && (imm5 & 4)) { // 0:xx100
7350         laneNo = (imm5 >> 3) & 3;
7351         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
7352         res = isU ? unop(Iop_32Uto64, lane)
7353                   : NULL;
7354         arTs = "s";
7355      }
7356      else if (bitQ && (imm5 & 4)) { // 1:xxx10
7357         laneNo = (imm5 >> 3) & 3;
7358         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
7359         res = isU ? NULL
7360                   : unop(Iop_32Sto64, lane);
7361         arTs = "s";
7362      }
7363      else if (bitQ && (imm5 & 8)) { // 1:x1000
7364         laneNo = (imm5 >> 4) & 1;
7365         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
7366         res = isU ? lane
7367                   : NULL;
7368         arTs = "d";
7369      }
7370      /* */
7371      if (res) {
7372         vassert(laneNo < 16);
7373         putIReg64orZR(dd, res);
7374         DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
7375             nameIRegOrZR(bitQ == 1, dd),
7376             nameQReg128(nn), arTs, laneNo);
7377         return True;
7378      }
7379      /* else fall through */
7380   }
7381
7382   /* -------------------- INS (general) -------------------- */
7383   /* 31  28       20   15     9 4
7384      010 01110000 imm5 000111 n d  INS Vd.Ts[ix], Rn
7385      where Ts,ix = case imm5 of xxxx1 -> B, xxxx
7386                                 xxx10 -> H, xxx
7387                                 xx100 -> S, xx
7388                                 x1000 -> D, x
7389   */
7390   if (INSN(31,21) == BITS11(0,1,0,0,1,1,1,0,0,0,0)
7391       && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
7392      UInt    imm5   = INSN(20,16);
7393      UInt    nn     = INSN(9,5);
7394      UInt    dd     = INSN(4,0);
7395      HChar   ts     = '?';
7396      UInt    laneNo = 16;
7397      IRExpr* src    = NULL;
7398      if (imm5 & 1) {
7399         src    = unop(Iop_64to8, getIReg64orZR(nn));
7400         laneNo = (imm5 >> 1) & 15;
7401         ts     = 'b';
7402      }
7403      else if (imm5 & 2) {
7404         src    = unop(Iop_64to16, getIReg64orZR(nn));
7405         laneNo = (imm5 >> 2) & 7;
7406         ts     = 'h';
7407      }
7408      else if (imm5 & 4) {
7409         src    = unop(Iop_64to32, getIReg64orZR(nn));
7410         laneNo = (imm5 >> 3) & 3;
7411         ts     = 's';
7412      }
7413      else if (imm5 & 8) {
7414         src    = getIReg64orZR(nn);
7415         laneNo = (imm5 >> 4) & 1;
7416         ts     = 'd';
7417      }
7418      /* */
7419      if (src) {
7420         vassert(laneNo < 16);
7421         putQRegLane(dd, laneNo, src);
7422         DIP("ins %s.%c[%u], %s\n",
7423             nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
7424         return True;
7425      }
7426      /* else invalid; fall through */
7427   }
7428
7429   /* -------------------- INS (element) -------------------- */
7430   /* 31  28       20   15 14   10 9 4
7431      011 01110000 imm5 0  imm4 1  n d  INS Vd.Ts[ix1], Vn.Ts[ix2]
7432
7433      where Ts, ix1, ix2 = case imm5 of xxxx1 -> B, imm5<4:1>, imm4<3:0>
7434                                        xxx10 -> H, imm5<4:2>, imm4<3:1>
7435                                        xx100 -> S, imm5<4:3>, imm4<3:2>
7436                                        x1000 -> D, imm5<4:4>, imm4<3:3>
7437   */
7438   if (INSN(31,21) == BITS11(0,1,1,0,1,1,1,0,0,0,0)
7439       && INSN(15,15) == 0 && INSN(10,10) == 1 ) {
7440      UInt   imm5      = INSN(20,16);
7441      UInt   imm4      = INSN(14,11);
7442      UInt   nn        = INSN(9,5);
7443      UInt   dd        = INSN(4,0);
7444      HChar  ts        = '?';
7445      IRType ty        = Ity_INVALID;
7446      UInt   srcLaneNo = 16;
7447      UInt   dstLaneNo = 16;
7448
7449      if (imm5 & 1) {
7450         srcLaneNo = imm4;
7451         dstLaneNo = imm5 >> 1;
7452         ty = Ity_I8;
7453         ts = 'b';
7454      } else if (imm5 & 2) {
7455         srcLaneNo = imm4 >> 1;
7456         dstLaneNo = imm5 >> 2;
7457         ty = Ity_I16;
7458         ts = 'h';
7459      } else if (imm5 & 4) {
7460         srcLaneNo = imm4 >> 2;
7461         dstLaneNo = imm5 >> 3;
7462         ty = Ity_I32;
7463         ts = 's';
7464      } else if (imm5 & 8) {
7465         srcLaneNo = imm4 >> 3;
7466         dstLaneNo = imm5 >> 4;
7467         ty = Ity_I64;
7468         ts = 'd';
7469      }
7470
7471      if (ty != Ity_INVALID) {
7472         vassert(srcLaneNo < 16);
7473         vassert(dstLaneNo < 16);
7474         putQRegLane(dd, dstLaneNo, getQRegLane(nn, srcLaneNo, ty));
7475         DIP("ins %s.%c[%u], %s.%c[%u]\n",
7476             nameQReg128(dd), ts, dstLaneNo, nameQReg128(nn), ts, dstLaneNo);
7477         return True;
7478      }
7479
7480   }
7481
7482   /* -------------------- NEG (vector) -------------------- */
7483   /* 31  28    23 21    16      9 4
7484      0q1 01110 sz 10000 0101110 n d  NEG Vd, Vn
7485      sz is laneSz, q:sz == 011 is disallowed, as usual
7486   */
7487   if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0)
7488       && INSN(21,10) == BITS12(1,0,0,0,0,0,1,0,1,1,1,0)) {
7489      Bool isQ    = INSN(30,30) == 1;
7490      UInt szBlg2 = INSN(23,22);
7491      UInt nn     = INSN(9,5);
7492      UInt dd     = INSN(4,0);
7493      Bool zeroHI = False;
7494      const HChar* arrSpec = "";
7495      Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
7496      if (ok) {
7497         const IROp opSUB[4]
7498            = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
7499         IRTemp res = newTemp(Ity_V128);
7500         vassert(szBlg2 < 4);
7501         assign(res, binop(opSUB[szBlg2], mkV128(0x0000), getQReg128(nn)));
7502         putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
7503                               : mkexpr(res));
7504         DIP("neg %s.%s, %s.%s\n",
7505             nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
7506         return True;
7507      }
7508      /* else fall through */
7509   }
7510
7511   /* -------------------- TBL, TBX -------------------- */
7512   /* 31  28        20 15 14  12  9 4
7513      0q0 01110 000 m  0  len 000 n d  TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
7514      0q0 01110 000 m  0  len 100 n d  TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
7515      where Ta = 16b(q=1) or 8b(q=0)
7516   */
7517   if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
7518       && INSN(15,15) == 0 && INSN(11,10) == BITS2(0,0)) {
7519      Bool isQ   = INSN(30,30) == 1;
7520      Bool isTBX = INSN(12,12) == 1;
7521      UInt mm    = INSN(20,16);
7522      UInt len   = INSN(14,13);
7523      UInt nn    = INSN(9,5);
7524      UInt dd    = INSN(4,0);
7525      /* The out-of-range values to use. */
7526      IRTemp oor_values = newTemp(Ity_V128);
7527      assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
7528      /* src value */
7529      IRTemp src = newTemp(Ity_V128);
7530      assign(src, getQReg128(mm));
7531      /* The table values */
7532      IRTemp tab[4];
7533      UInt   i;
7534      for (i = 0; i <= len; i++) {
7535         vassert(i < 4);
7536         tab[i] = newTemp(Ity_V128);
7537         assign(tab[i], getQReg128((nn + i) % 32));
7538      }
7539      IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
7540      putQReg128(dd, isQ ? mkexpr(res)
7541                         : unop(Iop_ZeroHI64ofV128, mkexpr(res)) );
7542      const HChar* Ta = isQ ? "16b" : "8b";
7543      const HChar* nm = isTBX ? "tbx" : "tbl";
7544      DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
7545          nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
7546      return True;
7547   }
7548   /* FIXME Temporary hacks to get through ld.so FIXME */
7549
7550   /* ------------------ movi vD.4s, #0x0 ------------------ */
7551   /* 0x4F 0x00 0x04 000 vD */
7552   if ((insn & 0xFFFFFFE0) == 0x4F000400) {
7553      UInt vD = INSN(4,0);
7554      putQReg128(vD, mkV128(0x0000));
7555      DIP("movi v%u.4s, #0x0\n", vD);
7556      return True;
7557   }
7558
7559   /* ---------------- MOV vD.16b, vN.16b ---------------- */
7560   /* 31        23  20 15     9 4
7561      010 01110 101 m  000111 n d   ORR vD.16b, vN.16b, vM.16b
7562      This only handles the N == M case.
7563   */
7564   if (INSN(31,24) == BITS8(0,1,0,0,1,1,1,0)
7565       && INSN(23,21) == BITS3(1,0,1) && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
7566      UInt mm = INSN(20,16);
7567      UInt nn = INSN(9,5);
7568      UInt dd = INSN(4,0);
7569      if (mm == nn) {
7570         putQReg128(dd, getQReg128(nn));
7571         DIP("mov v%u.16b, v%u.16b\n", dd, nn);
7572         return True;
7573      }
7574      /* else it's really an ORR; fall through. */
7575   }
7576
7577   /* ---------------- CMEQ_d_d_#0 ---------------- */
7578   /*
7579      010 11110 11 10000 0100 110 n d   CMEQ Dd, Dn, #0
7580   */
7581   if ((INSN(31,0) & 0xFFFFFC00) == 0x5EE09800) {
7582      UInt nn = INSN(9,5);
7583      UInt dd = INSN(4,0);
7584      putQReg128(dd, unop(Iop_ZeroHI64ofV128,
7585                          binop(Iop_CmpEQ64x2, getQReg128(nn),
7586                                mkV128(0x0000))));
7587      DIP("cmeq d%u, d%u, #0\n", dd, nn);
7588      return True;
7589   }
7590
7591   /* ---------------- SHL_d_d_#imm ---------------- */
7592   /* 31         22 21  18 15     9 4
7593      010 111110 1  ih3 ib 010101 n d  SHL Dd, Dn, #(ih3:ib)
7594   */
7595   if (INSN(31,22) == BITS10(0,1,0,1,1,1,1,1,0,1)
7596       && INSN(15,10) == BITS6(0,1,0,1,0,1)) {
7597      UInt nn = INSN(9,5);
7598      UInt dd = INSN(4,0);
7599      UInt sh = INSN(21,16);
7600      vassert(sh < 64);
7601      putQReg128(dd, unop(Iop_ZeroHI64ofV128,
7602                          binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
7603      DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
7604      return True;
7605   }
7606   vex_printf("ARM64 front end: simd_and_fp\n");
7607   return False;
7608#  undef INSN
7609}
7610
7611
7612/*------------------------------------------------------------*/
7613/*--- Disassemble a single ARM64 instruction               ---*/
7614/*------------------------------------------------------------*/
7615
7616/* Disassemble a single ARM64 instruction into IR.  The instruction
7617   has is located at |guest_instr| and has guest IP of
7618   |guest_PC_curr_instr|, which will have been set before the call
7619   here.  Returns True iff the instruction was decoded, in which case
7620   *dres will be set accordingly, or False, in which case *dres should
7621   be ignored by the caller. */
7622
7623static
7624Bool disInstr_ARM64_WRK (
7625        /*MB_OUT*/DisResult* dres,
7626        Bool         (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
7627        Bool         resteerCisOk,
7628        void*        callback_opaque,
7629        UChar*       guest_instr,
7630        VexArchInfo* archinfo,
7631        VexAbiInfo*  abiinfo
7632     )
7633{
7634   // A macro to fish bits out of 'insn'.
7635#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
7636
7637//ZZ    DisResult dres;
7638//ZZ    UInt      insn;
7639//ZZ    //Bool      allow_VFP = False;
7640//ZZ    //UInt      hwcaps = archinfo->hwcaps;
7641//ZZ    IRTemp    condT; /* :: Ity_I32 */
7642//ZZ    UInt      summary;
7643//ZZ    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
7644//ZZ
7645//ZZ    /* What insn variants are we supporting today? */
7646//ZZ    //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
7647//ZZ    // etc etc
7648
7649   /* Set result defaults. */
7650   dres->whatNext    = Dis_Continue;
7651   dres->len         = 4;
7652   dres->continueAt  = 0;
7653   dres->jk_StopHere = Ijk_INVALID;
7654
7655   /* At least this is simple on ARM64: insns are all 4 bytes long, and
7656      4-aligned.  So just fish the whole thing out of memory right now
7657      and have done. */
7658   UInt insn = getUIntLittleEndianly( guest_instr );
7659
7660   if (0) vex_printf("insn: 0x%x\n", insn);
7661
7662   DIP("\t(arm64) 0x%llx:  ", (ULong)guest_PC_curr_instr);
7663
7664   vassert(0 == (guest_PC_curr_instr & 3ULL));
7665
7666   /* ----------------------------------------------------------- */
7667
7668   /* Spot "Special" instructions (see comment at top of file). */
7669   {
7670      UChar* code = (UChar*)guest_instr;
7671      /* Spot the 16-byte preamble:
7672            93CC0D8C   ror x12, x12, #3
7673            93CC358C   ror x12, x12, #13
7674            93CCCD8C   ror x12, x12, #51
7675            93CCF58C   ror x12, x12, #61
7676      */
7677      UInt word1 = 0x93CC0D8C;
7678      UInt word2 = 0x93CC358C;
7679      UInt word3 = 0x93CCCD8C;
7680      UInt word4 = 0x93CCF58C;
7681      if (getUIntLittleEndianly(code+ 0) == word1 &&
7682          getUIntLittleEndianly(code+ 4) == word2 &&
7683          getUIntLittleEndianly(code+ 8) == word3 &&
7684          getUIntLittleEndianly(code+12) == word4) {
7685         /* Got a "Special" instruction preamble.  Which one is it? */
7686         if (getUIntLittleEndianly(code+16) == 0xAA0A014A
7687                                               /* orr x10,x10,x10 */) {
7688            /* X3 = client_request ( X4 ) */
7689            DIP("x3 = client_request ( x4 )\n");
7690            putPC(mkU64( guest_PC_curr_instr + 20 ));
7691            dres->jk_StopHere = Ijk_ClientReq;
7692            dres->whatNext    = Dis_StopHere;
7693            return True;
7694         }
7695         else
7696         if (getUIntLittleEndianly(code+16) == 0xAA0B016B
7697                                               /* orr x11,x11,x11 */) {
7698            /* X3 = guest_NRADDR */
7699            DIP("x3 = guest_NRADDR\n");
7700            dres->len = 20;
7701            putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
7702            return True;
7703         }
7704         else
7705         if (getUIntLittleEndianly(code+16) == 0xAA0C018C
7706                                               /* orr x12,x12,x12 */) {
7707            /*  branch-and-link-to-noredir X8 */
7708            DIP("branch-and-link-to-noredir x8\n");
7709            putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
7710            putPC(getIReg64orZR(8));
7711            dres->jk_StopHere = Ijk_NoRedir;
7712            dres->whatNext    = Dis_StopHere;
7713            return True;
7714         }
7715         else
7716         if (getUIntLittleEndianly(code+16) == 0xAA090129
7717                                               /* orr x9,x9,x9 */) {
7718            /* IR injection */
7719            DIP("IR injection\n");
7720            vex_inject_ir(irsb, Iend_LE);
7721            // Invalidate the current insn. The reason is that the IRop we're
7722            // injecting here can change. In which case the translation has to
7723            // be redone. For ease of handling, we simply invalidate all the
7724            // time.
7725            stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
7726            stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(20)));
7727            putPC(mkU64( guest_PC_curr_instr + 20 ));
7728            dres->whatNext    = Dis_StopHere;
7729            dres->jk_StopHere = Ijk_InvalICache;
7730            return True;
7731         }
7732         /* We don't know what it is. */
7733         return False;
7734         /*NOTREACHED*/
7735      }
7736   }
7737
7738   /* ----------------------------------------------------------- */
7739
7740   /* Main ARM64 instruction decoder starts here. */
7741
7742   Bool ok = False;
7743
7744   /* insn[28:25] determines the top-level grouping, so let's start
7745      off with that.
7746
7747      For all of these dis_ARM64_ functions, we pass *dres with the
7748      normal default results "insn OK, 4 bytes long, keep decoding" so
7749      they don't need to change it.  However, decodes of control-flow
7750      insns may cause *dres to change.
7751   */
7752   switch (INSN(28,25)) {
7753      case BITS4(1,0,0,0): case BITS4(1,0,0,1):
7754         // Data processing - immediate
7755         ok = dis_ARM64_data_processing_immediate(dres, insn);
7756         break;
7757      case BITS4(1,0,1,0): case BITS4(1,0,1,1):
7758         // Branch, exception generation and system instructions
7759         ok = dis_ARM64_branch_etc(dres, insn, archinfo);
7760         break;
7761      case BITS4(0,1,0,0): case BITS4(0,1,1,0):
7762      case BITS4(1,1,0,0): case BITS4(1,1,1,0):
7763         // Loads and stores
7764         ok = dis_ARM64_load_store(dres, insn);
7765         break;
7766      case BITS4(0,1,0,1): case BITS4(1,1,0,1):
7767         // Data processing - register
7768         ok = dis_ARM64_data_processing_register(dres, insn);
7769         break;
7770      case BITS4(0,1,1,1): case BITS4(1,1,1,1):
7771         // Data processing - SIMD and floating point
7772         ok = dis_ARM64_simd_and_fp(dres, insn);
7773         break;
7774      case BITS4(0,0,0,0): case BITS4(0,0,0,1):
7775      case BITS4(0,0,1,0): case BITS4(0,0,1,1):
7776         // UNALLOCATED
7777         break;
7778      default:
7779         vassert(0); /* Can't happen */
7780   }
7781
7782   /* If the next-level down decoders failed, make sure |dres| didn't
7783      get changed. */
7784   if (!ok) {
7785      vassert(dres->whatNext    == Dis_Continue);
7786      vassert(dres->len         == 4);
7787      vassert(dres->continueAt  == 0);
7788      vassert(dres->jk_StopHere == Ijk_INVALID);
7789   }
7790
7791   return ok;
7792
7793#  undef INSN
7794}
7795
7796
7797/*------------------------------------------------------------*/
7798/*--- Top-level fn                                         ---*/
7799/*------------------------------------------------------------*/
7800
7801/* Disassemble a single instruction into IR.  The instruction
7802   is located in host memory at &guest_code[delta]. */
7803
7804DisResult disInstr_ARM64 ( IRSB*        irsb_IN,
7805                           Bool         (*resteerOkFn) ( void*, Addr64 ),
7806                           Bool         resteerCisOk,
7807                           void*        callback_opaque,
7808                           UChar*       guest_code_IN,
7809                           Long         delta_IN,
7810                           Addr64       guest_IP,
7811                           VexArch      guest_arch,
7812                           VexArchInfo* archinfo,
7813                           VexAbiInfo*  abiinfo,
7814                           Bool         host_bigendian_IN,
7815                           Bool         sigill_diag_IN )
7816{
7817   DisResult dres;
7818   vex_bzero(&dres, sizeof(dres));
7819
7820   /* Set globals (see top of this file) */
7821   vassert(guest_arch == VexArchARM64);
7822
7823   irsb                = irsb_IN;
7824   host_is_bigendian   = host_bigendian_IN;
7825   guest_PC_curr_instr = (Addr64)guest_IP;
7826
7827   /* Sanity checks */
7828   /* (x::UInt - 2) <= 15   ===   x >= 2 && x <= 17 (I hope) */
7829   vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
7830   vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
7831
7832   /* Try to decode */
7833   Bool ok = disInstr_ARM64_WRK( &dres,
7834                                 resteerOkFn, resteerCisOk, callback_opaque,
7835                                 (UChar*)&guest_code_IN[delta_IN],
7836                                 archinfo, abiinfo );
7837   if (ok) {
7838      /* All decode successes end up here. */
7839      vassert(dres.len == 4 || dres.len == 20);
7840      switch (dres.whatNext) {
7841         case Dis_Continue:
7842            putPC( mkU64(dres.len + guest_PC_curr_instr) );
7843            break;
7844         case Dis_ResteerU:
7845         case Dis_ResteerC:
7846            putPC(mkU64(dres.continueAt));
7847            break;
7848         case Dis_StopHere:
7849            break;
7850         default:
7851            vassert(0);
7852      }
7853      DIP("\n");
7854   } else {
7855      /* All decode failures end up here. */
7856      if (sigill_diag_IN) {
7857         Int   i, j;
7858         UChar buf[64];
7859         UInt  insn
7860                  = getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] );
7861         vex_bzero(buf, sizeof(buf));
7862         for (i = j = 0; i < 32; i++) {
7863            if (i > 0) {
7864              if ((i & 7) == 0) buf[j++] = ' ';
7865              else if ((i & 3) == 0) buf[j++] = '\'';
7866            }
7867            buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
7868         }
7869         vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
7870         vex_printf("disInstr(arm64): %s\n", buf);
7871      }
7872
7873      /* Tell the dispatcher that this insn cannot be decoded, and so
7874         has not been executed, and (is currently) the next to be
7875         executed.  PC should be up-to-date since it is made so at the
7876         start of each insn, but nevertheless be paranoid and update
7877         it again right now. */
7878      putPC( mkU64(guest_PC_curr_instr) );
7879      dres.whatNext    = Dis_StopHere;
7880      dres.len         = 0;
7881      dres.continueAt  = 0;
7882      dres.jk_StopHere = Ijk_NoDecode;
7883   }
7884   return dres;
7885}
7886
7887////////////////////////////////////////////////////////////////////////
7888////////////////////////////////////////////////////////////////////////
7889
7890/* Spare code for doing reference implementations of various 128-bit
7891   SIMD interleaves/deinterleaves/concatenation ops.  For 64-bit
7892   equivalents see the end of guest_arm_toIR.c. */
7893
7894////////////////////////////////////////////////////////////////
7895// 64x2 operations
7896//
7897static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 )
7898{
7899  // returns a0 b0
7900  return binop(Iop_64HLtoV128, unop(Iop_V128to64, mkexpr(a10)),
7901                               unop(Iop_V128to64, mkexpr(b10)));
7902}
7903
7904static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 )
7905{
7906  // returns a1 b1
7907  return binop(Iop_64HLtoV128, unop(Iop_V128HIto64, mkexpr(a10)),
7908                               unop(Iop_V128HIto64, mkexpr(b10)));
7909}
7910
7911
7912////////////////////////////////////////////////////////////////
7913// 32x4 operations
7914//
7915
7916// Split a 128 bit value into 4 32 bit ones, in 64-bit IRTemps with
7917// the top halves guaranteed to be zero.
7918static void breakV128to32s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
7919                             IRTemp* out0, IRTemp v128 )
7920{
7921  if (out3) *out3 = newTemp(Ity_I64);
7922  if (out2) *out2 = newTemp(Ity_I64);
7923  if (out1) *out1 = newTemp(Ity_I64);
7924  if (out0) *out0 = newTemp(Ity_I64);
7925  IRTemp hi64 = newTemp(Ity_I64);
7926  IRTemp lo64 = newTemp(Ity_I64);
7927  assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7928  assign(lo64, unop(Iop_V128to64,   mkexpr(v128)) );
7929  if (out3) assign(*out3, binop(Iop_Shr64, mkexpr(hi64), mkU8(32)));
7930  if (out2) assign(*out2, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFFFFFF)));
7931  if (out1) assign(*out1, binop(Iop_Shr64, mkexpr(lo64), mkU8(32)));
7932  if (out0) assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFFFFFF)));
7933}
7934
7935// Make a V128 bit value from 4 32 bit ones, each of which is in a 64 bit
7936// IRTemp.
7937static IRTemp mkV128from32s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7938{
7939  IRTemp hi64 = newTemp(Ity_I64);
7940  IRTemp lo64 = newTemp(Ity_I64);
7941  assign(hi64,
7942         binop(Iop_Or64,
7943               binop(Iop_Shl64, mkexpr(in3), mkU8(32)),
7944               binop(Iop_And64, mkexpr(in2), mkU64(0xFFFFFFFF))));
7945  assign(lo64,
7946         binop(Iop_Or64,
7947               binop(Iop_Shl64, mkexpr(in1), mkU8(32)),
7948               binop(Iop_And64, mkexpr(in0), mkU64(0xFFFFFFFF))));
7949  IRTemp res = newTemp(Ity_V128);
7950  assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
7951  return res;
7952}
7953
7954static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 )
7955{
7956  // returns a2 a0 b2 b0
7957  IRTemp a2, a0, b2, b0;
7958  breakV128to32s(NULL, &a2, NULL, &a0, a3210);
7959  breakV128to32s(NULL, &b2, NULL, &b0, b3210);
7960  return mkexpr(mkV128from32s(a2, a0, b2, b0));
7961}
7962
7963static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 )
7964{
7965  // returns a3 a1 b3 b1
7966  IRTemp a3, a1, b3, b1;
7967  breakV128to32s(&a3, NULL, &a1, NULL, a3210);
7968  breakV128to32s(&b3, NULL, &b1, NULL, b3210);
7969  return mkexpr(mkV128from32s(a3, a1, b3, b1));
7970}
7971
7972static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 )
7973{
7974  // returns a1 b1 a0 b0
7975  IRTemp a1, a0, b1, b0;
7976  breakV128to32s(NULL, NULL, &a1, &a0, a3210);
7977  breakV128to32s(NULL, NULL, &b1, &b0, b3210);
7978  return mkexpr(mkV128from32s(a1, b1, a0, b0));
7979}
7980
7981static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 )
7982{
7983  // returns a3 b3 a2 b2
7984  IRTemp a3, a2, b3, b2;
7985  breakV128to32s(&a3, &a2, NULL, NULL, a3210);
7986  breakV128to32s(&b3, &b2, NULL, NULL, b3210);
7987  return mkexpr(mkV128from32s(a3, b3, a2, b2));
7988}
7989
7990////////////////////////////////////////////////////////////////
7991// 16x8 operations
7992//
7993
7994static void breakV128to16s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
7995                             IRTemp* out4, IRTemp* out3, IRTemp* out2,
7996                             IRTemp* out1,IRTemp* out0, IRTemp v128 )
7997{
7998  if (out7) *out7 = newTemp(Ity_I64);
7999  if (out6) *out6 = newTemp(Ity_I64);
8000  if (out5) *out5 = newTemp(Ity_I64);
8001  if (out4) *out4 = newTemp(Ity_I64);
8002  if (out3) *out3 = newTemp(Ity_I64);
8003  if (out2) *out2 = newTemp(Ity_I64);
8004  if (out1) *out1 = newTemp(Ity_I64);
8005  if (out0) *out0 = newTemp(Ity_I64);
8006  IRTemp hi64 = newTemp(Ity_I64);
8007  IRTemp lo64 = newTemp(Ity_I64);
8008  assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
8009  assign(lo64, unop(Iop_V128to64,   mkexpr(v128)) );
8010  if (out7)
8011    assign(*out7, binop(Iop_And64,
8012                        binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
8013                        mkU64(0xFFFF)));
8014  if (out6)
8015    assign(*out6, binop(Iop_And64,
8016                        binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
8017                        mkU64(0xFFFF)));
8018  if (out5)
8019    assign(*out5, binop(Iop_And64,
8020                        binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
8021                        mkU64(0xFFFF)));
8022  if (out4)
8023    assign(*out4, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFF)));
8024  if (out3)
8025    assign(*out3, binop(Iop_And64,
8026                        binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
8027                        mkU64(0xFFFF)));
8028  if (out2)
8029    assign(*out2, binop(Iop_And64,
8030                        binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
8031                        mkU64(0xFFFF)));
8032  if (out1)
8033    assign(*out1, binop(Iop_And64,
8034                        binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
8035                        mkU64(0xFFFF)));
8036  if (out0)
8037    assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFF)));
8038}
8039
8040static IRTemp mkV128from16s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
8041                              IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
8042{
8043  IRTemp hi64 = newTemp(Ity_I64);
8044  IRTemp lo64 = newTemp(Ity_I64);
8045  assign(hi64,
8046         binop(Iop_Or64,
8047               binop(Iop_Or64,
8048                     binop(Iop_Shl64,
8049                           binop(Iop_And64, mkexpr(in7), mkU64(0xFFFF)),
8050                           mkU8(48)),
8051                     binop(Iop_Shl64,
8052                           binop(Iop_And64, mkexpr(in6), mkU64(0xFFFF)),
8053                           mkU8(32))),
8054               binop(Iop_Or64,
8055                     binop(Iop_Shl64,
8056                           binop(Iop_And64, mkexpr(in5), mkU64(0xFFFF)),
8057                           mkU8(16)),
8058                     binop(Iop_And64,
8059                           mkexpr(in4), mkU64(0xFFFF)))));
8060  assign(lo64,
8061         binop(Iop_Or64,
8062               binop(Iop_Or64,
8063                     binop(Iop_Shl64,
8064                           binop(Iop_And64, mkexpr(in3), mkU64(0xFFFF)),
8065                           mkU8(48)),
8066                     binop(Iop_Shl64,
8067                           binop(Iop_And64, mkexpr(in2), mkU64(0xFFFF)),
8068                           mkU8(32))),
8069               binop(Iop_Or64,
8070                     binop(Iop_Shl64,
8071                           binop(Iop_And64, mkexpr(in1), mkU64(0xFFFF)),
8072                           mkU8(16)),
8073                     binop(Iop_And64,
8074                           mkexpr(in0), mkU64(0xFFFF)))));
8075  IRTemp res = newTemp(Ity_V128);
8076  assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
8077  return res;
8078}
8079
8080static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
8081{
8082  // returns a6 a4 a2 a0 b6 b4 b2 b0
8083  IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
8084  breakV128to16s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
8085  breakV128to16s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
8086  return mkexpr(mkV128from16s(a6, a4, a2, a0, b6, b4, b2, b0));
8087}
8088
8089static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
8090{
8091  // returns a7 a5 a3 a1 b7 b5 b3 b1
8092  IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
8093  breakV128to16s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
8094  breakV128to16s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
8095  return mkexpr(mkV128from16s(a7, a5, a3, a1, b7, b5, b3, b1));
8096}
8097
8098static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 )
8099{
8100  // returns a3 b3 a2 b2 a1 b1 a0 b0
8101  IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
8102  breakV128to16s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
8103  breakV128to16s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
8104  return mkexpr(mkV128from16s(a3, b3, a2, b2, a1, b1, a0, b0));
8105}
8106
8107static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 )
8108{
8109  // returns a7 b7 a6 b6 a5 b5 a4 b4
8110  IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
8111  breakV128to16s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
8112  breakV128to16s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
8113  return mkexpr(mkV128from16s(a7, b7, a6, b6, a5, b5, a4, b4));
8114}
8115
8116////////////////////////////////////////////////////////////////
8117// 8x16 operations
8118//
8119
8120static void breakV128to8s ( IRTemp* outF, IRTemp* outE, IRTemp* outD,
8121                            IRTemp* outC, IRTemp* outB, IRTemp* outA,
8122                            IRTemp* out9, IRTemp* out8,
8123                            IRTemp* out7, IRTemp* out6, IRTemp* out5,
8124                            IRTemp* out4, IRTemp* out3, IRTemp* out2,
8125                            IRTemp* out1,IRTemp* out0, IRTemp v128 )
8126{
8127  if (outF) *outF = newTemp(Ity_I64);
8128  if (outE) *outE = newTemp(Ity_I64);
8129  if (outD) *outD = newTemp(Ity_I64);
8130  if (outC) *outC = newTemp(Ity_I64);
8131  if (outB) *outB = newTemp(Ity_I64);
8132  if (outA) *outA = newTemp(Ity_I64);
8133  if (out9) *out9 = newTemp(Ity_I64);
8134  if (out8) *out8 = newTemp(Ity_I64);
8135  if (out7) *out7 = newTemp(Ity_I64);
8136  if (out6) *out6 = newTemp(Ity_I64);
8137  if (out5) *out5 = newTemp(Ity_I64);
8138  if (out4) *out4 = newTemp(Ity_I64);
8139  if (out3) *out3 = newTemp(Ity_I64);
8140  if (out2) *out2 = newTemp(Ity_I64);
8141  if (out1) *out1 = newTemp(Ity_I64);
8142  if (out0) *out0 = newTemp(Ity_I64);
8143  IRTemp hi64 = newTemp(Ity_I64);
8144  IRTemp lo64 = newTemp(Ity_I64);
8145  assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
8146  assign(lo64, unop(Iop_V128to64,   mkexpr(v128)) );
8147  if (outF)
8148    assign(*outF, binop(Iop_And64,
8149                        binop(Iop_Shr64, mkexpr(hi64), mkU8(56)),
8150                        mkU64(0xFF)));
8151  if (outE)
8152    assign(*outE, binop(Iop_And64,
8153                        binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
8154                        mkU64(0xFF)));
8155  if (outD)
8156    assign(*outD, binop(Iop_And64,
8157                        binop(Iop_Shr64, mkexpr(hi64), mkU8(40)),
8158                        mkU64(0xFF)));
8159  if (outC)
8160    assign(*outC, binop(Iop_And64,
8161                        binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
8162                        mkU64(0xFF)));
8163  if (outB)
8164    assign(*outB, binop(Iop_And64,
8165                        binop(Iop_Shr64, mkexpr(hi64), mkU8(24)),
8166                        mkU64(0xFF)));
8167  if (outA)
8168    assign(*outA, binop(Iop_And64,
8169                        binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
8170                        mkU64(0xFF)));
8171  if (out9)
8172    assign(*out9, binop(Iop_And64,
8173                        binop(Iop_Shr64, mkexpr(hi64), mkU8(8)),
8174                        mkU64(0xFF)));
8175  if (out8)
8176    assign(*out8, binop(Iop_And64,
8177                        binop(Iop_Shr64, mkexpr(hi64), mkU8(0)),
8178                        mkU64(0xFF)));
8179  if (out7)
8180    assign(*out7, binop(Iop_And64,
8181                        binop(Iop_Shr64, mkexpr(lo64), mkU8(56)),
8182                        mkU64(0xFF)));
8183  if (out6)
8184    assign(*out6, binop(Iop_And64,
8185                        binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
8186                        mkU64(0xFF)));
8187  if (out5)
8188    assign(*out5, binop(Iop_And64,
8189                        binop(Iop_Shr64, mkexpr(lo64), mkU8(40)),
8190                        mkU64(0xFF)));
8191  if (out4)
8192    assign(*out4, binop(Iop_And64,
8193                        binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
8194                        mkU64(0xFF)));
8195  if (out3)
8196    assign(*out3, binop(Iop_And64,
8197                        binop(Iop_Shr64, mkexpr(lo64), mkU8(24)),
8198                        mkU64(0xFF)));
8199  if (out2)
8200    assign(*out2, binop(Iop_And64,
8201                        binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
8202                        mkU64(0xFF)));
8203  if (out1)
8204    assign(*out1, binop(Iop_And64,
8205                        binop(Iop_Shr64, mkexpr(lo64), mkU8(8)),
8206                        mkU64(0xFF)));
8207  if (out0)
8208    assign(*out0, binop(Iop_And64,
8209                        binop(Iop_Shr64, mkexpr(lo64), mkU8(0)),
8210                        mkU64(0xFF)));
8211}
8212
8213static IRTemp mkV128from8s ( IRTemp inF, IRTemp inE, IRTemp inD, IRTemp inC,
8214                             IRTemp inB, IRTemp inA, IRTemp in9, IRTemp in8,
8215                             IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
8216                             IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
8217{
8218  IRTemp vFE = newTemp(Ity_I64);
8219  IRTemp vDC = newTemp(Ity_I64);
8220  IRTemp vBA = newTemp(Ity_I64);
8221  IRTemp v98 = newTemp(Ity_I64);
8222  IRTemp v76 = newTemp(Ity_I64);
8223  IRTemp v54 = newTemp(Ity_I64);
8224  IRTemp v32 = newTemp(Ity_I64);
8225  IRTemp v10 = newTemp(Ity_I64);
8226  assign(vFE, binop(Iop_Or64,
8227                    binop(Iop_Shl64,
8228                          binop(Iop_And64, mkexpr(inF), mkU64(0xFF)), mkU8(8)),
8229                    binop(Iop_And64, mkexpr(inE), mkU64(0xFF))));
8230  assign(vDC, binop(Iop_Or64,
8231                    binop(Iop_Shl64,
8232                          binop(Iop_And64, mkexpr(inD), mkU64(0xFF)), mkU8(8)),
8233                    binop(Iop_And64, mkexpr(inC), mkU64(0xFF))));
8234  assign(vBA, binop(Iop_Or64,
8235                    binop(Iop_Shl64,
8236                          binop(Iop_And64, mkexpr(inB), mkU64(0xFF)), mkU8(8)),
8237                    binop(Iop_And64, mkexpr(inA), mkU64(0xFF))));
8238  assign(v98, binop(Iop_Or64,
8239                    binop(Iop_Shl64,
8240                          binop(Iop_And64, mkexpr(in9), mkU64(0xFF)), mkU8(8)),
8241                    binop(Iop_And64, mkexpr(in8), mkU64(0xFF))));
8242  assign(v76, binop(Iop_Or64,
8243                    binop(Iop_Shl64,
8244                          binop(Iop_And64, mkexpr(in7), mkU64(0xFF)), mkU8(8)),
8245                    binop(Iop_And64, mkexpr(in6), mkU64(0xFF))));
8246  assign(v54, binop(Iop_Or64,
8247                    binop(Iop_Shl64,
8248                          binop(Iop_And64, mkexpr(in5), mkU64(0xFF)), mkU8(8)),
8249                    binop(Iop_And64, mkexpr(in4), mkU64(0xFF))));
8250  assign(v32, binop(Iop_Or64,
8251                    binop(Iop_Shl64,
8252                          binop(Iop_And64, mkexpr(in3), mkU64(0xFF)), mkU8(8)),
8253                    binop(Iop_And64, mkexpr(in2), mkU64(0xFF))));
8254  assign(v10, binop(Iop_Or64,
8255                    binop(Iop_Shl64,
8256                          binop(Iop_And64, mkexpr(in1), mkU64(0xFF)), mkU8(8)),
8257                    binop(Iop_And64, mkexpr(in0), mkU64(0xFF))));
8258  return mkV128from16s(vFE, vDC, vBA, v98, v76, v54, v32, v10);
8259}
8260
8261static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
8262                                     IRTemp bFEDCBA9876543210 )
8263{
8264  // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
8265  IRTemp aE, aC, aA, a8, a6, a4, a2, a0, bE, bC, bA, b8, b6, b4, b2, b0;
8266  breakV128to8s(NULL, &aE, NULL, &aC, NULL, &aA, NULL, &a8,
8267                NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0,
8268                aFEDCBA9876543210);
8269  breakV128to8s(NULL, &bE, NULL, &bC, NULL, &bA, NULL, &b8,
8270                NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0,
8271                bFEDCBA9876543210);
8272  return mkexpr(mkV128from8s(aE, aC, aA, a8, a6, a4, a2, a0,
8273                             bE, bC, bA, b8, b6, b4, b2, b0));
8274}
8275
8276static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
8277                                    IRTemp bFEDCBA9876543210 )
8278{
8279  // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
8280  IRTemp aF, aD, aB, a9, a7, a5, a3, a1, bF, bD, bB, b9, b7, b5, b3, b1;
8281  breakV128to8s(&aF, NULL, &aD, NULL, &aB, NULL, &a9, NULL,
8282                &a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL,
8283                aFEDCBA9876543210);
8284
8285  breakV128to8s(&bF, NULL, &bD, NULL, &bB, NULL, &b9, NULL,
8286                &b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL,
8287                aFEDCBA9876543210);
8288
8289  return mkexpr(mkV128from8s(aF, aD, aB, a9, a7, a5, a3, a1,
8290                             bF, bD, bB, b9, b7, b5, b3, b1));
8291}
8292
8293static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
8294                                     IRTemp bFEDCBA9876543210 )
8295{
8296  // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
8297  IRTemp a7, b7, a6, b6, a5, b5, a4, b4, a3, b3, a2, b2, a1, b1, a0, b0;
8298  breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
8299                &a7,  &a6,  &a5,  &a4,  &a3,  &a2,  &a1,  &a0,
8300                aFEDCBA9876543210);
8301  breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
8302                &b7,  &b6,  &b5,  &b4,  &b3,  &b2,  &b1,  &b0,
8303                bFEDCBA9876543210);
8304  return mkexpr(mkV128from8s(a7, b7, a6, b6, a5, b5, a4, b4,
8305                             a3, b3, a2, b2, a1, b1, a0, b0));
8306}
8307
8308static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
8309                                     IRTemp bFEDCBA9876543210 )
8310{
8311  // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
8312  IRTemp aF, bF, aE, bE, aD, bD, aC, bC, aB, bB, aA, bA, a9, b9, a8, b8;
8313  breakV128to8s(&aF,  &aE,  &aD,  &aC,  &aB,  &aA,  &a9,  &a8,
8314                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
8315                aFEDCBA9876543210);
8316  breakV128to8s(&bF,  &bE,  &bD,  &bC,  &bB,  &bA,  &b9,  &b8,
8317                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
8318                bFEDCBA9876543210);
8319  return mkexpr(mkV128from8s(aF, bF, aE, bE, aD, bD, aC, bC,
8320                             aB, bB, aA, bA, a9, b9, a8, b8));
8321}
8322
8323/*--------------------------------------------------------------------*/
8324/*--- end                                       guest_arm64_toIR.c ---*/
8325/*--------------------------------------------------------------------*/
8326