1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- begin                                     guest_arm64_toIR.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2013-2013 OpenWorks
12      info@open-works.net
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27   02110-1301, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32/* KNOWN LIMITATIONS 2014-Nov-16
33
34   * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
35
36     Also FP comparison "unordered" .. is implemented as normal FP
37     comparison.
38
39     Both should be fixed.  They behave incorrectly in the presence of
40     NaNs.
41
42     FMULX is treated the same as FMUL.  That's also not correct.
43
44   * Floating multiply-add (etc) insns.  Are split into a multiply and
45     an add, and so suffer double rounding and hence sometimes the
46     least significant mantissa bit is incorrect.  Fix: use the IR
47     multiply-add IROps instead.
48
49   * FRINTA, FRINTN are kludged .. they just round to nearest.  No special
50     handling for the "ties" case.  FRINTX might be dubious too.
51
52   * Ditto FCVTXN.  No idea what "round to odd" means.  This implementation
53     just rounds to nearest.
54*/
55
56/* "Special" instructions.
57
58   This instruction decoder can decode four special instructions
59   which mean nothing natively (are no-ops as far as regs/mem are
60   concerned) but have meaning for supporting Valgrind.  A special
61   instruction is flagged by a 16-byte preamble:
62
63      93CC0D8C 93CC358C 93CCCD8C 93CCF58C
64      (ror x12, x12, #3;   ror x12, x12, #13
65       ror x12, x12, #51;  ror x12, x12, #61)
66
67   Following that, one of the following 3 are allowed
68   (standard interpretation in parentheses):
69
70      AA0A014A (orr x10,x10,x10)   X3 = client_request ( X4 )
71      AA0B016B (orr x11,x11,x11)   X3 = guest_NRADDR
72      AA0C018C (orr x12,x12,x12)   branch-and-link-to-noredir X8
73      AA090129 (orr x9,x9,x9)      IR injection
74
75   Any other bytes following the 16-byte preamble are illegal and
76   constitute a failure in instruction decoding.  This all assumes
77   that the preamble will never occur except in specific code
78   fragments designed for Valgrind to catch.
79*/
80
81/* Translates ARM64 code to IR. */
82
83#include "libvex_basictypes.h"
84#include "libvex_ir.h"
85#include "libvex.h"
86#include "libvex_guest_arm64.h"
87
88#include "main_util.h"
89#include "main_globals.h"
90#include "guest_generic_bb_to_IR.h"
91#include "guest_arm64_defs.h"
92
93
94/*------------------------------------------------------------*/
95/*--- Globals                                              ---*/
96/*------------------------------------------------------------*/
97
98/* These are set at the start of the translation of a instruction, so
99   that we don't have to pass them around endlessly.  CONST means does
100   not change during translation of the instruction.
101*/
102
103/* CONST: what is the host's endianness?  We need to know this in
104   order to do sub-register accesses to the SIMD/FP registers
105   correctly. */
106static VexEndness host_endness;
107
108/* CONST: The guest address for the instruction currently being
109   translated.  */
110static Addr64 guest_PC_curr_instr;
111
112/* MOD: The IRSB* into which we're generating code. */
113static IRSB* irsb;
114
115
116/*------------------------------------------------------------*/
117/*--- Debugging output                                     ---*/
118/*------------------------------------------------------------*/
119
120#define DIP(format, args...)           \
121   if (vex_traceflags & VEX_TRACE_FE)  \
122      vex_printf(format, ## args)
123
124#define DIS(buf, format, args...)      \
125   if (vex_traceflags & VEX_TRACE_FE)  \
126      vex_sprintf(buf, format, ## args)
127
128
129/*------------------------------------------------------------*/
130/*--- Helper bits and pieces for deconstructing the        ---*/
131/*--- arm insn stream.                                     ---*/
132/*------------------------------------------------------------*/
133
134/* Do a little-endian load of a 32-bit word, regardless of the
135   endianness of the underlying host. */
136static inline UInt getUIntLittleEndianly ( const UChar* p )
137{
138   UInt w = 0;
139   w = (w << 8) | p[3];
140   w = (w << 8) | p[2];
141   w = (w << 8) | p[1];
142   w = (w << 8) | p[0];
143   return w;
144}
145
146/* Sign extend a N-bit value up to 64 bits, by copying
147   bit N-1 into all higher positions. */
148static ULong sx_to_64 ( ULong x, UInt n )
149{
150   vassert(n > 1 && n < 64);
151   Long r = (Long)x;
152   r = (r << (64-n)) >> (64-n);
153   return (ULong)r;
154}
155
156//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
157//ZZ    endianness of the underlying host. */
158//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
159//ZZ {
160//ZZ    UShort w = 0;
161//ZZ    w = (w << 8) | p[1];
162//ZZ    w = (w << 8) | p[0];
163//ZZ    return w;
164//ZZ }
165//ZZ
166//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
167//ZZ    vassert(sh >= 0 && sh < 32);
168//ZZ    if (sh == 0)
169//ZZ       return x;
170//ZZ    else
171//ZZ       return (x << (32-sh)) | (x >> sh);
172//ZZ }
173//ZZ
174//ZZ static Int popcount32 ( UInt x )
175//ZZ {
176//ZZ    Int res = 0, i;
177//ZZ    for (i = 0; i < 32; i++) {
178//ZZ       res += (x & 1);
179//ZZ       x >>= 1;
180//ZZ    }
181//ZZ    return res;
182//ZZ }
183//ZZ
184//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
185//ZZ {
186//ZZ    UInt mask = 1 << ix;
187//ZZ    x &= ~mask;
188//ZZ    x |= ((b << ix) & mask);
189//ZZ    return x;
190//ZZ }
191
192#define BITS2(_b1,_b0)  \
193   (((_b1) << 1) | (_b0))
194
195#define BITS3(_b2,_b1,_b0)  \
196  (((_b2) << 2) | ((_b1) << 1) | (_b0))
197
198#define BITS4(_b3,_b2,_b1,_b0)  \
199   (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
200
201#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
202   ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4)  \
203    | BITS4((_b3),(_b2),(_b1),(_b0)))
204
205#define BITS5(_b4,_b3,_b2,_b1,_b0)  \
206   (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
207#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
208   (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
209#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
210   (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
211
212#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
213   (((_b8) << 8)  \
214    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
215
216#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
217   (((_b9) << 9) | ((_b8) << 8)  \
218    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
219
220#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
221   (((_b10) << 10)  \
222    | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
223
224#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
225   (((_b11) << 11)  \
226    | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
227
228#define X00 BITS2(0,0)
229#define X01 BITS2(0,1)
230#define X10 BITS2(1,0)
231#define X11 BITS2(1,1)
232
233// produces _uint[_bMax:_bMin]
234#define SLICE_UInt(_uint,_bMax,_bMin)  \
235   (( ((UInt)(_uint)) >> (_bMin))  \
236    & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
237
238
239/*------------------------------------------------------------*/
240/*--- Helper bits and pieces for creating IR fragments.    ---*/
241/*------------------------------------------------------------*/
242
243static IRExpr* mkV128 ( UShort w )
244{
245   return IRExpr_Const(IRConst_V128(w));
246}
247
248static IRExpr* mkU64 ( ULong i )
249{
250   return IRExpr_Const(IRConst_U64(i));
251}
252
253static IRExpr* mkU32 ( UInt i )
254{
255   return IRExpr_Const(IRConst_U32(i));
256}
257
258static IRExpr* mkU16 ( UInt i )
259{
260   vassert(i < 65536);
261   return IRExpr_Const(IRConst_U16(i));
262}
263
264static IRExpr* mkU8 ( UInt i )
265{
266   vassert(i < 256);
267   return IRExpr_Const(IRConst_U8( (UChar)i ));
268}
269
270static IRExpr* mkexpr ( IRTemp tmp )
271{
272   return IRExpr_RdTmp(tmp);
273}
274
275static IRExpr* unop ( IROp op, IRExpr* a )
276{
277   return IRExpr_Unop(op, a);
278}
279
280static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
281{
282   return IRExpr_Binop(op, a1, a2);
283}
284
285static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
286{
287   return IRExpr_Triop(op, a1, a2, a3);
288}
289
290static IRExpr* loadLE ( IRType ty, IRExpr* addr )
291{
292   return IRExpr_Load(Iend_LE, ty, addr);
293}
294
295/* Add a statement to the list held by "irbb". */
296static void stmt ( IRStmt* st )
297{
298   addStmtToIRSB( irsb, st );
299}
300
301static void assign ( IRTemp dst, IRExpr* e )
302{
303   stmt( IRStmt_WrTmp(dst, e) );
304}
305
306static void storeLE ( IRExpr* addr, IRExpr* data )
307{
308   stmt( IRStmt_Store(Iend_LE, addr, data) );
309}
310
311//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
312//ZZ {
313//ZZ    if (guardT == IRTemp_INVALID) {
314//ZZ       /* unconditional */
315//ZZ       storeLE(addr, data);
316//ZZ    } else {
317//ZZ       stmt( IRStmt_StoreG(Iend_LE, addr, data,
318//ZZ                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
319//ZZ    }
320//ZZ }
321//ZZ
322//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
323//ZZ                             IRExpr* addr, IRExpr* alt,
324//ZZ                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
325//ZZ {
326//ZZ    if (guardT == IRTemp_INVALID) {
327//ZZ       /* unconditional */
328//ZZ       IRExpr* loaded = NULL;
329//ZZ       switch (cvt) {
330//ZZ          case ILGop_Ident32:
331//ZZ             loaded = loadLE(Ity_I32, addr); break;
332//ZZ          case ILGop_8Uto32:
333//ZZ             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
334//ZZ          case ILGop_8Sto32:
335//ZZ             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
336//ZZ          case ILGop_16Uto32:
337//ZZ             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
338//ZZ          case ILGop_16Sto32:
339//ZZ             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
340//ZZ          default:
341//ZZ             vassert(0);
342//ZZ       }
343//ZZ       vassert(loaded != NULL);
344//ZZ       assign(dst, loaded);
345//ZZ    } else {
346//ZZ       /* Generate a guarded load into 'dst', but apply 'cvt' to the
347//ZZ          loaded data before putting the data in 'dst'.  If the load
348//ZZ          does not take place, 'alt' is placed directly in 'dst'. */
349//ZZ       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
350//ZZ                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
351//ZZ    }
352//ZZ }
353
354/* Generate a new temporary of the given type. */
355static IRTemp newTemp ( IRType ty )
356{
357   vassert(isPlausibleIRType(ty));
358   return newIRTemp( irsb->tyenv, ty );
359}
360
361/* This is used in many places, so the brevity is an advantage. */
362static IRTemp newTempV128(void)
363{
364   return newTemp(Ity_V128);
365}
366
367/* Initialise V128 temporaries en masse. */
368static
369void newTempsV128_2(IRTemp* t1, IRTemp* t2)
370{
371   vassert(t1 && *t1 == IRTemp_INVALID);
372   vassert(t2 && *t2 == IRTemp_INVALID);
373   *t1 = newTempV128();
374   *t2 = newTempV128();
375}
376
377static
378void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
379{
380   vassert(t1 && *t1 == IRTemp_INVALID);
381   vassert(t2 && *t2 == IRTemp_INVALID);
382   vassert(t3 && *t3 == IRTemp_INVALID);
383   *t1 = newTempV128();
384   *t2 = newTempV128();
385   *t3 = newTempV128();
386}
387
388static
389void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
390{
391   vassert(t1 && *t1 == IRTemp_INVALID);
392   vassert(t2 && *t2 == IRTemp_INVALID);
393   vassert(t3 && *t3 == IRTemp_INVALID);
394   vassert(t4 && *t4 == IRTemp_INVALID);
395   *t1 = newTempV128();
396   *t2 = newTempV128();
397   *t3 = newTempV128();
398   *t4 = newTempV128();
399}
400
401static
402void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
403                    IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
404{
405   vassert(t1 && *t1 == IRTemp_INVALID);
406   vassert(t2 && *t2 == IRTemp_INVALID);
407   vassert(t3 && *t3 == IRTemp_INVALID);
408   vassert(t4 && *t4 == IRTemp_INVALID);
409   vassert(t5 && *t5 == IRTemp_INVALID);
410   vassert(t6 && *t6 == IRTemp_INVALID);
411   vassert(t7 && *t7 == IRTemp_INVALID);
412   *t1 = newTempV128();
413   *t2 = newTempV128();
414   *t3 = newTempV128();
415   *t4 = newTempV128();
416   *t5 = newTempV128();
417   *t6 = newTempV128();
418   *t7 = newTempV128();
419}
420
421//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
422//ZZ    IRRoundingMode. */
423//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
424//ZZ {
425//ZZ    return mkU32(Irrm_NEAREST);
426//ZZ }
427//ZZ
428//ZZ /* Generate an expression for SRC rotated right by ROT. */
429//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
430//ZZ {
431//ZZ    vassert(rot >= 0 && rot < 32);
432//ZZ    if (rot == 0)
433//ZZ       return mkexpr(src);
434//ZZ    return
435//ZZ       binop(Iop_Or32,
436//ZZ             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
437//ZZ             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
438//ZZ }
439//ZZ
440//ZZ static IRExpr* mkU128 ( ULong i )
441//ZZ {
442//ZZ    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
443//ZZ }
444//ZZ
445//ZZ /* Generate a 4-aligned version of the given expression if
446//ZZ    the given condition is true.  Else return it unchanged. */
447//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
448//ZZ {
449//ZZ    if (b)
450//ZZ       return binop(Iop_And32, e, mkU32(~3));
451//ZZ    else
452//ZZ       return e;
453//ZZ }
454
455/* Other IR construction helpers. */
456static IROp mkAND ( IRType ty ) {
457   switch (ty) {
458      case Ity_I32: return Iop_And32;
459      case Ity_I64: return Iop_And64;
460      default: vpanic("mkAND");
461   }
462}
463
464static IROp mkOR ( IRType ty ) {
465   switch (ty) {
466      case Ity_I32: return Iop_Or32;
467      case Ity_I64: return Iop_Or64;
468      default: vpanic("mkOR");
469   }
470}
471
472static IROp mkXOR ( IRType ty ) {
473   switch (ty) {
474      case Ity_I32: return Iop_Xor32;
475      case Ity_I64: return Iop_Xor64;
476      default: vpanic("mkXOR");
477   }
478}
479
480static IROp mkSHL ( IRType ty ) {
481   switch (ty) {
482      case Ity_I32: return Iop_Shl32;
483      case Ity_I64: return Iop_Shl64;
484      default: vpanic("mkSHL");
485   }
486}
487
488static IROp mkSHR ( IRType ty ) {
489   switch (ty) {
490      case Ity_I32: return Iop_Shr32;
491      case Ity_I64: return Iop_Shr64;
492      default: vpanic("mkSHR");
493   }
494}
495
496static IROp mkSAR ( IRType ty ) {
497   switch (ty) {
498      case Ity_I32: return Iop_Sar32;
499      case Ity_I64: return Iop_Sar64;
500      default: vpanic("mkSAR");
501   }
502}
503
504static IROp mkNOT ( IRType ty ) {
505   switch (ty) {
506      case Ity_I32: return Iop_Not32;
507      case Ity_I64: return Iop_Not64;
508      default: vpanic("mkNOT");
509   }
510}
511
512static IROp mkADD ( IRType ty ) {
513   switch (ty) {
514      case Ity_I32: return Iop_Add32;
515      case Ity_I64: return Iop_Add64;
516      default: vpanic("mkADD");
517   }
518}
519
520static IROp mkSUB ( IRType ty ) {
521   switch (ty) {
522      case Ity_I32: return Iop_Sub32;
523      case Ity_I64: return Iop_Sub64;
524      default: vpanic("mkSUB");
525   }
526}
527
528static IROp mkADDF ( IRType ty ) {
529   switch (ty) {
530      case Ity_F32: return Iop_AddF32;
531      case Ity_F64: return Iop_AddF64;
532      default: vpanic("mkADDF");
533   }
534}
535
536static IROp mkSUBF ( IRType ty ) {
537   switch (ty) {
538      case Ity_F32: return Iop_SubF32;
539      case Ity_F64: return Iop_SubF64;
540      default: vpanic("mkSUBF");
541   }
542}
543
544static IROp mkMULF ( IRType ty ) {
545   switch (ty) {
546      case Ity_F32: return Iop_MulF32;
547      case Ity_F64: return Iop_MulF64;
548      default: vpanic("mkMULF");
549   }
550}
551
552static IROp mkDIVF ( IRType ty ) {
553   switch (ty) {
554      case Ity_F32: return Iop_DivF32;
555      case Ity_F64: return Iop_DivF64;
556      default: vpanic("mkMULF");
557   }
558}
559
560static IROp mkNEGF ( IRType ty ) {
561   switch (ty) {
562      case Ity_F32: return Iop_NegF32;
563      case Ity_F64: return Iop_NegF64;
564      default: vpanic("mkNEGF");
565   }
566}
567
568static IROp mkABSF ( IRType ty ) {
569   switch (ty) {
570      case Ity_F32: return Iop_AbsF32;
571      case Ity_F64: return Iop_AbsF64;
572      default: vpanic("mkNEGF");
573   }
574}
575
576static IROp mkSQRTF ( IRType ty ) {
577   switch (ty) {
578      case Ity_F32: return Iop_SqrtF32;
579      case Ity_F64: return Iop_SqrtF64;
580      default: vpanic("mkNEGF");
581   }
582}
583
584static IROp mkVecADD ( UInt size ) {
585   const IROp ops[4]
586      = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
587   vassert(size < 4);
588   return ops[size];
589}
590
591static IROp mkVecQADDU ( UInt size ) {
592   const IROp ops[4]
593      = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
594   vassert(size < 4);
595   return ops[size];
596}
597
598static IROp mkVecQADDS ( UInt size ) {
599   const IROp ops[4]
600      = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
601   vassert(size < 4);
602   return ops[size];
603}
604
605static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
606   const IROp ops[4]
607      = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
608          Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
609   vassert(size < 4);
610   return ops[size];
611}
612
613static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
614   const IROp ops[4]
615      = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
616          Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
617   vassert(size < 4);
618   return ops[size];
619}
620
621static IROp mkVecSUB ( UInt size ) {
622   const IROp ops[4]
623      = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
624   vassert(size < 4);
625   return ops[size];
626}
627
628static IROp mkVecQSUBU ( UInt size ) {
629   const IROp ops[4]
630      = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
631   vassert(size < 4);
632   return ops[size];
633}
634
635static IROp mkVecQSUBS ( UInt size ) {
636   const IROp ops[4]
637      = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
638   vassert(size < 4);
639   return ops[size];
640}
641
642static IROp mkVecSARN ( UInt size ) {
643   const IROp ops[4]
644      = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
645   vassert(size < 4);
646   return ops[size];
647}
648
649static IROp mkVecSHRN ( UInt size ) {
650   const IROp ops[4]
651      = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
652   vassert(size < 4);
653   return ops[size];
654}
655
656static IROp mkVecSHLN ( UInt size ) {
657   const IROp ops[4]
658      = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
659   vassert(size < 4);
660   return ops[size];
661}
662
663static IROp mkVecCATEVENLANES ( UInt size ) {
664   const IROp ops[4]
665      = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
666          Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
667   vassert(size < 4);
668   return ops[size];
669}
670
671static IROp mkVecCATODDLANES ( UInt size ) {
672   const IROp ops[4]
673      = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
674          Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
675   vassert(size < 4);
676   return ops[size];
677}
678
679static IROp mkVecINTERLEAVELO ( UInt size ) {
680   const IROp ops[4]
681      = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
682          Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
683   vassert(size < 4);
684   return ops[size];
685}
686
687static IROp mkVecINTERLEAVEHI ( UInt size ) {
688   const IROp ops[4]
689      = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
690          Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
691   vassert(size < 4);
692   return ops[size];
693}
694
695static IROp mkVecMAXU ( UInt size ) {
696   const IROp ops[4]
697      = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
698   vassert(size < 4);
699   return ops[size];
700}
701
702static IROp mkVecMAXS ( UInt size ) {
703   const IROp ops[4]
704      = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
705   vassert(size < 4);
706   return ops[size];
707}
708
709static IROp mkVecMINU ( UInt size ) {
710   const IROp ops[4]
711      = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
712   vassert(size < 4);
713   return ops[size];
714}
715
716static IROp mkVecMINS ( UInt size ) {
717   const IROp ops[4]
718      = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
719   vassert(size < 4);
720   return ops[size];
721}
722
723static IROp mkVecMUL ( UInt size ) {
724   const IROp ops[4]
725      = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
726   vassert(size < 3);
727   return ops[size];
728}
729
730static IROp mkVecMULLU ( UInt sizeNarrow ) {
731   const IROp ops[4]
732      = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
733   vassert(sizeNarrow < 3);
734   return ops[sizeNarrow];
735}
736
737static IROp mkVecMULLS ( UInt sizeNarrow ) {
738   const IROp ops[4]
739      = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
740   vassert(sizeNarrow < 3);
741   return ops[sizeNarrow];
742}
743
744static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
745   const IROp ops[4]
746      = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
747   vassert(sizeNarrow < 3);
748   return ops[sizeNarrow];
749}
750
751static IROp mkVecCMPEQ ( UInt size ) {
752   const IROp ops[4]
753      = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
754   vassert(size < 4);
755   return ops[size];
756}
757
758static IROp mkVecCMPGTU ( UInt size ) {
759   const IROp ops[4]
760      = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
761   vassert(size < 4);
762   return ops[size];
763}
764
765static IROp mkVecCMPGTS ( UInt size ) {
766   const IROp ops[4]
767      = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
768   vassert(size < 4);
769   return ops[size];
770}
771
772static IROp mkVecABS ( UInt size ) {
773   const IROp ops[4]
774      = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
775   vassert(size < 4);
776   return ops[size];
777}
778
779static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
780   const IROp ops[4]
781      = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
782          Iop_ZeroHI96ofV128,  Iop_ZeroHI64ofV128 };
783   vassert(size < 4);
784   return ops[size];
785}
786
787static IRExpr* mkU ( IRType ty, ULong imm ) {
788   switch (ty) {
789      case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
790      case Ity_I64: return mkU64(imm);
791      default: vpanic("mkU");
792   }
793}
794
795static IROp mkVecQDMULHIS ( UInt size ) {
796   const IROp ops[4]
797      = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
798   vassert(size < 4);
799   return ops[size];
800}
801
802static IROp mkVecQRDMULHIS ( UInt size ) {
803   const IROp ops[4]
804      = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
805   vassert(size < 4);
806   return ops[size];
807}
808
809static IROp mkVecQANDUQSH ( UInt size ) {
810   const IROp ops[4]
811      = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
812          Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
813   vassert(size < 4);
814   return ops[size];
815}
816
817static IROp mkVecQANDSQSH ( UInt size ) {
818   const IROp ops[4]
819      = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
820          Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
821   vassert(size < 4);
822   return ops[size];
823}
824
825static IROp mkVecQANDUQRSH ( UInt size ) {
826   const IROp ops[4]
827      = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
828          Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
829   vassert(size < 4);
830   return ops[size];
831}
832
833static IROp mkVecQANDSQRSH ( UInt size ) {
834   const IROp ops[4]
835      = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
836          Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
837   vassert(size < 4);
838   return ops[size];
839}
840
841static IROp mkVecSHU ( UInt size ) {
842   const IROp ops[4]
843      = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
844   vassert(size < 4);
845   return ops[size];
846}
847
848static IROp mkVecSHS ( UInt size ) {
849   const IROp ops[4]
850      = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
851   vassert(size < 4);
852   return ops[size];
853}
854
855static IROp mkVecRSHU ( UInt size ) {
856   const IROp ops[4]
857      = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
858   vassert(size < 4);
859   return ops[size];
860}
861
862static IROp mkVecRSHS ( UInt size ) {
863   const IROp ops[4]
864      = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
865   vassert(size < 4);
866   return ops[size];
867}
868
869static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
870   const IROp ops[4]
871      = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
872          Iop_NarrowUn64to32x2, Iop_INVALID };
873   vassert(sizeNarrow < 4);
874   return ops[sizeNarrow];
875}
876
877static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
878   const IROp ops[4]
879      = { Iop_QNarrowUn16Sto8Ux8,  Iop_QNarrowUn32Sto16Ux4,
880          Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
881   vassert(sizeNarrow < 4);
882   return ops[sizeNarrow];
883}
884
885static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
886   const IROp ops[4]
887      = { Iop_QNarrowUn16Sto8Sx8,  Iop_QNarrowUn32Sto16Sx4,
888          Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
889   vassert(sizeNarrow < 4);
890   return ops[sizeNarrow];
891}
892
893static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
894   const IROp ops[4]
895      = { Iop_QNarrowUn16Uto8Ux8,  Iop_QNarrowUn32Uto16Ux4,
896          Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
897   vassert(sizeNarrow < 4);
898   return ops[sizeNarrow];
899}
900
901static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
902   const IROp ops[4]
903      = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
904          Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
905   vassert(sizeNarrow < 4);
906   return ops[sizeNarrow];
907}
908
909static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
910   const IROp ops[4]
911      = { Iop_QandQSarNnarrow16Sto8Sx8,  Iop_QandQSarNnarrow32Sto16Sx4,
912          Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
913   vassert(sizeNarrow < 4);
914   return ops[sizeNarrow];
915}
916
917static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
918   const IROp ops[4]
919      = { Iop_QandQSarNnarrow16Sto8Ux8,  Iop_QandQSarNnarrow32Sto16Ux4,
920          Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
921   vassert(sizeNarrow < 4);
922   return ops[sizeNarrow];
923}
924
925static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
926   const IROp ops[4]
927      = { Iop_QandQRShrNnarrow16Uto8Ux8,  Iop_QandQRShrNnarrow32Uto16Ux4,
928          Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
929   vassert(sizeNarrow < 4);
930   return ops[sizeNarrow];
931}
932
933static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
934   const IROp ops[4]
935      = { Iop_QandQRSarNnarrow16Sto8Sx8,  Iop_QandQRSarNnarrow32Sto16Sx4,
936          Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
937   vassert(sizeNarrow < 4);
938   return ops[sizeNarrow];
939}
940
941static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
942   const IROp ops[4]
943      = { Iop_QandQRSarNnarrow16Sto8Ux8,  Iop_QandQRSarNnarrow32Sto16Ux4,
944          Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
945   vassert(sizeNarrow < 4);
946   return ops[sizeNarrow];
947}
948
949static IROp mkVecQSHLNSATUU ( UInt size ) {
950   const IROp ops[4]
951      = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
952          Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
953   vassert(size < 4);
954   return ops[size];
955}
956
957static IROp mkVecQSHLNSATSS ( UInt size ) {
958   const IROp ops[4]
959      = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
960          Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
961   vassert(size < 4);
962   return ops[size];
963}
964
965static IROp mkVecQSHLNSATSU ( UInt size ) {
966   const IROp ops[4]
967      = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
968          Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
969   vassert(size < 4);
970   return ops[size];
971}
972
973static IROp mkVecADDF ( UInt size ) {
974   const IROp ops[4]
975      = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
976   vassert(size < 4);
977   return ops[size];
978}
979
980static IROp mkVecMAXF ( UInt size ) {
981   const IROp ops[4]
982      = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
983   vassert(size < 4);
984   return ops[size];
985}
986
987static IROp mkVecMINF ( UInt size ) {
988   const IROp ops[4]
989      = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
990   vassert(size < 4);
991   return ops[size];
992}
993
994/* Generate IR to create 'arg rotated right by imm', for sane values
995   of 'ty' and 'imm'. */
996static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
997{
998   UInt w = 0;
999   if (ty == Ity_I64) {
1000      w = 64;
1001   } else {
1002      vassert(ty == Ity_I32);
1003      w = 32;
1004   }
1005   vassert(w != 0);
1006   vassert(imm < w);
1007   if (imm == 0) {
1008      return arg;
1009   }
1010   IRTemp res = newTemp(ty);
1011   assign(res, binop(mkOR(ty),
1012                     binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1013                     binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1014   return res;
1015}
1016
1017/* Generate IR to set the returned temp to either all-zeroes or
1018   all ones, as a copy of arg<imm>. */
1019static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1020{
1021   UInt w = 0;
1022   if (ty == Ity_I64) {
1023      w = 64;
1024   } else {
1025      vassert(ty == Ity_I32);
1026      w = 32;
1027   }
1028   vassert(w != 0);
1029   vassert(imm < w);
1030   IRTemp res = newTemp(ty);
1031   assign(res, binop(mkSAR(ty),
1032                     binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1033                     mkU8(w - 1)));
1034   return res;
1035}
1036
1037/* U-widen 8/16/32/64 bit int expr to 64. */
1038static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1039{
1040   switch (srcTy) {
1041      case Ity_I64: return e;
1042      case Ity_I32: return unop(Iop_32Uto64, e);
1043      case Ity_I16: return unop(Iop_16Uto64, e);
1044      case Ity_I8:  return unop(Iop_8Uto64, e);
1045      default: vpanic("widenUto64(arm64)");
1046   }
1047}
1048
1049/* Narrow 64 bit int expr to 8/16/32/64.  Clearly only some
1050   of these combinations make sense. */
1051static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1052{
1053   switch (dstTy) {
1054      case Ity_I64: return e;
1055      case Ity_I32: return unop(Iop_64to32, e);
1056      case Ity_I16: return unop(Iop_64to16, e);
1057      case Ity_I8:  return unop(Iop_64to8, e);
1058      default: vpanic("narrowFrom64(arm64)");
1059   }
1060}
1061
1062
1063/*------------------------------------------------------------*/
1064/*--- Helpers for accessing guest registers.               ---*/
1065/*------------------------------------------------------------*/
1066
1067#define OFFB_X0       offsetof(VexGuestARM64State,guest_X0)
1068#define OFFB_X1       offsetof(VexGuestARM64State,guest_X1)
1069#define OFFB_X2       offsetof(VexGuestARM64State,guest_X2)
1070#define OFFB_X3       offsetof(VexGuestARM64State,guest_X3)
1071#define OFFB_X4       offsetof(VexGuestARM64State,guest_X4)
1072#define OFFB_X5       offsetof(VexGuestARM64State,guest_X5)
1073#define OFFB_X6       offsetof(VexGuestARM64State,guest_X6)
1074#define OFFB_X7       offsetof(VexGuestARM64State,guest_X7)
1075#define OFFB_X8       offsetof(VexGuestARM64State,guest_X8)
1076#define OFFB_X9       offsetof(VexGuestARM64State,guest_X9)
1077#define OFFB_X10      offsetof(VexGuestARM64State,guest_X10)
1078#define OFFB_X11      offsetof(VexGuestARM64State,guest_X11)
1079#define OFFB_X12      offsetof(VexGuestARM64State,guest_X12)
1080#define OFFB_X13      offsetof(VexGuestARM64State,guest_X13)
1081#define OFFB_X14      offsetof(VexGuestARM64State,guest_X14)
1082#define OFFB_X15      offsetof(VexGuestARM64State,guest_X15)
1083#define OFFB_X16      offsetof(VexGuestARM64State,guest_X16)
1084#define OFFB_X17      offsetof(VexGuestARM64State,guest_X17)
1085#define OFFB_X18      offsetof(VexGuestARM64State,guest_X18)
1086#define OFFB_X19      offsetof(VexGuestARM64State,guest_X19)
1087#define OFFB_X20      offsetof(VexGuestARM64State,guest_X20)
1088#define OFFB_X21      offsetof(VexGuestARM64State,guest_X21)
1089#define OFFB_X22      offsetof(VexGuestARM64State,guest_X22)
1090#define OFFB_X23      offsetof(VexGuestARM64State,guest_X23)
1091#define OFFB_X24      offsetof(VexGuestARM64State,guest_X24)
1092#define OFFB_X25      offsetof(VexGuestARM64State,guest_X25)
1093#define OFFB_X26      offsetof(VexGuestARM64State,guest_X26)
1094#define OFFB_X27      offsetof(VexGuestARM64State,guest_X27)
1095#define OFFB_X28      offsetof(VexGuestARM64State,guest_X28)
1096#define OFFB_X29      offsetof(VexGuestARM64State,guest_X29)
1097#define OFFB_X30      offsetof(VexGuestARM64State,guest_X30)
1098
1099#define OFFB_XSP      offsetof(VexGuestARM64State,guest_XSP)
1100#define OFFB_PC       offsetof(VexGuestARM64State,guest_PC)
1101
1102#define OFFB_CC_OP    offsetof(VexGuestARM64State,guest_CC_OP)
1103#define OFFB_CC_DEP1  offsetof(VexGuestARM64State,guest_CC_DEP1)
1104#define OFFB_CC_DEP2  offsetof(VexGuestARM64State,guest_CC_DEP2)
1105#define OFFB_CC_NDEP  offsetof(VexGuestARM64State,guest_CC_NDEP)
1106
1107#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1108#define OFFB_NRADDR   offsetof(VexGuestARM64State,guest_NRADDR)
1109
1110#define OFFB_Q0       offsetof(VexGuestARM64State,guest_Q0)
1111#define OFFB_Q1       offsetof(VexGuestARM64State,guest_Q1)
1112#define OFFB_Q2       offsetof(VexGuestARM64State,guest_Q2)
1113#define OFFB_Q3       offsetof(VexGuestARM64State,guest_Q3)
1114#define OFFB_Q4       offsetof(VexGuestARM64State,guest_Q4)
1115#define OFFB_Q5       offsetof(VexGuestARM64State,guest_Q5)
1116#define OFFB_Q6       offsetof(VexGuestARM64State,guest_Q6)
1117#define OFFB_Q7       offsetof(VexGuestARM64State,guest_Q7)
1118#define OFFB_Q8       offsetof(VexGuestARM64State,guest_Q8)
1119#define OFFB_Q9       offsetof(VexGuestARM64State,guest_Q9)
1120#define OFFB_Q10      offsetof(VexGuestARM64State,guest_Q10)
1121#define OFFB_Q11      offsetof(VexGuestARM64State,guest_Q11)
1122#define OFFB_Q12      offsetof(VexGuestARM64State,guest_Q12)
1123#define OFFB_Q13      offsetof(VexGuestARM64State,guest_Q13)
1124#define OFFB_Q14      offsetof(VexGuestARM64State,guest_Q14)
1125#define OFFB_Q15      offsetof(VexGuestARM64State,guest_Q15)
1126#define OFFB_Q16      offsetof(VexGuestARM64State,guest_Q16)
1127#define OFFB_Q17      offsetof(VexGuestARM64State,guest_Q17)
1128#define OFFB_Q18      offsetof(VexGuestARM64State,guest_Q18)
1129#define OFFB_Q19      offsetof(VexGuestARM64State,guest_Q19)
1130#define OFFB_Q20      offsetof(VexGuestARM64State,guest_Q20)
1131#define OFFB_Q21      offsetof(VexGuestARM64State,guest_Q21)
1132#define OFFB_Q22      offsetof(VexGuestARM64State,guest_Q22)
1133#define OFFB_Q23      offsetof(VexGuestARM64State,guest_Q23)
1134#define OFFB_Q24      offsetof(VexGuestARM64State,guest_Q24)
1135#define OFFB_Q25      offsetof(VexGuestARM64State,guest_Q25)
1136#define OFFB_Q26      offsetof(VexGuestARM64State,guest_Q26)
1137#define OFFB_Q27      offsetof(VexGuestARM64State,guest_Q27)
1138#define OFFB_Q28      offsetof(VexGuestARM64State,guest_Q28)
1139#define OFFB_Q29      offsetof(VexGuestARM64State,guest_Q29)
1140#define OFFB_Q30      offsetof(VexGuestARM64State,guest_Q30)
1141#define OFFB_Q31      offsetof(VexGuestARM64State,guest_Q31)
1142
1143#define OFFB_FPCR     offsetof(VexGuestARM64State,guest_FPCR)
1144#define OFFB_QCFLAG   offsetof(VexGuestARM64State,guest_QCFLAG)
1145
1146#define OFFB_CMSTART  offsetof(VexGuestARM64State,guest_CMSTART)
1147#define OFFB_CMLEN    offsetof(VexGuestARM64State,guest_CMLEN)
1148
1149
1150/* ---------------- Integer registers ---------------- */
1151
1152static Int offsetIReg64 ( UInt iregNo )
1153{
1154   /* Do we care about endianness here?  We do if sub-parts of integer
1155      registers are accessed. */
1156   switch (iregNo) {
1157      case 0:  return OFFB_X0;
1158      case 1:  return OFFB_X1;
1159      case 2:  return OFFB_X2;
1160      case 3:  return OFFB_X3;
1161      case 4:  return OFFB_X4;
1162      case 5:  return OFFB_X5;
1163      case 6:  return OFFB_X6;
1164      case 7:  return OFFB_X7;
1165      case 8:  return OFFB_X8;
1166      case 9:  return OFFB_X9;
1167      case 10: return OFFB_X10;
1168      case 11: return OFFB_X11;
1169      case 12: return OFFB_X12;
1170      case 13: return OFFB_X13;
1171      case 14: return OFFB_X14;
1172      case 15: return OFFB_X15;
1173      case 16: return OFFB_X16;
1174      case 17: return OFFB_X17;
1175      case 18: return OFFB_X18;
1176      case 19: return OFFB_X19;
1177      case 20: return OFFB_X20;
1178      case 21: return OFFB_X21;
1179      case 22: return OFFB_X22;
1180      case 23: return OFFB_X23;
1181      case 24: return OFFB_X24;
1182      case 25: return OFFB_X25;
1183      case 26: return OFFB_X26;
1184      case 27: return OFFB_X27;
1185      case 28: return OFFB_X28;
1186      case 29: return OFFB_X29;
1187      case 30: return OFFB_X30;
1188      /* but not 31 */
1189      default: vassert(0);
1190   }
1191}
1192
1193static Int offsetIReg64orSP ( UInt iregNo )
1194{
1195   return iregNo == 31  ? OFFB_XSP  : offsetIReg64(iregNo);
1196}
1197
1198static const HChar* nameIReg64orZR ( UInt iregNo )
1199{
1200   vassert(iregNo < 32);
1201   static const HChar* names[32]
1202      = { "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
1203          "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
1204          "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1205          "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1206   return names[iregNo];
1207}
1208
1209static const HChar* nameIReg64orSP ( UInt iregNo )
1210{
1211   if (iregNo == 31) {
1212      return "sp";
1213   }
1214   vassert(iregNo < 31);
1215   return nameIReg64orZR(iregNo);
1216}
1217
1218static IRExpr* getIReg64orSP ( UInt iregNo )
1219{
1220   vassert(iregNo < 32);
1221   return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1222}
1223
1224static IRExpr* getIReg64orZR ( UInt iregNo )
1225{
1226   if (iregNo == 31) {
1227      return mkU64(0);
1228   }
1229   vassert(iregNo < 31);
1230   return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1231}
1232
1233static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1234{
1235   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1236   stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1237}
1238
1239static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1240{
1241   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1242   if (iregNo == 31) {
1243      return;
1244   }
1245   vassert(iregNo < 31);
1246   stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1247}
1248
1249static const HChar* nameIReg32orZR ( UInt iregNo )
1250{
1251   vassert(iregNo < 32);
1252   static const HChar* names[32]
1253      = { "w0",  "w1",  "w2",  "w3",  "w4",  "w5",  "w6",  "w7",
1254          "w8",  "w9",  "w10", "w11", "w12", "w13", "w14", "w15",
1255          "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1256          "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1257   return names[iregNo];
1258}
1259
1260static const HChar* nameIReg32orSP ( UInt iregNo )
1261{
1262   if (iregNo == 31) {
1263      return "wsp";
1264   }
1265   vassert(iregNo < 31);
1266   return nameIReg32orZR(iregNo);
1267}
1268
1269static IRExpr* getIReg32orSP ( UInt iregNo )
1270{
1271   vassert(iregNo < 32);
1272   return unop(Iop_64to32,
1273               IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1274}
1275
1276static IRExpr* getIReg32orZR ( UInt iregNo )
1277{
1278   if (iregNo == 31) {
1279      return mkU32(0);
1280   }
1281   vassert(iregNo < 31);
1282   return unop(Iop_64to32,
1283               IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1284}
1285
1286static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1287{
1288   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1289   stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1290}
1291
1292static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1293{
1294   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1295   if (iregNo == 31) {
1296      return;
1297   }
1298   vassert(iregNo < 31);
1299   stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1300}
1301
1302static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1303{
1304   vassert(is64 == True || is64 == False);
1305   return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1306}
1307
1308static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1309{
1310   vassert(is64 == True || is64 == False);
1311   return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1312}
1313
1314static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1315{
1316   vassert(is64 == True || is64 == False);
1317   return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1318}
1319
1320static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1321{
1322   vassert(is64 == True || is64 == False);
1323   if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1324}
1325
1326static void putPC ( IRExpr* e )
1327{
1328   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1329   stmt( IRStmt_Put(OFFB_PC, e) );
1330}
1331
1332
1333/* ---------------- Vector (Q) registers ---------------- */
1334
1335static Int offsetQReg128 ( UInt qregNo )
1336{
1337   /* We don't care about endianness at this point.  It only becomes
1338      relevant when dealing with sections of these registers.*/
1339   switch (qregNo) {
1340      case 0:  return OFFB_Q0;
1341      case 1:  return OFFB_Q1;
1342      case 2:  return OFFB_Q2;
1343      case 3:  return OFFB_Q3;
1344      case 4:  return OFFB_Q4;
1345      case 5:  return OFFB_Q5;
1346      case 6:  return OFFB_Q6;
1347      case 7:  return OFFB_Q7;
1348      case 8:  return OFFB_Q8;
1349      case 9:  return OFFB_Q9;
1350      case 10: return OFFB_Q10;
1351      case 11: return OFFB_Q11;
1352      case 12: return OFFB_Q12;
1353      case 13: return OFFB_Q13;
1354      case 14: return OFFB_Q14;
1355      case 15: return OFFB_Q15;
1356      case 16: return OFFB_Q16;
1357      case 17: return OFFB_Q17;
1358      case 18: return OFFB_Q18;
1359      case 19: return OFFB_Q19;
1360      case 20: return OFFB_Q20;
1361      case 21: return OFFB_Q21;
1362      case 22: return OFFB_Q22;
1363      case 23: return OFFB_Q23;
1364      case 24: return OFFB_Q24;
1365      case 25: return OFFB_Q25;
1366      case 26: return OFFB_Q26;
1367      case 27: return OFFB_Q27;
1368      case 28: return OFFB_Q28;
1369      case 29: return OFFB_Q29;
1370      case 30: return OFFB_Q30;
1371      case 31: return OFFB_Q31;
1372      default: vassert(0);
1373   }
1374}
1375
1376/* Write to a complete Qreg. */
1377static void putQReg128 ( UInt qregNo, IRExpr* e )
1378{
1379   vassert(qregNo < 32);
1380   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1381   stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1382}
1383
1384/* Read a complete Qreg. */
1385static IRExpr* getQReg128 ( UInt qregNo )
1386{
1387   vassert(qregNo < 32);
1388   return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1389}
1390
1391/* Produce the IR type for some sub-part of a vector.  For 32- and 64-
1392   bit sub-parts we can choose either integer or float types, and
1393   choose float on the basis that that is the common use case and so
1394   will give least interference with Put-to-Get forwarding later
1395   on. */
1396static IRType preferredVectorSubTypeFromSize ( UInt szB )
1397{
1398   switch (szB) {
1399      case 1:  return Ity_I8;
1400      case 2:  return Ity_I16;
1401      case 4:  return Ity_I32; //Ity_F32;
1402      case 8:  return Ity_F64;
1403      case 16: return Ity_V128;
1404      default: vassert(0);
1405   }
1406}
1407
1408/* Find the offset of the laneNo'th lane of type laneTy in the given
1409   Qreg.  Since the host is little-endian, the least significant lane
1410   has the lowest offset. */
1411static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
1412{
1413   vassert(host_endness == VexEndnessLE);
1414   Int base = offsetQReg128(qregNo);
1415   /* Since the host is little-endian, the least significant lane
1416      will be at the lowest address. */
1417   /* Restrict this to known types, so as to avoid silently accepting
1418      stupid types. */
1419   UInt laneSzB = 0;
1420   switch (laneTy) {
1421      case Ity_I8:                 laneSzB = 1;  break;
1422      case Ity_F16: case Ity_I16:  laneSzB = 2;  break;
1423      case Ity_F32: case Ity_I32:  laneSzB = 4;  break;
1424      case Ity_F64: case Ity_I64:  laneSzB = 8;  break;
1425      case Ity_V128:               laneSzB = 16; break;
1426      default: break;
1427   }
1428   vassert(laneSzB > 0);
1429   UInt minOff = laneNo * laneSzB;
1430   UInt maxOff = minOff + laneSzB - 1;
1431   vassert(maxOff < 16);
1432   return base + minOff;
1433}
1434
1435/* Put to the least significant lane of a Qreg. */
1436static void putQRegLO ( UInt qregNo, IRExpr* e )
1437{
1438   IRType ty  = typeOfIRExpr(irsb->tyenv, e);
1439   Int    off = offsetQRegLane(qregNo, ty, 0);
1440   switch (ty) {
1441      case Ity_I8:  case Ity_I16: case Ity_I32: case Ity_I64:
1442      case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
1443         break;
1444      default:
1445         vassert(0); // Other cases are probably invalid
1446   }
1447   stmt(IRStmt_Put(off, e));
1448}
1449
1450/* Get from the least significant lane of a Qreg. */
1451static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
1452{
1453   Int off = offsetQRegLane(qregNo, ty, 0);
1454   switch (ty) {
1455      case Ity_I8:
1456      case Ity_F16: case Ity_I16:
1457      case Ity_I32: case Ity_I64:
1458      case Ity_F32: case Ity_F64: case Ity_V128:
1459         break;
1460      default:
1461         vassert(0); // Other cases are ATC
1462   }
1463   return IRExpr_Get(off, ty);
1464}
1465
1466static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
1467{
1468   static const HChar* namesQ[32]
1469      = { "q0",  "q1",  "q2",  "q3",  "q4",  "q5",  "q6",  "q7",
1470          "q8",  "q9",  "q10", "q11", "q12", "q13", "q14", "q15",
1471          "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1472          "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1473   static const HChar* namesD[32]
1474      = { "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
1475          "d8",  "d9",  "d10", "d11", "d12", "d13", "d14", "d15",
1476          "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1477          "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1478   static const HChar* namesS[32]
1479      = { "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
1480          "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
1481          "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1482          "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1483   static const HChar* namesH[32]
1484      = { "h0",  "h1",  "h2",  "h3",  "h4",  "h5",  "h6",  "h7",
1485          "h8",  "h9",  "h10", "h11", "h12", "h13", "h14", "h15",
1486          "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1487          "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1488   static const HChar* namesB[32]
1489      = { "b0",  "b1",  "b2",  "b3",  "b4",  "b5",  "b6",  "b7",
1490          "b8",  "b9",  "b10", "b11", "b12", "b13", "b14", "b15",
1491          "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1492          "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1493   vassert(qregNo < 32);
1494   switch (sizeofIRType(laneTy)) {
1495      case 1:  return namesB[qregNo];
1496      case 2:  return namesH[qregNo];
1497      case 4:  return namesS[qregNo];
1498      case 8:  return namesD[qregNo];
1499      case 16: return namesQ[qregNo];
1500      default: vassert(0);
1501   }
1502   /*NOTREACHED*/
1503}
1504
1505static const HChar* nameQReg128 ( UInt qregNo )
1506{
1507   return nameQRegLO(qregNo, Ity_V128);
1508}
1509
1510/* Find the offset of the most significant half (8 bytes) of the given
1511   Qreg.  This requires knowing the endianness of the host. */
1512static Int offsetQRegHI64 ( UInt qregNo )
1513{
1514   return offsetQRegLane(qregNo, Ity_I64, 1);
1515}
1516
1517static IRExpr* getQRegHI64 ( UInt qregNo )
1518{
1519   return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
1520}
1521
1522static void putQRegHI64 ( UInt qregNo, IRExpr* e )
1523{
1524   IRType ty  = typeOfIRExpr(irsb->tyenv, e);
1525   Int    off = offsetQRegHI64(qregNo);
1526   switch (ty) {
1527      case Ity_I64: case Ity_F64:
1528         break;
1529      default:
1530         vassert(0); // Other cases are plain wrong
1531   }
1532   stmt(IRStmt_Put(off, e));
1533}
1534
1535/* Put to a specified lane of a Qreg. */
1536static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1537{
1538   IRType laneTy  = typeOfIRExpr(irsb->tyenv, e);
1539   Int    off     = offsetQRegLane(qregNo, laneTy, laneNo);
1540   switch (laneTy) {
1541      case Ity_F64: case Ity_I64:
1542      case Ity_I32: case Ity_F32:
1543      case Ity_I16: case Ity_F16:
1544      case Ity_I8:
1545         break;
1546      default:
1547         vassert(0); // Other cases are ATC
1548   }
1549   stmt(IRStmt_Put(off, e));
1550}
1551
1552/* Get from a specified lane of a Qreg. */
1553static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1554{
1555   Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1556   switch (laneTy) {
1557      case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1558      case Ity_F64: case Ity_F32: case Ity_F16:
1559         break;
1560      default:
1561         vassert(0); // Other cases are ATC
1562   }
1563   return IRExpr_Get(off, laneTy);
1564}
1565
1566
1567//ZZ /* ---------------- Misc registers ---------------- */
1568//ZZ
1569//ZZ static void putMiscReg32 ( UInt    gsoffset,
1570//ZZ                            IRExpr* e, /* :: Ity_I32 */
1571//ZZ                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
1572//ZZ {
1573//ZZ    switch (gsoffset) {
1574//ZZ       case OFFB_FPSCR:   break;
1575//ZZ       case OFFB_QFLAG32: break;
1576//ZZ       case OFFB_GEFLAG0: break;
1577//ZZ       case OFFB_GEFLAG1: break;
1578//ZZ       case OFFB_GEFLAG2: break;
1579//ZZ       case OFFB_GEFLAG3: break;
1580//ZZ       default: vassert(0); /* awaiting more cases */
1581//ZZ    }
1582//ZZ    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1583//ZZ
1584//ZZ    if (guardT == IRTemp_INVALID) {
1585//ZZ       /* unconditional write */
1586//ZZ       stmt(IRStmt_Put(gsoffset, e));
1587//ZZ    } else {
1588//ZZ       stmt(IRStmt_Put(
1589//ZZ          gsoffset,
1590//ZZ          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1591//ZZ                      e, IRExpr_Get(gsoffset, Ity_I32) )
1592//ZZ       ));
1593//ZZ    }
1594//ZZ }
1595//ZZ
1596//ZZ static IRTemp get_ITSTATE ( void )
1597//ZZ {
1598//ZZ    ASSERT_IS_THUMB;
1599//ZZ    IRTemp t = newTemp(Ity_I32);
1600//ZZ    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1601//ZZ    return t;
1602//ZZ }
1603//ZZ
1604//ZZ static void put_ITSTATE ( IRTemp t )
1605//ZZ {
1606//ZZ    ASSERT_IS_THUMB;
1607//ZZ    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1608//ZZ }
1609//ZZ
1610//ZZ static IRTemp get_QFLAG32 ( void )
1611//ZZ {
1612//ZZ    IRTemp t = newTemp(Ity_I32);
1613//ZZ    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1614//ZZ    return t;
1615//ZZ }
1616//ZZ
1617//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1618//ZZ {
1619//ZZ    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1620//ZZ }
1621//ZZ
1622//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1623//ZZ    Status Register) to indicate that overflow or saturation occurred.
1624//ZZ    Nb: t must be zero to denote no saturation, and any nonzero
1625//ZZ    value to indicate saturation. */
1626//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1627//ZZ {
1628//ZZ    IRTemp old = get_QFLAG32();
1629//ZZ    IRTemp nyu = newTemp(Ity_I32);
1630//ZZ    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1631//ZZ    put_QFLAG32(nyu, condT);
1632//ZZ }
1633
1634
1635/* ---------------- FPCR stuff ---------------- */
1636
1637/* Generate IR to get hold of the rounding mode bits in FPCR, and
1638   convert them to IR format.  Bind the final result to the
1639   returned temp. */
1640static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1641{
1642   /* The ARMvfp encoding for rounding mode bits is:
1643         00  to nearest
1644         01  to +infinity
1645         10  to -infinity
1646         11  to zero
1647      We need to convert that to the IR encoding:
1648         00  to nearest (the default)
1649         10  to +infinity
1650         01  to -infinity
1651         11  to zero
1652      Which can be done by swapping bits 0 and 1.
1653      The rmode bits are at 23:22 in FPSCR.
1654   */
1655   IRTemp armEncd = newTemp(Ity_I32);
1656   IRTemp swapped = newTemp(Ity_I32);
1657   /* Fish FPCR[23:22] out, and slide to bottom.  Doesn't matter that
1658      we don't zero out bits 24 and above, since the assignment to
1659      'swapped' will mask them out anyway. */
1660   assign(armEncd,
1661          binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1662   /* Now swap them. */
1663   assign(swapped,
1664          binop(Iop_Or32,
1665                binop(Iop_And32,
1666                      binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1667                      mkU32(2)),
1668                binop(Iop_And32,
1669                      binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1670                      mkU32(1))
1671         ));
1672   return swapped;
1673}
1674
1675
1676/*------------------------------------------------------------*/
1677/*--- Helpers for flag handling and conditional insns      ---*/
1678/*------------------------------------------------------------*/
1679
1680static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1681{
1682   switch (cond) {
1683      case ARM64CondEQ:  return "eq";
1684      case ARM64CondNE:  return "ne";
1685      case ARM64CondCS:  return "cs";  // or 'hs'
1686      case ARM64CondCC:  return "cc";  // or 'lo'
1687      case ARM64CondMI:  return "mi";
1688      case ARM64CondPL:  return "pl";
1689      case ARM64CondVS:  return "vs";
1690      case ARM64CondVC:  return "vc";
1691      case ARM64CondHI:  return "hi";
1692      case ARM64CondLS:  return "ls";
1693      case ARM64CondGE:  return "ge";
1694      case ARM64CondLT:  return "lt";
1695      case ARM64CondGT:  return "gt";
1696      case ARM64CondLE:  return "le";
1697      case ARM64CondAL:  return "al";
1698      case ARM64CondNV:  return "nv";
1699      default: vpanic("name_ARM64Condcode");
1700   }
1701}
1702
1703/* and a handy shorthand for it */
1704static const HChar* nameCC ( ARM64Condcode cond ) {
1705   return nameARM64Condcode(cond);
1706}
1707
1708
1709/* Build IR to calculate some particular condition from stored
1710   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1711   Ity_I64, suitable for narrowing.  Although the return type is
1712   Ity_I64, the returned value is either 0 or 1.  'cond' must be
1713   :: Ity_I64 and must denote the condition to compute in
1714   bits 7:4, and be zero everywhere else.
1715*/
1716static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1717{
1718   vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1719   /* And 'cond' had better produce a value in which only bits 7:4 are
1720      nonzero.  However, obviously we can't assert for that. */
1721
1722   /* So what we're constructing for the first argument is
1723      "(cond << 4) | stored-operation".
1724      However, as per comments above, 'cond' must be supplied
1725      pre-shifted to this function.
1726
1727      This pairing scheme requires that the ARM64_CC_OP_ values all fit
1728      in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1729      8 bits of the first argument. */
1730   IRExpr** args
1731      = mkIRExprVec_4(
1732           binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1733           IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1734           IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1735           IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1736        );
1737   IRExpr* call
1738      = mkIRExprCCall(
1739           Ity_I64,
1740           0/*regparm*/,
1741           "arm64g_calculate_condition", &arm64g_calculate_condition,
1742           args
1743        );
1744
1745   /* Exclude the requested condition, OP and NDEP from definedness
1746      checking.  We're only interested in DEP1 and DEP2. */
1747   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1748   return call;
1749}
1750
1751
1752/* Build IR to calculate some particular condition from stored
1753   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1754   Ity_I64, suitable for narrowing.  Although the return type is
1755   Ity_I64, the returned value is either 0 or 1.
1756*/
1757static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1758{
1759  /* First arg is "(cond << 4) | condition".  This requires that the
1760     ARM64_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1761     (COND, OP) pair in the lowest 8 bits of the first argument. */
1762   vassert(cond >= 0 && cond <= 15);
1763   return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1764}
1765
1766
1767/* Build IR to calculate just the carry flag from stored
1768   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1769   Ity_I64. */
1770static IRExpr* mk_arm64g_calculate_flag_c ( void )
1771{
1772   IRExpr** args
1773      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
1774                       IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1775                       IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1776                       IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1777   IRExpr* call
1778      = mkIRExprCCall(
1779           Ity_I64,
1780           0/*regparm*/,
1781           "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1782           args
1783        );
1784   /* Exclude OP and NDEP from definedness checking.  We're only
1785      interested in DEP1 and DEP2. */
1786   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1787   return call;
1788}
1789
1790
1791//ZZ /* Build IR to calculate just the overflow flag from stored
1792//ZZ    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1793//ZZ    Ity_I32. */
1794//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1795//ZZ {
1796//ZZ    IRExpr** args
1797//ZZ       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1798//ZZ                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1799//ZZ                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1800//ZZ                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1801//ZZ    IRExpr* call
1802//ZZ       = mkIRExprCCall(
1803//ZZ            Ity_I32,
1804//ZZ            0/*regparm*/,
1805//ZZ            "armg_calculate_flag_v", &armg_calculate_flag_v,
1806//ZZ            args
1807//ZZ         );
1808//ZZ    /* Exclude OP and NDEP from definedness checking.  We're only
1809//ZZ       interested in DEP1 and DEP2. */
1810//ZZ    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1811//ZZ    return call;
1812//ZZ }
1813
1814
1815/* Build IR to calculate N Z C V in bits 31:28 of the
1816   returned word. */
1817static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1818{
1819   IRExpr** args
1820      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
1821                       IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1822                       IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1823                       IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1824   IRExpr* call
1825      = mkIRExprCCall(
1826           Ity_I64,
1827           0/*regparm*/,
1828           "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1829           args
1830        );
1831   /* Exclude OP and NDEP from definedness checking.  We're only
1832      interested in DEP1 and DEP2. */
1833   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1834   return call;
1835}
1836
1837
1838/* Build IR to set the flags thunk, in the most general case. */
1839static
1840void setFlags_D1_D2_ND ( UInt cc_op,
1841                         IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1842{
1843   vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1844   vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1845   vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1846   vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1847   stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(cc_op) ));
1848   stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1849   stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1850   stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1851}
1852
1853/* Build IR to set the flags thunk after ADD or SUB. */
1854static
1855void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1856{
1857   IRTemp argL64 = IRTemp_INVALID;
1858   IRTemp argR64 = IRTemp_INVALID;
1859   IRTemp z64    = newTemp(Ity_I64);
1860   if (is64) {
1861      argL64 = argL;
1862      argR64 = argR;
1863   } else {
1864      argL64 = newTemp(Ity_I64);
1865      argR64 = newTemp(Ity_I64);
1866      assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1867      assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1868   }
1869   assign(z64, mkU64(0));
1870   UInt cc_op = ARM64G_CC_OP_NUMBER;
1871   /**/ if ( isSUB &&  is64) { cc_op = ARM64G_CC_OP_SUB64; }
1872   else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1873   else if (!isSUB &&  is64) { cc_op = ARM64G_CC_OP_ADD64; }
1874   else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1875   else                      { vassert(0); }
1876   setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1877}
1878
1879/* Build IR to set the flags thunk after ADC or SBC. */
1880static
1881void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1882                        IRTemp argL, IRTemp argR, IRTemp oldC )
1883{
1884   IRTemp argL64 = IRTemp_INVALID;
1885   IRTemp argR64 = IRTemp_INVALID;
1886   IRTemp oldC64 = IRTemp_INVALID;
1887   if (is64) {
1888      argL64 = argL;
1889      argR64 = argR;
1890      oldC64 = oldC;
1891   } else {
1892      argL64 = newTemp(Ity_I64);
1893      argR64 = newTemp(Ity_I64);
1894      oldC64 = newTemp(Ity_I64);
1895      assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1896      assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1897      assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1898   }
1899   UInt cc_op = ARM64G_CC_OP_NUMBER;
1900   /**/ if ( isSBC &&  is64) { cc_op = ARM64G_CC_OP_SBC64; }
1901   else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1902   else if (!isSBC &&  is64) { cc_op = ARM64G_CC_OP_ADC64; }
1903   else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1904   else                      { vassert(0); }
1905   setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1906}
1907
1908/* Build IR to set the flags thunk after ADD or SUB, if the given
1909   condition evaluates to True at run time.  If not, the flags are set
1910   to the specified NZCV value. */
1911static
1912void setFlags_ADD_SUB_conditionally (
1913        Bool is64, Bool isSUB,
1914        IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1915     )
1916{
1917   /* Generate IR as follows:
1918        CC_OP   = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1919        CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1920        CC_DEP2 = ITE(cond, argR64, 0)
1921        CC_NDEP = 0
1922   */
1923
1924   IRTemp z64 = newTemp(Ity_I64);
1925   assign(z64, mkU64(0));
1926
1927   /* Establish the operation and operands for the True case. */
1928   IRTemp t_dep1 = IRTemp_INVALID;
1929   IRTemp t_dep2 = IRTemp_INVALID;
1930   UInt   t_op   = ARM64G_CC_OP_NUMBER;
1931   /**/ if ( isSUB &&  is64) { t_op = ARM64G_CC_OP_SUB64; }
1932   else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1933   else if (!isSUB &&  is64) { t_op = ARM64G_CC_OP_ADD64; }
1934   else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1935   else                      { vassert(0); }
1936   /* */
1937   if (is64) {
1938      t_dep1 = argL;
1939      t_dep2 = argR;
1940   } else {
1941      t_dep1 = newTemp(Ity_I64);
1942      t_dep2 = newTemp(Ity_I64);
1943      assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1944      assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1945   }
1946
1947   /* Establish the operation and operands for the False case. */
1948   IRTemp f_dep1 = newTemp(Ity_I64);
1949   IRTemp f_dep2 = z64;
1950   UInt   f_op   = ARM64G_CC_OP_COPY;
1951   assign(f_dep1, mkU64(nzcv << 28));
1952
1953   /* Final thunk values */
1954   IRTemp dep1 = newTemp(Ity_I64);
1955   IRTemp dep2 = newTemp(Ity_I64);
1956   IRTemp op   = newTemp(Ity_I64);
1957
1958   assign(op,   IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1959   assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1960   assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1961
1962   /* finally .. */
1963   stmt( IRStmt_Put( OFFB_CC_OP,   mkexpr(op) ));
1964   stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1965   stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1966   stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1967}
1968
1969/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1970static
1971void setFlags_LOGIC ( Bool is64, IRTemp res )
1972{
1973   IRTemp res64 = IRTemp_INVALID;
1974   IRTemp z64   = newTemp(Ity_I64);
1975   UInt   cc_op = ARM64G_CC_OP_NUMBER;
1976   if (is64) {
1977      res64 = res;
1978      cc_op = ARM64G_CC_OP_LOGIC64;
1979   } else {
1980      res64 = newTemp(Ity_I64);
1981      assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1982      cc_op = ARM64G_CC_OP_LOGIC32;
1983   }
1984   assign(z64, mkU64(0));
1985   setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1986}
1987
1988/* Build IR to set the flags thunk to a given NZCV value.  NZCV is
1989   located in bits 31:28 of the supplied value. */
1990static
1991void setFlags_COPY ( IRTemp nzcv_28x0 )
1992{
1993   IRTemp z64 = newTemp(Ity_I64);
1994   assign(z64, mkU64(0));
1995   setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1996}
1997
1998
1999//ZZ /* Minor variant of the above that sets NDEP to zero (if it
2000//ZZ    sets it at all) */
2001//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2002//ZZ                              IRTemp t_dep2,
2003//ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2004//ZZ {
2005//ZZ    IRTemp z32 = newTemp(Ity_I32);
2006//ZZ    assign( z32, mkU32(0) );
2007//ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2008//ZZ }
2009//ZZ
2010//ZZ
2011//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2012//ZZ    sets it at all) */
2013//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2014//ZZ                              IRTemp t_ndep,
2015//ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2016//ZZ {
2017//ZZ    IRTemp z32 = newTemp(Ity_I32);
2018//ZZ    assign( z32, mkU32(0) );
2019//ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2020//ZZ }
2021//ZZ
2022//ZZ
2023//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2024//ZZ    sets them at all) */
2025//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2026//ZZ                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2027//ZZ {
2028//ZZ    IRTemp z32 = newTemp(Ity_I32);
2029//ZZ    assign( z32, mkU32(0) );
2030//ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2031//ZZ }
2032
2033
2034/*------------------------------------------------------------*/
2035/*--- Misc math helpers                                    ---*/
2036/*------------------------------------------------------------*/
2037
2038/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
2039static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
2040{
2041   IRTemp maskT = newTemp(Ity_I64);
2042   IRTemp res   = newTemp(Ity_I64);
2043   vassert(sh >= 1 && sh <= 63);
2044   assign(maskT, mkU64(mask));
2045   assign( res,
2046           binop(Iop_Or64,
2047                 binop(Iop_Shr64,
2048                       binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2049                       mkU8(sh)),
2050                 binop(Iop_And64,
2051                       binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2052                       mkexpr(maskT))
2053                 )
2054           );
2055   return res;
2056}
2057
2058/* Generates byte swaps within 32-bit lanes. */
2059static IRTemp math_UINTSWAP64 ( IRTemp src )
2060{
2061   IRTemp res;
2062   res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2063   res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2064   return res;
2065}
2066
2067/* Generates byte swaps within 16-bit lanes. */
2068static IRTemp math_USHORTSWAP64 ( IRTemp src )
2069{
2070   IRTemp res;
2071   res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2072   return res;
2073}
2074
2075/* Generates a 64-bit byte swap. */
2076static IRTemp math_BYTESWAP64 ( IRTemp src )
2077{
2078   IRTemp res;
2079   res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2080   res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2081   res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2082   return res;
2083}
2084
2085/* Generates a 64-bit bit swap. */
2086static IRTemp math_BITSWAP64 ( IRTemp src )
2087{
2088   IRTemp res;
2089   res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2090   res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2091   res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2092   return math_BYTESWAP64(res);
2093}
2094
2095/* Duplicates the bits at the bottom of the given word to fill the
2096   whole word.  src :: Ity_I64 is assumed to have zeroes everywhere
2097   except for the bottom bits. */
2098static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2099{
2100   if (srcTy == Ity_I8) {
2101      IRTemp t16 = newTemp(Ity_I64);
2102      assign(t16, binop(Iop_Or64, mkexpr(src),
2103                                  binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2104      IRTemp t32 = newTemp(Ity_I64);
2105      assign(t32, binop(Iop_Or64, mkexpr(t16),
2106                                  binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2107      IRTemp t64 = newTemp(Ity_I64);
2108      assign(t64, binop(Iop_Or64, mkexpr(t32),
2109                                  binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2110      return t64;
2111   }
2112   if (srcTy == Ity_I16) {
2113      IRTemp t32 = newTemp(Ity_I64);
2114      assign(t32, binop(Iop_Or64, mkexpr(src),
2115                                  binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2116      IRTemp t64 = newTemp(Ity_I64);
2117      assign(t64, binop(Iop_Or64, mkexpr(t32),
2118                                  binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2119      return t64;
2120   }
2121   if (srcTy == Ity_I32) {
2122      IRTemp t64 = newTemp(Ity_I64);
2123      assign(t64, binop(Iop_Or64, mkexpr(src),
2124                                  binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2125      return t64;
2126   }
2127   if (srcTy == Ity_I64) {
2128      return src;
2129   }
2130   vassert(0);
2131}
2132
2133
2134/* Duplicates the src element exactly so as to fill a V128 value. */
2135static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2136{
2137   IRTemp res = newTempV128();
2138   if (srcTy == Ity_F64) {
2139      IRTemp i64 = newTemp(Ity_I64);
2140      assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2141      assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2142      return res;
2143   }
2144   if (srcTy == Ity_F32) {
2145      IRTemp i64a = newTemp(Ity_I64);
2146      assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2147      IRTemp i64b = newTemp(Ity_I64);
2148      assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2149                                   mkexpr(i64a)));
2150      assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2151      return res;
2152   }
2153   if (srcTy == Ity_I64) {
2154      assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2155      return res;
2156   }
2157   if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2158      IRTemp t1 = newTemp(Ity_I64);
2159      assign(t1, widenUto64(srcTy, mkexpr(src)));
2160      IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2161      assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2162      return res;
2163   }
2164   vassert(0);
2165}
2166
2167
2168/* |fullWidth| is a full V128 width result.  Depending on bitQ,
2169   zero out the upper half. */
2170static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2171{
2172   if (bitQ == 1) return mkexpr(fullWidth);
2173   if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2174   vassert(0);
2175}
2176
2177/* The same, but from an expression instead. */
2178static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2179{
2180   IRTemp fullWidthT = newTempV128();
2181   assign(fullWidthT, fullWidth);
2182   return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2183}
2184
2185
2186/*------------------------------------------------------------*/
2187/*--- FP comparison helpers                                ---*/
2188/*------------------------------------------------------------*/
2189
2190/* irRes :: Ity_I32 holds a floating point comparison result encoded
2191   as an IRCmpF64Result.  Generate code to convert it to an
2192   ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2193   Assign a new temp to hold that value, and return the temp. */
2194static
2195IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2196{
2197   IRTemp ix       = newTemp(Ity_I64);
2198   IRTemp termL    = newTemp(Ity_I64);
2199   IRTemp termR    = newTemp(Ity_I64);
2200   IRTemp nzcv     = newTemp(Ity_I64);
2201   IRTemp irRes    = newTemp(Ity_I64);
2202
2203   /* This is where the fun starts.  We have to convert 'irRes' from
2204      an IR-convention return result (IRCmpF64Result) to an
2205      ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
2206      4 bits of 'nzcv'. */
2207   /* Map compare result from IR to ARM(nzcv) */
2208   /*
2209      FP cmp result | IR   | ARM(nzcv)
2210      --------------------------------
2211      UN              0x45   0011
2212      LT              0x01   1000
2213      GT              0x00   0010
2214      EQ              0x40   0110
2215   */
2216   /* Now since you're probably wondering WTF ..
2217
2218      ix fishes the useful bits out of the IR value, bits 6 and 0, and
2219      places them side by side, giving a number which is 0, 1, 2 or 3.
2220
2221      termL is a sequence cooked up by GNU superopt.  It converts ix
2222         into an almost correct value NZCV value (incredibly), except
2223         for the case of UN, where it produces 0100 instead of the
2224         required 0011.
2225
2226      termR is therefore a correction term, also computed from ix.  It
2227         is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
2228         the final correct value, we subtract termR from termL.
2229
2230      Don't take my word for it.  There's a test program at the bottom
2231      of guest_arm_toIR.c, to try this out with.
2232   */
2233   assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2234
2235   assign(
2236      ix,
2237      binop(Iop_Or64,
2238            binop(Iop_And64,
2239                  binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2240                  mkU64(3)),
2241            binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2242
2243   assign(
2244      termL,
2245      binop(Iop_Add64,
2246            binop(Iop_Shr64,
2247                  binop(Iop_Sub64,
2248                        binop(Iop_Shl64,
2249                              binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2250                              mkU8(62)),
2251                        mkU64(1)),
2252                  mkU8(61)),
2253            mkU64(1)));
2254
2255   assign(
2256      termR,
2257      binop(Iop_And64,
2258            binop(Iop_And64,
2259                  mkexpr(ix),
2260                  binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2261            mkU64(1)));
2262
2263   assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2264   return nzcv;
2265}
2266
2267
2268/*------------------------------------------------------------*/
2269/*--- Data processing (immediate)                          ---*/
2270/*------------------------------------------------------------*/
2271
2272/* Helper functions for supporting "DecodeBitMasks" */
2273
2274static ULong dbm_ROR ( Int width, ULong x, Int rot )
2275{
2276   vassert(width > 0 && width <= 64);
2277   vassert(rot >= 0 && rot < width);
2278   if (rot == 0) return x;
2279   ULong res = x >> rot;
2280   res |= (x << (width - rot));
2281   if (width < 64)
2282     res &= ((1ULL << width) - 1);
2283   return res;
2284}
2285
2286static ULong dbm_RepTo64( Int esize, ULong x )
2287{
2288   switch (esize) {
2289      case 64:
2290         return x;
2291      case 32:
2292         x &= 0xFFFFFFFF; x |= (x << 32);
2293         return x;
2294      case 16:
2295         x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2296         return x;
2297      case 8:
2298         x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2299         return x;
2300      case 4:
2301         x &= 0xF; x |= (x << 4); x |= (x << 8);
2302         x |= (x << 16); x |= (x << 32);
2303         return x;
2304      case 2:
2305         x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2306         x |= (x << 16); x |= (x << 32);
2307         return x;
2308      default:
2309         break;
2310   }
2311   vpanic("dbm_RepTo64");
2312   /*NOTREACHED*/
2313   return 0;
2314}
2315
2316static Int dbm_highestSetBit ( ULong x )
2317{
2318   Int i;
2319   for (i = 63; i >= 0; i--) {
2320      if (x & (1ULL << i))
2321         return i;
2322   }
2323   vassert(x == 0);
2324   return -1;
2325}
2326
2327static
2328Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2329                          ULong immN, ULong imms, ULong immr, Bool immediate,
2330                          UInt M /*32 or 64*/)
2331{
2332   vassert(immN < (1ULL << 1));
2333   vassert(imms < (1ULL << 6));
2334   vassert(immr < (1ULL << 6));
2335   vassert(immediate == False || immediate == True);
2336   vassert(M == 32 || M == 64);
2337
2338   Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2339   if (len < 1) { /* printf("fail1\n"); */ return False; }
2340   vassert(len <= 6);
2341   vassert(M >= (1 << len));
2342
2343   vassert(len >= 1 && len <= 6);
2344   ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2345                  (1 << len) - 1;
2346   vassert(levels >= 1 && levels <= 63);
2347
2348   if (immediate && ((imms & levels) == levels)) {
2349      /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2350      return False;
2351   }
2352
2353   ULong S = imms & levels;
2354   ULong R = immr & levels;
2355   Int   diff = S - R;
2356   diff &= 63;
2357   Int esize = 1 << len;
2358   vassert(2 <= esize && esize <= 64);
2359
2360   /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2361      same below with d.  S can be 63 in which case we have an out of
2362      range and hence undefined shift. */
2363   vassert(S >= 0 && S <= 63);
2364   vassert(esize >= (S+1));
2365   ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2366                  //(1ULL << (S+1)) - 1;
2367                  ((1ULL << S) - 1) + (1ULL << S);
2368
2369   Int d = // diff<len-1:0>
2370           diff & ((1 << len)-1);
2371   vassert(esize >= (d+1));
2372   vassert(d >= 0 && d <= 63);
2373
2374   ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2375                  //(1ULL << (d+1)) - 1;
2376                  ((1ULL << d) - 1) + (1ULL << d);
2377
2378   if (esize != 64) vassert(elem_s < (1ULL << esize));
2379   if (esize != 64) vassert(elem_d < (1ULL << esize));
2380
2381   if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2382   if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2383
2384   return True;
2385}
2386
2387
2388static
2389Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2390                                         UInt insn)
2391{
2392#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
2393
2394   /* insn[28:23]
2395      10000x PC-rel addressing
2396      10001x Add/subtract (immediate)
2397      100100 Logical (immediate)
2398      100101 Move Wide (immediate)
2399      100110 Bitfield
2400      100111 Extract
2401   */
2402
2403   /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2404   if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2405      Bool is64   = INSN(31,31) == 1;
2406      Bool isSub  = INSN(30,30) == 1;
2407      Bool setCC  = INSN(29,29) == 1;
2408      UInt sh     = INSN(23,22);
2409      UInt uimm12 = INSN(21,10);
2410      UInt nn     = INSN(9,5);
2411      UInt dd     = INSN(4,0);
2412      const HChar* nm = isSub ? "sub" : "add";
2413      if (sh >= 2) {
2414         /* Invalid; fall through */
2415      } else {
2416         vassert(sh <= 1);
2417         uimm12 <<= (12 * sh);
2418         if (is64) {
2419            IRTemp argL  = newTemp(Ity_I64);
2420            IRTemp argR  = newTemp(Ity_I64);
2421            IRTemp res   = newTemp(Ity_I64);
2422            assign(argL, getIReg64orSP(nn));
2423            assign(argR, mkU64(uimm12));
2424            assign(res,  binop(isSub ? Iop_Sub64 : Iop_Add64,
2425                               mkexpr(argL), mkexpr(argR)));
2426            if (setCC) {
2427               putIReg64orZR(dd, mkexpr(res));
2428               setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2429               DIP("%ss %s, %s, 0x%x\n",
2430                   nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2431            } else {
2432               putIReg64orSP(dd, mkexpr(res));
2433               DIP("%s %s, %s, 0x%x\n",
2434                   nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2435            }
2436         } else {
2437            IRTemp argL  = newTemp(Ity_I32);
2438            IRTemp argR  = newTemp(Ity_I32);
2439            IRTemp res   = newTemp(Ity_I32);
2440            assign(argL, getIReg32orSP(nn));
2441            assign(argR, mkU32(uimm12));
2442            assign(res,  binop(isSub ? Iop_Sub32 : Iop_Add32,
2443                               mkexpr(argL), mkexpr(argR)));
2444            if (setCC) {
2445               putIReg32orZR(dd, mkexpr(res));
2446               setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2447               DIP("%ss %s, %s, 0x%x\n",
2448                   nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2449            } else {
2450               putIReg32orSP(dd, mkexpr(res));
2451               DIP("%s %s, %s, 0x%x\n",
2452                   nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2453            }
2454         }
2455         return True;
2456      }
2457   }
2458
2459   /* -------------------- ADR/ADRP -------------------- */
2460   if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2461      UInt  bP    = INSN(31,31);
2462      UInt  immLo = INSN(30,29);
2463      UInt  immHi = INSN(23,5);
2464      UInt  rD    = INSN(4,0);
2465      ULong uimm  = (immHi << 2) | immLo;
2466      ULong simm  = sx_to_64(uimm, 21);
2467      ULong val;
2468      if (bP) {
2469         val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2470      } else {
2471         val = guest_PC_curr_instr + simm;
2472      }
2473      putIReg64orZR(rD, mkU64(val));
2474      DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2475      return True;
2476   }
2477
2478   /* -------------------- LOGIC(imm) -------------------- */
2479   if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2480      /* 31 30 28     22 21   15   9  4
2481         sf op 100100 N  immr imms Rn Rd
2482           op=00: AND  Rd|SP, Rn, #imm
2483           op=01: ORR  Rd|SP, Rn, #imm
2484           op=10: EOR  Rd|SP, Rn, #imm
2485           op=11: ANDS Rd|ZR, Rn, #imm
2486      */
2487      Bool  is64 = INSN(31,31) == 1;
2488      UInt  op   = INSN(30,29);
2489      UInt  N    = INSN(22,22);
2490      UInt  immR = INSN(21,16);
2491      UInt  immS = INSN(15,10);
2492      UInt  nn   = INSN(9,5);
2493      UInt  dd   = INSN(4,0);
2494      ULong imm  = 0;
2495      Bool  ok;
2496      if (N == 1 && !is64)
2497         goto after_logic_imm; /* not allowed; fall through */
2498      ok = dbm_DecodeBitMasks(&imm, NULL,
2499                              N, immS, immR, True, is64 ? 64 : 32);
2500      if (!ok)
2501         goto after_logic_imm;
2502
2503      const HChar* names[4] = { "and", "orr", "eor", "ands" };
2504      const IROp   ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2505      const IROp   ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2506
2507      vassert(op < 4);
2508      if (is64) {
2509         IRExpr* argL = getIReg64orZR(nn);
2510         IRExpr* argR = mkU64(imm);
2511         IRTemp  res  = newTemp(Ity_I64);
2512         assign(res, binop(ops64[op], argL, argR));
2513         if (op < 3) {
2514            putIReg64orSP(dd, mkexpr(res));
2515            DIP("%s %s, %s, 0x%llx\n", names[op],
2516                nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2517         } else {
2518            putIReg64orZR(dd, mkexpr(res));
2519            setFlags_LOGIC(True/*is64*/, res);
2520            DIP("%s %s, %s, 0x%llx\n", names[op],
2521                nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2522         }
2523      } else {
2524         IRExpr* argL = getIReg32orZR(nn);
2525         IRExpr* argR = mkU32((UInt)imm);
2526         IRTemp  res  = newTemp(Ity_I32);
2527         assign(res, binop(ops32[op], argL, argR));
2528         if (op < 3) {
2529            putIReg32orSP(dd, mkexpr(res));
2530            DIP("%s %s, %s, 0x%x\n", names[op],
2531                nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2532         } else {
2533            putIReg32orZR(dd, mkexpr(res));
2534            setFlags_LOGIC(False/*!is64*/, res);
2535            DIP("%s %s, %s, 0x%x\n", names[op],
2536                nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2537         }
2538      }
2539      return True;
2540   }
2541   after_logic_imm:
2542
2543   /* -------------------- MOV{Z,N,K} -------------------- */
2544   if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2545      /* 31 30 28      22 20    4
2546         |  |  |       |  |     |
2547         sf 10 100 101 hw imm16 Rd   MOV(Z) Rd, (imm16 << (16*hw))
2548         sf 00 100 101 hw imm16 Rd   MOV(N) Rd, ~(imm16 << (16*hw))
2549         sf 11 100 101 hw imm16 Rd   MOV(K) Rd, (imm16 << (16*hw))
2550      */
2551      Bool is64   = INSN(31,31) == 1;
2552      UInt subopc = INSN(30,29);
2553      UInt hw     = INSN(22,21);
2554      UInt imm16  = INSN(20,5);
2555      UInt dd     = INSN(4,0);
2556      if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2557         /* invalid; fall through */
2558      } else {
2559         ULong imm64 = ((ULong)imm16) << (16 * hw);
2560         if (!is64)
2561            vassert(imm64 < 0x100000000ULL);
2562         switch (subopc) {
2563            case BITS2(1,0): // MOVZ
2564               putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2565               DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2566               break;
2567            case BITS2(0,0): // MOVN
2568               imm64 = ~imm64;
2569               if (!is64)
2570                  imm64 &= 0xFFFFFFFFULL;
2571               putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2572               DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2573               break;
2574            case BITS2(1,1): // MOVK
2575               /* This is more complex.  We are inserting a slice into
2576                  the destination register, so we need to have the old
2577                  value of it. */
2578               if (is64) {
2579                  IRTemp old = newTemp(Ity_I64);
2580                  assign(old, getIReg64orZR(dd));
2581                  ULong mask = 0xFFFFULL << (16 * hw);
2582                  IRExpr* res
2583                     = binop(Iop_Or64,
2584                             binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2585                             mkU64(imm64));
2586                  putIReg64orZR(dd, res);
2587                  DIP("movk %s, 0x%x, lsl %u\n",
2588                      nameIReg64orZR(dd), imm16, 16*hw);
2589               } else {
2590                  IRTemp old = newTemp(Ity_I32);
2591                  assign(old, getIReg32orZR(dd));
2592                  vassert(hw <= 1);
2593                  UInt mask = 0xFFFF << (16 * hw);
2594                  IRExpr* res
2595                     = binop(Iop_Or32,
2596                             binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2597                             mkU32((UInt)imm64));
2598                  putIReg32orZR(dd, res);
2599                  DIP("movk %s, 0x%x, lsl %u\n",
2600                      nameIReg32orZR(dd), imm16, 16*hw);
2601               }
2602               break;
2603            default:
2604               vassert(0);
2605         }
2606         return True;
2607      }
2608   }
2609
2610   /* -------------------- {U,S,}BFM -------------------- */
2611   /*    30 28     22 21   15   9  4
2612
2613      sf 10 100110 N  immr imms nn dd
2614         UBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2615         UBFM Xd, Xn, #immr, #imms   when sf=1, N=1
2616
2617      sf 00 100110 N  immr imms nn dd
2618         SBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2619         SBFM Xd, Xn, #immr, #imms   when sf=1, N=1
2620
2621      sf 01 100110 N  immr imms nn dd
2622         BFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2623         BFM Xd, Xn, #immr, #imms   when sf=1, N=1
2624   */
2625   if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2626      UInt sf     = INSN(31,31);
2627      UInt opc    = INSN(30,29);
2628      UInt N      = INSN(22,22);
2629      UInt immR   = INSN(21,16);
2630      UInt immS   = INSN(15,10);
2631      UInt nn     = INSN(9,5);
2632      UInt dd     = INSN(4,0);
2633      Bool inZero = False;
2634      Bool extend = False;
2635      const HChar* nm = "???";
2636      /* skip invalid combinations */
2637      switch (opc) {
2638         case BITS2(0,0):
2639            inZero = True; extend = True; nm = "sbfm"; break;
2640         case BITS2(0,1):
2641            inZero = False; extend = False; nm = "bfm"; break;
2642         case BITS2(1,0):
2643            inZero = True; extend = False; nm = "ubfm"; break;
2644         case BITS2(1,1):
2645            goto after_bfm; /* invalid */
2646         default:
2647            vassert(0);
2648      }
2649      if (sf == 1 && N != 1) goto after_bfm;
2650      if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2651                             || ((immS >> 5) & 1) != 0)) goto after_bfm;
2652      ULong wmask = 0, tmask = 0;
2653      Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2654                                   N, immS, immR, False, sf == 1 ? 64 : 32);
2655      if (!ok) goto after_bfm; /* hmmm */
2656
2657      Bool   is64 = sf == 1;
2658      IRType ty   = is64 ? Ity_I64 : Ity_I32;
2659
2660      IRTemp dst = newTemp(ty);
2661      IRTemp src = newTemp(ty);
2662      IRTemp bot = newTemp(ty);
2663      IRTemp top = newTemp(ty);
2664      IRTemp res = newTemp(ty);
2665      assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2666      assign(src, getIRegOrZR(is64, nn));
2667      /* perform bitfield move on low bits */
2668      assign(bot, binop(mkOR(ty),
2669                        binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2670                        binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2671                                         mkU(ty, wmask))));
2672      /* determine extension bits (sign, zero or dest register) */
2673      assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2674      /* combine extension bits and result bits */
2675      assign(res, binop(mkOR(ty),
2676                        binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2677                        binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2678      putIRegOrZR(is64, dd, mkexpr(res));
2679      DIP("%s %s, %s, immR=%u, immS=%u\n",
2680          nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2681      return True;
2682   }
2683   after_bfm:
2684
2685   /* ---------------------- EXTR ---------------------- */
2686   /*   30 28     22 20 15   9 4
2687      1 00 100111 10 m  imm6 n d  EXTR Xd, Xn, Xm, #imm6
2688      0 00 100111 00 m  imm6 n d  EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2689   */
2690   if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2691      Bool is64  = INSN(31,31) == 1;
2692      UInt mm    = INSN(20,16);
2693      UInt imm6  = INSN(15,10);
2694      UInt nn    = INSN(9,5);
2695      UInt dd    = INSN(4,0);
2696      Bool valid = True;
2697      if (INSN(31,31) != INSN(22,22))
2698        valid = False;
2699      if (!is64 && imm6 >= 32)
2700        valid = False;
2701      if (!valid) goto after_extr;
2702      IRType ty    = is64 ? Ity_I64 : Ity_I32;
2703      IRTemp srcHi = newTemp(ty);
2704      IRTemp srcLo = newTemp(ty);
2705      IRTemp res   = newTemp(ty);
2706      assign(srcHi, getIRegOrZR(is64, nn));
2707      assign(srcLo, getIRegOrZR(is64, mm));
2708      if (imm6 == 0) {
2709        assign(res, mkexpr(srcLo));
2710      } else {
2711        UInt szBits = 8 * sizeofIRType(ty);
2712        vassert(imm6 > 0 && imm6 < szBits);
2713        assign(res, binop(mkOR(ty),
2714                          binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2715                          binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2716      }
2717      putIRegOrZR(is64, dd, mkexpr(res));
2718      DIP("extr %s, %s, %s, #%u\n",
2719          nameIRegOrZR(is64,dd),
2720          nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2721      return True;
2722   }
2723  after_extr:
2724
2725   vex_printf("ARM64 front end: data_processing_immediate\n");
2726   return False;
2727#  undef INSN
2728}
2729
2730
2731/*------------------------------------------------------------*/
2732/*--- Data processing (register) instructions              ---*/
2733/*------------------------------------------------------------*/
2734
2735static const HChar* nameSH ( UInt sh ) {
2736   switch (sh) {
2737      case 0: return "lsl";
2738      case 1: return "lsr";
2739      case 2: return "asr";
2740      case 3: return "ror";
2741      default: vassert(0);
2742   }
2743}
2744
2745/* Generate IR to get a register value, possibly shifted by an
2746   immediate.  Returns either a 32- or 64-bit temporary holding the
2747   result.  After the shift, the value can optionally be NOT-ed
2748   too.
2749
2750   sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR.  sh_amt may only be
2751   in the range 0 to (is64 ? 64 : 32)-1.  For some instructions, ROR
2752   isn't allowed, but it's the job of the caller to check that.
2753*/
2754static IRTemp getShiftedIRegOrZR ( Bool is64,
2755                                   UInt sh_how, UInt sh_amt, UInt regNo,
2756                                   Bool invert )
2757{
2758   vassert(sh_how < 4);
2759   vassert(sh_amt < (is64 ? 64 : 32));
2760   IRType ty = is64 ? Ity_I64 : Ity_I32;
2761   IRTemp t0 = newTemp(ty);
2762   assign(t0, getIRegOrZR(is64, regNo));
2763   IRTemp t1 = newTemp(ty);
2764   switch (sh_how) {
2765      case BITS2(0,0):
2766         assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2767         break;
2768      case BITS2(0,1):
2769         assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2770         break;
2771      case BITS2(1,0):
2772         assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2773         break;
2774      case BITS2(1,1):
2775         assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2776         break;
2777      default:
2778         vassert(0);
2779   }
2780   if (invert) {
2781      IRTemp t2 = newTemp(ty);
2782      assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2783      return t2;
2784   } else {
2785      return t1;
2786   }
2787}
2788
2789
2790static
2791Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2792                                        UInt insn)
2793{
2794#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
2795
2796   /* ------------------- ADD/SUB(reg) ------------------- */
2797   /* x==0 => 32 bit op      x==1 => 64 bit op
2798      sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2799
2800      31 30 29 28    23 21 20 15   9  4
2801      |  |  |  |     |  |  |  |    |  |
2802      x  0  0  01011 sh 0  Rm imm6 Rn Rd   ADD  Rd,Rn, sh(Rm,imm6)
2803      x  0  1  01011 sh 0  Rm imm6 Rn Rd   ADDS Rd,Rn, sh(Rm,imm6)
2804      x  1  0  01011 sh 0  Rm imm6 Rn Rd   SUB  Rd,Rn, sh(Rm,imm6)
2805      x  1  1  01011 sh 0  Rm imm6 Rn Rd   SUBS Rd,Rn, sh(Rm,imm6)
2806   */
2807   if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2808      UInt   bX    = INSN(31,31);
2809      UInt   bOP   = INSN(30,30); /* 0: ADD, 1: SUB */
2810      UInt   bS    = INSN(29, 29); /* set flags? */
2811      UInt   sh    = INSN(23,22);
2812      UInt   rM    = INSN(20,16);
2813      UInt   imm6  = INSN(15,10);
2814      UInt   rN    = INSN(9,5);
2815      UInt   rD    = INSN(4,0);
2816      Bool   isSUB = bOP == 1;
2817      Bool   is64  = bX == 1;
2818      IRType ty    = is64 ? Ity_I64 : Ity_I32;
2819      if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2820         /* invalid; fall through */
2821      } else {
2822         IRTemp argL = newTemp(ty);
2823         assign(argL, getIRegOrZR(is64, rN));
2824         IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2825         IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
2826         IRTemp res  = newTemp(ty);
2827         assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2828         if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2829         if (bS) {
2830            setFlags_ADD_SUB(is64, isSUB, argL, argR);
2831         }
2832         DIP("%s%s %s, %s, %s, %s #%u\n",
2833             bOP ? "sub" : "add", bS ? "s" : "",
2834             nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2835             nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2836         return True;
2837      }
2838   }
2839
2840   /* ------------------- ADC/SBC(reg) ------------------- */
2841   /* x==0 => 32 bit op      x==1 => 64 bit op
2842
2843      31 30 29 28    23 21 20 15     9  4
2844      |  |  |  |     |  |  |  |      |  |
2845      x  0  0  11010 00 0  Rm 000000 Rn Rd   ADC  Rd,Rn,Rm
2846      x  0  1  11010 00 0  Rm 000000 Rn Rd   ADCS Rd,Rn,Rm
2847      x  1  0  11010 00 0  Rm 000000 Rn Rd   SBC  Rd,Rn,Rm
2848      x  1  1  11010 00 0  Rm 000000 Rn Rd   SBCS Rd,Rn,Rm
2849   */
2850
2851   if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2852      UInt   bX    = INSN(31,31);
2853      UInt   bOP   = INSN(30,30); /* 0: ADC, 1: SBC */
2854      UInt   bS    = INSN(29,29); /* set flags */
2855      UInt   rM    = INSN(20,16);
2856      UInt   rN    = INSN(9,5);
2857      UInt   rD    = INSN(4,0);
2858
2859      Bool   isSUB = bOP == 1;
2860      Bool   is64  = bX == 1;
2861      IRType ty    = is64 ? Ity_I64 : Ity_I32;
2862
2863      IRTemp oldC = newTemp(ty);
2864      assign(oldC,
2865             is64 ? mk_arm64g_calculate_flag_c()
2866                  : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2867
2868      IRTemp argL = newTemp(ty);
2869      assign(argL, getIRegOrZR(is64, rN));
2870      IRTemp argR = newTemp(ty);
2871      assign(argR, getIRegOrZR(is64, rM));
2872
2873      IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
2874      IRTemp res  = newTemp(ty);
2875      if (isSUB) {
2876         IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2877         IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2878         assign(res,
2879                binop(op,
2880                      binop(op, mkexpr(argL), mkexpr(argR)),
2881                      binop(xorOp, mkexpr(oldC), one)));
2882      } else {
2883         assign(res,
2884                binop(op,
2885                      binop(op, mkexpr(argL), mkexpr(argR)),
2886                      mkexpr(oldC)));
2887      }
2888
2889      if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2890
2891      if (bS) {
2892         setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2893      }
2894
2895      DIP("%s%s %s, %s, %s\n",
2896          bOP ? "sbc" : "adc", bS ? "s" : "",
2897          nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2898          nameIRegOrZR(is64, rM));
2899      return True;
2900   }
2901
2902   /* -------------------- LOGIC(reg) -------------------- */
2903   /* x==0 => 32 bit op      x==1 => 64 bit op
2904      N==0 => inv? is no-op (no inversion)
2905      N==1 => inv? is NOT
2906      sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2907
2908      31 30 28    23 21 20 15   9  4
2909      |  |  |     |  |  |  |    |  |
2910      x  00 01010 sh N  Rm imm6 Rn Rd  AND  Rd,Rn, inv?(sh(Rm,imm6))
2911      x  01 01010 sh N  Rm imm6 Rn Rd  ORR  Rd,Rn, inv?(sh(Rm,imm6))
2912      x  10 01010 sh N  Rm imm6 Rn Rd  EOR  Rd,Rn, inv?(sh(Rm,imm6))
2913      x  11 01010 sh N  Rm imm6 Rn Rd  ANDS Rd,Rn, inv?(sh(Rm,imm6))
2914      With N=1, the names are: BIC ORN EON BICS
2915   */
2916   if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2917      UInt   bX   = INSN(31,31);
2918      UInt   sh   = INSN(23,22);
2919      UInt   bN   = INSN(21,21);
2920      UInt   rM   = INSN(20,16);
2921      UInt   imm6 = INSN(15,10);
2922      UInt   rN   = INSN(9,5);
2923      UInt   rD   = INSN(4,0);
2924      Bool   is64 = bX == 1;
2925      IRType ty   = is64 ? Ity_I64 : Ity_I32;
2926      if (!is64 && imm6 > 31) {
2927         /* invalid; fall though */
2928      } else {
2929         IRTemp argL = newTemp(ty);
2930         assign(argL, getIRegOrZR(is64, rN));
2931         IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2932         IROp   op   = Iop_INVALID;
2933         switch (INSN(30,29)) {
2934            case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2935            case BITS2(0,1):                  op = mkOR(ty);  break;
2936            case BITS2(1,0):                  op = mkXOR(ty); break;
2937            default: vassert(0);
2938         }
2939         IRTemp res = newTemp(ty);
2940         assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2941         if (INSN(30,29) == BITS2(1,1)) {
2942            setFlags_LOGIC(is64, res);
2943         }
2944         putIRegOrZR(is64, rD, mkexpr(res));
2945
2946         static const HChar* names_op[8]
2947            = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2948         vassert(((bN << 2) | INSN(30,29)) < 8);
2949         const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2950         /* Special-case the printing of "MOV" */
2951         if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2952            DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2953                                nameIRegOrZR(is64, rM));
2954         } else {
2955            DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2956                nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2957                nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2958         }
2959         return True;
2960      }
2961   }
2962
2963   /* -------------------- {U,S}MULH -------------------- */
2964   /* 31       23 22 20 15     9   4
2965      10011011 1  10 Rm 011111 Rn Rd   UMULH Xd,Xn,Xm
2966      10011011 0  10 Rm 011111 Rn Rd   SMULH Xd,Xn,Xm
2967   */
2968   if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2969       && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
2970      Bool isU = INSN(23,23) == 1;
2971      UInt mm  = INSN(20,16);
2972      UInt nn  = INSN(9,5);
2973      UInt dd  = INSN(4,0);
2974      putIReg64orZR(dd, unop(Iop_128HIto64,
2975                             binop(isU ? Iop_MullU64 : Iop_MullS64,
2976                                   getIReg64orZR(nn), getIReg64orZR(mm))));
2977      DIP("%cmulh %s, %s, %s\n",
2978          isU ? 'u' : 's',
2979          nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2980      return True;
2981   }
2982
2983   /* -------------------- M{ADD,SUB} -------------------- */
2984   /* 31 30           20 15 14 9 4
2985      sf 00 11011 000 m  0  a  n r   MADD Rd,Rn,Rm,Ra  d = a+m*n
2986      sf 00 11011 000 m  1  a  n r   MADD Rd,Rn,Rm,Ra  d = a-m*n
2987   */
2988   if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2989      Bool is64  = INSN(31,31) == 1;
2990      UInt mm    = INSN(20,16);
2991      Bool isAdd = INSN(15,15) == 0;
2992      UInt aa    = INSN(14,10);
2993      UInt nn    = INSN(9,5);
2994      UInt dd    = INSN(4,0);
2995      if (is64) {
2996         putIReg64orZR(
2997            dd,
2998            binop(isAdd ? Iop_Add64 : Iop_Sub64,
2999                  getIReg64orZR(aa),
3000                  binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
3001      } else {
3002         putIReg32orZR(
3003            dd,
3004            binop(isAdd ? Iop_Add32 : Iop_Sub32,
3005                  getIReg32orZR(aa),
3006                  binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
3007      }
3008      DIP("%s %s, %s, %s, %s\n",
3009          isAdd ? "madd" : "msub",
3010          nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3011          nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3012      return True;
3013   }
3014
3015   /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3016   /* 31 30 28        20 15   11 9  4
3017      sf 00 1101 0100 mm cond 00 nn dd   CSEL  Rd,Rn,Rm
3018      sf 00 1101 0100 mm cond 01 nn dd   CSINC Rd,Rn,Rm
3019      sf 10 1101 0100 mm cond 00 nn dd   CSINV Rd,Rn,Rm
3020      sf 10 1101 0100 mm cond 01 nn dd   CSNEG Rd,Rn,Rm
3021      In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3022   */
3023   if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3024      Bool    is64 = INSN(31,31) == 1;
3025      UInt    b30  = INSN(30,30);
3026      UInt    mm   = INSN(20,16);
3027      UInt    cond = INSN(15,12);
3028      UInt    b10  = INSN(10,10);
3029      UInt    nn   = INSN(9,5);
3030      UInt    dd   = INSN(4,0);
3031      UInt    op   = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3032      IRType  ty   = is64 ? Ity_I64 : Ity_I32;
3033      IRExpr* argL = getIRegOrZR(is64, nn);
3034      IRExpr* argR = getIRegOrZR(is64, mm);
3035      switch (op) {
3036         case BITS2(0,0):
3037            break;
3038         case BITS2(0,1):
3039            argR = binop(mkADD(ty), argR, mkU(ty,1));
3040            break;
3041         case BITS2(1,0):
3042            argR = unop(mkNOT(ty), argR);
3043            break;
3044         case BITS2(1,1):
3045            argR = binop(mkSUB(ty), mkU(ty,0), argR);
3046            break;
3047         default:
3048            vassert(0);
3049      }
3050      putIRegOrZR(
3051         is64, dd,
3052         IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3053                    argL, argR)
3054      );
3055      const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3056      DIP("%s %s, %s, %s, %s\n", op_nm[op],
3057          nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3058          nameIRegOrZR(is64, mm), nameCC(cond));
3059      return True;
3060   }
3061
3062   /* -------------- ADD/SUB(extended reg) -------------- */
3063   /*     28         20 15  12   9 4
3064      000 01011 00 1 m  opt imm3 n d   ADD  Wd|SP, Wn|SP, Wm ext&lsld
3065      100 01011 00 1 m  opt imm3 n d   ADD  Xd|SP, Xn|SP, Rm ext&lsld
3066
3067      001 01011 00 1 m  opt imm3 n d   ADDS Wd,    Wn|SP, Wm ext&lsld
3068      101 01011 00 1 m  opt imm3 n d   ADDS Xd,    Xn|SP, Rm ext&lsld
3069
3070      010 01011 00 1 m  opt imm3 n d   SUB  Wd|SP, Wn|SP, Wm ext&lsld
3071      110 01011 00 1 m  opt imm3 n d   SUB  Xd|SP, Xn|SP, Rm ext&lsld
3072
3073      011 01011 00 1 m  opt imm3 n d   SUBS Wd,    Wn|SP, Wm ext&lsld
3074      111 01011 00 1 m  opt imm3 n d   SUBS Xd,    Xn|SP, Rm ext&lsld
3075
3076      The 'm' operand is extended per opt, thusly:
3077
3078        000   Xm & 0xFF           UXTB
3079        001   Xm & 0xFFFF         UXTH
3080        010   Xm & (2^32)-1       UXTW
3081        011   Xm                  UXTX
3082
3083        100   Xm sx from bit 7    SXTB
3084        101   Xm sx from bit 15   SXTH
3085        110   Xm sx from bit 31   SXTW
3086        111   Xm                  SXTX
3087
3088      In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3089      operation on Xm.  In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3090      are the identity operation on Wm.
3091
3092      After extension, the value is shifted left by imm3 bits, which
3093      may only be in the range 0 .. 4 inclusive.
3094   */
3095   if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3096      Bool is64  = INSN(31,31) == 1;
3097      Bool isSub = INSN(30,30) == 1;
3098      Bool setCC = INSN(29,29) == 1;
3099      UInt mm    = INSN(20,16);
3100      UInt opt   = INSN(15,13);
3101      UInt imm3  = INSN(12,10);
3102      UInt nn    = INSN(9,5);
3103      UInt dd    = INSN(4,0);
3104      const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3105                                  "sxtb", "sxth", "sxtw", "sxtx" };
3106      /* Do almost the same thing in the 32- and 64-bit cases. */
3107      IRTemp xN = newTemp(Ity_I64);
3108      IRTemp xM = newTemp(Ity_I64);
3109      assign(xN, getIReg64orSP(nn));
3110      assign(xM, getIReg64orZR(mm));
3111      IRExpr* xMw  = mkexpr(xM); /* "xM widened" */
3112      Int     shSX = 0;
3113      /* widen Xm .. */
3114      switch (opt) {
3115         case BITS3(0,0,0): // UXTB
3116            xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3117         case BITS3(0,0,1): // UXTH
3118            xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3119         case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3120            if (is64) {
3121               xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3122            }
3123            break;
3124         case BITS3(0,1,1): // UXTX -- always a noop
3125            break;
3126         case BITS3(1,0,0): // SXTB
3127            shSX = 56; goto sxTo64;
3128         case BITS3(1,0,1): // SXTH
3129            shSX = 48; goto sxTo64;
3130         case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3131            if (is64) {
3132               shSX = 32; goto sxTo64;
3133            }
3134            break;
3135         case BITS3(1,1,1): // SXTX -- always a noop
3136            break;
3137         sxTo64:
3138            vassert(shSX >= 32);
3139            xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3140                        mkU8(shSX));
3141            break;
3142         default:
3143            vassert(0);
3144      }
3145      /* and now shift */
3146      IRTemp argL = xN;
3147      IRTemp argR = newTemp(Ity_I64);
3148      assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3149      IRTemp res = newTemp(Ity_I64);
3150      assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3151                        mkexpr(argL), mkexpr(argR)));
3152      if (is64) {
3153         if (setCC) {
3154            putIReg64orZR(dd, mkexpr(res));
3155            setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3156         } else {
3157            putIReg64orSP(dd, mkexpr(res));
3158         }
3159      } else {
3160         if (setCC) {
3161            IRTemp argL32 = newTemp(Ity_I32);
3162            IRTemp argR32 = newTemp(Ity_I32);
3163            putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3164            assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3165            assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3166            setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3167         } else {
3168            putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3169         }
3170      }
3171      DIP("%s%s %s, %s, %s %s lsl %u\n",
3172          isSub ? "sub" : "add", setCC ? "s" : "",
3173          setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3174          nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3175          nameExt[opt], imm3);
3176      return True;
3177   }
3178
3179   /* ---------------- CCMP/CCMN(imm) ---------------- */
3180   /* Bizarrely, these appear in the "data processing register"
3181      category, even though they are operations against an
3182      immediate. */
3183   /* 31   29        20   15   11 9    3
3184      sf 1 111010010 imm5 cond 10 Rn 0 nzcv   CCMP Rn, #imm5, #nzcv, cond
3185      sf 0 111010010 imm5 cond 10 Rn 0 nzcv   CCMN Rn, #imm5, #nzcv, cond
3186
3187      Operation is:
3188         (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3189         (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3190   */
3191   if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3192       && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3193      Bool is64  = INSN(31,31) == 1;
3194      Bool isSUB = INSN(30,30) == 1;
3195      UInt imm5  = INSN(20,16);
3196      UInt cond  = INSN(15,12);
3197      UInt nn    = INSN(9,5);
3198      UInt nzcv  = INSN(3,0);
3199
3200      IRTemp condT = newTemp(Ity_I1);
3201      assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3202
3203      IRType ty   = is64 ? Ity_I64 : Ity_I32;
3204      IRTemp argL = newTemp(ty);
3205      IRTemp argR = newTemp(ty);
3206
3207      if (is64) {
3208         assign(argL, getIReg64orZR(nn));
3209         assign(argR, mkU64(imm5));
3210      } else {
3211         assign(argL, getIReg32orZR(nn));
3212         assign(argR, mkU32(imm5));
3213      }
3214      setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3215
3216      DIP("ccm%c %s, #%u, #%u, %s\n",
3217          isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3218          imm5, nzcv, nameCC(cond));
3219      return True;
3220   }
3221
3222   /* ---------------- CCMP/CCMN(reg) ---------------- */
3223   /* 31   29        20 15   11 9    3
3224      sf 1 111010010 Rm cond 00 Rn 0 nzcv   CCMP Rn, Rm, #nzcv, cond
3225      sf 0 111010010 Rm cond 00 Rn 0 nzcv   CCMN Rn, Rm, #nzcv, cond
3226      Operation is:
3227         (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3228         (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3229   */
3230   if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3231       && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3232      Bool is64  = INSN(31,31) == 1;
3233      Bool isSUB = INSN(30,30) == 1;
3234      UInt mm    = INSN(20,16);
3235      UInt cond  = INSN(15,12);
3236      UInt nn    = INSN(9,5);
3237      UInt nzcv  = INSN(3,0);
3238
3239      IRTemp condT = newTemp(Ity_I1);
3240      assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3241
3242      IRType ty   = is64 ? Ity_I64 : Ity_I32;
3243      IRTemp argL = newTemp(ty);
3244      IRTemp argR = newTemp(ty);
3245
3246      if (is64) {
3247         assign(argL, getIReg64orZR(nn));
3248         assign(argR, getIReg64orZR(mm));
3249      } else {
3250         assign(argL, getIReg32orZR(nn));
3251         assign(argR, getIReg32orZR(mm));
3252      }
3253      setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3254
3255      DIP("ccm%c %s, %s, #%u, %s\n",
3256          isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3257          nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3258      return True;
3259   }
3260
3261
3262   /* -------------- REV/REV16/REV32/RBIT -------------- */
3263   /* 31 30 28       20    15   11 9 4
3264
3265      1  10 11010110 00000 0000 11 n d    (1) REV   Xd, Xn
3266      0  10 11010110 00000 0000 10 n d    (2) REV   Wd, Wn
3267
3268      1  10 11010110 00000 0000 00 n d    (3) RBIT  Xd, Xn
3269      0  10 11010110 00000 0000 00 n d    (4) RBIT  Wd, Wn
3270
3271      1  10 11010110 00000 0000 01 n d    (5) REV16 Xd, Xn
3272      0  10 11010110 00000 0000 01 n d    (6) REV16 Wd, Wn
3273
3274      1  10 11010110 00000 0000 10 n d    (7) REV32 Xd, Xn
3275   */
3276   if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3277       && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3278      UInt b31 = INSN(31,31);
3279      UInt opc = INSN(11,10);
3280
3281      UInt ix = 0;
3282      /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3283      else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3284      else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3285      else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3286      else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3287      else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3288      else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
3289      if (ix >= 1 && ix <= 7) {
3290         Bool   is64  = ix == 1 || ix == 3 || ix == 5 || ix == 7;
3291         UInt   nn    = INSN(9,5);
3292         UInt   dd    = INSN(4,0);
3293         IRTemp src   = newTemp(Ity_I64);
3294         IRTemp dst   = IRTemp_INVALID;
3295         IRTemp (*math)(IRTemp) = NULL;
3296         switch (ix) {
3297            case 1: case 2: math = math_BYTESWAP64;   break;
3298            case 3: case 4: math = math_BITSWAP64;    break;
3299            case 5: case 6: math = math_USHORTSWAP64; break;
3300            case 7:         math = math_UINTSWAP64;   break;
3301            default: vassert(0);
3302         }
3303         const HChar* names[7]
3304           = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3305         const HChar* nm = names[ix-1];
3306         vassert(math);
3307         if (ix == 6) {
3308            /* This has to be special cased, since the logic below doesn't
3309               handle it correctly. */
3310            assign(src, getIReg64orZR(nn));
3311            dst = math(src);
3312            putIReg64orZR(dd,
3313                          unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3314         } else if (is64) {
3315            assign(src, getIReg64orZR(nn));
3316            dst = math(src);
3317            putIReg64orZR(dd, mkexpr(dst));
3318         } else {
3319            assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
3320            dst = math(src);
3321            putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3322         }
3323         DIP("%s %s, %s\n", nm,
3324             nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3325         return True;
3326      }
3327      /* else fall through */
3328   }
3329
3330   /* -------------------- CLZ/CLS -------------------- */
3331   /*    30 28   24   20    15      9 4
3332      sf 10 1101 0110 00000 00010 0 n d    CLZ Rd, Rn
3333      sf 10 1101 0110 00000 00010 1 n d    CLS Rd, Rn
3334   */
3335   if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3336       && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3337      Bool   is64  = INSN(31,31) == 1;
3338      Bool   isCLS = INSN(10,10) == 1;
3339      UInt   nn    = INSN(9,5);
3340      UInt   dd    = INSN(4,0);
3341      IRTemp src   = newTemp(Ity_I64);
3342      IRTemp srcZ  = newTemp(Ity_I64);
3343      IRTemp dst   = newTemp(Ity_I64);
3344      /* Get the argument, widened out to 64 bit */
3345      if (is64) {
3346         assign(src, getIReg64orZR(nn));
3347      } else {
3348         assign(src, binop(Iop_Shl64,
3349                           unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
3350      }
3351      /* If this is CLS, mash the arg around accordingly */
3352      if (isCLS) {
3353         IRExpr* one = mkU8(1);
3354         assign(srcZ,
3355         binop(Iop_Xor64,
3356               binop(Iop_Shl64, mkexpr(src), one),
3357               binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
3358      } else {
3359         assign(srcZ, mkexpr(src));
3360      }
3361      /* And compute CLZ. */
3362      if (is64) {
3363         assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3364                                mkU64(isCLS ? 63 : 64),
3365                                unop(Iop_Clz64, mkexpr(srcZ))));
3366         putIReg64orZR(dd, mkexpr(dst));
3367      } else {
3368         assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3369                                mkU64(isCLS ? 31 : 32),
3370                                unop(Iop_Clz64, mkexpr(srcZ))));
3371         putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3372      }
3373      DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
3374          nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3375      return True;
3376   }
3377
3378   /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
3379   /*    30 28        20 15   11 9 4
3380      sf 00 1101 0110 m  0010 00 n d   LSLV Rd,Rn,Rm
3381      sf 00 1101 0110 m  0010 01 n d   LSRV Rd,Rn,Rm
3382      sf 00 1101 0110 m  0010 10 n d   ASRV Rd,Rn,Rm
3383      sf 00 1101 0110 m  0010 11 n d   RORV Rd,Rn,Rm
3384   */
3385   if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3386       && INSN(15,12) == BITS4(0,0,1,0)) {
3387      Bool   is64 = INSN(31,31) == 1;
3388      UInt   mm   = INSN(20,16);
3389      UInt   op   = INSN(11,10);
3390      UInt   nn   = INSN(9,5);
3391      UInt   dd   = INSN(4,0);
3392      IRType ty   = is64 ? Ity_I64 : Ity_I32;
3393      IRTemp srcL = newTemp(ty);
3394      IRTemp srcR = newTemp(Ity_I64);
3395      IRTemp res  = newTemp(ty);
3396      IROp   iop  = Iop_INVALID;
3397      assign(srcL, getIRegOrZR(is64, nn));
3398      assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
3399                                    mkU64(is64 ? 63 : 31)));
3400      if (op < 3) {
3401         // LSLV, LSRV, ASRV
3402         switch (op) {
3403            case BITS2(0,0): iop = mkSHL(ty); break;
3404            case BITS2(0,1): iop = mkSHR(ty); break;
3405            case BITS2(1,0): iop = mkSAR(ty); break;
3406            default: vassert(0);
3407         }
3408         assign(res, binop(iop, mkexpr(srcL),
3409                                unop(Iop_64to8, mkexpr(srcR))));
3410      } else {
3411         // RORV
3412         IROp opSHL = mkSHL(ty);
3413         IROp opSHR = mkSHR(ty);
3414         IROp opOR  = mkOR(ty);
3415         IRExpr* width = mkU64(is64 ? 64: 32);
3416         assign(
3417            res,
3418            IRExpr_ITE(
3419               binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
3420               mkexpr(srcL),
3421               binop(opOR,
3422                     binop(opSHL,
3423                           mkexpr(srcL),
3424                           unop(Iop_64to8, binop(Iop_Sub64, width,
3425                                                            mkexpr(srcR)))),
3426                     binop(opSHR,
3427                           mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
3428         ));
3429      }
3430      putIRegOrZR(is64, dd, mkexpr(res));
3431      vassert(op < 4);
3432      const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
3433      DIP("%s %s, %s, %s\n",
3434          names[op], nameIRegOrZR(is64,dd),
3435                     nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3436      return True;
3437   }
3438
3439   /* -------------------- SDIV/UDIV -------------------- */
3440   /*    30 28        20 15    10 9 4
3441      sf 00 1101 0110 m  00001  1 n d  SDIV Rd,Rn,Rm
3442      sf 00 1101 0110 m  00001  0 n d  UDIV Rd,Rn,Rm
3443   */
3444   if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3445       && INSN(15,11) == BITS5(0,0,0,0,1)) {
3446      Bool is64 = INSN(31,31) == 1;
3447      UInt mm   = INSN(20,16);
3448      Bool isS  = INSN(10,10) == 1;
3449      UInt nn   = INSN(9,5);
3450      UInt dd   = INSN(4,0);
3451      if (isS) {
3452         putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3453                                     getIRegOrZR(is64, nn),
3454                                     getIRegOrZR(is64, mm)));
3455      } else {
3456         putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3457                                     getIRegOrZR(is64, nn),
3458                                     getIRegOrZR(is64, mm)));
3459      }
3460      DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3461          nameIRegOrZR(is64, dd),
3462          nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3463      return True;
3464   }
3465
3466   /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3467   /* 31        23  20 15 14 9 4
3468      1001 1011 101 m  0  a  n d   UMADDL Xd,Wn,Wm,Xa
3469      1001 1011 001 m  0  a  n d   SMADDL Xd,Wn,Wm,Xa
3470      1001 1011 101 m  1  a  n d   UMSUBL Xd,Wn,Wm,Xa
3471      1001 1011 001 m  1  a  n d   SMSUBL Xd,Wn,Wm,Xa
3472      with operation
3473         Xd = Xa +/- (Wn *u/s Wm)
3474   */
3475   if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3476      Bool   isU   = INSN(23,23) == 1;
3477      UInt   mm    = INSN(20,16);
3478      Bool   isAdd = INSN(15,15) == 0;
3479      UInt   aa    = INSN(14,10);
3480      UInt   nn    = INSN(9,5);
3481      UInt   dd    = INSN(4,0);
3482      IRTemp wN    = newTemp(Ity_I32);
3483      IRTemp wM    = newTemp(Ity_I32);
3484      IRTemp xA    = newTemp(Ity_I64);
3485      IRTemp muld  = newTemp(Ity_I64);
3486      IRTemp res   = newTemp(Ity_I64);
3487      assign(wN, getIReg32orZR(nn));
3488      assign(wM, getIReg32orZR(mm));
3489      assign(xA, getIReg64orZR(aa));
3490      assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3491                         mkexpr(wN), mkexpr(wM)));
3492      assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3493                        mkexpr(xA), mkexpr(muld)));
3494      putIReg64orZR(dd, mkexpr(res));
3495      DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3496          nameIReg64orZR(dd), nameIReg32orZR(nn),
3497          nameIReg32orZR(mm), nameIReg64orZR(aa));
3498      return True;
3499   }
3500   vex_printf("ARM64 front end: data_processing_register\n");
3501   return False;
3502#  undef INSN
3503}
3504
3505
3506/*------------------------------------------------------------*/
3507/*--- Math helpers for vector interleave/deinterleave      ---*/
3508/*------------------------------------------------------------*/
3509
3510#define EX(_tmp) \
3511           mkexpr(_tmp)
3512#define SL(_hi128,_lo128,_nbytes) \
3513           ( (_nbytes) == 0 \
3514                ? (_lo128) \
3515                : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3516#define ROR(_v128,_nbytes) \
3517           SL((_v128),(_v128),(_nbytes))
3518#define ROL(_v128,_nbytes) \
3519           SL((_v128),(_v128),16-(_nbytes))
3520#define SHR(_v128,_nbytes) \
3521           binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3522#define SHL(_v128,_nbytes) \
3523           binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3524#define ILO64x2(_argL,_argR) \
3525           binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3526#define IHI64x2(_argL,_argR) \
3527           binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3528#define ILO32x4(_argL,_argR) \
3529           binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3530#define IHI32x4(_argL,_argR) \
3531           binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3532#define ILO16x8(_argL,_argR) \
3533           binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3534#define IHI16x8(_argL,_argR) \
3535           binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3536#define ILO8x16(_argL,_argR) \
3537           binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3538#define IHI8x16(_argL,_argR) \
3539           binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3540#define CEV32x4(_argL,_argR) \
3541           binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3542#define COD32x4(_argL,_argR) \
3543           binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3544#define COD16x8(_argL,_argR) \
3545           binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3546#define COD8x16(_argL,_argR) \
3547           binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3548#define CEV8x16(_argL,_argR) \
3549           binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3550#define AND(_arg1,_arg2) \
3551           binop(Iop_AndV128,(_arg1),(_arg2))
3552#define OR2(_arg1,_arg2) \
3553           binop(Iop_OrV128,(_arg1),(_arg2))
3554#define OR3(_arg1,_arg2,_arg3) \
3555           binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3556#define OR4(_arg1,_arg2,_arg3,_arg4) \
3557           binop(Iop_OrV128, \
3558                 binop(Iop_OrV128,(_arg1),(_arg2)), \
3559                 binop(Iop_OrV128,(_arg3),(_arg4)))
3560
3561
3562/* Do interleaving for 1 128 bit vector, for ST1 insns. */
3563static
3564void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
3565                           UInt laneSzBlg2, IRTemp u0 )
3566{
3567   assign(*i0, mkexpr(u0));
3568}
3569
3570
3571/* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3572static
3573void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
3574                           UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
3575{
3576   /* This is pretty easy, since we have primitives directly to
3577      hand. */
3578   if (laneSzBlg2 == 3) {
3579      // 64x2
3580      // u1 == B1 B0, u0 == A1 A0
3581      // i1 == B1 A1, i0 == B0 A0
3582      assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
3583      assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
3584      return;
3585   }
3586   if (laneSzBlg2 == 2) {
3587      // 32x4
3588      // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3589      // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3590      assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
3591      assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
3592      return;
3593   }
3594   if (laneSzBlg2 == 1) {
3595      // 16x8
3596      // u1 == B{7..0}, u0 == A{7..0}
3597      // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3598      // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3599      assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
3600      assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
3601      return;
3602   }
3603   if (laneSzBlg2 == 0) {
3604      // 8x16
3605      // u1 == B{f..0}, u0 == A{f..0}
3606      // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3607      // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3608      assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
3609      assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
3610      return;
3611   }
3612   /*NOTREACHED*/
3613   vassert(0);
3614}
3615
3616
3617/* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3618static
3619void math_INTERLEAVE3_128(
3620        /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
3621        UInt laneSzBlg2,
3622        IRTemp u0, IRTemp u1, IRTemp u2 )
3623{
3624   if (laneSzBlg2 == 3) {
3625      // 64x2
3626      // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3627      // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3628      assign(*i2, IHI64x2( EX(u2), EX(u1) ));
3629      assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
3630      assign(*i0, ILO64x2( EX(u1), EX(u0) ));
3631      return;
3632   }
3633
3634   if (laneSzBlg2 == 2) {
3635      // 32x4
3636      // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3637      // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3638      // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3639      IRTemp p0    = newTempV128();
3640      IRTemp p1    = newTempV128();
3641      IRTemp p2    = newTempV128();
3642      IRTemp c1100 = newTempV128();
3643      IRTemp c0011 = newTempV128();
3644      IRTemp c0110 = newTempV128();
3645      assign(c1100, mkV128(0xFF00));
3646      assign(c0011, mkV128(0x00FF));
3647      assign(c0110, mkV128(0x0FF0));
3648      // First interleave them at 64x2 granularity,
3649      // generating partial ("p") values.
3650      math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
3651      // And more shuffling around for the final answer
3652      assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
3653                       AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
3654      assign(*i1, OR3( SHL(EX(p2),12),
3655                       AND(EX(p1),EX(c0110)),
3656                       SHR(EX(p0),12) ));
3657      assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
3658                       AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
3659      return;
3660   }
3661
3662   if (laneSzBlg2 == 1) {
3663      // 16x8
3664      // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3665      // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3666      // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3667      //
3668      // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3669      // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3670      // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3671      //
3672      // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3673      // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3674      // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3675      IRTemp p0    = newTempV128();
3676      IRTemp p1    = newTempV128();
3677      IRTemp p2    = newTempV128();
3678      IRTemp c1000 = newTempV128();
3679      IRTemp c0100 = newTempV128();
3680      IRTemp c0010 = newTempV128();
3681      IRTemp c0001 = newTempV128();
3682      assign(c1000, mkV128(0xF000));
3683      assign(c0100, mkV128(0x0F00));
3684      assign(c0010, mkV128(0x00F0));
3685      assign(c0001, mkV128(0x000F));
3686      // First interleave them at 32x4 granularity,
3687      // generating partial ("p") values.
3688      math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
3689      // And more shuffling around for the final answer
3690      assign(*i2,
3691             OR4( AND( IHI16x8( EX(p2),        ROL(EX(p2),4) ), EX(c1000) ),
3692                  AND( IHI16x8( ROL(EX(p2),6), EX(p2)        ), EX(c0100) ),
3693                  AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
3694                  AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
3695      ));
3696      assign(*i1,
3697             OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
3698                  AND( IHI16x8( EX(p1),        ROL(EX(p1),4) ), EX(c0100) ),
3699                  AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
3700                  AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
3701      ));
3702      assign(*i0,
3703             OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
3704                  AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
3705                  AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
3706                  AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
3707      ));
3708      return;
3709   }
3710
3711   if (laneSzBlg2 == 0) {
3712      // 8x16.  It doesn't seem worth the hassle of first doing a
3713      // 16x8 interleave, so just generate all 24 partial results
3714      // directly :-(
3715      // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3716      // i2 == Cf Bf Af Ce .. Bb Ab Ca
3717      // i1 == Ba Aa C9 B9 .. A6 C5 B5
3718      // i0 == A5 C4 B4 A4 .. C0 B0 A0
3719
3720      IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
3721      IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
3722      IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
3723      IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
3724      IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
3725      IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
3726      IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
3727      IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
3728      IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
3729
3730      // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3731      // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3732      //
3733#     define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3734         IRTemp t_##_tempName = newTempV128(); \
3735         assign(t_##_tempName, \
3736                ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3737                         ROR(EX(_srcVec2),(_srcShift2)) ) )
3738
3739      // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3740      IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
3741
3742      // The slicing and reassembly are done as interleavedly as possible,
3743      // so as to minimise the demand for registers in the back end, which
3744      // was observed to be a problem in testing.
3745
3746      XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
3747      XXXX(AfCe, AA, 0xf, CC, 0xe);
3748      assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
3749
3750      XXXX(BeAe, BB, 0xe, AA, 0xe);
3751      XXXX(CdBd, CC, 0xd, BB, 0xd);
3752      assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
3753      assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
3754
3755      XXXX(AdCc, AA, 0xd, CC, 0xc);
3756      XXXX(BcAc, BB, 0xc, AA, 0xc);
3757      assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
3758
3759      XXXX(CbBb, CC, 0xb, BB, 0xb);
3760      XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
3761      assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
3762      assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
3763      assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
3764
3765      XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
3766      XXXX(C9B9, CC, 0x9, BB, 0x9);
3767      assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
3768
3769      XXXX(A9C8, AA, 0x9, CC, 0x8);
3770      XXXX(B8A8, BB, 0x8, AA, 0x8);
3771      assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
3772      assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
3773
3774      XXXX(C7B7, CC, 0x7, BB, 0x7);
3775      XXXX(A7C6, AA, 0x7, CC, 0x6);
3776      assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
3777
3778      XXXX(B6A6, BB, 0x6, AA, 0x6);
3779      XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
3780      assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
3781      assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
3782      assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
3783
3784      XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
3785      XXXX(B4A4, BB, 0x4, AA, 0x4);
3786      assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
3787
3788      XXXX(C3B3, CC, 0x3, BB, 0x3);
3789      XXXX(A3C2, AA, 0x3, CC, 0x2);
3790      assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
3791      assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
3792
3793      XXXX(B2A2, BB, 0x2, AA, 0x2);
3794      XXXX(C1B1, CC, 0x1, BB, 0x1);
3795      assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
3796
3797      XXXX(A1C0, AA, 0x1, CC, 0x0);
3798      XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
3799      assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
3800      assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
3801      assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
3802
3803#     undef XXXX
3804      return;
3805   }
3806
3807   /*NOTREACHED*/
3808   vassert(0);
3809}
3810
3811
3812/* Do interleaving for 4 128 bit vectors, for ST4 insns. */
3813static
3814void math_INTERLEAVE4_128(
3815        /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
3816        UInt laneSzBlg2,
3817        IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
3818{
3819   if (laneSzBlg2 == 3) {
3820      // 64x2
3821      assign(*i0, ILO64x2(EX(u1), EX(u0)));
3822      assign(*i1, ILO64x2(EX(u3), EX(u2)));
3823      assign(*i2, IHI64x2(EX(u1), EX(u0)));
3824      assign(*i3, IHI64x2(EX(u3), EX(u2)));
3825      return;
3826   }
3827   if (laneSzBlg2 == 2) {
3828      // 32x4
3829      // First, interleave at the 64-bit lane size.
3830      IRTemp p0 = newTempV128();
3831      IRTemp p1 = newTempV128();
3832      IRTemp p2 = newTempV128();
3833      IRTemp p3 = newTempV128();
3834      math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
3835      // And interleave (cat) at the 32 bit size.
3836      assign(*i0, CEV32x4(EX(p1), EX(p0)));
3837      assign(*i1, COD32x4(EX(p1), EX(p0)));
3838      assign(*i2, CEV32x4(EX(p3), EX(p2)));
3839      assign(*i3, COD32x4(EX(p3), EX(p2)));
3840      return;
3841   }
3842   if (laneSzBlg2 == 1) {
3843      // 16x8
3844      // First, interleave at the 32-bit lane size.
3845      IRTemp p0 = newTempV128();
3846      IRTemp p1 = newTempV128();
3847      IRTemp p2 = newTempV128();
3848      IRTemp p3 = newTempV128();
3849      math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
3850      // And rearrange within each vector, to get the right 16 bit lanes.
3851      assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
3852      assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
3853      assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
3854      assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
3855      return;
3856   }
3857   if (laneSzBlg2 == 0) {
3858      // 8x16
3859      // First, interleave at the 16-bit lane size.
3860      IRTemp p0 = newTempV128();
3861      IRTemp p1 = newTempV128();
3862      IRTemp p2 = newTempV128();
3863      IRTemp p3 = newTempV128();
3864      math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
3865      // And rearrange within each vector, to get the right 8 bit lanes.
3866      assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
3867      assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
3868      assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
3869      assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
3870      return;
3871   }
3872   /*NOTREACHED*/
3873   vassert(0);
3874}
3875
3876
3877/* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
3878static
3879void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
3880                             UInt laneSzBlg2, IRTemp i0 )
3881{
3882   assign(*u0, mkexpr(i0));
3883}
3884
3885
3886/* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
3887static
3888void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
3889                             UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
3890{
3891   /* This is pretty easy, since we have primitives directly to
3892      hand. */
3893   if (laneSzBlg2 == 3) {
3894      // 64x2
3895      // i1 == B1 A1, i0 == B0 A0
3896      // u1 == B1 B0, u0 == A1 A0
3897      assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
3898      assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
3899      return;
3900   }
3901   if (laneSzBlg2 == 2) {
3902      // 32x4
3903      // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3904      // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3905      assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
3906      assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
3907      return;
3908   }
3909   if (laneSzBlg2 == 1) {
3910      // 16x8
3911      // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3912      // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3913      // u1 == B{7..0}, u0 == A{7..0}
3914      assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
3915      assign(*u1, binop(Iop_CatOddLanes16x8,  mkexpr(i1), mkexpr(i0)));
3916      return;
3917   }
3918   if (laneSzBlg2 == 0) {
3919      // 8x16
3920      // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3921      // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3922      // u1 == B{f..0}, u0 == A{f..0}
3923      assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
3924      assign(*u1, binop(Iop_CatOddLanes8x16,  mkexpr(i1), mkexpr(i0)));
3925      return;
3926   }
3927   /*NOTREACHED*/
3928   vassert(0);
3929}
3930
3931
3932/* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
3933static
3934void math_DEINTERLEAVE3_128(
3935        /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
3936        UInt laneSzBlg2,
3937        IRTemp i0, IRTemp i1, IRTemp i2 )
3938{
3939   if (laneSzBlg2 == 3) {
3940      // 64x2
3941      // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3942      // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3943      assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1)        ));
3944      assign(*u1, ILO64x2( EX(i2),        ROL(EX(i0),8) ));
3945      assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0)        ));
3946      return;
3947   }
3948
3949   if (laneSzBlg2 == 2) {
3950      // 32x4
3951      // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3952      // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3953      // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3954      IRTemp t_a1c0b0a0 = newTempV128();
3955      IRTemp t_a2c1b1a1 = newTempV128();
3956      IRTemp t_a3c2b2a2 = newTempV128();
3957      IRTemp t_a0c3b3a3 = newTempV128();
3958      IRTemp p0 = newTempV128();
3959      IRTemp p1 = newTempV128();
3960      IRTemp p2 = newTempV128();
3961      // Compute some intermediate values.
3962      assign(t_a1c0b0a0, EX(i0));
3963      assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
3964      assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
3965      assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
3966      // First deinterleave into lane-pairs
3967      assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
3968      assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
3969                         IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
3970      assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
3971      // Then deinterleave at 64x2 granularity.
3972      math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
3973      return;
3974   }
3975
3976   if (laneSzBlg2 == 1) {
3977      // 16x8
3978      // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3979      // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3980      // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3981      //
3982      // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3983      // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3984      // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3985      //
3986      // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3987      // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3988      // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3989
3990      IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
3991      s0 = s1 = s2 = s3
3992         = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
3993      newTempsV128_4(&s0, &s1, &s2, &s3);
3994      newTempsV128_4(&t0, &t1, &t2, &t3);
3995      newTempsV128_4(&p0, &p1, &p2, &c00111111);
3996
3997      // s0 == b2a2 c1b1a1 c0b0a0
3998      // s1 == b4a4 c3b3c3 c2b2a2
3999      // s2 == b6a6 c5b5a5 c4b4a4
4000      // s3 == b0a0 c7b7a7 c6b6a6
4001      assign(s0, EX(i0));
4002      assign(s1, SL(EX(i1),EX(i0),6*2));
4003      assign(s2, SL(EX(i2),EX(i1),4*2));
4004      assign(s3, SL(EX(i0),EX(i2),2*2));
4005
4006      // t0 == 0 0 c1c0 b1b0 a1a0
4007      // t1 == 0 0 c3c2 b3b2 a3a2
4008      // t2 == 0 0 c5c4 b5b4 a5a4
4009      // t3 == 0 0 c7c6 b7b6 a7a6
4010      assign(c00111111, mkV128(0x0FFF));
4011      assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
4012      assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
4013      assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
4014      assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
4015
4016      assign(p0, OR2(EX(t0),          SHL(EX(t1),6*2)));
4017      assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
4018      assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
4019
4020      // Then deinterleave at 32x4 granularity.
4021      math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
4022      return;
4023   }
4024
4025   if (laneSzBlg2 == 0) {
4026      // 8x16.  This is the same scheme as for 16x8, with twice the
4027      // number of intermediate values.
4028      //
4029      // u2 == C{f..0}
4030      // u1 == B{f..0}
4031      // u0 == A{f..0}
4032      //
4033      // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4034      // i1 ==  BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4035      // i0 ==   A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4036      //
4037      // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4038      // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4039      // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4040      //
4041      IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
4042             t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
4043      s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
4044         = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
4045         = IRTemp_INVALID;
4046      newTempsV128_4(&s0, &s1, &s2, &s3);
4047      newTempsV128_4(&s4, &s5, &s6, &s7);
4048      newTempsV128_4(&t0, &t1, &t2, &t3);
4049      newTempsV128_4(&t4, &t5, &t6, &t7);
4050      newTempsV128_4(&p0, &p1, &p2, &cMASK);
4051
4052      // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4053      // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4054      // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4055      // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4056      // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4057      // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4058      // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4059      // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4060      assign(s0, SL(EX(i1),EX(i0), 0));
4061      assign(s1, SL(EX(i1),EX(i0), 6));
4062      assign(s2, SL(EX(i1),EX(i0),12));
4063      assign(s3, SL(EX(i2),EX(i1), 2));
4064      assign(s4, SL(EX(i2),EX(i1), 8));
4065      assign(s5, SL(EX(i2),EX(i1),14));
4066      assign(s6, SL(EX(i0),EX(i2), 4));
4067      assign(s7, SL(EX(i0),EX(i2),10));
4068
4069      // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4070      // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4071      // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4072      // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4073      // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4074      // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4075      // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4076      // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4077      assign(cMASK, mkV128(0x003F));
4078      assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
4079      assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
4080      assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
4081      assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
4082      assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
4083      assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
4084      assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
4085      assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
4086
4087      assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
4088      assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
4089                 SHL(EX(t3),2), SHR(EX(t2),4) ));
4090      assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
4091
4092      // Then deinterleave at 16x8 granularity.
4093      math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
4094      return;
4095   }
4096
4097   /*NOTREACHED*/
4098   vassert(0);
4099}
4100
4101
4102/* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4103static
4104void math_DEINTERLEAVE4_128(
4105        /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4106        UInt laneSzBlg2,
4107        IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4108{
4109   if (laneSzBlg2 == 3) {
4110      // 64x2
4111      assign(*u0, ILO64x2(EX(i2), EX(i0)));
4112      assign(*u1, IHI64x2(EX(i2), EX(i0)));
4113      assign(*u2, ILO64x2(EX(i3), EX(i1)));
4114      assign(*u3, IHI64x2(EX(i3), EX(i1)));
4115      return;
4116   }
4117   if (laneSzBlg2 == 2) {
4118      // 32x4
4119      IRTemp p0 = newTempV128();
4120      IRTemp p2 = newTempV128();
4121      IRTemp p1 = newTempV128();
4122      IRTemp p3 = newTempV128();
4123      assign(p0, ILO32x4(EX(i1), EX(i0)));
4124      assign(p1, IHI32x4(EX(i1), EX(i0)));
4125      assign(p2, ILO32x4(EX(i3), EX(i2)));
4126      assign(p3, IHI32x4(EX(i3), EX(i2)));
4127      // And now do what we did for the 64-bit case.
4128      math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
4129      return;
4130   }
4131   if (laneSzBlg2 == 1) {
4132      // 16x8
4133      // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4134      IRTemp p0 = newTempV128();
4135      IRTemp p1 = newTempV128();
4136      IRTemp p2 = newTempV128();
4137      IRTemp p3 = newTempV128();
4138      assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
4139      assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
4140      assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
4141      assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
4142      // From here on is like the 32 bit case.
4143      math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
4144      return;
4145   }
4146   if (laneSzBlg2 == 0) {
4147      // 8x16
4148      // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4149      IRTemp p0 = newTempV128();
4150      IRTemp p1 = newTempV128();
4151      IRTemp p2 = newTempV128();
4152      IRTemp p3 = newTempV128();
4153      assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
4154                          ILO8x16(EX(i0),ROL(EX(i0),4)) ));
4155      assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
4156                          ILO8x16(EX(i1),ROL(EX(i1),4)) ));
4157      assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
4158                          ILO8x16(EX(i2),ROL(EX(i2),4)) ));
4159      assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
4160                          ILO8x16(EX(i3),ROL(EX(i3),4)) ));
4161      // From here on is like the 16 bit case.
4162      math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
4163      return;
4164   }
4165   /*NOTREACHED*/
4166   vassert(0);
4167}
4168
4169
4170/* Wrappers that use the full-width (de)interleavers to do half-width
4171   (de)interleaving.  The scheme is to clone each input lane in the
4172   lower half of each incoming value, do a full width (de)interleave
4173   at the next lane size up, and remove every other lane of the the
4174   result.  The returned values may have any old junk in the upper
4175   64 bits -- the caller must ignore that. */
4176
4177/* Helper function -- get doubling and narrowing operations. */
4178static
4179void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
4180                                   /*OUT*/IROp* halver,
4181                                   UInt laneSzBlg2 )
4182{
4183   switch (laneSzBlg2) {
4184      case 2:
4185         *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
4186         break;
4187      case 1:
4188         *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
4189         break;
4190      case 0:
4191         *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
4192         break;
4193      default:
4194         vassert(0);
4195   }
4196}
4197
4198/* Do interleaving for 1 64 bit vector, for ST1 insns. */
4199static
4200void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
4201                          UInt laneSzBlg2, IRTemp u0 )
4202{
4203   assign(*i0, mkexpr(u0));
4204}
4205
4206
4207/* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4208static
4209void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
4210                          UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
4211{
4212   if (laneSzBlg2 == 3) {
4213      // 1x64, degenerate case
4214      assign(*i0, EX(u0));
4215      assign(*i1, EX(u1));
4216      return;
4217   }
4218
4219   vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4220   IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4221   math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4222
4223   IRTemp du0 = newTempV128();
4224   IRTemp du1 = newTempV128();
4225   assign(du0, binop(doubler, EX(u0), EX(u0)));
4226   assign(du1, binop(doubler, EX(u1), EX(u1)));
4227   IRTemp di0 = newTempV128();
4228   IRTemp di1 = newTempV128();
4229   math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
4230   assign(*i0, binop(halver, EX(di0), EX(di0)));
4231   assign(*i1, binop(halver, EX(di1), EX(di1)));
4232}
4233
4234
4235/* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4236static
4237void math_INTERLEAVE3_64(
4238        /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
4239        UInt laneSzBlg2,
4240        IRTemp u0, IRTemp u1, IRTemp u2 )
4241{
4242   if (laneSzBlg2 == 3) {
4243      // 1x64, degenerate case
4244      assign(*i0, EX(u0));
4245      assign(*i1, EX(u1));
4246      assign(*i2, EX(u2));
4247      return;
4248   }
4249
4250   vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4251   IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4252   math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4253
4254   IRTemp du0 = newTempV128();
4255   IRTemp du1 = newTempV128();
4256   IRTemp du2 = newTempV128();
4257   assign(du0, binop(doubler, EX(u0), EX(u0)));
4258   assign(du1, binop(doubler, EX(u1), EX(u1)));
4259   assign(du2, binop(doubler, EX(u2), EX(u2)));
4260   IRTemp di0 = newTempV128();
4261   IRTemp di1 = newTempV128();
4262   IRTemp di2 = newTempV128();
4263   math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
4264   assign(*i0, binop(halver, EX(di0), EX(di0)));
4265   assign(*i1, binop(halver, EX(di1), EX(di1)));
4266   assign(*i2, binop(halver, EX(di2), EX(di2)));
4267}
4268
4269
4270/* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4271static
4272void math_INTERLEAVE4_64(
4273        /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4274        UInt laneSzBlg2,
4275        IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4276{
4277   if (laneSzBlg2 == 3) {
4278      // 1x64, degenerate case
4279      assign(*i0, EX(u0));
4280      assign(*i1, EX(u1));
4281      assign(*i2, EX(u2));
4282      assign(*i3, EX(u3));
4283      return;
4284   }
4285
4286   vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4287   IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4288   math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4289
4290   IRTemp du0 = newTempV128();
4291   IRTemp du1 = newTempV128();
4292   IRTemp du2 = newTempV128();
4293   IRTemp du3 = newTempV128();
4294   assign(du0, binop(doubler, EX(u0), EX(u0)));
4295   assign(du1, binop(doubler, EX(u1), EX(u1)));
4296   assign(du2, binop(doubler, EX(u2), EX(u2)));
4297   assign(du3, binop(doubler, EX(u3), EX(u3)));
4298   IRTemp di0 = newTempV128();
4299   IRTemp di1 = newTempV128();
4300   IRTemp di2 = newTempV128();
4301   IRTemp di3 = newTempV128();
4302   math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
4303                        laneSzBlg2 + 1, du0, du1, du2, du3);
4304   assign(*i0, binop(halver, EX(di0), EX(di0)));
4305   assign(*i1, binop(halver, EX(di1), EX(di1)));
4306   assign(*i2, binop(halver, EX(di2), EX(di2)));
4307   assign(*i3, binop(halver, EX(di3), EX(di3)));
4308}
4309
4310
4311/* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4312static
4313void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
4314                            UInt laneSzBlg2, IRTemp i0 )
4315{
4316   assign(*u0, mkexpr(i0));
4317}
4318
4319
4320/* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4321static
4322void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4323                            UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4324{
4325   if (laneSzBlg2 == 3) {
4326      // 1x64, degenerate case
4327      assign(*u0, EX(i0));
4328      assign(*u1, EX(i1));
4329      return;
4330   }
4331
4332   vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4333   IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4334   math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4335
4336   IRTemp di0 = newTempV128();
4337   IRTemp di1 = newTempV128();
4338   assign(di0, binop(doubler, EX(i0), EX(i0)));
4339   assign(di1, binop(doubler, EX(i1), EX(i1)));
4340
4341   IRTemp du0 = newTempV128();
4342   IRTemp du1 = newTempV128();
4343   math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
4344   assign(*u0, binop(halver, EX(du0), EX(du0)));
4345   assign(*u1, binop(halver, EX(du1), EX(du1)));
4346}
4347
4348
4349/* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4350static
4351void math_DEINTERLEAVE3_64(
4352        /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4353        UInt laneSzBlg2,
4354        IRTemp i0, IRTemp i1, IRTemp i2 )
4355{
4356   if (laneSzBlg2 == 3) {
4357      // 1x64, degenerate case
4358      assign(*u0, EX(i0));
4359      assign(*u1, EX(i1));
4360      assign(*u2, EX(i2));
4361      return;
4362   }
4363
4364   vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4365   IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4366   math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4367
4368   IRTemp di0 = newTempV128();
4369   IRTemp di1 = newTempV128();
4370   IRTemp di2 = newTempV128();
4371   assign(di0, binop(doubler, EX(i0), EX(i0)));
4372   assign(di1, binop(doubler, EX(i1), EX(i1)));
4373   assign(di2, binop(doubler, EX(i2), EX(i2)));
4374   IRTemp du0 = newTempV128();
4375   IRTemp du1 = newTempV128();
4376   IRTemp du2 = newTempV128();
4377   math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
4378   assign(*u0, binop(halver, EX(du0), EX(du0)));
4379   assign(*u1, binop(halver, EX(du1), EX(du1)));
4380   assign(*u2, binop(halver, EX(du2), EX(du2)));
4381}
4382
4383
4384/* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4385static
4386void math_DEINTERLEAVE4_64(
4387        /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4388        UInt laneSzBlg2,
4389        IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4390{
4391   if (laneSzBlg2 == 3) {
4392      // 1x64, degenerate case
4393      assign(*u0, EX(i0));
4394      assign(*u1, EX(i1));
4395      assign(*u2, EX(i2));
4396      assign(*u3, EX(i3));
4397      return;
4398   }
4399
4400   vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4401   IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4402   math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4403
4404   IRTemp di0 = newTempV128();
4405   IRTemp di1 = newTempV128();
4406   IRTemp di2 = newTempV128();
4407   IRTemp di3 = newTempV128();
4408   assign(di0, binop(doubler, EX(i0), EX(i0)));
4409   assign(di1, binop(doubler, EX(i1), EX(i1)));
4410   assign(di2, binop(doubler, EX(i2), EX(i2)));
4411   assign(di3, binop(doubler, EX(i3), EX(i3)));
4412   IRTemp du0 = newTempV128();
4413   IRTemp du1 = newTempV128();
4414   IRTemp du2 = newTempV128();
4415   IRTemp du3 = newTempV128();
4416   math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
4417                          laneSzBlg2 + 1, di0, di1, di2, di3);
4418   assign(*u0, binop(halver, EX(du0), EX(du0)));
4419   assign(*u1, binop(halver, EX(du1), EX(du1)));
4420   assign(*u2, binop(halver, EX(du2), EX(du2)));
4421   assign(*u3, binop(halver, EX(du3), EX(du3)));
4422}
4423
4424
4425#undef EX
4426#undef SL
4427#undef ROR
4428#undef ROL
4429#undef SHR
4430#undef SHL
4431#undef ILO64x2
4432#undef IHI64x2
4433#undef ILO32x4
4434#undef IHI32x4
4435#undef ILO16x8
4436#undef IHI16x8
4437#undef ILO16x8
4438#undef IHI16x8
4439#undef CEV32x4
4440#undef COD32x4
4441#undef COD16x8
4442#undef COD8x16
4443#undef CEV8x16
4444#undef AND
4445#undef OR2
4446#undef OR3
4447#undef OR4
4448
4449
4450/*------------------------------------------------------------*/
4451/*--- Load and Store instructions                          ---*/
4452/*------------------------------------------------------------*/
4453
4454/* Generate the EA for a "reg + reg" style amode.  This is done from
4455   parts of the insn, but for sanity checking sake it takes the whole
4456   insn.  This appears to depend on insn[15:12], with opt=insn[15:13]
4457   and S=insn[12]:
4458
4459   The possible forms, along with their opt:S values, are:
4460      011:0   Xn|SP + Xm
4461      111:0   Xn|SP + Xm
4462      011:1   Xn|SP + Xm * transfer_szB
4463      111:1   Xn|SP + Xm * transfer_szB
4464      010:0   Xn|SP + 32Uto64(Wm)
4465      010:1   Xn|SP + 32Uto64(Wm) * transfer_szB
4466      110:0   Xn|SP + 32Sto64(Wm)
4467      110:1   Xn|SP + 32Sto64(Wm) * transfer_szB
4468
4469   Rm is insn[20:16].  Rn is insn[9:5].  Rt is insn[4:0].  Log2 of
4470   the transfer size is insn[23,31,30].  For integer loads/stores,
4471   insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4472
4473   If the decoding fails, it returns IRTemp_INVALID.
4474
4475   isInt is True iff this is decoding is for transfers to/from integer
4476   registers.  If False it is for transfers to/from vector registers.
4477*/
4478static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
4479{
4480   UInt    optS  = SLICE_UInt(insn, 15, 12);
4481   UInt    mm    = SLICE_UInt(insn, 20, 16);
4482   UInt    nn    = SLICE_UInt(insn, 9, 5);
4483   UInt    szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
4484                   | SLICE_UInt(insn, 31, 30); // Log2 of the size
4485
4486   buf[0] = 0;
4487
4488   /* Sanity checks, that this really is a load/store insn. */
4489   if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
4490      goto fail;
4491
4492   if (isInt
4493       && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4494       && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4495       && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4496       && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4497      goto fail;
4498
4499   if (!isInt
4500       && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4501      goto fail;
4502
4503   /* Throw out non-verified but possibly valid cases. */
4504   switch (szLg2) {
4505      case BITS3(0,0,0): break; //  8 bit, valid for both int and vec
4506      case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4507      case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4508      case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4509      case BITS3(1,0,0): // can only ever be valid for the vector case
4510                         if (isInt) goto fail; else break;
4511      case BITS3(1,0,1): // these sizes are never valid
4512      case BITS3(1,1,0):
4513      case BITS3(1,1,1): goto fail;
4514
4515      default: vassert(0);
4516   }
4517
4518   IRExpr* rhs  = NULL;
4519   switch (optS) {
4520      case BITS4(1,1,1,0): goto fail; //ATC
4521      case BITS4(0,1,1,0):
4522         rhs = getIReg64orZR(mm);
4523         vex_sprintf(buf, "[%s, %s]",
4524                     nameIReg64orZR(nn), nameIReg64orZR(mm));
4525         break;
4526      case BITS4(1,1,1,1): goto fail; //ATC
4527      case BITS4(0,1,1,1):
4528         rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
4529         vex_sprintf(buf, "[%s, %s lsl %u]",
4530                     nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
4531         break;
4532      case BITS4(0,1,0,0):
4533         rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
4534         vex_sprintf(buf, "[%s, %s uxtx]",
4535                     nameIReg64orZR(nn), nameIReg32orZR(mm));
4536         break;
4537      case BITS4(0,1,0,1):
4538         rhs = binop(Iop_Shl64,
4539                     unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
4540         vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
4541                     nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4542         break;
4543      case BITS4(1,1,0,0):
4544         rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
4545         vex_sprintf(buf, "[%s, %s sxtx]",
4546                     nameIReg64orZR(nn), nameIReg32orZR(mm));
4547         break;
4548      case BITS4(1,1,0,1):
4549         rhs = binop(Iop_Shl64,
4550                     unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
4551         vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
4552                     nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4553         break;
4554      default:
4555         /* The rest appear to be genuinely invalid */
4556         goto fail;
4557   }
4558
4559   vassert(rhs);
4560   IRTemp res = newTemp(Ity_I64);
4561   assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
4562   return res;
4563
4564  fail:
4565   vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
4566   return IRTemp_INVALID;
4567}
4568
4569
4570/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4571   bits of DATAE :: Ity_I64. */
4572static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
4573{
4574   IRExpr* addrE = mkexpr(addr);
4575   switch (szB) {
4576      case 8:
4577         storeLE(addrE, dataE);
4578         break;
4579      case 4:
4580         storeLE(addrE, unop(Iop_64to32, dataE));
4581         break;
4582      case 2:
4583         storeLE(addrE, unop(Iop_64to16, dataE));
4584         break;
4585      case 1:
4586         storeLE(addrE, unop(Iop_64to8, dataE));
4587         break;
4588      default:
4589         vassert(0);
4590   }
4591}
4592
4593
4594/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4595   placing the result in an Ity_I64 temporary. */
4596static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4597{
4598   IRTemp  res   = newTemp(Ity_I64);
4599   IRExpr* addrE = mkexpr(addr);
4600   switch (szB) {
4601      case 8:
4602         assign(res, loadLE(Ity_I64,addrE));
4603         break;
4604      case 4:
4605         assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
4606         break;
4607      case 2:
4608         assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
4609         break;
4610      case 1:
4611         assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
4612         break;
4613      default:
4614         vassert(0);
4615   }
4616   return res;
4617}
4618
4619
4620/* Generate a "standard 7" name, from bitQ and size.  But also
4621   allow ".1d" since that's occasionally useful. */
4622static
4623const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
4624{
4625   vassert(bitQ <= 1 && size <= 3);
4626   const HChar* nms[8]
4627      = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
4628   UInt ix = (bitQ << 2) | size;
4629   vassert(ix < 8);
4630   return nms[ix];
4631}
4632
4633
4634static
4635Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
4636{
4637#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
4638
4639   /* ------------ LDR,STR (immediate, uimm12) ----------- */
4640   /* uimm12 is scaled by the transfer size
4641
4642      31 29  26    21    9  4
4643      |  |   |     |     |  |
4644      11 111 00100 imm12 nn tt    STR  Xt, [Xn|SP, #imm12 * 8]
4645      11 111 00101 imm12 nn tt    LDR  Xt, [Xn|SP, #imm12 * 8]
4646
4647      10 111 00100 imm12 nn tt    STR  Wt, [Xn|SP, #imm12 * 4]
4648      10 111 00101 imm12 nn tt    LDR  Wt, [Xn|SP, #imm12 * 4]
4649
4650      01 111 00100 imm12 nn tt    STRH Wt, [Xn|SP, #imm12 * 2]
4651      01 111 00101 imm12 nn tt    LDRH Wt, [Xn|SP, #imm12 * 2]
4652
4653      00 111 00100 imm12 nn tt    STRB Wt, [Xn|SP, #imm12 * 1]
4654      00 111 00101 imm12 nn tt    LDRB Wt, [Xn|SP, #imm12 * 1]
4655   */
4656   if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4657      UInt   szLg2 = INSN(31,30);
4658      UInt   szB   = 1 << szLg2;
4659      Bool   isLD  = INSN(22,22) == 1;
4660      UInt   offs  = INSN(21,10) * szB;
4661      UInt   nn    = INSN(9,5);
4662      UInt   tt    = INSN(4,0);
4663      IRTemp ta    = newTemp(Ity_I64);
4664      assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
4665      if (nn == 31) { /* FIXME generate stack alignment check */ }
4666      vassert(szLg2 < 4);
4667      if (isLD) {
4668         putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
4669      } else {
4670         gen_narrowing_store(szB, ta, getIReg64orZR(tt));
4671      }
4672      const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4673      const HChar* st_name[4] = { "strb", "strh", "str", "str" };
4674      DIP("%s %s, [%s, #%u]\n",
4675          (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
4676          nameIReg64orSP(nn), offs);
4677      return True;
4678   }
4679
4680   /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4681   /*
4682      31 29  26      20   11 9  4
4683      |  |   |       |    |  |  |
4684      (at-Rn-then-Rn=EA)  |  |  |
4685      sz 111 00000 0 imm9 01 Rn Rt   STR Rt, [Xn|SP], #simm9
4686      sz 111 00001 0 imm9 01 Rn Rt   LDR Rt, [Xn|SP], #simm9
4687
4688      (at-EA-then-Rn=EA)
4689      sz 111 00000 0 imm9 11 Rn Rt   STR Rt, [Xn|SP, #simm9]!
4690      sz 111 00001 0 imm9 11 Rn Rt   LDR Rt, [Xn|SP, #simm9]!
4691
4692      (at-EA)
4693      sz 111 00000 0 imm9 00 Rn Rt   STR Rt, [Xn|SP, #simm9]
4694      sz 111 00001 0 imm9 00 Rn Rt   LDR Rt, [Xn|SP, #simm9]
4695
4696      simm9 is unscaled.
4697
4698      The case 'wback && Rn == Rt && Rt != 31' is disallowed.  In the
4699      load case this is because would create two competing values for
4700      Rt.  In the store case the reason is unclear, but the spec
4701      disallows it anyway.
4702
4703      Stores are narrowing, loads are unsigned widening.  sz encodes
4704      the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4705   */
4706   if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4707       == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4708      UInt szLg2  = INSN(31,30);
4709      UInt szB    = 1 << szLg2;
4710      Bool isLoad = INSN(22,22) == 1;
4711      UInt imm9   = INSN(20,12);
4712      UInt nn     = INSN(9,5);
4713      UInt tt     = INSN(4,0);
4714      Bool wBack  = INSN(10,10) == 1;
4715      UInt how    = INSN(11,10);
4716      if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
4717         /* undecodable; fall through */
4718      } else {
4719         if (nn == 31) { /* FIXME generate stack alignment check */ }
4720
4721         // Compute the transfer address TA and the writeback address WA.
4722         IRTemp tRN = newTemp(Ity_I64);
4723         assign(tRN, getIReg64orSP(nn));
4724         IRTemp tEA = newTemp(Ity_I64);
4725         Long simm9 = (Long)sx_to_64(imm9, 9);
4726         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4727
4728         IRTemp tTA = newTemp(Ity_I64);
4729         IRTemp tWA = newTemp(Ity_I64);
4730         switch (how) {
4731            case BITS2(0,1):
4732               assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4733            case BITS2(1,1):
4734               assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4735            case BITS2(0,0):
4736               assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4737            default:
4738               vassert(0); /* NOTREACHED */
4739         }
4740
4741         /* Normally rN would be updated after the transfer.  However, in
4742            the special case typifed by
4743               str x30, [sp,#-16]!
4744            it is necessary to update SP before the transfer, (1)
4745            because Memcheck will otherwise complain about a write
4746            below the stack pointer, and (2) because the segfault
4747            stack extension mechanism will otherwise extend the stack
4748            only down to SP before the instruction, which might not be
4749            far enough, if the -16 bit takes the actual access
4750            address to the next page.
4751         */
4752         Bool earlyWBack
4753           = wBack && simm9 < 0 && szB == 8
4754             && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
4755
4756         if (wBack && earlyWBack)
4757            putIReg64orSP(nn, mkexpr(tEA));
4758
4759         if (isLoad) {
4760            putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
4761         } else {
4762            gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
4763         }
4764
4765         if (wBack && !earlyWBack)
4766            putIReg64orSP(nn, mkexpr(tEA));
4767
4768         const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4769         const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
4770         const HChar* fmt_str = NULL;
4771         switch (how) {
4772            case BITS2(0,1):
4773               fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4774               break;
4775            case BITS2(1,1):
4776               fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4777               break;
4778            case BITS2(0,0):
4779               fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
4780               break;
4781            default:
4782               vassert(0);
4783         }
4784         DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
4785                      nameIRegOrZR(szB == 8, tt),
4786                      nameIReg64orSP(nn), simm9);
4787         return True;
4788      }
4789   }
4790
4791   /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
4792   /* L==1 => mm==LD
4793      L==0 => mm==ST
4794      x==0 => 32 bit transfers, and zero extended loads
4795      x==1 => 64 bit transfers
4796      simm7 is scaled by the (single-register) transfer size
4797
4798      (at-Rn-then-Rn=EA)
4799      x0 101 0001 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP], #imm
4800
4801      (at-EA-then-Rn=EA)
4802      x0 101 0011 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]!
4803
4804      (at-EA)
4805      x0 101 0010 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]
4806   */
4807
4808   UInt insn_30_23 = INSN(30,23);
4809   if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
4810       || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
4811       || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
4812      UInt bL     = INSN(22,22);
4813      UInt bX     = INSN(31,31);
4814      UInt bWBack = INSN(23,23);
4815      UInt rT1    = INSN(4,0);
4816      UInt rN     = INSN(9,5);
4817      UInt rT2    = INSN(14,10);
4818      Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
4819      if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
4820          || (bL && rT1 == rT2)) {
4821         /* undecodable; fall through */
4822      } else {
4823         if (rN == 31) { /* FIXME generate stack alignment check */ }
4824
4825         // Compute the transfer address TA and the writeback address WA.
4826         IRTemp tRN = newTemp(Ity_I64);
4827         assign(tRN, getIReg64orSP(rN));
4828         IRTemp tEA = newTemp(Ity_I64);
4829         simm7 = (bX ? 8 : 4) * simm7;
4830         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4831
4832         IRTemp tTA = newTemp(Ity_I64);
4833         IRTemp tWA = newTemp(Ity_I64);
4834         switch (INSN(24,23)) {
4835            case BITS2(0,1):
4836               assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4837            case BITS2(1,1):
4838               assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4839            case BITS2(1,0):
4840               assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4841            default:
4842               vassert(0); /* NOTREACHED */
4843         }
4844
4845         /* Normally rN would be updated after the transfer.  However, in
4846            the special case typifed by
4847               stp x29, x30, [sp,#-112]!
4848            it is necessary to update SP before the transfer, (1)
4849            because Memcheck will otherwise complain about a write
4850            below the stack pointer, and (2) because the segfault
4851            stack extension mechanism will otherwise extend the stack
4852            only down to SP before the instruction, which might not be
4853            far enough, if the -112 bit takes the actual access
4854            address to the next page.
4855         */
4856         Bool earlyWBack
4857           = bWBack && simm7 < 0
4858             && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
4859
4860         if (bWBack && earlyWBack)
4861            putIReg64orSP(rN, mkexpr(tEA));
4862
4863         /**/ if (bL == 1 && bX == 1) {
4864            // 64 bit load
4865            putIReg64orZR(rT1, loadLE(Ity_I64,
4866                                      binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4867            putIReg64orZR(rT2, loadLE(Ity_I64,
4868                                      binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
4869         } else if (bL == 1 && bX == 0) {
4870            // 32 bit load
4871            putIReg32orZR(rT1, loadLE(Ity_I32,
4872                                      binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4873            putIReg32orZR(rT2, loadLE(Ity_I32,
4874                                      binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
4875         } else if (bL == 0 && bX == 1) {
4876            // 64 bit store
4877            storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4878                    getIReg64orZR(rT1));
4879            storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
4880                    getIReg64orZR(rT2));
4881         } else {
4882            vassert(bL == 0 && bX == 0);
4883            // 32 bit store
4884            storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4885                    getIReg32orZR(rT1));
4886            storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
4887                    getIReg32orZR(rT2));
4888         }
4889
4890         if (bWBack && !earlyWBack)
4891            putIReg64orSP(rN, mkexpr(tEA));
4892
4893         const HChar* fmt_str = NULL;
4894         switch (INSN(24,23)) {
4895            case BITS2(0,1):
4896               fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4897               break;
4898            case BITS2(1,1):
4899               fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4900               break;
4901            case BITS2(1,0):
4902               fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4903               break;
4904            default:
4905               vassert(0);
4906         }
4907         DIP(fmt_str, bL == 0 ? "st" : "ld",
4908                      nameIRegOrZR(bX == 1, rT1),
4909                      nameIRegOrZR(bX == 1, rT2),
4910                      nameIReg64orSP(rN), simm7);
4911         return True;
4912      }
4913   }
4914
4915   /* ---------------- LDR (literal, int reg) ---------------- */
4916   /* 31 29      23    4
4917      00 011 000 imm19 Rt   LDR   Wt, [PC + sxTo64(imm19 << 2)]
4918      01 011 000 imm19 Rt   LDR   Xt, [PC + sxTo64(imm19 << 2)]
4919      10 011 000 imm19 Rt   LDRSW Xt, [PC + sxTo64(imm19 << 2)]
4920      11 011 000 imm19 Rt   prefetch  [PC + sxTo64(imm19 << 2)]
4921      Just handles the first two cases for now.
4922   */
4923   if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
4924      UInt  imm19 = INSN(23,5);
4925      UInt  rT    = INSN(4,0);
4926      UInt  bX    = INSN(30,30);
4927      ULong ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4928      if (bX) {
4929         putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
4930      } else {
4931         putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
4932      }
4933      DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
4934      return True;
4935   }
4936
4937   /* -------------- {LD,ST}R (integer register) --------------- */
4938   /* 31 29        20 15     12 11 9  4
4939      |  |         |  |      |  |  |  |
4940      11 111000011 Rm option S  10 Rn Rt  LDR  Xt, [Xn|SP, R<m>{ext/sh}]
4941      10 111000011 Rm option S  10 Rn Rt  LDR  Wt, [Xn|SP, R<m>{ext/sh}]
4942      01 111000011 Rm option S  10 Rn Rt  LDRH Wt, [Xn|SP, R<m>{ext/sh}]
4943      00 111000011 Rm option S  10 Rn Rt  LDRB Wt, [Xn|SP, R<m>{ext/sh}]
4944
4945      11 111000001 Rm option S  10 Rn Rt  STR  Xt, [Xn|SP, R<m>{ext/sh}]
4946      10 111000001 Rm option S  10 Rn Rt  STR  Wt, [Xn|SP, R<m>{ext/sh}]
4947      01 111000001 Rm option S  10 Rn Rt  STRH Wt, [Xn|SP, R<m>{ext/sh}]
4948      00 111000001 Rm option S  10 Rn Rt  STRB Wt, [Xn|SP, R<m>{ext/sh}]
4949   */
4950   if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
4951       && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
4952      HChar  dis_buf[64];
4953      UInt   szLg2 = INSN(31,30);
4954      Bool   isLD  = INSN(22,22) == 1;
4955      UInt   tt    = INSN(4,0);
4956      IRTemp ea    = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
4957      if (ea != IRTemp_INVALID) {
4958         switch (szLg2) {
4959            case 3: /* 64 bit */
4960               if (isLD) {
4961                  putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
4962                  DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
4963               } else {
4964                  storeLE(mkexpr(ea), getIReg64orZR(tt));
4965                  DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
4966               }
4967               break;
4968            case 2: /* 32 bit */
4969               if (isLD) {
4970                  putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
4971                  DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
4972               } else {
4973                  storeLE(mkexpr(ea), getIReg32orZR(tt));
4974                  DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
4975               }
4976               break;
4977            case 1: /* 16 bit */
4978               if (isLD) {
4979                  putIReg64orZR(tt, unop(Iop_16Uto64,
4980                                         loadLE(Ity_I16, mkexpr(ea))));
4981                  DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4982               } else {
4983                  storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
4984                  DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4985               }
4986               break;
4987            case 0: /* 8 bit */
4988               if (isLD) {
4989                  putIReg64orZR(tt, unop(Iop_8Uto64,
4990                                         loadLE(Ity_I8, mkexpr(ea))));
4991                  DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
4992               } else {
4993                  storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
4994                  DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
4995               }
4996               break;
4997            default:
4998               vassert(0);
4999         }
5000         return True;
5001      }
5002   }
5003
5004   /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5005   /* 31 29  26  23 21    9 4
5006      10 111 001 10 imm12 n t   LDRSW Xt, [Xn|SP, #pimm12 * 4]
5007      01 111 001 1x imm12 n t   LDRSH Rt, [Xn|SP, #pimm12 * 2]
5008      00 111 001 1x imm12 n t   LDRSB Rt, [Xn|SP, #pimm12 * 1]
5009      where
5010         Rt is Wt when x==1, Xt when x==0
5011   */
5012   if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5013      /* Further checks on bits 31:30 and 22 */
5014      Bool valid = False;
5015      switch ((INSN(31,30) << 1) | INSN(22,22)) {
5016         case BITS3(1,0,0):
5017         case BITS3(0,1,0): case BITS3(0,1,1):
5018         case BITS3(0,0,0): case BITS3(0,0,1):
5019            valid = True;
5020            break;
5021      }
5022      if (valid) {
5023         UInt    szLg2 = INSN(31,30);
5024         UInt    bitX  = INSN(22,22);
5025         UInt    imm12 = INSN(21,10);
5026         UInt    nn    = INSN(9,5);
5027         UInt    tt    = INSN(4,0);
5028         UInt    szB   = 1 << szLg2;
5029         IRExpr* ea    = binop(Iop_Add64,
5030                               getIReg64orSP(nn), mkU64(imm12 * szB));
5031         switch (szB) {
5032            case 4:
5033               vassert(bitX == 0);
5034               putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
5035               DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
5036                   nameIReg64orSP(nn), imm12 * szB);
5037               break;
5038            case 2:
5039               if (bitX == 1) {
5040                  putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
5041               } else {
5042                  putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
5043               }
5044               DIP("ldrsh %s, [%s, #%u]\n",
5045                   nameIRegOrZR(bitX == 0, tt),
5046                   nameIReg64orSP(nn), imm12 * szB);
5047               break;
5048            case 1:
5049               if (bitX == 1) {
5050                  putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
5051               } else {
5052                  putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
5053               }
5054               DIP("ldrsb %s, [%s, #%u]\n",
5055                   nameIRegOrZR(bitX == 0, tt),
5056                   nameIReg64orSP(nn), imm12 * szB);
5057               break;
5058            default:
5059               vassert(0);
5060         }
5061         return True;
5062      }
5063      /* else fall through */
5064   }
5065
5066   /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5067   /* (at-Rn-then-Rn=EA)
5068      31 29      23 21 20   11 9 4
5069      00 111 000 1x 0  imm9 01 n t  LDRSB Rt, [Xn|SP], #simm9
5070      01 111 000 1x 0  imm9 01 n t  LDRSH Rt, [Xn|SP], #simm9
5071      10 111 000 10 0  imm9 01 n t  LDRSW Xt, [Xn|SP], #simm9
5072
5073      (at-EA-then-Rn=EA)
5074      00 111 000 1x 0  imm9 11 n t  LDRSB Rt, [Xn|SP, #simm9]!
5075      01 111 000 1x 0  imm9 11 n t  LDRSH Rt, [Xn|SP, #simm9]!
5076      10 111 000 10 0  imm9 11 n t  LDRSW Xt, [Xn|SP, #simm9]!
5077      where
5078         Rt is Wt when x==1, Xt when x==0
5079         transfer-at-Rn when [11]==0, at EA when [11]==1
5080   */
5081   if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5082       && INSN(21,21) == 0 && INSN(10,10) == 1) {
5083      /* Further checks on bits 31:30 and 22 */
5084      Bool valid = False;
5085      switch ((INSN(31,30) << 1) | INSN(22,22)) {
5086         case BITS3(1,0,0):                    // LDRSW Xt
5087         case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5088         case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5089            valid = True;
5090            break;
5091      }
5092      if (valid) {
5093         UInt   szLg2 = INSN(31,30);
5094         UInt   imm9  = INSN(20,12);
5095         Bool   atRN  = INSN(11,11) == 0;
5096         UInt   nn    = INSN(9,5);
5097         UInt   tt    = INSN(4,0);
5098         IRTemp tRN   = newTemp(Ity_I64);
5099         IRTemp tEA   = newTemp(Ity_I64);
5100         IRTemp tTA   = IRTemp_INVALID;
5101         ULong  simm9 = sx_to_64(imm9, 9);
5102         Bool   is64  = INSN(22,22) == 0;
5103         assign(tRN, getIReg64orSP(nn));
5104         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5105         tTA = atRN ? tRN : tEA;
5106         HChar ch = '?';
5107         /* There are 5 cases:
5108               byte     load,           SX to 64
5109               byte     load, SX to 32, ZX to 64
5110               halfword load,           SX to 64
5111               halfword load, SX to 32, ZX to 64
5112               word     load,           SX to 64
5113            The ifs below handle them in the listed order.
5114         */
5115         if (szLg2 == 0) {
5116            ch = 'b';
5117            if (is64) {
5118               putIReg64orZR(tt, unop(Iop_8Sto64,
5119                                      loadLE(Ity_I8, mkexpr(tTA))));
5120            } else {
5121               putIReg32orZR(tt, unop(Iop_8Sto32,
5122                                      loadLE(Ity_I8, mkexpr(tTA))));
5123            }
5124         }
5125         else if (szLg2 == 1) {
5126            ch = 'h';
5127            if (is64) {
5128               putIReg64orZR(tt, unop(Iop_16Sto64,
5129                                      loadLE(Ity_I16, mkexpr(tTA))));
5130            } else {
5131               putIReg32orZR(tt, unop(Iop_16Sto32,
5132                                      loadLE(Ity_I16, mkexpr(tTA))));
5133            }
5134         }
5135         else if (szLg2 == 2 && is64) {
5136            ch = 'w';
5137            putIReg64orZR(tt, unop(Iop_32Sto64,
5138                                   loadLE(Ity_I32, mkexpr(tTA))));
5139         }
5140         else {
5141            vassert(0);
5142         }
5143         putIReg64orSP(nn, mkexpr(tEA));
5144         DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
5145             ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5146         return True;
5147      }
5148      /* else fall through */
5149   }
5150
5151   /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5152   /* 31 29      23 21 20   11 9 4
5153      00 111 000 1x 0  imm9 00 n t  LDURSB Rt, [Xn|SP, #simm9]
5154      01 111 000 1x 0  imm9 00 n t  LDURSH Rt, [Xn|SP, #simm9]
5155      10 111 000 10 0  imm9 00 n t  LDURSW Xt, [Xn|SP, #simm9]
5156      where
5157         Rt is Wt when x==1, Xt when x==0
5158   */
5159   if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5160       && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5161      /* Further checks on bits 31:30 and 22 */
5162      Bool valid = False;
5163      switch ((INSN(31,30) << 1) | INSN(22,22)) {
5164         case BITS3(1,0,0):                    // LDURSW Xt
5165         case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5166         case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5167            valid = True;
5168            break;
5169      }
5170      if (valid) {
5171         UInt   szLg2 = INSN(31,30);
5172         UInt   imm9  = INSN(20,12);
5173         UInt   nn    = INSN(9,5);
5174         UInt   tt    = INSN(4,0);
5175         IRTemp tRN   = newTemp(Ity_I64);
5176         IRTemp tEA   = newTemp(Ity_I64);
5177         ULong  simm9 = sx_to_64(imm9, 9);
5178         Bool   is64  = INSN(22,22) == 0;
5179         assign(tRN, getIReg64orSP(nn));
5180         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5181         HChar ch = '?';
5182         /* There are 5 cases:
5183               byte     load,           SX to 64
5184               byte     load, SX to 32, ZX to 64
5185               halfword load,           SX to 64
5186               halfword load, SX to 32, ZX to 64
5187               word     load,           SX to 64
5188            The ifs below handle them in the listed order.
5189         */
5190         if (szLg2 == 0) {
5191            ch = 'b';
5192            if (is64) {
5193               putIReg64orZR(tt, unop(Iop_8Sto64,
5194                                      loadLE(Ity_I8, mkexpr(tEA))));
5195            } else {
5196               putIReg32orZR(tt, unop(Iop_8Sto32,
5197                                      loadLE(Ity_I8, mkexpr(tEA))));
5198            }
5199         }
5200         else if (szLg2 == 1) {
5201            ch = 'h';
5202            if (is64) {
5203               putIReg64orZR(tt, unop(Iop_16Sto64,
5204                                      loadLE(Ity_I16, mkexpr(tEA))));
5205            } else {
5206               putIReg32orZR(tt, unop(Iop_16Sto32,
5207                                      loadLE(Ity_I16, mkexpr(tEA))));
5208            }
5209         }
5210         else if (szLg2 == 2 && is64) {
5211            ch = 'w';
5212            putIReg64orZR(tt, unop(Iop_32Sto64,
5213                                   loadLE(Ity_I32, mkexpr(tEA))));
5214         }
5215         else {
5216            vassert(0);
5217         }
5218         DIP("ldurs%c %s, [%s, #%lld]",
5219             ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5220         return True;
5221      }
5222      /* else fall through */
5223   }
5224
5225   /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5226   /* L==1    => mm==LD
5227      L==0    => mm==ST
5228      sz==00  => 32 bit (S) transfers
5229      sz==01  => 64 bit (D) transfers
5230      sz==10  => 128 bit (Q) transfers
5231      sz==11  isn't allowed
5232      simm7 is scaled by the (single-register) transfer size
5233
5234      31 29  26   22 21   14 9 4
5235
5236      sz 101 1000 L  imm7 t2 n t1   mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5237                                    (at-EA, with nontemporal hint)
5238
5239      sz 101 1001 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP], #imm
5240                                    (at-Rn-then-Rn=EA)
5241
5242      sz 101 1010 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]
5243                                    (at-EA)
5244
5245      sz 101 1011 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5246                                    (at-EA-then-Rn=EA)
5247   */
5248   if (INSN(29,25) == BITS5(1,0,1,1,0)) {
5249      UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
5250      Bool isLD   = INSN(22,22) == 1;
5251      Bool wBack  = INSN(23,23) == 1;
5252      Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
5253      UInt tt2    = INSN(14,10);
5254      UInt nn     = INSN(9,5);
5255      UInt tt1    = INSN(4,0);
5256      if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
5257         /* undecodable; fall through */
5258      } else {
5259         if (nn == 31) { /* FIXME generate stack alignment check */ }
5260
5261         // Compute the transfer address TA and the writeback address WA.
5262         UInt   szB = 4 << szSlg2; /* szB is the per-register size */
5263         IRTemp tRN = newTemp(Ity_I64);
5264         assign(tRN, getIReg64orSP(nn));
5265         IRTemp tEA = newTemp(Ity_I64);
5266         simm7 = szB * simm7;
5267         assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5268
5269         IRTemp tTA = newTemp(Ity_I64);
5270         IRTemp tWA = newTemp(Ity_I64);
5271         switch (INSN(24,23)) {
5272            case BITS2(0,1):
5273               assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5274            case BITS2(1,1):
5275               assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5276            case BITS2(1,0):
5277            case BITS2(0,0):
5278               assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5279            default:
5280               vassert(0); /* NOTREACHED */
5281         }
5282
5283         IRType ty = Ity_INVALID;
5284         switch (szB) {
5285            case 4:  ty = Ity_F32;  break;
5286            case 8:  ty = Ity_F64;  break;
5287            case 16: ty = Ity_V128; break;
5288            default: vassert(0);
5289         }
5290
5291         /* Normally rN would be updated after the transfer.  However, in
5292            the special cases typifed by
5293               stp q0, q1, [sp,#-512]!
5294               stp d0, d1, [sp,#-512]!
5295               stp s0, s1, [sp,#-512]!
5296            it is necessary to update SP before the transfer, (1)
5297            because Memcheck will otherwise complain about a write
5298            below the stack pointer, and (2) because the segfault
5299            stack extension mechanism will otherwise extend the stack
5300            only down to SP before the instruction, which might not be
5301            far enough, if the -512 bit takes the actual access
5302            address to the next page.
5303         */
5304         Bool earlyWBack
5305           = wBack && simm7 < 0
5306             && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
5307
5308         if (wBack && earlyWBack)
5309            putIReg64orSP(nn, mkexpr(tEA));
5310
5311         if (isLD) {
5312            if (szB < 16) {
5313               putQReg128(tt1, mkV128(0x0000));
5314            }
5315            putQRegLO(tt1,
5316                      loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
5317            if (szB < 16) {
5318               putQReg128(tt2, mkV128(0x0000));
5319            }
5320            putQRegLO(tt2,
5321                      loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
5322         } else {
5323            storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
5324                    getQRegLO(tt1, ty));
5325            storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
5326                    getQRegLO(tt2, ty));
5327         }
5328
5329         if (wBack && !earlyWBack)
5330            putIReg64orSP(nn, mkexpr(tEA));
5331
5332         const HChar* fmt_str = NULL;
5333         switch (INSN(24,23)) {
5334            case BITS2(0,1):
5335               fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5336               break;
5337            case BITS2(1,1):
5338               fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5339               break;
5340            case BITS2(1,0):
5341               fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5342               break;
5343            case BITS2(0,0):
5344               fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5345               break;
5346            default:
5347               vassert(0);
5348         }
5349         DIP(fmt_str, isLD ? "ld" : "st",
5350                      nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
5351                      nameIReg64orSP(nn), simm7);
5352         return True;
5353      }
5354   }
5355
5356   /* -------------- {LD,ST}R (vector register) --------------- */
5357   /* 31 29     23  20 15     12 11 9  4
5358      |  |      |   |  |      |  |  |  |
5359      00 111100 011 Rm option S  10 Rn Rt  LDR Bt, [Xn|SP, R<m>{ext/sh}]
5360      01 111100 011 Rm option S  10 Rn Rt  LDR Ht, [Xn|SP, R<m>{ext/sh}]
5361      10 111100 011 Rm option S  10 Rn Rt  LDR St, [Xn|SP, R<m>{ext/sh}]
5362      11 111100 011 Rm option S  10 Rn Rt  LDR Dt, [Xn|SP, R<m>{ext/sh}]
5363      00 111100 111 Rm option S  10 Rn Rt  LDR Qt, [Xn|SP, R<m>{ext/sh}]
5364
5365      00 111100 001 Rm option S  10 Rn Rt  STR Bt, [Xn|SP, R<m>{ext/sh}]
5366      01 111100 001 Rm option S  10 Rn Rt  STR Ht, [Xn|SP, R<m>{ext/sh}]
5367      10 111100 001 Rm option S  10 Rn Rt  STR St, [Xn|SP, R<m>{ext/sh}]
5368      11 111100 001 Rm option S  10 Rn Rt  STR Dt, [Xn|SP, R<m>{ext/sh}]
5369      00 111100 101 Rm option S  10 Rn Rt  STR Qt, [Xn|SP, R<m>{ext/sh}]
5370   */
5371   if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5372       && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5373      HChar  dis_buf[64];
5374      UInt   szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5375      Bool   isLD  = INSN(22,22) == 1;
5376      UInt   tt    = INSN(4,0);
5377      if (szLg2 > 4) goto after_LDR_STR_vector_register;
5378      IRTemp ea    = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
5379      if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
5380      switch (szLg2) {
5381         case 0: /* 8 bit */
5382            if (isLD) {
5383               putQReg128(tt, mkV128(0x0000));
5384               putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
5385               DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5386            } else {
5387               storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
5388               DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5389            }
5390            break;
5391         case 1:
5392            if (isLD) {
5393               putQReg128(tt, mkV128(0x0000));
5394               putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
5395               DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5396            } else {
5397               storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
5398               DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5399            }
5400            break;
5401         case 2: /* 32 bit */
5402            if (isLD) {
5403               putQReg128(tt, mkV128(0x0000));
5404               putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
5405               DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5406            } else {
5407               storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
5408               DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5409            }
5410            break;
5411         case 3: /* 64 bit */
5412            if (isLD) {
5413               putQReg128(tt, mkV128(0x0000));
5414               putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
5415               DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5416            } else {
5417               storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
5418               DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5419            }
5420            break;
5421         case 4:
5422            if (isLD) {
5423               putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
5424               DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
5425            } else {
5426               storeLE(mkexpr(ea), getQReg128(tt));
5427               DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
5428            }
5429            break;
5430         default:
5431            vassert(0);
5432      }
5433      return True;
5434   }
5435  after_LDR_STR_vector_register:
5436
5437   /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5438   /* 31 29      22 20 15  12 11 9  4
5439      |  |       |  |  |   |  |  |  |
5440      10 1110001 01 Rm opt S 10 Rn Rt    LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5441
5442      01 1110001 01 Rm opt S 10 Rn Rt    LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5443      01 1110001 11 Rm opt S 10 Rn Rt    LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5444
5445      00 1110001 01 Rm opt S 10 Rn Rt    LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5446      00 1110001 11 Rm opt S 10 Rn Rt    LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5447   */
5448   if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5449       && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5450      HChar  dis_buf[64];
5451      UInt   szLg2  = INSN(31,30);
5452      Bool   sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
5453      UInt   tt     = INSN(4,0);
5454      if (szLg2 == 3) goto after_LDRS_integer_register;
5455      IRTemp ea     = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5456      if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
5457      /* Enumerate the 5 variants explicitly. */
5458      if (szLg2 == 2/*32 bit*/ && sxTo64) {
5459         putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
5460         DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
5461         return True;
5462      }
5463      else
5464      if (szLg2 == 1/*16 bit*/) {
5465         if (sxTo64) {
5466            putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
5467            DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
5468         } else {
5469            putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
5470            DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5471         }
5472         return True;
5473      }
5474      else
5475      if (szLg2 == 0/*8 bit*/) {
5476         if (sxTo64) {
5477            putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
5478            DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
5479         } else {
5480            putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
5481            DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5482         }
5483         return True;
5484      }
5485      /* else it's an invalid combination */
5486   }
5487  after_LDRS_integer_register:
5488
5489   /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5490   /* This is the Unsigned offset variant only.  The Post-Index and
5491      Pre-Index variants are below.
5492
5493      31 29      23 21    9 4
5494      00 111 101 01 imm12 n t   LDR Bt, [Xn|SP + imm12 * 1]
5495      01 111 101 01 imm12 n t   LDR Ht, [Xn|SP + imm12 * 2]
5496      10 111 101 01 imm12 n t   LDR St, [Xn|SP + imm12 * 4]
5497      11 111 101 01 imm12 n t   LDR Dt, [Xn|SP + imm12 * 8]
5498      00 111 101 11 imm12 n t   LDR Qt, [Xn|SP + imm12 * 16]
5499
5500      00 111 101 00 imm12 n t   STR Bt, [Xn|SP + imm12 * 1]
5501      01 111 101 00 imm12 n t   STR Ht, [Xn|SP + imm12 * 2]
5502      10 111 101 00 imm12 n t   STR St, [Xn|SP + imm12 * 4]
5503      11 111 101 00 imm12 n t   STR Dt, [Xn|SP + imm12 * 8]
5504      00 111 101 10 imm12 n t   STR Qt, [Xn|SP + imm12 * 16]
5505   */
5506   if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5507       && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5508      UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
5509      Bool   isLD   = INSN(22,22) == 1;
5510      UInt   pimm12 = INSN(21,10) << szLg2;
5511      UInt   nn     = INSN(9,5);
5512      UInt   tt     = INSN(4,0);
5513      IRTemp tEA    = newTemp(Ity_I64);
5514      IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
5515      assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
5516      if (isLD) {
5517         if (szLg2 < 4) {
5518            putQReg128(tt, mkV128(0x0000));
5519         }
5520         putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5521      } else {
5522         storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5523      }
5524      DIP("%s %s, [%s, #%u]\n",
5525          isLD ? "ldr" : "str",
5526          nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
5527      return True;
5528   }
5529
5530   /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5531   /* These are the Post-Index and Pre-Index variants.
5532
5533      31 29      23   20   11 9 4
5534      (at-Rn-then-Rn=EA)
5535      00 111 100 01 0 imm9 01 n t   LDR Bt, [Xn|SP], #simm
5536      01 111 100 01 0 imm9 01 n t   LDR Ht, [Xn|SP], #simm
5537      10 111 100 01 0 imm9 01 n t   LDR St, [Xn|SP], #simm
5538      11 111 100 01 0 imm9 01 n t   LDR Dt, [Xn|SP], #simm
5539      00 111 100 11 0 imm9 01 n t   LDR Qt, [Xn|SP], #simm
5540
5541      (at-EA-then-Rn=EA)
5542      00 111 100 01 0 imm9 11 n t   LDR Bt, [Xn|SP, #simm]!
5543      01 111 100 01 0 imm9 11 n t   LDR Ht, [Xn|SP, #simm]!
5544      10 111 100 01 0 imm9 11 n t   LDR St, [Xn|SP, #simm]!
5545      11 111 100 01 0 imm9 11 n t   LDR Dt, [Xn|SP, #simm]!
5546      00 111 100 11 0 imm9 11 n t   LDR Qt, [Xn|SP, #simm]!
5547
5548      Stores are the same except with bit 22 set to 0.
5549   */
5550   if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5551       && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5552       && INSN(21,21) == 0 && INSN(10,10) == 1) {
5553      UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
5554      Bool   isLD   = INSN(22,22) == 1;
5555      UInt   imm9   = INSN(20,12);
5556      Bool   atRN   = INSN(11,11) == 0;
5557      UInt   nn     = INSN(9,5);
5558      UInt   tt     = INSN(4,0);
5559      IRTemp tRN    = newTemp(Ity_I64);
5560      IRTemp tEA    = newTemp(Ity_I64);
5561      IRTemp tTA    = IRTemp_INVALID;
5562      IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
5563      ULong  simm9  = sx_to_64(imm9, 9);
5564      assign(tRN, getIReg64orSP(nn));
5565      assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5566      tTA = atRN ? tRN : tEA;
5567      if (isLD) {
5568         if (szLg2 < 4) {
5569            putQReg128(tt, mkV128(0x0000));
5570         }
5571         putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
5572      } else {
5573         storeLE(mkexpr(tTA), getQRegLO(tt, ty));
5574      }
5575      putIReg64orSP(nn, mkexpr(tEA));
5576      DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5577          isLD ? "ldr" : "str",
5578          nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
5579      return True;
5580   }
5581
5582   /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5583   /* 31 29      23   20   11 9 4
5584      00 111 100 01 0 imm9 00 n t   LDR Bt, [Xn|SP, #simm]
5585      01 111 100 01 0 imm9 00 n t   LDR Ht, [Xn|SP, #simm]
5586      10 111 100 01 0 imm9 00 n t   LDR St, [Xn|SP, #simm]
5587      11 111 100 01 0 imm9 00 n t   LDR Dt, [Xn|SP, #simm]
5588      00 111 100 11 0 imm9 00 n t   LDR Qt, [Xn|SP, #simm]
5589
5590      00 111 100 00 0 imm9 00 n t   STR Bt, [Xn|SP, #simm]
5591      01 111 100 00 0 imm9 00 n t   STR Ht, [Xn|SP, #simm]
5592      10 111 100 00 0 imm9 00 n t   STR St, [Xn|SP, #simm]
5593      11 111 100 00 0 imm9 00 n t   STR Dt, [Xn|SP, #simm]
5594      00 111 100 10 0 imm9 00 n t   STR Qt, [Xn|SP, #simm]
5595   */
5596   if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5597       && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5598       && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5599      UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
5600      Bool   isLD   = INSN(22,22) == 1;
5601      UInt   imm9   = INSN(20,12);
5602      UInt   nn     = INSN(9,5);
5603      UInt   tt     = INSN(4,0);
5604      ULong  simm9  = sx_to_64(imm9, 9);
5605      IRTemp tEA    = newTemp(Ity_I64);
5606      IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
5607      assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
5608      if (isLD) {
5609         if (szLg2 < 4) {
5610            putQReg128(tt, mkV128(0x0000));
5611         }
5612         putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5613      } else {
5614         storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5615      }
5616      DIP("%s %s, [%s, #%lld]\n",
5617          isLD ? "ldur" : "stur",
5618          nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5619      return True;
5620   }
5621
5622   /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5623   /* 31 29      23    4
5624      00 011 100 imm19 t    LDR St, [PC + sxTo64(imm19 << 2)]
5625      01 011 100 imm19 t    LDR Dt, [PC + sxTo64(imm19 << 2)]
5626      10 011 100 imm19 t    LDR Qt, [PC + sxTo64(imm19 << 2)]
5627   */
5628   if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5629      UInt   szB   = 4 << INSN(31,30);
5630      UInt   imm19 = INSN(23,5);
5631      UInt   tt    = INSN(4,0);
5632      ULong  ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5633      IRType ty    = preferredVectorSubTypeFromSize(szB);
5634      putQReg128(tt, mkV128(0x0000));
5635      putQRegLO(tt, loadLE(ty, mkU64(ea)));
5636      DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
5637      return True;
5638   }
5639
5640   /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg  ------ */
5641   /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5642   /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5643   /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5644   /* 31 29  26   22 21 20    15   11 9 4
5645
5646      0q 001 1000 L  0  00000 0000 sz n t  xx4 {Vt..t+3.T}, [Xn|SP]
5647      0q 001 1001 L  0  m     0000 sz n t  xx4 {Vt..t+3.T}, [Xn|SP], step
5648
5649      0q 001 1000 L  0  00000 0100 sz n t  xx3 {Vt..t+2.T}, [Xn|SP]
5650      0q 001 1001 L  0  m     0100 sz n t  xx3 {Vt..t+2.T}, [Xn|SP], step
5651
5652      0q 001 1000 L  0  00000 1000 sz n t  xx2 {Vt..t+1.T}, [Xn|SP]
5653      0q 001 1001 L  0  m     1000 sz n t  xx2 {Vt..t+1.T}, [Xn|SP], step
5654
5655      0q 001 1000 L  0  00000 0111 sz n t  xx1 {Vt.T},      [Xn|SP]
5656      0q 001 1001 L  0  m     0111 sz n t  xx1 {Vt.T},      [Xn|SP], step
5657
5658      T    = defined by Q and sz in the normal way
5659      step = if m == 11111 then transfer-size else Xm
5660      xx   = case L of 1 -> LD ; 0 -> ST
5661   */
5662   if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5663       && INSN(21,21) == 0) {
5664      Bool bitQ  = INSN(30,30);
5665      Bool isPX  = INSN(23,23) == 1;
5666      Bool isLD  = INSN(22,22) == 1;
5667      UInt mm    = INSN(20,16);
5668      UInt opc   = INSN(15,12);
5669      UInt sz    = INSN(11,10);
5670      UInt nn    = INSN(9,5);
5671      UInt tt    = INSN(4,0);
5672      Bool isQ   = bitQ == 1;
5673      Bool is1d  = sz == BITS2(1,1) && !isQ;
5674      UInt nRegs = 0;
5675      switch (opc) {
5676         case BITS4(0,0,0,0): nRegs = 4; break;
5677         case BITS4(0,1,0,0): nRegs = 3; break;
5678         case BITS4(1,0,0,0): nRegs = 2; break;
5679         case BITS4(0,1,1,1): nRegs = 1; break;
5680         default: break;
5681      }
5682
5683      /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5684         If we see it, set nRegs to 0 so as to cause the next conditional
5685         to fail. */
5686      if (!isPX && mm != 0)
5687         nRegs = 0;
5688
5689      if (nRegs == 1                             /* .1d is allowed */
5690          || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
5691
5692         UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5693
5694         /* Generate the transfer address (TA) and if necessary the
5695            writeback address (WB) */
5696         IRTemp tTA = newTemp(Ity_I64);
5697         assign(tTA, getIReg64orSP(nn));
5698         if (nn == 31) { /* FIXME generate stack alignment check */ }
5699         IRTemp tWB = IRTemp_INVALID;
5700         if (isPX) {
5701            tWB = newTemp(Ity_I64);
5702            assign(tWB, binop(Iop_Add64,
5703                              mkexpr(tTA),
5704                              mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5705                                                     : getIReg64orZR(mm)));
5706         }
5707
5708         /* -- BEGIN generate the transfers -- */
5709
5710         IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
5711         u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
5712         switch (nRegs) {
5713            case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
5714            case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
5715            case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
5716            case 1: u0 = newTempV128(); i0 = newTempV128(); break;
5717            default: vassert(0);
5718         }
5719
5720         /* -- Multiple 128 or 64 bit stores -- */
5721         if (!isLD) {
5722            switch (nRegs) {
5723               case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5724               case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5725               case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
5726               case 1: assign(u0, getQReg128((tt+0) % 32)); break;
5727               default: vassert(0);
5728            }
5729            switch (nRegs) {
5730               case 4:  (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
5731                           (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
5732                        break;
5733               case 3:  (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
5734                           (&i0, &i1, &i2, sz, u0, u1, u2);
5735                        break;
5736               case 2:  (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
5737                           (&i0, &i1, sz, u0, u1);
5738                        break;
5739               case 1:  (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
5740                           (&i0, sz, u0);
5741                        break;
5742               default: vassert(0);
5743            }
5744#           define MAYBE_NARROW_TO_64(_expr) \
5745                      (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5746            UInt step = isQ ? 16 : 8;
5747            switch (nRegs) {
5748               case 4:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5749                                 MAYBE_NARROW_TO_64(mkexpr(i3)) );
5750                        /* fallthru */
5751               case 3:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5752                                 MAYBE_NARROW_TO_64(mkexpr(i2)) );
5753                        /* fallthru */
5754               case 2:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5755                                 MAYBE_NARROW_TO_64(mkexpr(i1)) );
5756                        /* fallthru */
5757               case 1:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5758                                 MAYBE_NARROW_TO_64(mkexpr(i0)) );
5759                        break;
5760               default: vassert(0);
5761            }
5762#           undef MAYBE_NARROW_TO_64
5763         }
5764
5765         /* -- Multiple 128 or 64 bit loads -- */
5766         else /* isLD */ {
5767            UInt   step   = isQ ? 16 : 8;
5768            IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5769#           define MAYBE_WIDEN_FROM_64(_expr) \
5770                      (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5771            switch (nRegs) {
5772               case 4:
5773                  assign(i3, MAYBE_WIDEN_FROM_64(
5774                                loadLE(loadTy,
5775                                       binop(Iop_Add64, mkexpr(tTA),
5776                                                        mkU64(3 * step)))));
5777                  /* fallthru */
5778               case 3:
5779                  assign(i2, MAYBE_WIDEN_FROM_64(
5780                                loadLE(loadTy,
5781                                       binop(Iop_Add64, mkexpr(tTA),
5782                                                        mkU64(2 * step)))));
5783                  /* fallthru */
5784               case 2:
5785                  assign(i1, MAYBE_WIDEN_FROM_64(
5786                                loadLE(loadTy,
5787                                       binop(Iop_Add64, mkexpr(tTA),
5788                                                        mkU64(1 * step)))));
5789                  /* fallthru */
5790               case 1:
5791                  assign(i0, MAYBE_WIDEN_FROM_64(
5792                                loadLE(loadTy,
5793                                       binop(Iop_Add64, mkexpr(tTA),
5794                                                        mkU64(0 * step)))));
5795                  break;
5796               default:
5797                  vassert(0);
5798            }
5799#           undef MAYBE_WIDEN_FROM_64
5800            switch (nRegs) {
5801               case 4:  (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
5802                           (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
5803                        break;
5804               case 3:  (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
5805                           (&u0, &u1, &u2, sz, i0, i1, i2);
5806                        break;
5807               case 2:  (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
5808                           (&u0, &u1, sz, i0, i1);
5809                        break;
5810               case 1:  (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
5811                           (&u0, sz, i0);
5812                        break;
5813               default: vassert(0);
5814            }
5815            switch (nRegs) {
5816               case 4:  putQReg128( (tt+3) % 32,
5817                                    math_MAYBE_ZERO_HI64(bitQ, u3));
5818                        /* fallthru */
5819               case 3:  putQReg128( (tt+2) % 32,
5820                                    math_MAYBE_ZERO_HI64(bitQ, u2));
5821                        /* fallthru */
5822               case 2:  putQReg128( (tt+1) % 32,
5823                                    math_MAYBE_ZERO_HI64(bitQ, u1));
5824                        /* fallthru */
5825               case 1:  putQReg128( (tt+0) % 32,
5826                                    math_MAYBE_ZERO_HI64(bitQ, u0));
5827                        break;
5828               default: vassert(0);
5829            }
5830         }
5831
5832         /* -- END generate the transfers -- */
5833
5834         /* Do the writeback, if necessary */
5835         if (isPX) {
5836            putIReg64orSP(nn, mkexpr(tWB));
5837         }
5838
5839         HChar pxStr[20];
5840         pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
5841         if (isPX) {
5842            if (mm == BITS5(1,1,1,1,1))
5843               vex_sprintf(pxStr, ", #%u", xferSzB);
5844            else
5845               vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
5846         }
5847         const HChar* arr = nameArr_Q_SZ(bitQ, sz);
5848         DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
5849             isLD ? "ld" : "st", nRegs,
5850             (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
5851             pxStr);
5852
5853         return True;
5854      }
5855      /* else fall through */
5856   }
5857
5858   /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs  ------ */
5859   /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs  ------ */
5860   /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs  ------ */
5861   /* 31 29  26   22 21 20    15   11 9 4
5862
5863      0q 001 1000 L  0  00000 0010 sz n t  xx1 {Vt..t+3.T}, [Xn|SP]
5864      0q 001 1001 L  0  m     0010 sz n t  xx1 {Vt..t+3.T}, [Xn|SP], step
5865
5866      0q 001 1000 L  0  00000 0110 sz n t  xx1 {Vt..t+2.T}, [Xn|SP]
5867      0q 001 1001 L  0  m     0110 sz n t  xx1 {Vt..t+2.T}, [Xn|SP], step
5868
5869      0q 001 1000 L  0  00000 1010 sz n t  xx1 {Vt..t+1.T}, [Xn|SP]
5870      0q 001 1001 L  0  m     1010 sz n t  xx1 {Vt..t+1.T}, [Xn|SP], step
5871
5872      T    = defined by Q and sz in the normal way
5873      step = if m == 11111 then transfer-size else Xm
5874      xx   = case L of 1 -> LD ; 0 -> ST
5875   */
5876   if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5877       && INSN(21,21) == 0) {
5878      Bool bitQ  = INSN(30,30);
5879      Bool isPX  = INSN(23,23) == 1;
5880      Bool isLD  = INSN(22,22) == 1;
5881      UInt mm    = INSN(20,16);
5882      UInt opc   = INSN(15,12);
5883      UInt sz    = INSN(11,10);
5884      UInt nn    = INSN(9,5);
5885      UInt tt    = INSN(4,0);
5886      Bool isQ   = bitQ == 1;
5887      UInt nRegs = 0;
5888      switch (opc) {
5889         case BITS4(0,0,1,0): nRegs = 4; break;
5890         case BITS4(0,1,1,0): nRegs = 3; break;
5891         case BITS4(1,0,1,0): nRegs = 2; break;
5892         default: break;
5893      }
5894
5895      /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5896         If we see it, set nRegs to 0 so as to cause the next conditional
5897         to fail. */
5898      if (!isPX && mm != 0)
5899         nRegs = 0;
5900
5901      if (nRegs >= 2 && nRegs <= 4) {
5902
5903         UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5904
5905         /* Generate the transfer address (TA) and if necessary the
5906            writeback address (WB) */
5907         IRTemp tTA = newTemp(Ity_I64);
5908         assign(tTA, getIReg64orSP(nn));
5909         if (nn == 31) { /* FIXME generate stack alignment check */ }
5910         IRTemp tWB = IRTemp_INVALID;
5911         if (isPX) {
5912            tWB = newTemp(Ity_I64);
5913            assign(tWB, binop(Iop_Add64,
5914                              mkexpr(tTA),
5915                              mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5916                                                     : getIReg64orZR(mm)));
5917         }
5918
5919         /* -- BEGIN generate the transfers -- */
5920
5921         IRTemp u0, u1, u2, u3;
5922         u0 = u1 = u2 = u3 = IRTemp_INVALID;
5923         switch (nRegs) {
5924            case 4: u3 = newTempV128(); /* fallthru */
5925            case 3: u2 = newTempV128(); /* fallthru */
5926            case 2: u1 = newTempV128();
5927                    u0 = newTempV128(); break;
5928            default: vassert(0);
5929         }
5930
5931         /* -- Multiple 128 or 64 bit stores -- */
5932         if (!isLD) {
5933            switch (nRegs) {
5934               case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5935               case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5936               case 2: assign(u1, getQReg128((tt+1) % 32));
5937                       assign(u0, getQReg128((tt+0) % 32)); break;
5938               default: vassert(0);
5939            }
5940#           define MAYBE_NARROW_TO_64(_expr) \
5941                      (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5942            UInt step = isQ ? 16 : 8;
5943            switch (nRegs) {
5944               case 4:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5945                                 MAYBE_NARROW_TO_64(mkexpr(u3)) );
5946                        /* fallthru */
5947               case 3:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5948                                 MAYBE_NARROW_TO_64(mkexpr(u2)) );
5949                        /* fallthru */
5950               case 2:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5951                                 MAYBE_NARROW_TO_64(mkexpr(u1)) );
5952                        storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5953                                 MAYBE_NARROW_TO_64(mkexpr(u0)) );
5954                        break;
5955               default: vassert(0);
5956            }
5957#           undef MAYBE_NARROW_TO_64
5958         }
5959
5960         /* -- Multiple 128 or 64 bit loads -- */
5961         else /* isLD */ {
5962            UInt   step   = isQ ? 16 : 8;
5963            IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5964#           define MAYBE_WIDEN_FROM_64(_expr) \
5965                      (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5966            switch (nRegs) {
5967               case 4:
5968                  assign(u3, MAYBE_WIDEN_FROM_64(
5969                                loadLE(loadTy,
5970                                       binop(Iop_Add64, mkexpr(tTA),
5971                                                        mkU64(3 * step)))));
5972                  /* fallthru */
5973               case 3:
5974                  assign(u2, MAYBE_WIDEN_FROM_64(
5975                                loadLE(loadTy,
5976                                       binop(Iop_Add64, mkexpr(tTA),
5977                                                        mkU64(2 * step)))));
5978                  /* fallthru */
5979               case 2:
5980                  assign(u1, MAYBE_WIDEN_FROM_64(
5981                                loadLE(loadTy,
5982                                       binop(Iop_Add64, mkexpr(tTA),
5983                                                        mkU64(1 * step)))));
5984                  assign(u0, MAYBE_WIDEN_FROM_64(
5985                                loadLE(loadTy,
5986                                       binop(Iop_Add64, mkexpr(tTA),
5987                                                        mkU64(0 * step)))));
5988                  break;
5989               default:
5990                  vassert(0);
5991            }
5992#           undef MAYBE_WIDEN_FROM_64
5993            switch (nRegs) {
5994               case 4:  putQReg128( (tt+3) % 32,
5995                                    math_MAYBE_ZERO_HI64(bitQ, u3));
5996                        /* fallthru */
5997               case 3:  putQReg128( (tt+2) % 32,
5998                                    math_MAYBE_ZERO_HI64(bitQ, u2));
5999                        /* fallthru */
6000               case 2:  putQReg128( (tt+1) % 32,
6001                                    math_MAYBE_ZERO_HI64(bitQ, u1));
6002                        putQReg128( (tt+0) % 32,
6003                                    math_MAYBE_ZERO_HI64(bitQ, u0));
6004                        break;
6005               default: vassert(0);
6006            }
6007         }
6008
6009         /* -- END generate the transfers -- */
6010
6011         /* Do the writeback, if necessary */
6012         if (isPX) {
6013            putIReg64orSP(nn, mkexpr(tWB));
6014         }
6015
6016         HChar pxStr[20];
6017         pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6018         if (isPX) {
6019            if (mm == BITS5(1,1,1,1,1))
6020               vex_sprintf(pxStr, ", #%u", xferSzB);
6021            else
6022               vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6023         }
6024         const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6025         DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6026             isLD ? "ld" : "st",
6027             (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6028             pxStr);
6029
6030         return True;
6031      }
6032      /* else fall through */
6033   }
6034
6035   /* ---------- LD1R (single structure, replicate) ---------- */
6036   /* ---------- LD2R (single structure, replicate) ---------- */
6037   /* ---------- LD3R (single structure, replicate) ---------- */
6038   /* ---------- LD4R (single structure, replicate) ---------- */
6039   /* 31 29       22 20    15    11 9 4
6040      0q 001 1010 10 00000 110 0 sz n t  LD1R {Vt.T}, [Xn|SP]
6041      0q 001 1011 10 m     110 0 sz n t  LD1R {Vt.T}, [Xn|SP], step
6042
6043      0q 001 1010 11 00000 110 0 sz n t  LD2R {Vt..t+1.T}, [Xn|SP]
6044      0q 001 1011 11 m     110 0 sz n t  LD2R {Vt..t+1.T}, [Xn|SP], step
6045
6046      0q 001 1010 10 00000 111 0 sz n t  LD3R {Vt..t+2.T}, [Xn|SP]
6047      0q 001 1011 10 m     111 0 sz n t  LD3R {Vt..t+2.T}, [Xn|SP], step
6048
6049      0q 001 1010 11 00000 111 0 sz n t  LD4R {Vt..t+3.T}, [Xn|SP]
6050      0q 001 1011 11 m     111 0 sz n t  LD4R {Vt..t+3.T}, [Xn|SP], step
6051
6052      step = if m == 11111 then transfer-size else Xm
6053   */
6054   if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
6055       && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6056       && INSN(12,12) == 0) {
6057      UInt   bitQ  = INSN(30,30);
6058      Bool   isPX  = INSN(23,23) == 1;
6059      UInt   nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6060      UInt   mm    = INSN(20,16);
6061      UInt   sz    = INSN(11,10);
6062      UInt   nn    = INSN(9,5);
6063      UInt   tt    = INSN(4,0);
6064
6065      /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6066      if (isPX || mm == 0) {
6067
6068         IRType ty    = integerIRTypeOfSize(1 << sz);
6069
6070         UInt laneSzB = 1 << sz;
6071         UInt xferSzB = laneSzB * nRegs;
6072
6073         /* Generate the transfer address (TA) and if necessary the
6074            writeback address (WB) */
6075         IRTemp tTA = newTemp(Ity_I64);
6076         assign(tTA, getIReg64orSP(nn));
6077         if (nn == 31) { /* FIXME generate stack alignment check */ }
6078         IRTemp tWB = IRTemp_INVALID;
6079         if (isPX) {
6080            tWB = newTemp(Ity_I64);
6081            assign(tWB, binop(Iop_Add64,
6082                              mkexpr(tTA),
6083                              mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6084                                                     : getIReg64orZR(mm)));
6085         }
6086
6087         /* Do the writeback, if necessary */
6088         if (isPX) {
6089            putIReg64orSP(nn, mkexpr(tWB));
6090         }
6091
6092         IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
6093         e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
6094         switch (nRegs) {
6095            case 4:
6096               e3 = newTemp(ty);
6097               assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6098                                                      mkU64(3 * laneSzB))));
6099               v3 = math_DUP_TO_V128(e3, ty);
6100               putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
6101               /* fallthrough */
6102            case 3:
6103               e2 = newTemp(ty);
6104               assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6105                                                      mkU64(2 * laneSzB))));
6106               v2 = math_DUP_TO_V128(e2, ty);
6107               putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
6108               /* fallthrough */
6109            case 2:
6110               e1 = newTemp(ty);
6111               assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6112                                                      mkU64(1 * laneSzB))));
6113               v1 = math_DUP_TO_V128(e1, ty);
6114               putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
6115               /* fallthrough */
6116            case 1:
6117               e0 = newTemp(ty);
6118               assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6119                                                      mkU64(0 * laneSzB))));
6120               v0 = math_DUP_TO_V128(e0, ty);
6121               putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
6122               break;
6123            default:
6124               vassert(0);
6125         }
6126
6127         HChar pxStr[20];
6128         pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6129         if (isPX) {
6130            if (mm == BITS5(1,1,1,1,1))
6131               vex_sprintf(pxStr, ", #%u", xferSzB);
6132            else
6133               vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6134         }
6135         const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6136         DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6137             nRegs,
6138             (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6139             pxStr);
6140
6141         return True;
6142      }
6143      /* else fall through */
6144   }
6145
6146   /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6147   /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6148   /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6149   /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6150   /* 31 29       22 21 20    15    11 9 4
6151      0q 001 1010 L  0  00000 xx0 S sz n t  op1 {Vt.T}[ix], [Xn|SP]
6152      0q 001 1011 L  0  m     xx0 S sz n t  op1 {Vt.T}[ix], [Xn|SP], step
6153
6154      0q 001 1010 L  1  00000 xx0 S sz n t  op2 {Vt..t+1.T}[ix], [Xn|SP]
6155      0q 001 1011 L  1  m     xx0 S sz n t  op2 {Vt..t+1.T}[ix], [Xn|SP], step
6156
6157      0q 001 1010 L  0  00000 xx1 S sz n t  op3 {Vt..t+2.T}[ix], [Xn|SP]
6158      0q 001 1011 L  0  m     xx1 S sz n t  op3 {Vt..t+2.T}[ix], [Xn|SP], step
6159
6160      0q 001 1010 L  1  00000 xx1 S sz n t  op4 {Vt..t+3.T}[ix], [Xn|SP]
6161      0q 001 1011 L  1  m     xx1 S sz n t  op4 {Vt..t+3.T}[ix], [Xn|SP], step
6162
6163      step = if m == 11111 then transfer-size else Xm
6164      op   = case L of 1 -> LD ; 0 -> ST
6165
6166      laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6167                                     01:b:b:b0 -> 2, bbb
6168                                     10:b:b:00 -> 4, bb
6169                                     10:b:0:01 -> 8, b
6170   */
6171   if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6172      UInt   bitQ  = INSN(30,30);
6173      Bool   isPX  = INSN(23,23) == 1;
6174      Bool   isLD  = INSN(22,22) == 1;
6175      UInt   nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6176      UInt   mm    = INSN(20,16);
6177      UInt   xx    = INSN(15,14);
6178      UInt   bitS  = INSN(12,12);
6179      UInt   sz    = INSN(11,10);
6180      UInt   nn    = INSN(9,5);
6181      UInt   tt    = INSN(4,0);
6182
6183      Bool valid = True;
6184
6185      /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6186      if (!isPX && mm != 0)
6187         valid = False;
6188
6189      UInt laneSzB = 0;  /* invalid */
6190      UInt ix      = 16; /* invalid */
6191
6192      UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
6193      switch (xx_q_S_sz) {
6194         case 0x00: case 0x01: case 0x02: case 0x03:
6195         case 0x04: case 0x05: case 0x06: case 0x07:
6196         case 0x08: case 0x09: case 0x0A: case 0x0B:
6197         case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6198            laneSzB = 1; ix = xx_q_S_sz & 0xF;
6199            break;
6200         case 0x10: case 0x12: case 0x14: case 0x16:
6201         case 0x18: case 0x1A: case 0x1C: case 0x1E:
6202            laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
6203            break;
6204         case 0x20: case 0x24: case 0x28: case 0x2C:
6205            laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
6206            break;
6207         case 0x21: case 0x29:
6208            laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
6209            break;
6210         default:
6211            break;
6212      }
6213
6214      if (valid && laneSzB != 0) {
6215
6216         IRType ty      = integerIRTypeOfSize(laneSzB);
6217         UInt   xferSzB = laneSzB * nRegs;
6218
6219         /* Generate the transfer address (TA) and if necessary the
6220            writeback address (WB) */
6221         IRTemp tTA = newTemp(Ity_I64);
6222         assign(tTA, getIReg64orSP(nn));
6223         if (nn == 31) { /* FIXME generate stack alignment check */ }
6224         IRTemp tWB = IRTemp_INVALID;
6225         if (isPX) {
6226            tWB = newTemp(Ity_I64);
6227            assign(tWB, binop(Iop_Add64,
6228                              mkexpr(tTA),
6229                              mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6230                                                     : getIReg64orZR(mm)));
6231         }
6232
6233         /* Do the writeback, if necessary */
6234         if (isPX) {
6235            putIReg64orSP(nn, mkexpr(tWB));
6236         }
6237
6238         switch (nRegs) {
6239            case 4: {
6240               IRExpr* addr
6241                  = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
6242               if (isLD) {
6243                  putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
6244               } else {
6245                  storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
6246               }
6247               /* fallthrough */
6248            }
6249            case 3: {
6250               IRExpr* addr
6251                  = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
6252               if (isLD) {
6253                  putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
6254               } else {
6255                  storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
6256               }
6257               /* fallthrough */
6258            }
6259            case 2: {
6260               IRExpr* addr
6261                  = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
6262               if (isLD) {
6263                  putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
6264               } else {
6265                  storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
6266               }
6267               /* fallthrough */
6268            }
6269            case 1: {
6270               IRExpr* addr
6271                  = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
6272               if (isLD) {
6273                  putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
6274               } else {
6275                  storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
6276               }
6277               break;
6278            }
6279            default:
6280               vassert(0);
6281         }
6282
6283         HChar pxStr[20];
6284         pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6285         if (isPX) {
6286            if (mm == BITS5(1,1,1,1,1))
6287               vex_sprintf(pxStr, ", #%u", xferSzB);
6288            else
6289               vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6290         }
6291         const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6292         DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6293             isLD ? "ld" : "st", nRegs,
6294             (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
6295             ix, nameIReg64orSP(nn), pxStr);
6296
6297         return True;
6298      }
6299      /* else fall through */
6300   }
6301
6302   /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6303   /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6304   /* 31 29     23  20      14    9 4
6305      sz 001000 010 11111 0 11111 n t   LDX{R,RH,RB}  Rt, [Xn|SP]
6306      sz 001000 010 11111 1 11111 n t   LDAX{R,RH,RB} Rt, [Xn|SP]
6307      sz 001000 000 s     0 11111 n t   STX{R,RH,RB}  Ws, Rt, [Xn|SP]
6308      sz 001000 000 s     1 11111 n t   STLX{R,RH,RB} Ws, Rt, [Xn|SP]
6309   */
6310   if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6311       && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6312       && INSN(14,10) == BITS5(1,1,1,1,1)) {
6313      UInt szBlg2     = INSN(31,30);
6314      Bool isLD       = INSN(22,22) == 1;
6315      Bool isAcqOrRel = INSN(15,15) == 1;
6316      UInt ss         = INSN(20,16);
6317      UInt nn         = INSN(9,5);
6318      UInt tt         = INSN(4,0);
6319
6320      vassert(szBlg2 < 4);
6321      UInt   szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6322      IRType ty  = integerIRTypeOfSize(szB);
6323      const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6324
6325      IRTemp ea = newTemp(Ity_I64);
6326      assign(ea, getIReg64orSP(nn));
6327      /* FIXME generate check that ea is szB-aligned */
6328
6329      if (isLD && ss == BITS5(1,1,1,1,1)) {
6330         IRTemp res = newTemp(ty);
6331         stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6332         putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6333         if (isAcqOrRel) {
6334            stmt(IRStmt_MBE(Imbe_Fence));
6335         }
6336         DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6337             nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6338         return True;
6339      }
6340      if (!isLD) {
6341         if (isAcqOrRel) {
6342            stmt(IRStmt_MBE(Imbe_Fence));
6343         }
6344         IRTemp  res  = newTemp(Ity_I1);
6345         IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6346         stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
6347         /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6348            Need to set rS to 1 on failure, 0 on success. */
6349         putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
6350                                            mkU64(1)));
6351         DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6352             nameIRegOrZR(False, ss),
6353             nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6354         return True;
6355      }
6356      /* else fall through */
6357   }
6358
6359   /* ------------------ LDA{R,RH,RB} ------------------ */
6360   /* ------------------ STL{R,RH,RB} ------------------ */
6361   /* 31 29     23  20      14    9 4
6362      sz 001000 110 11111 1 11111 n t   LDAR<sz> Rt, [Xn|SP]
6363      sz 001000 100 11111 1 11111 n t   STLR<sz> Rt, [Xn|SP]
6364   */
6365   if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6366       && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6367      UInt szBlg2 = INSN(31,30);
6368      Bool isLD   = INSN(22,22) == 1;
6369      UInt nn     = INSN(9,5);
6370      UInt tt     = INSN(4,0);
6371
6372      vassert(szBlg2 < 4);
6373      UInt   szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6374      IRType ty  = integerIRTypeOfSize(szB);
6375      const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6376
6377      IRTemp ea = newTemp(Ity_I64);
6378      assign(ea, getIReg64orSP(nn));
6379      /* FIXME generate check that ea is szB-aligned */
6380
6381      if (isLD) {
6382         IRTemp res = newTemp(ty);
6383         assign(res, loadLE(ty, mkexpr(ea)));
6384         putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6385         stmt(IRStmt_MBE(Imbe_Fence));
6386         DIP("lda%s %s, [%s]\n", suffix[szBlg2],
6387             nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6388      } else {
6389         stmt(IRStmt_MBE(Imbe_Fence));
6390         IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6391         storeLE(mkexpr(ea), data);
6392         DIP("stl%s %s, [%s]\n", suffix[szBlg2],
6393             nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6394      }
6395      return True;
6396   }
6397
6398   /* ------------------ PRFM (immediate) ------------------ */
6399   /* 31           21    9 4
6400      11 111 00110 imm12 n t   PRFM pfrop=Rt, [Xn|SP, #pimm]
6401   */
6402   if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6403      UInt imm12 = INSN(21,10);
6404      UInt nn    = INSN(9,5);
6405      UInt tt    = INSN(4,0);
6406      /* Generating any IR here is pointless, except for documentation
6407         purposes, as it will get optimised away later. */
6408      IRTemp ea = newTemp(Ity_I64);
6409      assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
6410      DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
6411      return True;
6412   }
6413
6414   vex_printf("ARM64 front end: load_store\n");
6415   return False;
6416#  undef INSN
6417}
6418
6419
6420/*------------------------------------------------------------*/
6421/*--- Control flow and misc instructions                   ---*/
6422/*------------------------------------------------------------*/
6423
6424static
6425Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
6426                          const VexArchInfo* archinfo)
6427{
6428#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
6429
6430   /* ---------------------- B cond ----------------------- */
6431   /* 31        24    4 3
6432      0101010 0 imm19 0 cond */
6433   if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
6434      UInt  cond   = INSN(3,0);
6435      ULong uimm64 = INSN(23,5) << 2;
6436      Long  simm64 = (Long)sx_to_64(uimm64, 21);
6437      vassert(dres->whatNext    == Dis_Continue);
6438      vassert(dres->len         == 4);
6439      vassert(dres->continueAt  == 0);
6440      vassert(dres->jk_StopHere == Ijk_INVALID);
6441      stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
6442                        Ijk_Boring,
6443                        IRConst_U64(guest_PC_curr_instr + simm64),
6444                        OFFB_PC) );
6445      putPC(mkU64(guest_PC_curr_instr + 4));
6446      dres->whatNext    = Dis_StopHere;
6447      dres->jk_StopHere = Ijk_Boring;
6448      DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
6449      return True;
6450   }
6451
6452   /* -------------------- B{L} uncond -------------------- */
6453   if (INSN(30,26) == BITS5(0,0,1,0,1)) {
6454      /* 000101 imm26  B  (PC + sxTo64(imm26 << 2))
6455         100101 imm26  B  (PC + sxTo64(imm26 << 2))
6456      */
6457      UInt  bLink  = INSN(31,31);
6458      ULong uimm64 = INSN(25,0) << 2;
6459      Long  simm64 = (Long)sx_to_64(uimm64, 28);
6460      if (bLink) {
6461         putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6462      }
6463      putPC(mkU64(guest_PC_curr_instr + simm64));
6464      dres->whatNext = Dis_StopHere;
6465      dres->jk_StopHere = Ijk_Call;
6466      DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
6467                          guest_PC_curr_instr + simm64);
6468      return True;
6469   }
6470
6471   /* --------------------- B{L} reg --------------------- */
6472   /* 31      24 22 20    15     9  4
6473      1101011 00 10 11111 000000 nn 00000  RET  Rn
6474      1101011 00 01 11111 000000 nn 00000  CALL Rn
6475      1101011 00 00 11111 000000 nn 00000  JMP  Rn
6476   */
6477   if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
6478       && INSN(20,16) == BITS5(1,1,1,1,1)
6479       && INSN(15,10) == BITS6(0,0,0,0,0,0)
6480       && INSN(4,0) == BITS5(0,0,0,0,0)) {
6481      UInt branch_type = INSN(22,21);
6482      UInt nn          = INSN(9,5);
6483      if (branch_type == BITS2(1,0) /* RET */) {
6484         putPC(getIReg64orZR(nn));
6485         dres->whatNext = Dis_StopHere;
6486         dres->jk_StopHere = Ijk_Ret;
6487         DIP("ret %s\n", nameIReg64orZR(nn));
6488         return True;
6489      }
6490      if (branch_type == BITS2(0,1) /* CALL */) {
6491         IRTemp dst = newTemp(Ity_I64);
6492         assign(dst, getIReg64orZR(nn));
6493         putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6494         putPC(mkexpr(dst));
6495         dres->whatNext = Dis_StopHere;
6496         dres->jk_StopHere = Ijk_Call;
6497         DIP("blr %s\n", nameIReg64orZR(nn));
6498         return True;
6499      }
6500      if (branch_type == BITS2(0,0) /* JMP */) {
6501         putPC(getIReg64orZR(nn));
6502         dres->whatNext = Dis_StopHere;
6503         dres->jk_StopHere = Ijk_Boring;
6504         DIP("jmp %s\n", nameIReg64orZR(nn));
6505         return True;
6506      }
6507   }
6508
6509   /* -------------------- CB{N}Z -------------------- */
6510   /* sf 011 010 1 imm19 Rt   CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6511      sf 011 010 0 imm19 Rt   CBZ  Xt|Wt, (PC + sxTo64(imm19 << 2))
6512   */
6513   if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
6514      Bool    is64   = INSN(31,31) == 1;
6515      Bool    bIfZ   = INSN(24,24) == 0;
6516      ULong   uimm64 = INSN(23,5) << 2;
6517      UInt    rT     = INSN(4,0);
6518      Long    simm64 = (Long)sx_to_64(uimm64, 21);
6519      IRExpr* cond   = NULL;
6520      if (is64) {
6521         cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6522                      getIReg64orZR(rT), mkU64(0));
6523      } else {
6524         cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
6525                      getIReg32orZR(rT), mkU32(0));
6526      }
6527      stmt( IRStmt_Exit(cond,
6528                        Ijk_Boring,
6529                        IRConst_U64(guest_PC_curr_instr + simm64),
6530                        OFFB_PC) );
6531      putPC(mkU64(guest_PC_curr_instr + 4));
6532      dres->whatNext    = Dis_StopHere;
6533      dres->jk_StopHere = Ijk_Boring;
6534      DIP("cb%sz %s, 0x%llx\n",
6535          bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
6536          guest_PC_curr_instr + simm64);
6537      return True;
6538   }
6539
6540   /* -------------------- TB{N}Z -------------------- */
6541   /* 31 30      24 23  18  5 4
6542      b5 011 011 1  b40 imm14 t  TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6543      b5 011 011 0  b40 imm14 t  TBZ  Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6544   */
6545   if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
6546      UInt    b5     = INSN(31,31);
6547      Bool    bIfZ   = INSN(24,24) == 0;
6548      UInt    b40    = INSN(23,19);
6549      UInt    imm14  = INSN(18,5);
6550      UInt    tt     = INSN(4,0);
6551      UInt    bitNo  = (b5 << 5) | b40;
6552      ULong   uimm64 = imm14 << 2;
6553      Long    simm64 = sx_to_64(uimm64, 16);
6554      IRExpr* cond
6555         = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6556                 binop(Iop_And64,
6557                       binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
6558                       mkU64(1)),
6559                 mkU64(0));
6560      stmt( IRStmt_Exit(cond,
6561                        Ijk_Boring,
6562                        IRConst_U64(guest_PC_curr_instr + simm64),
6563                        OFFB_PC) );
6564      putPC(mkU64(guest_PC_curr_instr + 4));
6565      dres->whatNext    = Dis_StopHere;
6566      dres->jk_StopHere = Ijk_Boring;
6567      DIP("tb%sz %s, #%u, 0x%llx\n",
6568          bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
6569          guest_PC_curr_instr + simm64);
6570      return True;
6571   }
6572
6573   /* -------------------- SVC -------------------- */
6574   /* 11010100 000 imm16 000 01
6575      Don't bother with anything except the imm16==0 case.
6576   */
6577   if (INSN(31,0) == 0xD4000001) {
6578      putPC(mkU64(guest_PC_curr_instr + 4));
6579      dres->whatNext    = Dis_StopHere;
6580      dres->jk_StopHere = Ijk_Sys_syscall;
6581      DIP("svc #0\n");
6582      return True;
6583   }
6584
6585   /* ------------------ M{SR,RS} ------------------ */
6586   /* ---- Cases for TPIDR_EL0 ----
6587      0xD51BD0 010 Rt   MSR tpidr_el0, rT
6588      0xD53BD0 010 Rt   MRS rT, tpidr_el0
6589   */
6590   if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
6591       || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
6592      Bool toSys = INSN(21,21) == 0;
6593      UInt tt    = INSN(4,0);
6594      if (toSys) {
6595         stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
6596         DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
6597      } else {
6598         putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
6599         DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
6600      }
6601      return True;
6602   }
6603   /* ---- Cases for FPCR ----
6604      0xD51B44 000 Rt  MSR fpcr, rT
6605      0xD53B44 000 Rt  MSR rT, fpcr
6606   */
6607   if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
6608       || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
6609      Bool toSys = INSN(21,21) == 0;
6610      UInt tt    = INSN(4,0);
6611      if (toSys) {
6612         stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
6613         DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
6614      } else {
6615         putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
6616         DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
6617      }
6618      return True;
6619   }
6620   /* ---- Cases for FPSR ----
6621      0xD51B44 001 Rt  MSR fpsr, rT
6622      0xD53B44 001 Rt  MSR rT, fpsr
6623      The only part of this we model is FPSR.QC.  All other bits
6624      are ignored when writing to it and RAZ when reading from it.
6625   */
6626   if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
6627       || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
6628      Bool toSys = INSN(21,21) == 0;
6629      UInt tt    = INSN(4,0);
6630      if (toSys) {
6631         /* Just deal with FPSR.QC.  Make up a V128 value which is
6632            zero if Xt[27] is zero and any other value if Xt[27] is
6633            nonzero. */
6634         IRTemp qc64 = newTemp(Ity_I64);
6635         assign(qc64, binop(Iop_And64,
6636                            binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
6637                            mkU64(1)));
6638         IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
6639         stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
6640         DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
6641      } else {
6642         /* Generate a value which is all zeroes except for bit 27,
6643            which must be zero if QCFLAG is all zeroes and one otherwise. */
6644         IRTemp qcV128 = newTempV128();
6645         assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
6646         IRTemp qc64 = newTemp(Ity_I64);
6647         assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
6648                                      unop(Iop_V128to64,   mkexpr(qcV128))));
6649         IRExpr* res = binop(Iop_Shl64,
6650                             unop(Iop_1Uto64,
6651                                  binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
6652                             mkU8(27));
6653         putIReg64orZR(tt, res);
6654         DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
6655      }
6656      return True;
6657   }
6658   /* ---- Cases for NZCV ----
6659      D51B42 000 Rt  MSR nzcv, rT
6660      D53B42 000 Rt  MRS rT, nzcv
6661      The only parts of NZCV that actually exist are bits 31:28, which
6662      are the N Z C and V bits themselves.  Hence the flags thunk provides
6663      all the state we need.
6664   */
6665   if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
6666       || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
6667      Bool  toSys = INSN(21,21) == 0;
6668      UInt  tt    = INSN(4,0);
6669      if (toSys) {
6670         IRTemp t = newTemp(Ity_I64);
6671         assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
6672         setFlags_COPY(t);
6673         DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
6674      } else {
6675         IRTemp res = newTemp(Ity_I64);
6676         assign(res, mk_arm64g_calculate_flags_nzcv());
6677         putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
6678         DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
6679      }
6680      return True;
6681   }
6682   /* ---- Cases for DCZID_EL0 ----
6683      Don't support arbitrary reads and writes to this register.  Just
6684      return the value 16, which indicates that the DC ZVA instruction
6685      is not permitted, so we don't have to emulate it.
6686      D5 3B 00 111 Rt  MRS rT, dczid_el0
6687   */
6688   if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
6689      UInt tt = INSN(4,0);
6690      putIReg64orZR(tt, mkU64(1<<4));
6691      DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
6692      return True;
6693   }
6694   /* ---- Cases for CTR_EL0 ----
6695      We just handle reads, and make up a value from the D and I line
6696      sizes in the VexArchInfo we are given, and patch in the following
6697      fields that the Foundation model gives ("natively"):
6698      CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
6699      D5 3B 00 001 Rt  MRS rT, dczid_el0
6700   */
6701   if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
6702      UInt tt = INSN(4,0);
6703      /* Need to generate a value from dMinLine_lg2_szB and
6704         dMinLine_lg2_szB.  The value in the register is in 32-bit
6705         units, so need to subtract 2 from the values in the
6706         VexArchInfo.  We can assume that the values here are valid --
6707         disInstr_ARM64 checks them -- so there's no need to deal with
6708         out-of-range cases. */
6709      vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6710              && archinfo->arm64_dMinLine_lg2_szB <= 17
6711              && archinfo->arm64_iMinLine_lg2_szB >= 2
6712              && archinfo->arm64_iMinLine_lg2_szB <= 17);
6713      UInt val
6714         = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
6715                      | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
6716      putIReg64orZR(tt, mkU64(val));
6717      DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
6718      return True;
6719   }
6720   /* ---- Cases for CNTVCT_EL0 ----
6721      This is a timestamp counter of some sort.  Support reads of it only
6722      by passing through to the host.
6723      D5 3B E0 010 Rt  MRS Xt, cntvct_el0
6724   */
6725   if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
6726      UInt     tt   = INSN(4,0);
6727      IRTemp   val  = newTemp(Ity_I64);
6728      IRExpr** args = mkIRExprVec_0();
6729      IRDirty* d    = unsafeIRDirty_1_N (
6730                         val,
6731                         0/*regparms*/,
6732                         "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
6733                         &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
6734                         args
6735                      );
6736      /* execute the dirty call, dumping the result in val. */
6737      stmt( IRStmt_Dirty(d) );
6738      putIReg64orZR(tt, mkexpr(val));
6739      DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
6740      return True;
6741   }
6742
6743   /* ------------------ IC_IVAU ------------------ */
6744   /* D5 0B 75 001 Rt  ic ivau, rT
6745   */
6746   if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
6747      /* We will always be provided with a valid iMinLine value. */
6748      vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
6749              && archinfo->arm64_iMinLine_lg2_szB <= 17);
6750      /* Round the requested address, in rT, down to the start of the
6751         containing block. */
6752      UInt   tt      = INSN(4,0);
6753      ULong  lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
6754      IRTemp addr    = newTemp(Ity_I64);
6755      assign( addr, binop( Iop_And64,
6756                           getIReg64orZR(tt),
6757                           mkU64(~(lineszB - 1))) );
6758      /* Set the invalidation range, request exit-and-invalidate, with
6759         continuation at the next instruction. */
6760      stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
6761      stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(lineszB)));
6762      /* be paranoid ... */
6763      stmt( IRStmt_MBE(Imbe_Fence) );
6764      putPC(mkU64( guest_PC_curr_instr + 4 ));
6765      dres->whatNext    = Dis_StopHere;
6766      dres->jk_StopHere = Ijk_InvalICache;
6767      DIP("ic ivau, %s\n", nameIReg64orZR(tt));
6768      return True;
6769   }
6770
6771   /* ------------------ DC_CVAU ------------------ */
6772   /* D5 0B 7B 001 Rt  dc cvau, rT
6773   */
6774   if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
6775      /* Exactly the same scheme as for IC IVAU, except we observe the
6776         dMinLine size, and request an Ijk_FlushDCache instead of
6777         Ijk_InvalICache. */
6778      /* We will always be provided with a valid dMinLine value. */
6779      vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6780              && archinfo->arm64_dMinLine_lg2_szB <= 17);
6781      /* Round the requested address, in rT, down to the start of the
6782         containing block. */
6783      UInt   tt      = INSN(4,0);
6784      ULong  lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
6785      IRTemp addr    = newTemp(Ity_I64);
6786      assign( addr, binop( Iop_And64,
6787                           getIReg64orZR(tt),
6788                           mkU64(~(lineszB - 1))) );
6789      /* Set the flush range, request exit-and-flush, with
6790         continuation at the next instruction. */
6791      stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
6792      stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(lineszB)));
6793      /* be paranoid ... */
6794      stmt( IRStmt_MBE(Imbe_Fence) );
6795      putPC(mkU64( guest_PC_curr_instr + 4 ));
6796      dres->whatNext    = Dis_StopHere;
6797      dres->jk_StopHere = Ijk_FlushDCache;
6798      DIP("dc cvau, %s\n", nameIReg64orZR(tt));
6799      return True;
6800   }
6801
6802   /* ------------------ ISB, DMB, DSB ------------------ */
6803   /* 31          21            11  7 6  4
6804      11010 10100 0 00 011 0011 CRm 1 01 11111  DMB opt
6805      11010 10100 0 00 011 0011 CRm 1 00 11111  DSB opt
6806      11010 10100 0 00 011 0011 CRm 1 10 11111  ISB opt
6807   */
6808   if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
6809       && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
6810       && INSN(7,7) == 1
6811       && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
6812      UInt opc = INSN(6,5);
6813      UInt CRm = INSN(11,8);
6814      vassert(opc <= 2 && CRm <= 15);
6815      stmt(IRStmt_MBE(Imbe_Fence));
6816      const HChar* opNames[3]
6817         = { "dsb", "dmb", "isb" };
6818      const HChar* howNames[16]
6819         = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
6820             "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
6821      DIP("%s %s\n", opNames[opc], howNames[CRm]);
6822      return True;
6823   }
6824
6825   /* -------------------- NOP -------------------- */
6826   if (INSN(31,0) == 0xD503201F) {
6827      DIP("nop\n");
6828      return True;
6829   }
6830
6831   /* -------------------- BRK -------------------- */
6832   /* 31        23  20    4
6833      1101 0100 001 imm16 00000  BRK #imm16
6834   */
6835   if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
6836       && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
6837      UInt imm16 = INSN(20,5);
6838      /* Request SIGTRAP and then restart of this insn. */
6839      putPC(mkU64(guest_PC_curr_instr + 0));
6840      dres->whatNext    = Dis_StopHere;
6841      dres->jk_StopHere = Ijk_SigTRAP;
6842      DIP("brk #%u\n", imm16);
6843      return True;
6844   }
6845
6846  //fail:
6847   vex_printf("ARM64 front end: branch_etc\n");
6848   return False;
6849#  undef INSN
6850}
6851
6852
6853/*------------------------------------------------------------*/
6854/*--- SIMD and FP instructions: helper functions           ---*/
6855/*------------------------------------------------------------*/
6856
6857/* Some constructors for interleave/deinterleave expressions. */
6858
6859static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
6860   // returns a0 b0
6861   return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
6862}
6863
6864static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
6865   // returns a1 b1
6866   return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
6867}
6868
6869static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
6870   // returns a2 a0 b2 b0
6871   return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
6872}
6873
6874static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
6875   // returns a3 a1 b3 b1
6876   return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
6877}
6878
6879static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
6880   // returns a1 b1 a0 b0
6881   return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
6882}
6883
6884static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
6885   // returns a3 b3 a2 b2
6886   return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
6887}
6888
6889static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6890   // returns a6 a4 a2 a0 b6 b4 b2 b0
6891   return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
6892}
6893
6894static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6895   // returns a7 a5 a3 a1 b7 b5 b3 b1
6896   return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
6897}
6898
6899static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6900   // returns a3 b3 a2 b2 a1 b1 a0 b0
6901   return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
6902}
6903
6904static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6905   // returns a7 b7 a6 b6 a5 b5 a4 b4
6906   return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
6907}
6908
6909static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
6910                                     IRTemp bFEDCBA9876543210 ) {
6911   // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
6912   return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
6913                                      mkexpr(bFEDCBA9876543210));
6914}
6915
6916static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
6917                                    IRTemp bFEDCBA9876543210 ) {
6918   // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
6919   return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
6920                                     mkexpr(bFEDCBA9876543210));
6921}
6922
6923static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
6924                                     IRTemp bFEDCBA9876543210 ) {
6925   // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
6926   return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
6927                                      mkexpr(bFEDCBA9876543210));
6928}
6929
6930static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
6931                                     IRTemp bFEDCBA9876543210 ) {
6932   // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
6933   return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
6934                                      mkexpr(bFEDCBA9876543210));
6935}
6936
6937/* Generate N copies of |bit| in the bottom of a ULong. */
6938static ULong Replicate ( ULong bit, Int N )
6939{
6940   vassert(bit <= 1 && N >= 1 && N < 64);
6941   if (bit == 0) {
6942      return 0;
6943    } else {
6944      /* Careful.  This won't work for N == 64. */
6945      return (1ULL << N) - 1;
6946   }
6947}
6948
6949static ULong Replicate32x2 ( ULong bits32 )
6950{
6951   vassert(0 == (bits32 & ~0xFFFFFFFFULL));
6952   return (bits32 << 32) | bits32;
6953}
6954
6955static ULong Replicate16x4 ( ULong bits16 )
6956{
6957   vassert(0 == (bits16 & ~0xFFFFULL));
6958   return Replicate32x2((bits16 << 16) | bits16);
6959}
6960
6961static ULong Replicate8x8 ( ULong bits8 )
6962{
6963   vassert(0 == (bits8 & ~0xFFULL));
6964   return Replicate16x4((bits8 << 8) | bits8);
6965}
6966
6967/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
6968   |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
6969   is 64.  In the former case, the upper 32 bits of the returned value
6970   are guaranteed to be zero. */
6971static ULong VFPExpandImm ( ULong imm8, Int N )
6972{
6973   vassert(imm8 <= 0xFF);
6974   vassert(N == 32 || N == 64);
6975   Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
6976   Int F = N - E - 1;
6977   ULong imm8_6 = (imm8 >> 6) & 1;
6978   /* sign: 1 bit */
6979   /* exp:  E bits */
6980   /* frac: F bits */
6981   ULong sign = (imm8 >> 7) & 1;
6982   ULong exp  = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
6983   ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
6984   vassert(sign < (1ULL << 1));
6985   vassert(exp  < (1ULL << E));
6986   vassert(frac < (1ULL << F));
6987   vassert(1 + E + F == N);
6988   ULong res = (sign << (E+F)) | (exp << F) | frac;
6989   return res;
6990}
6991
6992/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
6993   This might fail, as indicated by the returned Bool.  Page 2530 of
6994   the manual. */
6995static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
6996                               UInt op, UInt cmode, UInt imm8 )
6997{
6998   vassert(op <= 1);
6999   vassert(cmode <= 15);
7000   vassert(imm8 <= 255);
7001
7002   *res = 0; /* will overwrite iff returning True */
7003
7004   ULong imm64    = 0;
7005   Bool  testimm8 = False;
7006
7007   switch (cmode >> 1) {
7008      case 0:
7009         testimm8 = False; imm64 = Replicate32x2(imm8); break;
7010      case 1:
7011         testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
7012      case 2:
7013         testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
7014      case 3:
7015         testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
7016      case 4:
7017          testimm8 = False; imm64 = Replicate16x4(imm8); break;
7018      case 5:
7019          testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
7020      case 6:
7021          testimm8 = True;
7022          if ((cmode & 1) == 0)
7023              imm64 = Replicate32x2((imm8 << 8) | 0xFF);
7024          else
7025              imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
7026          break;
7027      case 7:
7028         testimm8 = False;
7029         if ((cmode & 1) == 0 && op == 0)
7030             imm64 = Replicate8x8(imm8);
7031         if ((cmode & 1) == 0 && op == 1) {
7032             imm64 = 0;   imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
7033             imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
7034             imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
7035             imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
7036             imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
7037             imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
7038             imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
7039             imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
7040         }
7041         if ((cmode & 1) == 1 && op == 0) {
7042            ULong imm8_7  = (imm8 >> 7) & 1;
7043            ULong imm8_6  = (imm8 >> 6) & 1;
7044            ULong imm8_50 = imm8 & 63;
7045            ULong imm32 = (imm8_7                 << (1 + 5 + 6 + 19))
7046                          | ((imm8_6 ^ 1)         << (5 + 6 + 19))
7047                          | (Replicate(imm8_6, 5) << (6 + 19))
7048                          | (imm8_50              << 19);
7049            imm64 = Replicate32x2(imm32);
7050         }
7051         if ((cmode & 1) == 1 && op == 1) {
7052            // imm64 = imm8<7>:NOT(imm8<6>)
7053            //                :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7054            ULong imm8_7  = (imm8 >> 7) & 1;
7055            ULong imm8_6  = (imm8 >> 6) & 1;
7056            ULong imm8_50 = imm8 & 63;
7057            imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
7058                    | (Replicate(imm8_6, 8) << 54)
7059                    | (imm8_50 << 48);
7060         }
7061         break;
7062      default:
7063        vassert(0);
7064   }
7065
7066   if (testimm8 && imm8 == 0)
7067      return False;
7068
7069   *res = imm64;
7070   return True;
7071}
7072
7073/* Help a bit for decoding laneage for vector operations that can be
7074   of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7075   and SZ bits, typically for vector floating point. */
7076static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI,  /*OUT*/IRType* tyF,
7077                               /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
7078                               /*OUT*/const HChar** arrSpec,
7079                               Bool bitQ, Bool bitSZ )
7080{
7081   vassert(bitQ == True || bitQ == False);
7082   vassert(bitSZ == True || bitSZ == False);
7083   if (bitQ && bitSZ) { // 2x64
7084      if (tyI)       *tyI       = Ity_I64;
7085      if (tyF)       *tyF       = Ity_F64;
7086      if (nLanes)    *nLanes    = 2;
7087      if (zeroUpper) *zeroUpper = False;
7088      if (arrSpec)   *arrSpec   = "2d";
7089      return True;
7090   }
7091   if (bitQ && !bitSZ) { // 4x32
7092      if (tyI)       *tyI       = Ity_I32;
7093      if (tyF)       *tyF       = Ity_F32;
7094      if (nLanes)    *nLanes    = 4;
7095      if (zeroUpper) *zeroUpper = False;
7096      if (arrSpec)   *arrSpec   = "4s";
7097      return True;
7098   }
7099   if (!bitQ && !bitSZ) { // 2x32
7100      if (tyI)       *tyI       = Ity_I32;
7101      if (tyF)       *tyF       = Ity_F32;
7102      if (nLanes)    *nLanes    = 2;
7103      if (zeroUpper) *zeroUpper = True;
7104      if (arrSpec)   *arrSpec   = "2s";
7105      return True;
7106   }
7107   // Else impliedly 1x64, which isn't allowed.
7108   return False;
7109}
7110
7111/* Helper for decoding laneage for shift-style vector operations
7112   that involve an immediate shift amount. */
7113static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
7114                                    UInt immh, UInt immb )
7115{
7116   vassert(immh < (1<<4));
7117   vassert(immb < (1<<3));
7118   UInt immhb = (immh << 3) | immb;
7119   if (immh & 8) {
7120      if (shift)  *shift  = 128 - immhb;
7121      if (szBlg2) *szBlg2 = 3;
7122      return True;
7123   }
7124   if (immh & 4) {
7125      if (shift)  *shift  = 64 - immhb;
7126      if (szBlg2) *szBlg2 = 2;
7127      return True;
7128   }
7129   if (immh & 2) {
7130      if (shift)  *shift  = 32 - immhb;
7131      if (szBlg2) *szBlg2 = 1;
7132      return True;
7133   }
7134   if (immh & 1) {
7135      if (shift)  *shift  = 16 - immhb;
7136      if (szBlg2) *szBlg2 = 0;
7137      return True;
7138   }
7139   return False;
7140}
7141
7142/* Generate IR to fold all lanes of the V128 value in 'src' as
7143   characterised by the operator 'op', and return the result in the
7144   bottom bits of a V128, with all other bits set to zero. */
7145static IRTemp math_FOLDV ( IRTemp src, IROp op )
7146{
7147   /* The basic idea is to use repeated applications of Iop_CatEven*
7148      and Iop_CatOdd* operators to 'src' so as to clone each lane into
7149      a complete vector.  Then fold all those vectors with 'op' and
7150      zero out all but the least significant lane. */
7151   switch (op) {
7152      case Iop_Min8Sx16: case Iop_Min8Ux16:
7153      case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
7154         /* NB: temp naming here is misleading -- the naming is for 8
7155            lanes of 16 bit, whereas what is being operated on is 16
7156            lanes of 8 bits. */
7157         IRTemp x76543210 = src;
7158         IRTemp x76547654 = newTempV128();
7159         IRTemp x32103210 = newTempV128();
7160         assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7161         assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7162         IRTemp x76767676 = newTempV128();
7163         IRTemp x54545454 = newTempV128();
7164         IRTemp x32323232 = newTempV128();
7165         IRTemp x10101010 = newTempV128();
7166         assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7167         assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7168         assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7169         assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7170         IRTemp x77777777 = newTempV128();
7171         IRTemp x66666666 = newTempV128();
7172         IRTemp x55555555 = newTempV128();
7173         IRTemp x44444444 = newTempV128();
7174         IRTemp x33333333 = newTempV128();
7175         IRTemp x22222222 = newTempV128();
7176         IRTemp x11111111 = newTempV128();
7177         IRTemp x00000000 = newTempV128();
7178         assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7179         assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7180         assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7181         assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7182         assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7183         assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7184         assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7185         assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7186         /* Naming not misleading after here. */
7187         IRTemp xAllF = newTempV128();
7188         IRTemp xAllE = newTempV128();
7189         IRTemp xAllD = newTempV128();
7190         IRTemp xAllC = newTempV128();
7191         IRTemp xAllB = newTempV128();
7192         IRTemp xAllA = newTempV128();
7193         IRTemp xAll9 = newTempV128();
7194         IRTemp xAll8 = newTempV128();
7195         IRTemp xAll7 = newTempV128();
7196         IRTemp xAll6 = newTempV128();
7197         IRTemp xAll5 = newTempV128();
7198         IRTemp xAll4 = newTempV128();
7199         IRTemp xAll3 = newTempV128();
7200         IRTemp xAll2 = newTempV128();
7201         IRTemp xAll1 = newTempV128();
7202         IRTemp xAll0 = newTempV128();
7203         assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
7204         assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
7205         assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
7206         assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
7207         assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
7208         assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
7209         assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
7210         assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
7211         assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
7212         assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
7213         assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
7214         assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
7215         assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
7216         assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
7217         assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
7218         assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
7219         IRTemp maxFE = newTempV128();
7220         IRTemp maxDC = newTempV128();
7221         IRTemp maxBA = newTempV128();
7222         IRTemp max98 = newTempV128();
7223         IRTemp max76 = newTempV128();
7224         IRTemp max54 = newTempV128();
7225         IRTemp max32 = newTempV128();
7226         IRTemp max10 = newTempV128();
7227         assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
7228         assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
7229         assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
7230         assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
7231         assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
7232         assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
7233         assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
7234         assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
7235         IRTemp maxFEDC = newTempV128();
7236         IRTemp maxBA98 = newTempV128();
7237         IRTemp max7654 = newTempV128();
7238         IRTemp max3210 = newTempV128();
7239         assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
7240         assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
7241         assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7242         assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7243         IRTemp maxFEDCBA98 = newTempV128();
7244         IRTemp max76543210 = newTempV128();
7245         assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
7246         assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7247         IRTemp maxAllLanes = newTempV128();
7248         assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
7249                                       mkexpr(max76543210)));
7250         IRTemp res = newTempV128();
7251         assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
7252         return res;
7253      }
7254      case Iop_Min16Sx8: case Iop_Min16Ux8:
7255      case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
7256         IRTemp x76543210 = src;
7257         IRTemp x76547654 = newTempV128();
7258         IRTemp x32103210 = newTempV128();
7259         assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7260         assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7261         IRTemp x76767676 = newTempV128();
7262         IRTemp x54545454 = newTempV128();
7263         IRTemp x32323232 = newTempV128();
7264         IRTemp x10101010 = newTempV128();
7265         assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7266         assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7267         assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7268         assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7269         IRTemp x77777777 = newTempV128();
7270         IRTemp x66666666 = newTempV128();
7271         IRTemp x55555555 = newTempV128();
7272         IRTemp x44444444 = newTempV128();
7273         IRTemp x33333333 = newTempV128();
7274         IRTemp x22222222 = newTempV128();
7275         IRTemp x11111111 = newTempV128();
7276         IRTemp x00000000 = newTempV128();
7277         assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7278         assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7279         assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7280         assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7281         assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7282         assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7283         assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7284         assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7285         IRTemp max76 = newTempV128();
7286         IRTemp max54 = newTempV128();
7287         IRTemp max32 = newTempV128();
7288         IRTemp max10 = newTempV128();
7289         assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
7290         assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
7291         assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
7292         assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
7293         IRTemp max7654 = newTempV128();
7294         IRTemp max3210 = newTempV128();
7295         assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7296         assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7297         IRTemp max76543210 = newTempV128();
7298         assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7299         IRTemp res = newTempV128();
7300         assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
7301         return res;
7302      }
7303      case Iop_Max32Fx4: case Iop_Min32Fx4:
7304      case Iop_Min32Sx4: case Iop_Min32Ux4:
7305      case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
7306         IRTemp x3210 = src;
7307         IRTemp x3232 = newTempV128();
7308         IRTemp x1010 = newTempV128();
7309         assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
7310         assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
7311         IRTemp x3333 = newTempV128();
7312         IRTemp x2222 = newTempV128();
7313         IRTemp x1111 = newTempV128();
7314         IRTemp x0000 = newTempV128();
7315         assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
7316         assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
7317         assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
7318         assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
7319         IRTemp max32 = newTempV128();
7320         IRTemp max10 = newTempV128();
7321         assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
7322         assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
7323         IRTemp max3210 = newTempV128();
7324         assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7325         IRTemp res = newTempV128();
7326         assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
7327         return res;
7328      }
7329      case Iop_Add64x2: {
7330         IRTemp x10 = src;
7331         IRTemp x00 = newTempV128();
7332         IRTemp x11 = newTempV128();
7333         assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
7334         assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
7335         IRTemp max10 = newTempV128();
7336         assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
7337         IRTemp res = newTempV128();
7338         assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
7339         return res;
7340      }
7341      default:
7342         vassert(0);
7343   }
7344}
7345
7346
7347/* Generate IR for TBL and TBX.  This deals with the 128 bit case
7348   only. */
7349static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
7350                             IRTemp oor_values )
7351{
7352   vassert(len >= 0 && len <= 3);
7353
7354   /* Generate some useful constants as concisely as possible. */
7355   IRTemp half15 = newTemp(Ity_I64);
7356   assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
7357   IRTemp half16 = newTemp(Ity_I64);
7358   assign(half16, mkU64(0x1010101010101010ULL));
7359
7360   /* A zero vector */
7361   IRTemp allZero = newTempV128();
7362   assign(allZero, mkV128(0x0000));
7363   /* A vector containing 15 in each 8-bit lane */
7364   IRTemp all15 = newTempV128();
7365   assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
7366   /* A vector containing 16 in each 8-bit lane */
7367   IRTemp all16 = newTempV128();
7368   assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
7369   /* A vector containing 32 in each 8-bit lane */
7370   IRTemp all32 = newTempV128();
7371   assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
7372   /* A vector containing 48 in each 8-bit lane */
7373   IRTemp all48 = newTempV128();
7374   assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
7375   /* A vector containing 64 in each 8-bit lane */
7376   IRTemp all64 = newTempV128();
7377   assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
7378
7379   /* Group the 16/32/48/64 vectors so as to be indexable. */
7380   IRTemp allXX[4] = { all16, all32, all48, all64 };
7381
7382   /* Compute the result for each table vector, with zeroes in places
7383      where the index values are out of range, and OR them into the
7384      running vector. */
7385   IRTemp running_result = newTempV128();
7386   assign(running_result, mkV128(0));
7387
7388   UInt tabent;
7389   for (tabent = 0; tabent <= len; tabent++) {
7390      vassert(tabent >= 0 && tabent < 4);
7391      IRTemp bias = newTempV128();
7392      assign(bias,
7393             mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
7394      IRTemp biased_indices = newTempV128();
7395      assign(biased_indices,
7396             binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
7397      IRTemp valid_mask = newTempV128();
7398      assign(valid_mask,
7399             binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
7400      IRTemp safe_biased_indices = newTempV128();
7401      assign(safe_biased_indices,
7402             binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
7403      IRTemp results_or_junk = newTempV128();
7404      assign(results_or_junk,
7405             binop(Iop_Perm8x16, mkexpr(tab[tabent]),
7406                                 mkexpr(safe_biased_indices)));
7407      IRTemp results_or_zero = newTempV128();
7408      assign(results_or_zero,
7409             binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
7410      /* And OR that into the running result. */
7411      IRTemp tmp = newTempV128();
7412      assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
7413                        mkexpr(running_result)));
7414      running_result = tmp;
7415   }
7416
7417   /* So now running_result holds the overall result where the indices
7418      are in range, and zero in out-of-range lanes.  Now we need to
7419      compute an overall validity mask and use this to copy in the
7420      lanes in the oor_values for out of range indices.  This is
7421      unnecessary for TBL but will get folded out by iropt, so we lean
7422      on that and generate the same code for TBL and TBX here. */
7423   IRTemp overall_valid_mask = newTempV128();
7424   assign(overall_valid_mask,
7425          binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
7426   IRTemp result = newTempV128();
7427   assign(result,
7428          binop(Iop_OrV128,
7429                mkexpr(running_result),
7430                binop(Iop_AndV128,
7431                      mkexpr(oor_values),
7432                      unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
7433   return result;
7434}
7435
7436
7437/* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
7438   an op which takes two I64s and produces a V128.  That is, a widening
7439   operator.  Generate IR which applies |opI64x2toV128| to either the
7440   lower (if |is2| is False) or upper (if |is2| is True) halves of
7441   |argL| and |argR|, and return the value in a new IRTemp.
7442*/
7443static
7444IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
7445                                   IRExpr* argL, IRExpr* argR )
7446{
7447   IRTemp res   = newTempV128();
7448   IROp   slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
7449   assign(res, binop(opI64x2toV128, unop(slice, argL),
7450                                    unop(slice, argR)));
7451   return res;
7452}
7453
7454
7455/* Generate signed/unsigned absolute difference vector IR. */
7456static
7457IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
7458{
7459   vassert(size <= 3);
7460   IRTemp argL = newTempV128();
7461   IRTemp argR = newTempV128();
7462   IRTemp msk  = newTempV128();
7463   IRTemp res  = newTempV128();
7464   assign(argL, argLE);
7465   assign(argR, argRE);
7466   assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
7467                     mkexpr(argL), mkexpr(argR)));
7468   assign(res,
7469          binop(Iop_OrV128,
7470                binop(Iop_AndV128,
7471                      binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
7472                      mkexpr(msk)),
7473                binop(Iop_AndV128,
7474                      binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
7475                      unop(Iop_NotV128, mkexpr(msk)))));
7476   return res;
7477}
7478
7479
7480/* Generate IR that takes a V128 and sign- or zero-widens
7481   either the lower or upper set of lanes to twice-as-wide,
7482   resulting in a new V128 value. */
7483static
7484IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
7485                                   UInt sizeNarrow, IRExpr* srcE )
7486{
7487   IRTemp src = newTempV128();
7488   IRTemp res = newTempV128();
7489   assign(src, srcE);
7490   switch (sizeNarrow) {
7491      case X10:
7492         assign(res,
7493                binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
7494                      binop(fromUpperHalf ? Iop_InterleaveHI32x4
7495                                          : Iop_InterleaveLO32x4,
7496                            mkexpr(src),
7497                            mkexpr(src)),
7498                      mkU8(32)));
7499         break;
7500      case X01:
7501         assign(res,
7502                binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
7503                      binop(fromUpperHalf ? Iop_InterleaveHI16x8
7504                                          : Iop_InterleaveLO16x8,
7505                            mkexpr(src),
7506                            mkexpr(src)),
7507                      mkU8(16)));
7508         break;
7509      case X00:
7510         assign(res,
7511                binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
7512                      binop(fromUpperHalf ? Iop_InterleaveHI8x16
7513                                          : Iop_InterleaveLO8x16,
7514                            mkexpr(src),
7515                            mkexpr(src)),
7516                      mkU8(8)));
7517         break;
7518      default:
7519         vassert(0);
7520   }
7521   return res;
7522}
7523
7524
7525/* Generate IR that takes a V128 and sign- or zero-widens
7526   either the even or odd lanes to twice-as-wide,
7527   resulting in a new V128 value. */
7528static
7529IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
7530                                      UInt sizeNarrow, IRExpr* srcE )
7531{
7532   IRTemp src   = newTempV128();
7533   IRTemp res   = newTempV128();
7534   IROp   opSAR = mkVecSARN(sizeNarrow+1);
7535   IROp   opSHR = mkVecSHRN(sizeNarrow+1);
7536   IROp   opSHL = mkVecSHLN(sizeNarrow+1);
7537   IROp   opSxR = zWiden ? opSHR : opSAR;
7538   UInt   amt   = 0;
7539   switch (sizeNarrow) {
7540      case X10: amt = 32; break;
7541      case X01: amt = 16; break;
7542      case X00: amt = 8;  break;
7543      default: vassert(0);
7544   }
7545   assign(src, srcE);
7546   if (fromOdd) {
7547      assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
7548   } else {
7549      assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
7550                               mkU8(amt)));
7551   }
7552   return res;
7553}
7554
7555
7556/* Generate IR that takes two V128s and narrows (takes lower half)
7557   of each lane, producing a single V128 value. */
7558static
7559IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
7560{
7561   IRTemp res = newTempV128();
7562   assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
7563                     mkexpr(argHi), mkexpr(argLo)));
7564   return res;
7565}
7566
7567
7568/* Return a temp which holds the vector dup of the lane of width
7569   (1 << size) obtained from src[laneNo]. */
7570static
7571IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
7572{
7573   vassert(size <= 3);
7574   /* Normalise |laneNo| so it is of the form
7575      x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
7576      This puts the bits we want to inspect at constant offsets
7577      regardless of the value of |size|.
7578   */
7579   UInt ix = laneNo << size;
7580   vassert(ix <= 15);
7581   IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
7582   switch (size) {
7583      case 0: /* B */
7584         ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
7585         /* fallthrough */
7586      case 1: /* H */
7587         ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
7588         /* fallthrough */
7589      case 2: /* S */
7590         ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
7591         /* fallthrough */
7592      case 3: /* D */
7593         ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
7594         break;
7595      default:
7596         vassert(0);
7597   }
7598   IRTemp res = newTempV128();
7599   assign(res, src);
7600   Int i;
7601   for (i = 3; i >= 0; i--) {
7602      if (ops[i] == Iop_INVALID)
7603         break;
7604      IRTemp tmp = newTempV128();
7605      assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
7606      res = tmp;
7607   }
7608   return res;
7609}
7610
7611
7612/* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
7613   selector encoded as shown below.  Return a new V128 holding the
7614   selected lane from |srcV| dup'd out to V128, and also return the
7615   lane number, log2 of the lane size in bytes, and width-character via
7616   *laneNo, *laneSzLg2 and *laneCh respectively.  It may be that imm5
7617   is an invalid selector, in which case return
7618   IRTemp_INVALID, 0, 0 and '?' respectively.
7619
7620   imm5 = xxxx1   signifies .b[xxxx]
7621        = xxx10   .h[xxx]
7622        = xx100   .s[xx]
7623        = x1000   .d[x]
7624        otherwise invalid
7625*/
7626static
7627IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
7628                             /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
7629                             IRExpr* srcV, UInt imm5 )
7630{
7631   *laneNo    = 0;
7632   *laneSzLg2 = 0;
7633   *laneCh    = '?';
7634
7635   if (imm5 & 1) {
7636      *laneNo    = (imm5 >> 1) & 15;
7637      *laneSzLg2 = 0;
7638      *laneCh    = 'b';
7639   }
7640   else if (imm5 & 2) {
7641      *laneNo    = (imm5 >> 2) & 7;
7642      *laneSzLg2 = 1;
7643      *laneCh    = 'h';
7644   }
7645   else if (imm5 & 4) {
7646      *laneNo    = (imm5 >> 3) & 3;
7647      *laneSzLg2 = 2;
7648      *laneCh    = 's';
7649   }
7650   else if (imm5 & 8) {
7651      *laneNo    = (imm5 >> 4) & 1;
7652      *laneSzLg2 = 3;
7653      *laneCh    = 'd';
7654   }
7655   else {
7656      /* invalid */
7657      return IRTemp_INVALID;
7658   }
7659
7660   return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
7661}
7662
7663
7664/* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
7665static
7666IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
7667{
7668   IRType ty  = Ity_INVALID;
7669   IRTemp rcS = IRTemp_INVALID;
7670   switch (size) {
7671      case X01:
7672         vassert(imm <= 0xFFFFULL);
7673         ty  = Ity_I16;
7674         rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
7675         break;
7676      case X10:
7677         vassert(imm <= 0xFFFFFFFFULL);
7678         ty  = Ity_I32;
7679         rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
7680         break;
7681      case X11:
7682         ty  = Ity_I64;
7683         rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
7684      default:
7685         vassert(0);
7686   }
7687   IRTemp rcV = math_DUP_TO_V128(rcS, ty);
7688   return rcV;
7689}
7690
7691
7692/* Let |new64| be a V128 in which only the lower 64 bits are interesting,
7693   and the upper can contain any value -- it is ignored.  If |is2| is False,
7694   generate IR to put |new64| in the lower half of vector reg |dd| and zero
7695   the upper half.  If |is2| is True, generate IR to put |new64| in the upper
7696   half of vector reg |dd| and leave the lower half unchanged.  This
7697   simulates the behaviour of the "foo/foo2" instructions in which the
7698   destination is half the width of sources, for example addhn/addhn2.
7699*/
7700static
7701void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
7702{
7703   if (is2) {
7704      /* Get the old contents of Vdd, zero the upper half, and replace
7705         it with 'x'. */
7706      IRTemp t_zero_oldLO = newTempV128();
7707      assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
7708      IRTemp t_newHI_zero = newTempV128();
7709      assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
7710                                                       mkV128(0x0000)));
7711      IRTemp res = newTempV128();
7712      assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
7713                                    mkexpr(t_newHI_zero)));
7714      putQReg128(dd, mkexpr(res));
7715   } else {
7716      /* This is simple. */
7717      putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
7718   }
7719}
7720
7721
7722/* Compute vector SQABS at lane size |size| for |srcE|, returning
7723   the q result in |*qabs| and the normal result in |*nabs|. */
7724static
7725void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
7726                  IRExpr* srcE, UInt size )
7727{
7728      IRTemp src, mask, maskn, nsub, qsub;
7729      src = mask = maskn = nsub = qsub = IRTemp_INVALID;
7730      newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
7731      assign(src,   srcE);
7732      assign(mask,  binop(mkVecCMPGTS(size),  mkV128(0x0000), mkexpr(src)));
7733      assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
7734      assign(nsub,  binop(mkVecSUB(size),   mkV128(0x0000), mkexpr(src)));
7735      assign(qsub,  binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
7736      assign(*nabs, binop(Iop_OrV128,
7737                          binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
7738                          binop(Iop_AndV128, mkexpr(src),  mkexpr(maskn))));
7739      assign(*qabs, binop(Iop_OrV128,
7740                          binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
7741                          binop(Iop_AndV128, mkexpr(src),  mkexpr(maskn))));
7742}
7743
7744
7745/* Compute vector SQNEG at lane size |size| for |srcE|, returning
7746   the q result in |*qneg| and the normal result in |*nneg|. */
7747static
7748void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
7749                  IRExpr* srcE, UInt size )
7750{
7751      IRTemp src = IRTemp_INVALID;
7752      newTempsV128_3(&src, nneg, qneg);
7753      assign(src,   srcE);
7754      assign(*nneg, binop(mkVecSUB(size),   mkV128(0x0000), mkexpr(src)));
7755      assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
7756}
7757
7758
7759/* Zero all except the least significant lane of |srcE|, where |size|
7760   indicates the lane size in the usual way. */
7761static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
7762{
7763   vassert(size < 4);
7764   IRTemp t = newTempV128();
7765   assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
7766   return t;
7767}
7768
7769
7770/* Generate IR to compute vector widening MULL from either the lower
7771   (is2==False) or upper (is2==True) halves of vecN and vecM.  The
7772   widening multiplies are unsigned when isU==True and signed when
7773   isU==False.  |size| is the narrow lane size indication.  Optionally,
7774   the product may be added to or subtracted from vecD, at the wide lane
7775   size.  This happens when |mas| is 'a' (add) or 's' (sub).  When |mas|
7776   is 'm' (only multiply) then the accumulate part does not happen, and
7777   |vecD| is expected to == IRTemp_INVALID.
7778
7779   Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
7780   are allowed.  The result is returned in a new IRTemp, which is
7781   returned in *res. */
7782static
7783void math_MULL_ACC ( /*OUT*/IRTemp* res,
7784                     Bool is2, Bool isU, UInt size, HChar mas,
7785                     IRTemp vecN, IRTemp vecM, IRTemp vecD )
7786{
7787   vassert(res && *res == IRTemp_INVALID);
7788   vassert(size <= 2);
7789   vassert(mas == 'm' || mas == 'a' || mas == 's');
7790   if (mas == 'm') vassert(vecD == IRTemp_INVALID);
7791   IROp   mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
7792   IROp   accOp = (mas == 'a') ? mkVecADD(size+1)
7793                  : (mas == 's' ? mkVecSUB(size+1)
7794                  : Iop_INVALID);
7795   IRTemp mul   = math_BINARY_WIDENING_V128(is2, mulOp,
7796                                            mkexpr(vecN), mkexpr(vecM));
7797   *res = newTempV128();
7798   assign(*res, mas == 'm' ? mkexpr(mul)
7799                           : binop(accOp, mkexpr(vecD), mkexpr(mul)));
7800}
7801
7802
7803/* Same as math_MULL_ACC, except the multiply is signed widening,
7804   the multiplied value is then doubled, before being added to or
7805   subtracted from the accumulated value.  And everything is
7806   saturated.  In all cases, saturation residuals are returned
7807   via (sat1q, sat1n), and in the accumulate cases,
7808   via (sat2q, sat2n) too.  All results are returned in new temporaries.
7809   In the no-accumulate case, *sat2q and *sat2n are never instantiated,
7810   so the caller can tell this has happened. */
7811static
7812void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
7813                        /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
7814                        /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
7815                        Bool is2, UInt size, HChar mas,
7816                        IRTemp vecN, IRTemp vecM, IRTemp vecD )
7817{
7818   vassert(size <= 2);
7819   vassert(mas == 'm' || mas == 'a' || mas == 's');
7820   /* Compute
7821         sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
7822         sat1n = vecN.D[is2] *s  vecM.d[is2] *  2
7823      IOW take either the low or high halves of vecN and vecM, signed widen,
7824      multiply, double that, and signedly saturate.  Also compute the same
7825      but without saturation.
7826   */
7827   vassert(sat2q && *sat2q == IRTemp_INVALID);
7828   vassert(sat2n && *sat2n == IRTemp_INVALID);
7829   newTempsV128_3(sat1q, sat1n, res);
7830   IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
7831                                         mkexpr(vecN), mkexpr(vecM));
7832   IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
7833                                         mkexpr(vecN), mkexpr(vecM));
7834   assign(*sat1q, mkexpr(tq));
7835   assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
7836
7837   /* If there is no accumulation, the final result is sat1q,
7838      and there's no assignment to sat2q or sat2n. */
7839   if (mas == 'm') {
7840      assign(*res, mkexpr(*sat1q));
7841      return;
7842   }
7843
7844   /* Compute
7845         sat2q  = vecD +sq/-sq sat1q
7846         sat2n  = vecD +/-     sat1n
7847         result = sat2q
7848   */
7849   newTempsV128_2(sat2q, sat2n);
7850   assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
7851                        mkexpr(vecD), mkexpr(*sat1q)));
7852   assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
7853                        mkexpr(vecD), mkexpr(*sat1n)));
7854   assign(*res, mkexpr(*sat2q));
7855}
7856
7857
7858/* Generate IR for widening signed vector multiplies.  The operands
7859   have their lane width signedly widened, and they are then multiplied
7860   at the wider width, returning results in two new IRTemps. */
7861static
7862void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
7863                  UInt sizeNarrow, IRTemp argL, IRTemp argR )
7864{
7865   vassert(sizeNarrow <= 2);
7866   newTempsV128_2(resHI, resLO);
7867   IRTemp argLhi = newTemp(Ity_I64);
7868   IRTemp argLlo = newTemp(Ity_I64);
7869   IRTemp argRhi = newTemp(Ity_I64);
7870   IRTemp argRlo = newTemp(Ity_I64);
7871   assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
7872   assign(argLlo, unop(Iop_V128to64,   mkexpr(argL)));
7873   assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
7874   assign(argRlo, unop(Iop_V128to64,   mkexpr(argR)));
7875   IROp opMulls = mkVecMULLS(sizeNarrow);
7876   assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
7877   assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
7878}
7879
7880
7881/* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
7882   double that, possibly add a rounding constant (R variants), and take
7883   the high half. */
7884static
7885void math_SQDMULH ( /*OUT*/IRTemp* res,
7886                    /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
7887                    Bool isR, UInt size, IRTemp vN, IRTemp vM )
7888{
7889   vassert(size == X01 || size == X10); /* s or h only */
7890
7891   newTempsV128_3(res, sat1q, sat1n);
7892
7893   IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
7894   math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
7895
7896   IRTemp addWide = mkVecADD(size+1);
7897
7898   if (isR) {
7899      assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
7900
7901      Int    rcShift    = size == X01 ? 15 : 31;
7902      IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
7903      assign(*sat1n,
7904             binop(mkVecCATODDLANES(size),
7905                   binop(addWide,
7906                         binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
7907                         mkexpr(roundConst)),
7908                   binop(addWide,
7909                         binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
7910                         mkexpr(roundConst))));
7911   } else {
7912      assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
7913
7914      assign(*sat1n,
7915             binop(mkVecCATODDLANES(size),
7916                   binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
7917                   binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
7918   }
7919
7920   assign(*res, mkexpr(*sat1q));
7921}
7922
7923
7924/* Generate IR for SQSHL, UQSHL, SQSHLU by imm.  Put the result in
7925   a new temp in *res, and the Q difference pair in new temps in
7926   *qDiff1 and *qDiff2 respectively.  |nm| denotes which of the
7927   three operations it is. */
7928static
7929void math_QSHL_IMM ( /*OUT*/IRTemp* res,
7930                     /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
7931                     IRTemp src, UInt size, UInt shift, const HChar* nm )
7932{
7933   vassert(size <= 3);
7934   UInt laneBits = 8 << size;
7935   vassert(shift < laneBits);
7936   newTempsV128_3(res, qDiff1, qDiff2);
7937   IRTemp z128 = newTempV128();
7938   assign(z128, mkV128(0x0000));
7939
7940   /* UQSHL */
7941   if (vex_streq(nm, "uqshl")) {
7942      IROp qop = mkVecQSHLNSATUU(size);
7943      assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7944      if (shift == 0) {
7945         /* No shift means no saturation. */
7946         assign(*qDiff1, mkexpr(z128));
7947         assign(*qDiff2, mkexpr(z128));
7948      } else {
7949         /* Saturation has occurred if any of the shifted-out bits are
7950            nonzero.  We get the shifted-out bits by right-shifting the
7951            original value. */
7952         UInt rshift = laneBits - shift;
7953         vassert(rshift >= 1 && rshift < laneBits);
7954         assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
7955         assign(*qDiff2, mkexpr(z128));
7956      }
7957      return;
7958   }
7959
7960   /* SQSHL */
7961   if (vex_streq(nm, "sqshl")) {
7962      IROp qop = mkVecQSHLNSATSS(size);
7963      assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7964      if (shift == 0) {
7965         /* No shift means no saturation. */
7966         assign(*qDiff1, mkexpr(z128));
7967         assign(*qDiff2, mkexpr(z128));
7968      } else {
7969         /* Saturation has occurred if any of the shifted-out bits are
7970            different from the top bit of the original value. */
7971         UInt rshift = laneBits - 1 - shift;
7972         vassert(rshift >= 0 && rshift < laneBits-1);
7973         /* qDiff1 is the shifted out bits, and the top bit of the original
7974            value, preceded by zeroes. */
7975         assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
7976         /* qDiff2 is the top bit of the original value, cloned the
7977            correct number of times. */
7978         assign(*qDiff2, binop(mkVecSHRN(size),
7979                               binop(mkVecSARN(size), mkexpr(src),
7980                                                      mkU8(laneBits-1)),
7981                               mkU8(rshift)));
7982         /* This also succeeds in comparing the top bit of the original
7983            value to itself, which is a bit stupid, but not wrong. */
7984      }
7985      return;
7986   }
7987
7988   /* SQSHLU */
7989   if (vex_streq(nm, "sqshlu")) {
7990      IROp qop = mkVecQSHLNSATSU(size);
7991      assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7992      if (shift == 0) {
7993         /* If there's no shift, saturation depends on the top bit
7994            of the source. */
7995         assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
7996         assign(*qDiff2, mkexpr(z128));
7997      } else {
7998         /* Saturation has occurred if any of the shifted-out bits are
7999            nonzero.  We get the shifted-out bits by right-shifting the
8000            original value. */
8001         UInt rshift = laneBits - shift;
8002         vassert(rshift >= 1 && rshift < laneBits);
8003         assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8004         assign(*qDiff2, mkexpr(z128));
8005      }
8006      return;
8007   }
8008
8009   vassert(0);
8010}
8011
8012
8013/* Generate IR to do SRHADD and URHADD. */
8014static
8015IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
8016{
8017   /* Generate this:
8018      (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8019   */
8020   vassert(size <= 3);
8021   IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
8022   IROp opADD = mkVecADD(size);
8023   /* The only tricky bit is to generate the correct vector 1 constant. */
8024   const ULong ones64[4]
8025      = { 0x0101010101010101ULL, 0x0001000100010001ULL,
8026          0x0000000100000001ULL, 0x0000000000000001ULL };
8027   IRTemp imm64 = newTemp(Ity_I64);
8028   assign(imm64, mkU64(ones64[size]));
8029   IRTemp vecOne = newTempV128();
8030   assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
8031   IRTemp scaOne = newTemp(Ity_I8);
8032   assign(scaOne, mkU8(1));
8033   IRTemp res = newTempV128();
8034   assign(res,
8035          binop(opADD,
8036                binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
8037                binop(opADD,
8038                      binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
8039                      binop(opSHR,
8040                            binop(opADD,
8041                                  binop(opADD,
8042                                        binop(Iop_AndV128, mkexpr(aa),
8043                                                           mkexpr(vecOne)),
8044                                        binop(Iop_AndV128, mkexpr(bb),
8045                                                           mkexpr(vecOne))
8046                                  ),
8047                                  mkexpr(vecOne)
8048                            ),
8049                            mkexpr(scaOne)
8050                      )
8051                )
8052          )
8053   );
8054   return res;
8055}
8056
8057
8058/* QCFLAG tracks the SIMD sticky saturation status.  Update the status
8059   thusly: if, after application of |opZHI| to both |qres| and |nres|,
8060   they have the same value, leave QCFLAG unchanged.  Otherwise, set it
8061   (implicitly) to 1.  |opZHI| may only be one of the Iop_ZeroHIxxofV128
8062   operators, or Iop_INVALID, in which case |qres| and |nres| are used
8063   unmodified.  The presence |opZHI| means this function can be used to
8064   generate QCFLAG update code for both scalar and vector SIMD operations.
8065*/
8066static
8067void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
8068{
8069   IRTemp diff      = newTempV128();
8070   IRTemp oldQCFLAG = newTempV128();
8071   IRTemp newQCFLAG = newTempV128();
8072   if (opZHI == Iop_INVALID) {
8073      assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
8074   } else {
8075      vassert(opZHI == Iop_ZeroHI64ofV128
8076              || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
8077      assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
8078   }
8079   assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
8080   assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
8081   stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
8082}
8083
8084
8085/* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8086   are used unmodified, hence suitable for QCFLAG updates for whole-vector
8087   operations. */
8088static
8089void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
8090{
8091   updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
8092}
8093
8094
8095/* Generate IR to rearrange two vector values in a way which is useful
8096   for doing S/D add-pair etc operations.  There are 3 cases:
8097
8098   2d:  [m1 m0] [n1 n0]  -->  [m1 n1] [m0 n0]
8099
8100   4s:  [m3 m2 m1 m0] [n3 n2 n1 n0]  -->  [m3 m1 n3 n1] [m2 m0 n2 n0]
8101
8102   2s:  [m2 m2 m1 m0] [n3 n2 n1 n0]  -->  [0 0 m1 n1] [0 0 m0 n0]
8103
8104   The cases are distinguished as follows:
8105   isD == True,  bitQ == 1  =>  2d
8106   isD == False, bitQ == 1  =>  4s
8107   isD == False, bitQ == 0  =>  2s
8108*/
8109static
8110void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8111        /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
8112        IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ
8113     )
8114{
8115   vassert(rearrL && *rearrL == IRTemp_INVALID);
8116   vassert(rearrR && *rearrR == IRTemp_INVALID);
8117   *rearrL = newTempV128();
8118   *rearrR = newTempV128();
8119   if (isD) {
8120      // 2d case
8121      vassert(bitQ == 1);
8122      assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
8123      assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
8124   }
8125   else if (!isD && bitQ == 1) {
8126      // 4s case
8127      assign(*rearrL, binop(Iop_CatOddLanes32x4,  mkexpr(vecM), mkexpr(vecN)));
8128      assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8129   } else {
8130      // 2s case
8131      vassert(!isD && bitQ == 0);
8132      IRTemp m1n1m0n0 = newTempV128();
8133      IRTemp m0n0m1n1 = newTempV128();
8134      assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
8135                             mkexpr(vecM), mkexpr(vecN)));
8136      assign(m0n0m1n1, triop(Iop_SliceV128,
8137                             mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
8138      assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
8139      assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
8140   }
8141}
8142
8143
8144/* Returns 2.0 ^ (-n) for n in 1 .. 64 */
8145static Double two_to_the_minus ( Int n )
8146{
8147   if (n == 1) return 0.5;
8148   vassert(n >= 2 && n <= 64);
8149   Int half = n / 2;
8150   return two_to_the_minus(half) * two_to_the_minus(n - half);
8151}
8152
8153
8154/* Returns 2.0 ^ n for n in 1 .. 64 */
8155static Double two_to_the_plus ( Int n )
8156{
8157   if (n == 1) return 2.0;
8158   vassert(n >= 2 && n <= 64);
8159   Int half = n / 2;
8160   return two_to_the_plus(half) * two_to_the_plus(n - half);
8161}
8162
8163
8164/*------------------------------------------------------------*/
8165/*--- SIMD and FP instructions                             ---*/
8166/*------------------------------------------------------------*/
8167
8168static
8169Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
8170{
8171   /* 31  29     23  21 20 15 14   10 9 4
8172      0 q 101110 op2 0  m  0  imm4 0  n d
8173      Decode fields: op2
8174   */
8175#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8176   if (INSN(31,31) != 0
8177       || INSN(29,24) != BITS6(1,0,1,1,1,0)
8178       || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8179      return False;
8180   }
8181   UInt bitQ = INSN(30,30);
8182   UInt op2  = INSN(23,22);
8183   UInt mm   = INSN(20,16);
8184   UInt imm4 = INSN(14,11);
8185   UInt nn   = INSN(9,5);
8186   UInt dd   = INSN(4,0);
8187
8188   if (op2 == BITS2(0,0)) {
8189      /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
8190      IRTemp sHi = newTempV128();
8191      IRTemp sLo = newTempV128();
8192      IRTemp res = newTempV128();
8193      assign(sHi, getQReg128(mm));
8194      assign(sLo, getQReg128(nn));
8195      if (bitQ == 1) {
8196         if (imm4 == 0) {
8197            assign(res, mkexpr(sLo));
8198         } else {
8199            vassert(imm4 >= 1 && imm4 <= 15);
8200            assign(res, triop(Iop_SliceV128,
8201                              mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
8202         }
8203         putQReg128(dd, mkexpr(res));
8204         DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
8205      } else {
8206         if (imm4 >= 8) return False;
8207         if (imm4 == 0) {
8208            assign(res, mkexpr(sLo));
8209         } else {
8210            vassert(imm4 >= 1 && imm4 <= 7);
8211            IRTemp hi64lo64 = newTempV128();
8212            assign(hi64lo64, binop(Iop_InterleaveLO64x2,
8213                                   mkexpr(sHi), mkexpr(sLo)));
8214            assign(res, triop(Iop_SliceV128,
8215                              mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
8216         }
8217         putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8218         DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
8219      }
8220      return True;
8221   }
8222
8223   return False;
8224#  undef INSN
8225}
8226
8227
8228static
8229Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
8230{
8231   /* 31  29     23  21 20 15 14  12 11 9 4
8232      0 q 001110 op2 0  m  0  len op 00 n d
8233      Decode fields: op2,len,op
8234   */
8235#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8236   if (INSN(31,31) != 0
8237       || INSN(29,24) != BITS6(0,0,1,1,1,0)
8238       || INSN(21,21) != 0
8239       || INSN(15,15) != 0
8240       || INSN(11,10) != BITS2(0,0)) {
8241      return False;
8242   }
8243   UInt bitQ  = INSN(30,30);
8244   UInt op2   = INSN(23,22);
8245   UInt mm    = INSN(20,16);
8246   UInt len   = INSN(14,13);
8247   UInt bitOP = INSN(12,12);
8248   UInt nn    = INSN(9,5);
8249   UInt dd    = INSN(4,0);
8250
8251   if (op2 == X00) {
8252      /* -------- 00,xx,0 TBL, xx register table -------- */
8253      /* -------- 00,xx,1 TBX, xx register table -------- */
8254      /* 31  28        20 15 14  12  9 4
8255         0q0 01110 000 m  0  len 000 n d  TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8256         0q0 01110 000 m  0  len 100 n d  TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8257         where Ta = 16b(q=1) or 8b(q=0)
8258      */
8259      Bool isTBX = bitOP == 1;
8260      /* The out-of-range values to use. */
8261      IRTemp oor_values = newTempV128();
8262      assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
8263      /* src value */
8264      IRTemp src = newTempV128();
8265      assign(src, getQReg128(mm));
8266      /* The table values */
8267      IRTemp tab[4];
8268      UInt   i;
8269      for (i = 0; i <= len; i++) {
8270         vassert(i < 4);
8271         tab[i] = newTempV128();
8272         assign(tab[i], getQReg128((nn + i) % 32));
8273      }
8274      IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
8275      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8276      const HChar* Ta = bitQ ==1 ? "16b" : "8b";
8277      const HChar* nm = isTBX ? "tbx" : "tbl";
8278      DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
8279          nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
8280      return True;
8281   }
8282
8283#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8284   return False;
8285#  undef INSN
8286}
8287
8288
8289static
8290Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
8291{
8292   /* 31  29     23   21 20 15 14     11 9 4
8293      0 q 001110 size 0  m  0  opcode 10 n d
8294      Decode fields: opcode
8295   */
8296#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8297   if (INSN(31,31) != 0
8298       || INSN(29,24) != BITS6(0,0,1,1,1,0)
8299       || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
8300      return False;
8301   }
8302   UInt bitQ   = INSN(30,30);
8303   UInt size   = INSN(23,22);
8304   UInt mm     = INSN(20,16);
8305   UInt opcode = INSN(14,12);
8306   UInt nn     = INSN(9,5);
8307   UInt dd     = INSN(4,0);
8308
8309   if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
8310      /* -------- 001 UZP1 std7_std7_std7 -------- */
8311      /* -------- 101 UZP2 std7_std7_std7 -------- */
8312      if (bitQ == 0 && size == X11) return False; // implied 1d case
8313      Bool   isUZP1 = opcode == BITS3(0,0,1);
8314      IROp   op     = isUZP1 ? mkVecCATEVENLANES(size)
8315                             : mkVecCATODDLANES(size);
8316      IRTemp preL = newTempV128();
8317      IRTemp preR = newTempV128();
8318      IRTemp res  = newTempV128();
8319      if (bitQ == 0) {
8320         assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
8321                                                  getQReg128(nn)));
8322         assign(preR, mkexpr(preL));
8323      } else {
8324         assign(preL, getQReg128(mm));
8325         assign(preR, getQReg128(nn));
8326      }
8327      assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8328      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8329      const HChar* nm  = isUZP1 ? "uzp1" : "uzp2";
8330      const HChar* arr = nameArr_Q_SZ(bitQ, size);
8331      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8332          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8333      return True;
8334   }
8335
8336   if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
8337      /* -------- 010 TRN1 std7_std7_std7 -------- */
8338      /* -------- 110 TRN2 std7_std7_std7 -------- */
8339      if (bitQ == 0 && size == X11) return False; // implied 1d case
8340      Bool   isTRN1 = opcode == BITS3(0,1,0);
8341      IROp   op1    = isTRN1 ? mkVecCATEVENLANES(size)
8342                             : mkVecCATODDLANES(size);
8343      IROp op2 = mkVecINTERLEAVEHI(size);
8344      IRTemp srcM = newTempV128();
8345      IRTemp srcN = newTempV128();
8346      IRTemp res  = newTempV128();
8347      assign(srcM, getQReg128(mm));
8348      assign(srcN, getQReg128(nn));
8349      assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
8350                             binop(op1, mkexpr(srcN), mkexpr(srcN))));
8351      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8352      const HChar* nm  = isTRN1 ? "trn1" : "trn2";
8353      const HChar* arr = nameArr_Q_SZ(bitQ, size);
8354      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8355          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8356      return True;
8357   }
8358
8359   if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
8360      /* -------- 011 ZIP1 std7_std7_std7 -------- */
8361      /* -------- 111 ZIP2 std7_std7_std7 -------- */
8362      if (bitQ == 0 && size == X11) return False; // implied 1d case
8363      Bool   isZIP1 = opcode == BITS3(0,1,1);
8364      IROp   op     = isZIP1 ? mkVecINTERLEAVELO(size)
8365                             : mkVecINTERLEAVEHI(size);
8366      IRTemp preL = newTempV128();
8367      IRTemp preR = newTempV128();
8368      IRTemp res  = newTempV128();
8369      if (bitQ == 0 && !isZIP1) {
8370         IRTemp z128 = newTempV128();
8371         assign(z128, mkV128(0x0000));
8372         // preL = Vm shifted left 32 bits
8373         // preR = Vn shifted left 32 bits
8374         assign(preL, triop(Iop_SliceV128,
8375                            getQReg128(mm), mkexpr(z128), mkU8(12)));
8376         assign(preR, triop(Iop_SliceV128,
8377                            getQReg128(nn), mkexpr(z128), mkU8(12)));
8378
8379      } else {
8380         assign(preL, getQReg128(mm));
8381         assign(preR, getQReg128(nn));
8382      }
8383      assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8384      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8385      const HChar* nm  = isZIP1 ? "zip1" : "zip2";
8386      const HChar* arr = nameArr_Q_SZ(bitQ, size);
8387      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8388          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8389      return True;
8390   }
8391
8392   return False;
8393#  undef INSN
8394}
8395
8396
8397static
8398Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
8399{
8400   /* 31    28    23   21    16     11 9 4
8401      0 q u 01110 size 11000 opcode 10 n d
8402      Decode fields: u,size,opcode
8403   */
8404#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8405   if (INSN(31,31) != 0
8406       || INSN(28,24) != BITS5(0,1,1,1,0)
8407       || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
8408      return False;
8409   }
8410   UInt bitQ   = INSN(30,30);
8411   UInt bitU   = INSN(29,29);
8412   UInt size   = INSN(23,22);
8413   UInt opcode = INSN(16,12);
8414   UInt nn     = INSN(9,5);
8415   UInt dd     = INSN(4,0);
8416
8417   if (opcode == BITS5(0,0,0,1,1)) {
8418      /* -------- 0,xx,00011 SADDLV -------- */
8419      /* -------- 1,xx,00011 UADDLV -------- */
8420      /* size is the narrow size */
8421      if (size == X11 || (size == X10 && bitQ == 0)) return False;
8422      Bool   isU = bitU == 1;
8423      IRTemp src = newTempV128();
8424      assign(src, getQReg128(nn));
8425      /* The basic plan is to widen the lower half, and if Q = 1,
8426         the upper half too.  Add them together (if Q = 1), and in
8427         either case fold with add at twice the lane width.
8428      */
8429      IRExpr* widened
8430         = mkexpr(math_WIDEN_LO_OR_HI_LANES(
8431                     isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
8432      if (bitQ == 1) {
8433         widened
8434            = binop(mkVecADD(size+1),
8435                    widened,
8436                    mkexpr(math_WIDEN_LO_OR_HI_LANES(
8437                              isU, True/*fromUpperHalf*/, size, mkexpr(src)))
8438              );
8439      }
8440      /* Now fold. */
8441      IRTemp tWi = newTempV128();
8442      assign(tWi, widened);
8443      IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
8444      putQReg128(dd, mkexpr(res));
8445      const HChar* arr = nameArr_Q_SZ(bitQ, size);
8446      const HChar  ch  = "bhsd"[size];
8447      DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
8448          nameQReg128(dd), ch, nameQReg128(nn), arr);
8449      return True;
8450   }
8451
8452   UInt ix = 0;
8453   /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
8454   else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
8455   else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
8456   /**/
8457   if (ix != 0) {
8458      /* -------- 0,xx,01010: SMAXV -------- (1) */
8459      /* -------- 1,xx,01010: UMAXV -------- (2) */
8460      /* -------- 0,xx,11010: SMINV -------- (3) */
8461      /* -------- 1,xx,11010: UMINV -------- (4) */
8462      /* -------- 0,xx,11011: ADDV  -------- (5) */
8463      vassert(ix >= 1 && ix <= 5);
8464      if (size == X11) return False; // 1d,2d cases not allowed
8465      if (size == X10 && bitQ == 0) return False; // 2s case not allowed
8466      const IROp opMAXS[3]
8467         = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
8468      const IROp opMAXU[3]
8469         = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
8470      const IROp opMINS[3]
8471         = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
8472      const IROp opMINU[3]
8473         = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
8474      const IROp opADD[3]
8475         = { Iop_Add8x16,  Iop_Add16x8,  Iop_Add32x4 };
8476      vassert(size < 3);
8477      IROp op = Iop_INVALID;
8478      const HChar* nm = NULL;
8479      switch (ix) {
8480         case 1: op = opMAXS[size]; nm = "smaxv"; break;
8481         case 2: op = opMAXU[size]; nm = "umaxv"; break;
8482         case 3: op = opMINS[size]; nm = "sminv"; break;
8483         case 4: op = opMINU[size]; nm = "uminv"; break;
8484         case 5: op = opADD[size];  nm = "addv";  break;
8485         default: vassert(0);
8486      }
8487      vassert(op != Iop_INVALID && nm != NULL);
8488      IRTemp tN1 = newTempV128();
8489      assign(tN1, getQReg128(nn));
8490      /* If Q == 0, we're just folding lanes in the lower half of
8491         the value.  In which case, copy the lower half of the
8492         source into the upper half, so we can then treat it the
8493         same as the full width case.  Except for the addition case,
8494         in which we have to zero out the upper half. */
8495      IRTemp tN2 = newTempV128();
8496      assign(tN2, bitQ == 0
8497                     ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
8498                                : mk_CatEvenLanes64x2(tN1,tN1))
8499                     : mkexpr(tN1));
8500      IRTemp res = math_FOLDV(tN2, op);
8501      if (res == IRTemp_INVALID)
8502         return False; /* means math_FOLDV
8503                          doesn't handle this case yet */
8504      putQReg128(dd, mkexpr(res));
8505      const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
8506      IRType laneTy = tys[size];
8507      const HChar* arr = nameArr_Q_SZ(bitQ, size);
8508      DIP("%s %s, %s.%s\n", nm,
8509          nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
8510      return True;
8511   }
8512
8513   if ((size == X00 || size == X10)
8514       && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
8515      /* -------- 0,00,01100: FMAXMNV s_4s -------- */
8516      /* -------- 0,10,01100: FMINMNV s_4s -------- */
8517      /* -------- 1,00,01111: FMAXV   s_4s -------- */
8518      /* -------- 1,10,01111: FMINV   s_4s -------- */
8519      /* FMAXNM, FMINNM: FIXME -- KLUDGED */
8520      if (bitQ == 0) return False; // Only 4s is allowed
8521      Bool   isMIN = (size & 2) == 2;
8522      Bool   isNM  = opcode == BITS5(0,1,1,0,0);
8523      IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
8524      IRTemp src = newTempV128();
8525      assign(src, getQReg128(nn));
8526      IRTemp res = math_FOLDV(src, opMXX);
8527      putQReg128(dd, mkexpr(res));
8528      DIP("%s%sv s%u, %u.4s\n",
8529          isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
8530      return True;
8531   }
8532
8533#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8534   return False;
8535#  undef INSN
8536}
8537
8538
8539static
8540Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8541{
8542   /* 31     28       20   15 14   10 9 4
8543      0 q op 01110000 imm5 0  imm4 1  n d
8544      Decode fields: q,op,imm4
8545   */
8546#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8547   if (INSN(31,31) != 0
8548       || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
8549       || INSN(15,15) != 0 || INSN(10,10) != 1) {
8550      return False;
8551   }
8552   UInt bitQ  = INSN(30,30);
8553   UInt bitOP = INSN(29,29);
8554   UInt imm5  = INSN(20,16);
8555   UInt imm4  = INSN(14,11);
8556   UInt nn    = INSN(9,5);
8557   UInt dd    = INSN(4,0);
8558
8559   /* -------- x,0,0000: DUP (element, vector) -------- */
8560   /* 31  28       20   15     9 4
8561      0q0 01110000 imm5 000001 n d  DUP Vd.T, Vn.Ts[index]
8562   */
8563   if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
8564      UInt   laneNo    = 0;
8565      UInt   laneSzLg2 = 0;
8566      HChar  laneCh    = '?';
8567      IRTemp res       = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
8568                                             getQReg128(nn), imm5);
8569      if (res == IRTemp_INVALID)
8570         return False;
8571      if (bitQ == 0 && laneSzLg2 == X11)
8572         return False; /* .1d case */
8573      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8574      const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
8575      DIP("dup %s.%s, %s.%c[%u]\n",
8576           nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
8577      return True;
8578   }
8579
8580   /* -------- x,0,0001: DUP (general, vector) -------- */
8581   /* 31  28       20   15       9 4
8582      0q0 01110000 imm5 0 0001 1 n d  DUP Vd.T, Rn
8583      Q=0 writes 64, Q=1 writes 128
8584      imm5: xxxx1  8B(q=0)      or 16b(q=1),     R=W
8585            xxx10  4H(q=0)      or 8H(q=1),      R=W
8586            xx100  2S(q=0)      or 4S(q=1),      R=W
8587            x1000  Invalid(q=0) or 2D(q=1),      R=X
8588            x0000  Invalid(q=0) or Invalid(q=1)
8589      Require op=0, imm4=0001
8590   */
8591   if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
8592      Bool   isQ = bitQ == 1;
8593      IRTemp w0  = newTemp(Ity_I64);
8594      const HChar* arT = "??";
8595      IRType laneTy = Ity_INVALID;
8596      if (imm5 & 1) {
8597         arT    = isQ ? "16b" : "8b";
8598         laneTy = Ity_I8;
8599         assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
8600      }
8601      else if (imm5 & 2) {
8602         arT    = isQ ? "8h" : "4h";
8603         laneTy = Ity_I16;
8604         assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
8605      }
8606      else if (imm5 & 4) {
8607         arT    = isQ ? "4s" : "2s";
8608         laneTy = Ity_I32;
8609         assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
8610      }
8611      else if ((imm5 & 8) && isQ) {
8612         arT    = "2d";
8613         laneTy = Ity_I64;
8614         assign(w0, getIReg64orZR(nn));
8615      }
8616      else {
8617         /* invalid; leave laneTy unchanged. */
8618      }
8619      /* */
8620      if (laneTy != Ity_INVALID) {
8621         IRTemp w1 = math_DUP_TO_64(w0, laneTy);
8622         putQReg128(dd, binop(Iop_64HLtoV128,
8623                              isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
8624         DIP("dup %s.%s, %s\n",
8625             nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
8626         return True;
8627      }
8628      /* invalid */
8629      return False;
8630   }
8631
8632   /* -------- 1,0,0011: INS (general) -------- */
8633   /* 31  28       20   15     9 4
8634      010 01110000 imm5 000111 n d  INS Vd.Ts[ix], Rn
8635      where Ts,ix = case imm5 of xxxx1 -> B, xxxx
8636                                 xxx10 -> H, xxx
8637                                 xx100 -> S, xx
8638                                 x1000 -> D, x
8639   */
8640   if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
8641      HChar   ts     = '?';
8642      UInt    laneNo = 16;
8643      IRExpr* src    = NULL;
8644      if (imm5 & 1) {
8645         src    = unop(Iop_64to8, getIReg64orZR(nn));
8646         laneNo = (imm5 >> 1) & 15;
8647         ts     = 'b';
8648      }
8649      else if (imm5 & 2) {
8650         src    = unop(Iop_64to16, getIReg64orZR(nn));
8651         laneNo = (imm5 >> 2) & 7;
8652         ts     = 'h';
8653      }
8654      else if (imm5 & 4) {
8655         src    = unop(Iop_64to32, getIReg64orZR(nn));
8656         laneNo = (imm5 >> 3) & 3;
8657         ts     = 's';
8658      }
8659      else if (imm5 & 8) {
8660         src    = getIReg64orZR(nn);
8661         laneNo = (imm5 >> 4) & 1;
8662         ts     = 'd';
8663      }
8664      /* */
8665      if (src) {
8666         vassert(laneNo < 16);
8667         putQRegLane(dd, laneNo, src);
8668         DIP("ins %s.%c[%u], %s\n",
8669             nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
8670         return True;
8671      }
8672      /* invalid */
8673      return False;
8674   }
8675
8676   /* -------- x,0,0101: SMOV -------- */
8677   /* -------- x,0,0111: UMOV -------- */
8678   /* 31  28        20   15     9 4
8679      0q0 01110 000 imm5 001111 n d  UMOV Xd/Wd, Vn.Ts[index]
8680      0q0 01110 000 imm5 001011 n d  SMOV Xd/Wd, Vn.Ts[index]
8681      dest is Xd when q==1, Wd when q==0
8682      UMOV:
8683         Ts,index,ops = case q:imm5 of
8684                          0:xxxx1 -> B, xxxx, 8Uto64
8685                          1:xxxx1 -> invalid
8686                          0:xxx10 -> H, xxx,  16Uto64
8687                          1:xxx10 -> invalid
8688                          0:xx100 -> S, xx,   32Uto64
8689                          1:xx100 -> invalid
8690                          1:x1000 -> D, x,    copy64
8691                          other   -> invalid
8692      SMOV:
8693         Ts,index,ops = case q:imm5 of
8694                          0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
8695                          1:xxxx1 -> B, xxxx, 8Sto64
8696                          0:xxx10 -> H, xxx,  (32Uto64 . 16Sto32)
8697                          1:xxx10 -> H, xxx,  16Sto64
8698                          0:xx100 -> invalid
8699                          1:xx100 -> S, xx,   32Sto64
8700                          1:x1000 -> invalid
8701                          other   -> invalid
8702   */
8703   if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
8704      Bool isU  = (imm4 & 2) == 2;
8705      const HChar* arTs = "??";
8706      UInt    laneNo = 16; /* invalid */
8707      // Setting 'res' to non-NULL determines valid/invalid
8708      IRExpr* res    = NULL;
8709      if (!bitQ && (imm5 & 1)) { // 0:xxxx1
8710         laneNo = (imm5 >> 1) & 15;
8711         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
8712         res = isU ? unop(Iop_8Uto64, lane)
8713                   : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
8714         arTs = "b";
8715      }
8716      else if (bitQ && (imm5 & 1)) { // 1:xxxx1
8717         laneNo = (imm5 >> 1) & 15;
8718         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
8719         res = isU ? NULL
8720                   : unop(Iop_8Sto64, lane);
8721         arTs = "b";
8722      }
8723      else if (!bitQ && (imm5 & 2)) { // 0:xxx10
8724         laneNo = (imm5 >> 2) & 7;
8725         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
8726         res = isU ? unop(Iop_16Uto64, lane)
8727                   : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
8728         arTs = "h";
8729      }
8730      else if (bitQ && (imm5 & 2)) { // 1:xxx10
8731         laneNo = (imm5 >> 2) & 7;
8732         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
8733         res = isU ? NULL
8734                   : unop(Iop_16Sto64, lane);
8735         arTs = "h";
8736      }
8737      else if (!bitQ && (imm5 & 4)) { // 0:xx100
8738         laneNo = (imm5 >> 3) & 3;
8739         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
8740         res = isU ? unop(Iop_32Uto64, lane)
8741                   : NULL;
8742         arTs = "s";
8743      }
8744      else if (bitQ && (imm5 & 4)) { // 1:xxx10
8745         laneNo = (imm5 >> 3) & 3;
8746         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
8747         res = isU ? NULL
8748                   : unop(Iop_32Sto64, lane);
8749         arTs = "s";
8750      }
8751      else if (bitQ && (imm5 & 8)) { // 1:x1000
8752         laneNo = (imm5 >> 4) & 1;
8753         IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
8754         res = isU ? lane
8755                   : NULL;
8756         arTs = "d";
8757      }
8758      /* */
8759      if (res) {
8760         vassert(laneNo < 16);
8761         putIReg64orZR(dd, res);
8762         DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
8763             nameIRegOrZR(bitQ == 1, dd),
8764             nameQReg128(nn), arTs, laneNo);
8765         return True;
8766      }
8767      /* invalid */
8768      return False;
8769   }
8770
8771   /* -------- 1,1,xxxx: INS (element) -------- */
8772   /* 31  28       20     14   9 4
8773      011 01110000 imm5 0 imm4 n d  INS Vd.Ts[ix1], Vn.Ts[ix2]
8774      where Ts,ix1,ix2
8775               = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
8776                              xxx10 -> H, xxx,  imm4[3:1]
8777                              xx100 -> S, xx,   imm4[3:2]
8778                              x1000 -> D, x,    imm4[3:3]
8779   */
8780   if (bitQ == 1 && bitOP == 1) {
8781      HChar   ts  = '?';
8782      IRType  ity = Ity_INVALID;
8783      UInt    ix1 = 16;
8784      UInt    ix2 = 16;
8785      if (imm5 & 1) {
8786         ts  = 'b';
8787         ity = Ity_I8;
8788         ix1 = (imm5 >> 1) & 15;
8789         ix2 = (imm4 >> 0) & 15;
8790      }
8791      else if (imm5 & 2) {
8792         ts  = 'h';
8793         ity = Ity_I16;
8794         ix1 = (imm5 >> 2) & 7;
8795         ix2 = (imm4 >> 1) & 7;
8796      }
8797      else if (imm5 & 4) {
8798         ts  = 's';
8799         ity = Ity_I32;
8800         ix1 = (imm5 >> 3) & 3;
8801         ix2 = (imm4 >> 2) & 3;
8802      }
8803      else if (imm5 & 8) {
8804         ts  = 'd';
8805         ity = Ity_I64;
8806         ix1 = (imm5 >> 4) & 1;
8807         ix2 = (imm4 >> 3) & 1;
8808      }
8809      /* */
8810      if (ity != Ity_INVALID) {
8811         vassert(ix1 < 16);
8812         vassert(ix2 < 16);
8813         putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
8814         DIP("ins %s.%c[%u], %s.%c[%u]\n",
8815             nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
8816         return True;
8817      }
8818      /* invalid */
8819      return False;
8820   }
8821
8822   return False;
8823#  undef INSN
8824}
8825
8826
8827static
8828Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
8829{
8830   /* 31    28          18  15    11 9     4
8831      0q op 01111 00000 abc cmode 01 defgh d
8832      Decode fields: q,op,cmode
8833      Bit 11 is really "o2", but it is always zero.
8834   */
8835#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8836   if (INSN(31,31) != 0
8837       || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
8838       || INSN(11,10) != BITS2(0,1)) {
8839      return False;
8840   }
8841   UInt bitQ     = INSN(30,30);
8842   UInt bitOP    = INSN(29,29);
8843   UInt cmode    = INSN(15,12);
8844   UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
8845   UInt dd       = INSN(4,0);
8846
8847   ULong imm64lo  = 0;
8848   UInt  op_cmode = (bitOP << 4) | cmode;
8849   Bool  ok       = False;
8850   Bool  isORR    = False;
8851   Bool  isBIC    = False;
8852   Bool  isMOV    = False;
8853   Bool  isMVN    = False;
8854   Bool  isFMOV   = False;
8855   switch (op_cmode) {
8856      /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
8857      /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
8858      /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
8859      /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
8860      case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
8861      case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
8862         ok = True; isMOV = True; break;
8863
8864      /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
8865      /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
8866      /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
8867      /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
8868      case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
8869      case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
8870         ok = True; isORR = True; break;
8871
8872      /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
8873      /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
8874      case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
8875         ok = True; isMOV = True; break;
8876
8877      /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
8878      /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
8879      case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
8880         ok = True; isORR = True; break;
8881
8882      /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
8883      /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
8884      case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
8885         ok = True; isMOV = True; break;
8886
8887      /* -------- x,0,1110 MOVI 8-bit -------- */
8888      case BITS5(0,1,1,1,0):
8889         ok = True; isMOV = True; break;
8890
8891      /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
8892      case BITS5(0,1,1,1,1): // 0:1111
8893         ok = True; isFMOV = True; break;
8894
8895      /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
8896      /* -------- x,1,0010 MVNI 32-bit shifted imm  -------- */
8897      /* -------- x,1,0100 MVNI 32-bit shifted imm  -------- */
8898      /* -------- x,1,0110 MVNI 32-bit shifted imm  -------- */
8899      case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
8900      case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
8901         ok = True; isMVN = True; break;
8902
8903      /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
8904      /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
8905      /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
8906      /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
8907      case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
8908      case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
8909         ok = True; isBIC = True; break;
8910
8911      /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
8912      /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
8913      case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
8914         ok = True; isMVN = True; break;
8915
8916      /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
8917      /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
8918      case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
8919         ok = True; isBIC = True; break;
8920
8921      /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
8922      /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
8923      case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
8924         ok = True; isMVN = True; break;
8925
8926      /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
8927      /* -------- 1,1,1110 MOVI 64-bit vector -------- */
8928      case BITS5(1,1,1,1,0):
8929         ok = True; isMOV = True; break;
8930
8931      /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
8932      case BITS5(1,1,1,1,1): // 1:1111
8933         ok = bitQ == 1; isFMOV = True; break;
8934
8935      default:
8936        break;
8937   }
8938   if (ok) {
8939      vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
8940                   + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
8941      ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
8942   }
8943   if (ok) {
8944      if (isORR || isBIC) {
8945         ULong inv
8946            = isORR ? 0ULL : ~0ULL;
8947         IRExpr* immV128
8948            = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
8949         IRExpr* res
8950            = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
8951         const HChar* nm = isORR ? "orr" : "bic";
8952         if (bitQ == 0) {
8953            putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
8954            DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
8955         } else {
8956            putQReg128(dd, res);
8957            DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
8958                nameQReg128(dd), imm64lo, imm64lo);
8959         }
8960      }
8961      else if (isMOV || isMVN || isFMOV) {
8962         if (isMVN) imm64lo = ~imm64lo;
8963         ULong   imm64hi = bitQ == 0  ? 0  :  imm64lo;
8964         IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
8965                                                 mkU64(imm64lo));
8966         putQReg128(dd, immV128);
8967         DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
8968      }
8969      return True;
8970   }
8971   /* else fall through */
8972
8973   return False;
8974#  undef INSN
8975}
8976
8977
8978static
8979Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8980{
8981   /* 31    28       20   15 14   10 9 4
8982      01 op 11110000 imm5 0  imm4 1  n d
8983      Decode fields: op,imm4
8984   */
8985#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8986   if (INSN(31,30) != BITS2(0,1)
8987       || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
8988       || INSN(15,15) != 0 || INSN(10,10) != 1) {
8989      return False;
8990   }
8991   UInt bitOP = INSN(29,29);
8992   UInt imm5  = INSN(20,16);
8993   UInt imm4  = INSN(14,11);
8994   UInt nn    = INSN(9,5);
8995   UInt dd    = INSN(4,0);
8996
8997   if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
8998      /* -------- 0,0000 DUP (element, scalar) -------- */
8999      IRTemp w0     = newTemp(Ity_I64);
9000      const HChar* arTs = "??";
9001      IRType laneTy = Ity_INVALID;
9002      UInt   laneNo = 16; /* invalid */
9003      if (imm5 & 1) {
9004         arTs   = "b";
9005         laneNo = (imm5 >> 1) & 15;
9006         laneTy = Ity_I8;
9007         assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
9008      }
9009      else if (imm5 & 2) {
9010         arTs   = "h";
9011         laneNo = (imm5 >> 2) & 7;
9012         laneTy = Ity_I16;
9013         assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
9014      }
9015      else if (imm5 & 4) {
9016         arTs   = "s";
9017         laneNo = (imm5 >> 3) & 3;
9018         laneTy = Ity_I32;
9019         assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
9020      }
9021      else if (imm5 & 8) {
9022         arTs   = "d";
9023         laneNo = (imm5 >> 4) & 1;
9024         laneTy = Ity_I64;
9025         assign(w0, getQRegLane(nn, laneNo, laneTy));
9026      }
9027      else {
9028         /* invalid; leave laneTy unchanged. */
9029      }
9030      /* */
9031      if (laneTy != Ity_INVALID) {
9032         vassert(laneNo < 16);
9033         putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
9034         DIP("dup %s, %s.%s[%u]\n",
9035             nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
9036         return True;
9037      }
9038      /* else fall through */
9039   }
9040
9041   return False;
9042#  undef INSN
9043}
9044
9045
9046static
9047Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
9048{
9049   /* 31   28    23 21    16     11 9 4
9050      01 u 11110 sz 11000 opcode 10 n d
9051      Decode fields: u,sz,opcode
9052   */
9053#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9054   if (INSN(31,30) != BITS2(0,1)
9055       || INSN(28,24) != BITS5(1,1,1,1,0)
9056       || INSN(21,17) != BITS5(1,1,0,0,0)
9057       || INSN(11,10) != BITS2(1,0)) {
9058      return False;
9059   }
9060   UInt bitU   = INSN(29,29);
9061   UInt sz     = INSN(23,22);
9062   UInt opcode = INSN(16,12);
9063   UInt nn     = INSN(9,5);
9064   UInt dd     = INSN(4,0);
9065
9066   if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
9067      /* -------- 0,11,11011 ADDP d_2d -------- */
9068      IRTemp xy = newTempV128();
9069      IRTemp xx = newTempV128();
9070      assign(xy, getQReg128(nn));
9071      assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
9072      putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9073                          binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
9074      DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
9075      return True;
9076   }
9077
9078   if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
9079      /* -------- 1,00,01101 ADDP s_2s -------- */
9080      /* -------- 1,01,01101 ADDP d_2d -------- */
9081      Bool   isD   = sz == X01;
9082      IROp   opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9083      IROp   opADD = mkVecADDF(isD ? 3 : 2);
9084      IRTemp src   = newTempV128();
9085      IRTemp argL  = newTempV128();
9086      IRTemp argR  = newTempV128();
9087      assign(src, getQReg128(nn));
9088      assign(argL, unop(opZHI, mkexpr(src)));
9089      assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9090                                                    mkU8(isD ? 8 : 4))));
9091      putQReg128(dd, unop(opZHI,
9092                          triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
9093                                              mkexpr(argL), mkexpr(argR))));
9094      DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
9095      return True;
9096   }
9097
9098   if (bitU == 1
9099       && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9100      /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
9101      /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
9102      /* -------- 1,0x,01111 FMAXP   d_2d, s_2s -------- */
9103      /* -------- 1,1x,01111 FMINP   d_2d, s_2s -------- */
9104      /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9105      Bool   isD   = (sz & 1) == 1;
9106      Bool   isMIN = (sz & 2) == 2;
9107      Bool   isNM  = opcode == BITS5(0,1,1,0,0);
9108      IROp   opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9109      IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
9110      IRTemp src   = newTempV128();
9111      IRTemp argL  = newTempV128();
9112      IRTemp argR  = newTempV128();
9113      assign(src, getQReg128(nn));
9114      assign(argL, unop(opZHI, mkexpr(src)));
9115      assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9116                                                    mkU8(isD ? 8 : 4))));
9117      putQReg128(dd, unop(opZHI,
9118                          binop(opMXX, mkexpr(argL), mkexpr(argR))));
9119      HChar c = isD ? 'd' : 's';
9120      DIP("%s%sp %c%u, v%u.2%c\n",
9121           isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
9122      return True;
9123   }
9124
9125   return False;
9126#  undef INSN
9127}
9128
9129
9130static
9131Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
9132{
9133   /* 31   28     22   18   15     10 9 4
9134      01 u 111110 immh immb opcode 1  n d
9135      Decode fields: u,immh,opcode
9136   */
9137#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9138   if (INSN(31,30) != BITS2(0,1)
9139       || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9140      return False;
9141   }
9142   UInt bitU   = INSN(29,29);
9143   UInt immh   = INSN(22,19);
9144   UInt immb   = INSN(18,16);
9145   UInt opcode = INSN(15,11);
9146   UInt nn     = INSN(9,5);
9147   UInt dd     = INSN(4,0);
9148   UInt immhb  = (immh << 3) | immb;
9149
9150   if ((immh & 8) == 8
9151       && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
9152      /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9153      /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9154      /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9155      /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9156      Bool isU   = bitU == 1;
9157      Bool isAcc = opcode == BITS5(0,0,0,1,0);
9158      UInt sh    = 128 - immhb;
9159      vassert(sh >= 1 && sh <= 64);
9160      IROp    op  = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
9161      IRExpr* src = getQReg128(nn);
9162      IRTemp  shf = newTempV128();
9163      IRTemp  res = newTempV128();
9164      if (sh == 64 && isU) {
9165         assign(shf, mkV128(0x0000));
9166      } else {
9167         UInt nudge = 0;
9168         if (sh == 64) {
9169            vassert(!isU);
9170            nudge = 1;
9171         }
9172         assign(shf, binop(op, src, mkU8(sh - nudge)));
9173      }
9174      assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9175                        : mkexpr(shf));
9176      putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9177      const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
9178                              : (isU ? "ushr" : "sshr");
9179      DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9180      return True;
9181   }
9182
9183   if ((immh & 8) == 8
9184       && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
9185      /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9186      /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9187      /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
9188      /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
9189      Bool isU   = bitU == 1;
9190      Bool isAcc = opcode == BITS5(0,0,1,1,0);
9191      UInt sh    = 128 - immhb;
9192      vassert(sh >= 1 && sh <= 64);
9193      IROp    op  = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
9194      vassert(sh >= 1 && sh <= 64);
9195      IRExpr* src  = getQReg128(nn);
9196      IRTemp  imm8 = newTemp(Ity_I8);
9197      assign(imm8, mkU8((UChar)(-sh)));
9198      IRExpr* amt  = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
9199      IRTemp  shf  = newTempV128();
9200      IRTemp  res  = newTempV128();
9201      assign(shf, binop(op, src, amt));
9202      assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9203                        : mkexpr(shf));
9204      putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9205      const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
9206                              : (isU ? "urshr" : "srshr");
9207      DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9208      return True;
9209   }
9210
9211   if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
9212      /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
9213      UInt sh = 128 - immhb;
9214      vassert(sh >= 1 && sh <= 64);
9215      if (sh == 64) {
9216         putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
9217      } else {
9218         /* sh is in range 1 .. 63 */
9219         ULong   nmask  = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
9220         IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9221         IRTemp  res    = newTempV128();
9222         assign(res, binop(Iop_OrV128,
9223                           binop(Iop_AndV128, getQReg128(dd), nmaskV),
9224                           binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
9225         putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9226      }
9227      DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
9228      return True;
9229   }
9230
9231   if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9232      /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
9233      UInt sh = immhb - 64;
9234      vassert(sh >= 0 && sh < 64);
9235      putQReg128(dd,
9236                 unop(Iop_ZeroHI64ofV128,
9237                      sh == 0 ? getQReg128(nn)
9238                              : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9239      DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
9240      return True;
9241   }
9242
9243   if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9244      /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
9245      UInt sh = immhb - 64;
9246      vassert(sh >= 0 && sh < 64);
9247      if (sh == 0) {
9248         putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
9249      } else {
9250         /* sh is in range 1 .. 63 */
9251         ULong   nmask  = (1ULL << sh) - 1;
9252         IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9253         IRTemp  res    = newTempV128();
9254         assign(res, binop(Iop_OrV128,
9255                           binop(Iop_AndV128, getQReg128(dd), nmaskV),
9256                           binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9257         putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9258      }
9259      DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
9260      return True;
9261   }
9262
9263   if (opcode == BITS5(0,1,1,1,0)
9264       || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
9265      /* -------- 0,01110  SQSHL  #imm -------- */
9266      /* -------- 1,01110  UQSHL  #imm -------- */
9267      /* -------- 1,01100  SQSHLU #imm -------- */
9268      UInt size  = 0;
9269      UInt shift = 0;
9270      Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9271      if (!ok) return False;
9272      vassert(size >= 0 && size <= 3);
9273      /* The shift encoding has opposite sign for the leftwards case.
9274         Adjust shift to compensate. */
9275      UInt lanebits = 8 << size;
9276      shift = lanebits - shift;
9277      vassert(shift >= 0 && shift < lanebits);
9278      const HChar* nm = NULL;
9279      /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
9280      else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
9281      else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
9282      else vassert(0);
9283      IRTemp qDiff1 = IRTemp_INVALID;
9284      IRTemp qDiff2 = IRTemp_INVALID;
9285      IRTemp res = IRTemp_INVALID;
9286      IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
9287      /* This relies on the fact that the zeroed out lanes generate zeroed
9288         result lanes and don't saturate, so there's no point in trimming
9289         the resulting res, qDiff1 or qDiff2 values. */
9290      math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
9291      putQReg128(dd, mkexpr(res));
9292      updateQCFLAGwithDifference(qDiff1, qDiff2);
9293      const HChar arr = "bhsd"[size];
9294      DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
9295      return True;
9296   }
9297
9298   if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
9299       || (bitU == 1
9300           && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
9301      /* -------- 0,10010   SQSHRN #imm -------- */
9302      /* -------- 1,10010   UQSHRN #imm -------- */
9303      /* -------- 0,10011  SQRSHRN #imm -------- */
9304      /* -------- 1,10011  UQRSHRN #imm -------- */
9305      /* -------- 1,10000  SQSHRUN #imm -------- */
9306      /* -------- 1,10001 SQRSHRUN #imm -------- */
9307      UInt size  = 0;
9308      UInt shift = 0;
9309      Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9310      if (!ok || size == X11) return False;
9311      vassert(size >= X00 && size <= X10);
9312      vassert(shift >= 1 && shift <= (8 << size));
9313      const HChar* nm = "??";
9314      IROp op = Iop_INVALID;
9315      /* Decide on the name and the operation. */
9316      /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
9317         nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
9318      }
9319      else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9320         nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
9321      }
9322      else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
9323         nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
9324      }
9325      else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
9326         nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
9327      }
9328      else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
9329         nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
9330      }
9331      else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
9332         nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
9333      }
9334      else vassert(0);
9335      /* Compute the result (Q, shifted value) pair. */
9336      IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
9337      IRTemp pair   = newTempV128();
9338      assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
9339      /* Update the result reg */
9340      IRTemp res64in128 = newTempV128();
9341      assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
9342      putQReg128(dd, mkexpr(res64in128));
9343      /* Update the Q flag. */
9344      IRTemp q64q64 = newTempV128();
9345      assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
9346      IRTemp z128 = newTempV128();
9347      assign(z128, mkV128(0x0000));
9348      updateQCFLAGwithDifference(q64q64, z128);
9349      /* */
9350      const HChar arrNarrow = "bhsd"[size];
9351      const HChar arrWide   = "bhsd"[size+1];
9352      DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
9353      return True;
9354   }
9355
9356   if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) {
9357      /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
9358      /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
9359      UInt size  = 0;
9360      UInt fbits = 0;
9361      Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9362      /* The following holds because immh is never zero. */
9363      vassert(ok);
9364      /* The following holds because immh >= 0100. */
9365      vassert(size == X10 || size == X11);
9366      Bool isD = size == X11;
9367      Bool isU = bitU == 1;
9368      vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9369      Double  scale  = two_to_the_minus(fbits);
9370      IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9371                             : IRExpr_Const(IRConst_F32( (Float)scale ));
9372      IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
9373      IROp    opCVT  = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
9374                           : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
9375      IRType tyF = isD ? Ity_F64 : Ity_F32;
9376      IRType tyI = isD ? Ity_I64 : Ity_I32;
9377      IRTemp src = newTemp(tyI);
9378      IRTemp res = newTemp(tyF);
9379      IRTemp rm  = mk_get_IR_rounding_mode();
9380      assign(src, getQRegLane(nn, 0, tyI));
9381      assign(res, triop(opMUL, mkexpr(rm),
9382                               binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE));
9383      putQRegLane(dd, 0, mkexpr(res));
9384      if (!isD) {
9385         putQRegLane(dd, 1, mkU32(0));
9386      }
9387      putQRegLane(dd, 1, mkU64(0));
9388      const HChar ch = isD ? 'd' : 's';
9389      DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf",
9390          ch, dd, ch, nn, fbits);
9391      return True;
9392   }
9393
9394   if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) {
9395      /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
9396      /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
9397      UInt size  = 0;
9398      UInt fbits = 0;
9399      Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9400      /* The following holds because immh is never zero. */
9401      vassert(ok);
9402      /* The following holds because immh >= 0100. */
9403      vassert(size == X10 || size == X11);
9404      Bool isD = size == X11;
9405      Bool isU = bitU == 1;
9406      vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9407      Double  scale  = two_to_the_plus(fbits);
9408      IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9409                           : IRExpr_Const(IRConst_F32( (Float)scale ));
9410      IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
9411      IROp    opCVT  = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
9412                           : (isD ? Iop_F64toI64S : Iop_F32toI32S);
9413      IRType tyF = isD ? Ity_F64 : Ity_F32;
9414      IRType tyI = isD ? Ity_I64 : Ity_I32;
9415      IRTemp src = newTemp(tyF);
9416      IRTemp res = newTemp(tyI);
9417      IRTemp rm  = newTemp(Ity_I32);
9418      assign(src, getQRegLane(nn, 0, tyF));
9419      assign(rm,  mkU32(Irrm_ZERO));
9420      assign(res, binop(opCVT, mkexpr(rm),
9421                               triop(opMUL, mkexpr(rm), mkexpr(src), scaleE)));
9422      putQRegLane(dd, 0, mkexpr(res));
9423      if (!isD) {
9424         putQRegLane(dd, 1, mkU32(0));
9425      }
9426      putQRegLane(dd, 1, mkU64(0));
9427      const HChar ch = isD ? 'd' : 's';
9428      DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs",
9429          ch, dd, ch, nn, fbits);
9430      return True;
9431   }
9432
9433#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9434   return False;
9435#  undef INSN
9436}
9437
9438
9439static
9440Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
9441{
9442   /* 31 29 28    23   21 20 15     11 9 4
9443      01 U  11110 size 1  m  opcode 00 n d
9444      Decode fields: u,opcode
9445   */
9446#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9447   if (INSN(31,30) != BITS2(0,1)
9448       || INSN(28,24) != BITS5(1,1,1,1,0)
9449       || INSN(21,21) != 1
9450       || INSN(11,10) != BITS2(0,0)) {
9451      return False;
9452   }
9453   UInt bitU   = INSN(29,29);
9454   UInt size   = INSN(23,22);
9455   UInt mm     = INSN(20,16);
9456   UInt opcode = INSN(15,12);
9457   UInt nn     = INSN(9,5);
9458   UInt dd     = INSN(4,0);
9459   vassert(size < 4);
9460
9461   if (bitU == 0
9462       && (opcode == BITS4(1,1,0,1)
9463           || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
9464      /* -------- 0,1101  SQDMULL -------- */ // 0 (ks)
9465      /* -------- 0,1001  SQDMLAL -------- */ // 1
9466      /* -------- 0,1011  SQDMLSL -------- */ // 2
9467      /* Widens, and size refers to the narrowed lanes. */
9468      UInt ks = 3;
9469      switch (opcode) {
9470         case BITS4(1,1,0,1): ks = 0; break;
9471         case BITS4(1,0,0,1): ks = 1; break;
9472         case BITS4(1,0,1,1): ks = 2; break;
9473         default: vassert(0);
9474      }
9475      vassert(ks >= 0 && ks <= 2);
9476      if (size == X00 || size == X11) return False;
9477      vassert(size <= 2);
9478      IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
9479      vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
9480      newTempsV128_3(&vecN, &vecM, &vecD);
9481      assign(vecN, getQReg128(nn));
9482      assign(vecM, getQReg128(mm));
9483      assign(vecD, getQReg128(dd));
9484      math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
9485                       False/*!is2*/, size, "mas"[ks],
9486                       vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
9487      IROp opZHI = mkVecZEROHIxxOFV128(size+1);
9488      putQReg128(dd, unop(opZHI, mkexpr(res)));
9489      vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
9490      updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9491      if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
9492         updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
9493      }
9494      const HChar* nm        = ks == 0 ? "sqdmull"
9495                                       : (ks == 1 ? "sqdmlal" : "sqdmlsl");
9496      const HChar  arrNarrow = "bhsd"[size];
9497      const HChar  arrWide   = "bhsd"[size+1];
9498      DIP("%s %c%d, %c%d, %c%d\n",
9499          nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
9500      return True;
9501   }
9502
9503   return False;
9504#  undef INSN
9505}
9506
9507
9508static
9509Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
9510{
9511   /* 31 29 28    23   21 20 15     10 9 4
9512      01 U  11110 size 1  m  opcode 1  n d
9513      Decode fields: u,size,opcode
9514   */
9515#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9516   if (INSN(31,30) != BITS2(0,1)
9517       || INSN(28,24) != BITS5(1,1,1,1,0)
9518       || INSN(21,21) != 1
9519       || INSN(10,10) != 1) {
9520      return False;
9521   }
9522   UInt bitU   = INSN(29,29);
9523   UInt size   = INSN(23,22);
9524   UInt mm     = INSN(20,16);
9525   UInt opcode = INSN(15,11);
9526   UInt nn     = INSN(9,5);
9527   UInt dd     = INSN(4,0);
9528   vassert(size < 4);
9529
9530   if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
9531      /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
9532      /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
9533      /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
9534      /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
9535      Bool isADD = opcode == BITS5(0,0,0,0,1);
9536      Bool isU   = bitU == 1;
9537      IROp qop   = Iop_INVALID;
9538      IROp nop   = Iop_INVALID;
9539      if (isADD) {
9540         qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
9541         nop = mkVecADD(size);
9542      } else {
9543         qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
9544         nop = mkVecSUB(size);
9545      }
9546      IRTemp argL = newTempV128();
9547      IRTemp argR = newTempV128();
9548      IRTemp qres = newTempV128();
9549      IRTemp nres = newTempV128();
9550      assign(argL, getQReg128(nn));
9551      assign(argR, getQReg128(mm));
9552      assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9553                             size, binop(qop, mkexpr(argL), mkexpr(argR)))));
9554      assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9555                             size, binop(nop, mkexpr(argL), mkexpr(argR)))));
9556      putQReg128(dd, mkexpr(qres));
9557      updateQCFLAGwithDifference(qres, nres);
9558      const HChar* nm  = isADD ? (isU ? "uqadd" : "sqadd")
9559                               : (isU ? "uqsub" : "sqsub");
9560      const HChar  arr = "bhsd"[size];
9561      DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9562      return True;
9563   }
9564
9565   if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
9566      /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
9567      /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
9568      Bool    isGT = bitU == 0;
9569      IRExpr* argL = getQReg128(nn);
9570      IRExpr* argR = getQReg128(mm);
9571      IRTemp  res  = newTempV128();
9572      assign(res,
9573             isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9574                  : binop(Iop_CmpGT64Ux2, argL, argR));
9575      putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9576      DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
9577          nameQRegLO(dd, Ity_I64),
9578          nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9579      return True;
9580   }
9581
9582   if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
9583      /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
9584      /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
9585      Bool    isGE = bitU == 0;
9586      IRExpr* argL = getQReg128(nn);
9587      IRExpr* argR = getQReg128(mm);
9588      IRTemp  res  = newTempV128();
9589      assign(res,
9590             isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
9591                  : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
9592      putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9593      DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
9594          nameQRegLO(dd, Ity_I64),
9595          nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9596      return True;
9597   }
9598
9599   if (size == X11 && (opcode == BITS5(0,1,0,0,0)
9600                       || opcode == BITS5(0,1,0,1,0))) {
9601      /* -------- 0,xx,01000 SSHL  d_d_d -------- */
9602      /* -------- 0,xx,01010 SRSHL d_d_d -------- */
9603      /* -------- 1,xx,01000 USHL  d_d_d -------- */
9604      /* -------- 1,xx,01010 URSHL d_d_d -------- */
9605      Bool isU = bitU == 1;
9606      Bool isR = opcode == BITS5(0,1,0,1,0);
9607      IROp op  = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
9608                     : (isU ? mkVecSHU(size)  : mkVecSHS(size));
9609      IRTemp res = newTempV128();
9610      assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9611      putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9612      const HChar* nm  = isR ? (isU ? "urshl" : "srshl")
9613                             : (isU ? "ushl"  : "sshl");
9614      DIP("%s %s, %s, %s\n", nm,
9615          nameQRegLO(dd, Ity_I64),
9616          nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9617      return True;
9618   }
9619
9620   if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
9621      /* -------- 0,xx,01001 SQSHL  std4_std4_std4 -------- */
9622      /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
9623      /* -------- 1,xx,01001 UQSHL  std4_std4_std4 -------- */
9624      /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
9625      Bool isU = bitU == 1;
9626      Bool isR = opcode == BITS5(0,1,0,1,1);
9627      IROp op  = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
9628                     : (isU ? mkVecQANDUQSH(size)  : mkVecQANDSQSH(size));
9629      /* This is a bit tricky.  Since we're only interested in the lowest
9630         lane of the result, we zero out all the rest in the operands, so
9631         as to ensure that other lanes don't pollute the returned Q value.
9632         This works because it means, for the lanes we don't care about, we
9633         are shifting zero by zero, which can never saturate. */
9634      IRTemp res256 = newTemp(Ity_V256);
9635      IRTemp resSH  = newTempV128();
9636      IRTemp resQ   = newTempV128();
9637      IRTemp zero   = newTempV128();
9638      assign(
9639         res256,
9640         binop(op,
9641               mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
9642               mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
9643      assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
9644      assign(resQ,  unop(Iop_V256toV128_1, mkexpr(res256)));
9645      assign(zero,  mkV128(0x0000));
9646      putQReg128(dd, mkexpr(resSH));
9647      updateQCFLAGwithDifference(resQ, zero);
9648      const HChar* nm  = isR ? (isU ? "uqrshl" : "sqrshl")
9649                             : (isU ? "uqshl"  : "sqshl");
9650      const HChar  arr = "bhsd"[size];
9651      DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9652      return True;
9653   }
9654
9655   if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
9656      /* -------- 0,11,10000 ADD d_d_d -------- */
9657      /* -------- 1,11,10000 SUB d_d_d -------- */
9658      Bool   isSUB = bitU == 1;
9659      IRTemp res   = newTemp(Ity_I64);
9660      assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
9661                        getQRegLane(nn, 0, Ity_I64),
9662                        getQRegLane(mm, 0, Ity_I64)));
9663      putQRegLane(dd, 0, mkexpr(res));
9664      putQRegLane(dd, 1, mkU64(0));
9665      DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
9666          nameQRegLO(dd, Ity_I64),
9667          nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9668      return True;
9669   }
9670
9671   if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
9672      /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
9673      /* -------- 1,11,10001 CMEQ  d_d_d -------- */ // ==
9674      Bool    isEQ = bitU == 1;
9675      IRExpr* argL = getQReg128(nn);
9676      IRExpr* argR = getQReg128(mm);
9677      IRTemp  res  = newTempV128();
9678      assign(res,
9679             isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
9680                  : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
9681                                            binop(Iop_AndV128, argL, argR),
9682                                            mkV128(0x0000))));
9683      putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9684      DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
9685          nameQRegLO(dd, Ity_I64),
9686          nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9687      return True;
9688   }
9689
9690   if (opcode == BITS5(1,0,1,1,0)) {
9691      /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
9692      /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
9693      if (size == X00 || size == X11) return False;
9694      Bool isR = bitU == 1;
9695      IRTemp res, sat1q, sat1n, vN, vM;
9696      res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
9697      newTempsV128_2(&vN, &vM);
9698      assign(vN, getQReg128(nn));
9699      assign(vM, getQReg128(mm));
9700      math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
9701      putQReg128(dd,
9702                 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
9703      updateQCFLAGwithDifference(
9704         math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
9705         math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
9706      const HChar  arr = "bhsd"[size];
9707      const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
9708      DIP("%s %c%d, %c%d, %c%d\n", nm, arr, dd, arr, nn, arr, mm);
9709      return True;
9710   }
9711
9712   if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
9713      /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
9714      IRType ity = size == X11 ? Ity_F64 : Ity_F32;
9715      IRTemp res = newTemp(ity);
9716      assign(res, unop(mkABSF(ity),
9717                       triop(mkSUBF(ity),
9718                             mkexpr(mk_get_IR_rounding_mode()),
9719                             getQRegLO(nn,ity), getQRegLO(mm,ity))));
9720      putQReg128(dd, mkV128(0x0000));
9721      putQRegLO(dd, mkexpr(res));
9722      DIP("fabd %s, %s, %s\n",
9723          nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9724      return True;
9725   }
9726
9727   if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
9728      /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
9729      // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
9730      IRType ity = size == X01 ? Ity_F64 : Ity_F32;
9731      IRTemp res = newTemp(ity);
9732      assign(res, triop(mkMULF(ity),
9733                        mkexpr(mk_get_IR_rounding_mode()),
9734                        getQRegLO(nn,ity), getQRegLO(mm,ity)));
9735      putQReg128(dd, mkV128(0x0000));
9736      putQRegLO(dd, mkexpr(res));
9737      DIP("fmulx %s, %s, %s\n",
9738          nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9739      return True;
9740   }
9741
9742   if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
9743      /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
9744      /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
9745      Bool   isD   = size == X01;
9746      IRType ity   = isD ? Ity_F64 : Ity_F32;
9747      Bool   isGE  = bitU == 1;
9748      IROp   opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
9749                          : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
9750      IRTemp res   = newTempV128();
9751      assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
9752                       : binop(opCMP, getQReg128(nn), getQReg128(mm)));
9753      putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9754                                                             mkexpr(res))));
9755      DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
9756          nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9757      return True;
9758   }
9759
9760   if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
9761      /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
9762      Bool   isD   = size == X11;
9763      IRType ity   = isD ? Ity_F64 : Ity_F32;
9764      IROp   opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
9765      IRTemp res   = newTempV128();
9766      assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
9767      putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9768                                                             mkexpr(res))));
9769      DIP("%s %s, %s, %s\n", "fcmgt",
9770          nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9771      return True;
9772   }
9773
9774   if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
9775      /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
9776      /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
9777      Bool   isD   = (size & 1) == 1;
9778      IRType ity   = isD ? Ity_F64 : Ity_F32;
9779      Bool   isGT  = (size & 2) == 2;
9780      IROp   opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
9781                          : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
9782      IROp   opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
9783      IRTemp res   = newTempV128();
9784      assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
9785                               unop(opABS, getQReg128(nn)))); // swapd
9786      putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9787                                                             mkexpr(res))));
9788      DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
9789          nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9790      return True;
9791   }
9792
9793   if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
9794      /* -------- 0,0x,11111: FRECPS  d_d_d, s_s_s -------- */
9795      /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
9796      Bool isSQRT = (size & 2) == 2;
9797      Bool isD    = (size & 1) == 1;
9798      IROp op     = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
9799                           : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
9800      IRTemp res = newTempV128();
9801      assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9802      putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9803                                                             mkexpr(res))));
9804      HChar c = isD ? 'd' : 's';
9805      DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
9806          c, dd, c, nn, c, mm);
9807      return True;
9808   }
9809
9810   return False;
9811#  undef INSN
9812}
9813
9814
9815static
9816Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
9817{
9818   /* 31 29 28    23   21    16     11 9 4
9819      01 U  11110 size 10000 opcode 10 n d
9820      Decode fields: u,size,opcode
9821   */
9822#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9823   if (INSN(31,30) != BITS2(0,1)
9824       || INSN(28,24) != BITS5(1,1,1,1,0)
9825       || INSN(21,17) != BITS5(1,0,0,0,0)
9826       || INSN(11,10) != BITS2(1,0)) {
9827      return False;
9828   }
9829   UInt bitU   = INSN(29,29);
9830   UInt size   = INSN(23,22);
9831   UInt opcode = INSN(16,12);
9832   UInt nn     = INSN(9,5);
9833   UInt dd     = INSN(4,0);
9834   vassert(size < 4);
9835
9836   if (opcode == BITS5(0,0,0,1,1)) {
9837      /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
9838      /* -------- 1,xx,00011: USQADD std4_std4 -------- */
9839      /* These are a bit tricky (to say the least).  See comments on
9840         the vector variants (in dis_AdvSIMD_two_reg_misc) below for
9841         details. */
9842      Bool   isUSQADD = bitU == 1;
9843      IROp   qop  = isUSQADD ? mkVecQADDEXTSUSATUU(size)
9844                             : mkVecQADDEXTUSSATSS(size);
9845      IROp   nop  = mkVecADD(size);
9846      IRTemp argL = newTempV128();
9847      IRTemp argR = newTempV128();
9848      assign(argL, getQReg128(nn));
9849      assign(argR, getQReg128(dd));
9850      IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9851                       size, binop(qop, mkexpr(argL), mkexpr(argR)));
9852      IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9853                       size, binop(nop, mkexpr(argL), mkexpr(argR)));
9854      putQReg128(dd, mkexpr(qres));
9855      updateQCFLAGwithDifference(qres, nres);
9856      const HChar arr = "bhsd"[size];
9857      DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
9858      return True;
9859   }
9860
9861   if (opcode == BITS5(0,0,1,1,1)) {
9862      /* -------- 0,xx,00111 SQABS std4_std4 -------- */
9863      /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
9864      Bool isNEG = bitU == 1;
9865      IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
9866      (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
9867                                         getQReg128(nn), size );
9868      IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
9869      IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
9870      putQReg128(dd, mkexpr(qres));
9871      updateQCFLAGwithDifference(qres, nres);
9872      const HChar arr = "bhsd"[size];
9873      DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
9874      return True;
9875   }
9876
9877   if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
9878      /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
9879      /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
9880      Bool    isGT = bitU == 0;
9881      IRExpr* argL = getQReg128(nn);
9882      IRExpr* argR = mkV128(0x0000);
9883      IRTemp  res  = newTempV128();
9884      assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9885                       : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
9886      putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9887      DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
9888      return True;
9889   }
9890
9891   if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
9892      /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
9893      /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
9894      Bool    isEQ = bitU == 0;
9895      IRExpr* argL = getQReg128(nn);
9896      IRExpr* argR = mkV128(0x0000);
9897      IRTemp  res  = newTempV128();
9898      assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
9899                       : unop(Iop_NotV128,
9900                              binop(Iop_CmpGT64Sx2, argL, argR)));
9901      putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9902      DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
9903      return True;
9904   }
9905
9906   if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
9907      /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
9908      putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9909                          binop(Iop_CmpGT64Sx2, mkV128(0x0000),
9910                                                getQReg128(nn))));
9911      DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
9912      return True;
9913   }
9914
9915   if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
9916      /* -------- 0,11,01011 ABS d_d -------- */
9917      putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9918                          unop(Iop_Abs64x2, getQReg128(nn))));
9919      DIP("abs d%u, d%u\n", dd, nn);
9920      return True;
9921   }
9922
9923   if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
9924      /* -------- 1,11,01011 NEG d_d -------- */
9925      putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9926                          binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
9927      DIP("neg d%u, d%u\n", dd, nn);
9928      return True;
9929   }
9930
9931   UInt ix = 0; /*INVALID*/
9932   if (size >= X10) {
9933      switch (opcode) {
9934         case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
9935         case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
9936         case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
9937         default: break;
9938      }
9939   }
9940   if (ix > 0) {
9941      /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
9942      /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
9943      /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
9944      /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
9945      /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
9946      Bool   isD     = size == X11;
9947      IRType ity     = isD ? Ity_F64 : Ity_F32;
9948      IROp   opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
9949      IROp   opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
9950      IROp   opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
9951      IROp   opCmp   = Iop_INVALID;
9952      Bool   swap    = False;
9953      const HChar* nm = "??";
9954      switch (ix) {
9955         case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
9956         case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
9957         case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
9958         case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
9959         case 5: nm = "fcmle"; opCmp = opCmpLE; break;
9960         default: vassert(0);
9961      }
9962      IRExpr* zero = mkV128(0x0000);
9963      IRTemp res = newTempV128();
9964      assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
9965                       : binop(opCmp, getQReg128(nn), zero));
9966      putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9967                                                             mkexpr(res))));
9968
9969      DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
9970      return True;
9971   }
9972
9973   if (opcode == BITS5(1,0,1,0,0)
9974       || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
9975      /* -------- 0,xx,10100: SQXTN -------- */
9976      /* -------- 1,xx,10100: UQXTN -------- */
9977      /* -------- 1,xx,10010: SQXTUN -------- */
9978      if (size == X11) return False;
9979      vassert(size < 3);
9980      IROp  opN    = Iop_INVALID;
9981      Bool  zWiden = True;
9982      const HChar* nm = "??";
9983      /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
9984         opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
9985      }
9986      else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
9987         opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
9988      }
9989      else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9990         opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
9991      }
9992      else vassert(0);
9993      IRTemp src  = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9994                       size+1, getQReg128(nn));
9995      IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9996                       size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
9997      putQReg128(dd, mkexpr(resN));
9998      /* This widens zero lanes to zero, and compares it against zero, so all
9999         of the non-participating lanes make no contribution to the
10000         Q flag state. */
10001      IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
10002                                              size, mkexpr(resN));
10003      updateQCFLAGwithDifference(src, resW);
10004      const HChar arrNarrow = "bhsd"[size];
10005      const HChar arrWide   = "bhsd"[size+1];
10006      DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
10007      return True;
10008   }
10009
10010   if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) {
10011      /* -------- 1,01,10110 FCVTXN s_d -------- */
10012      /* Using Irrm_NEAREST here isn't right.  The docs say "round to
10013         odd" but I don't know what that really means. */
10014      putQRegLO(dd,
10015                binop(Iop_F64toF32, mkU32(Irrm_NEAREST),
10016                                    getQRegLO(nn, Ity_F64)));
10017      putQRegLane(dd, 1, mkU32(0));
10018      putQRegLane(dd, 1, mkU64(0));
10019      DIP("fcvtxn s%u, d%u\n", dd, nn);
10020      return True;
10021   }
10022
10023   ix = 0; /*INVALID*/
10024   switch (opcode) {
10025      case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
10026      case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
10027      case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
10028      default: break;
10029   }
10030   if (ix > 0) {
10031      /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10032      /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10033      /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10034      /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10035      /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10036      /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10037      /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10038      /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10039      /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10040      /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10041      Bool           isD  = (size & 1) == 1;
10042      IRType         tyF  = isD ? Ity_F64 : Ity_F32;
10043      IRType         tyI  = isD ? Ity_I64 : Ity_I32;
10044      IRRoundingMode irrm = 8; /*impossible*/
10045      HChar          ch   = '?';
10046      switch (ix) {
10047         case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
10048         case 2: ch = 'm'; irrm = Irrm_NegINF;  break;
10049         case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
10050         case 4: ch = 'p'; irrm = Irrm_PosINF;  break;
10051         case 5: ch = 'z'; irrm = Irrm_ZERO;    break;
10052         default: vassert(0);
10053      }
10054      IROp cvt = Iop_INVALID;
10055      if (bitU == 1) {
10056         cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
10057      } else {
10058         cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
10059      }
10060      IRTemp src = newTemp(tyF);
10061      IRTemp res = newTemp(tyI);
10062      assign(src, getQRegLane(nn, 0, tyF));
10063      assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
10064      putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
10065      if (!isD) {
10066         putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10067      }
10068      putQRegLane(dd, 1, mkU64(0));    /* bits 127-64 */
10069      HChar sOrD = isD ? 'd' : 's';
10070      DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
10071          sOrD, dd, sOrD, nn);
10072      return True;
10073   }
10074
10075   if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
10076      /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
10077      /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
10078      Bool   isU = bitU == 1;
10079      Bool   isD = (size & 1) == 1;
10080      IRType tyI = isD ? Ity_I64 : Ity_I32;
10081      IROp   iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10082                       : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10083      IRTemp rm  = mk_get_IR_rounding_mode();
10084      putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI)));
10085      if (!isD) {
10086         putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10087      }
10088      putQRegLane(dd, 1, mkU64(0));    /* bits 127-64 */
10089      HChar c = isD ? 'd' : 's';
10090      DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn);
10091      return True;
10092   }
10093
10094   if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
10095      /* -------- 0,1x,11101: FRECPE  d_d, s_s -------- */
10096      /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
10097      Bool isSQRT = bitU == 1;
10098      Bool isD    = (size & 1) == 1;
10099      IROp op     = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
10100                           : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
10101      IRTemp resV = newTempV128();
10102      assign(resV, unop(op, getQReg128(nn)));
10103      putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10104                                                             mkexpr(resV))));
10105      HChar c = isD ? 'd' : 's';
10106      DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
10107      return True;
10108   }
10109
10110   if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
10111      /* -------- 0,1x,11111: FRECPX  d_d, s_s -------- */
10112      Bool   isD = (size & 1) == 1;
10113      IRType ty  = isD ? Ity_F64 : Ity_F32;
10114      IROp   op  = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
10115      IRTemp res = newTemp(ty);
10116      IRTemp rm  = mk_get_IR_rounding_mode();
10117      assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
10118      putQReg128(dd, mkV128(0x0000));
10119      putQRegLane(dd, 0, mkexpr(res));
10120      HChar c = isD ? 'd' : 's';
10121      DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
10122      return True;
10123   }
10124
10125   return False;
10126#  undef INSN
10127}
10128
10129
10130static
10131Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
10132{
10133   /* 31   28    23   21 20 19 15     11   9 4
10134      01 U 11111 size L  M  m  opcode H  0 n d
10135      Decode fields are: u,size,opcode
10136      M is really part of the mm register number.  Individual
10137      cases need to inspect L and H though.
10138   */
10139#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
10140   if (INSN(31,30) != BITS2(0,1)
10141       || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
10142      return False;
10143   }
10144   UInt bitU   = INSN(29,29);
10145   UInt size   = INSN(23,22);
10146   UInt bitL   = INSN(21,21);
10147   UInt bitM   = INSN(20,20);
10148   UInt mmLO4  = INSN(19,16);
10149   UInt opcode = INSN(15,12);
10150   UInt bitH   = INSN(11,11);
10151   UInt nn     = INSN(9,5);
10152   UInt dd     = INSN(4,0);
10153   vassert(size < 4);
10154   vassert(bitH < 2 && bitM < 2 && bitL < 2);
10155
10156   if (bitU == 0 && size >= X10
10157       && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
10158      /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
10159      /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
10160      Bool isD   = (size & 1) == 1;
10161      Bool isSUB = opcode == BITS4(0,1,0,1);
10162      UInt index;
10163      if      (!isD)             index = (bitH << 1) | bitL;
10164      else if (isD && bitL == 0) index = bitH;
10165      else return False; // sz:L == x11 => unallocated encoding
10166      vassert(index < (isD ? 2 : 4));
10167      IRType ity   = isD ? Ity_F64 : Ity_F32;
10168      IRTemp elem  = newTemp(ity);
10169      UInt   mm    = (bitM << 4) | mmLO4;
10170      assign(elem, getQRegLane(mm, index, ity));
10171      IRTemp dupd  = math_DUP_TO_V128(elem, ity);
10172      IROp   opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
10173      IROp   opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
10174      IROp   opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10175      IRTemp rm    = mk_get_IR_rounding_mode();
10176      IRTemp t1    = newTempV128();
10177      IRTemp t2    = newTempV128();
10178      // FIXME: double rounding; use FMA primops instead
10179      assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10180      assign(t2, triop(isSUB ? opSUB : opADD,
10181                       mkexpr(rm), getQReg128(dd), mkexpr(t1)));
10182      putQReg128(dd,
10183                 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10184                                                         mkexpr(t2))));
10185      const HChar c = isD ? 'd' : 's';
10186      DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
10187          c, dd, c, nn, nameQReg128(mm), c, index);
10188      return True;
10189   }
10190
10191   if (size >= X10 && opcode == BITS4(1,0,0,1)) {
10192      /* -------- 0,1x,1001 FMUL  d_d_d[], s_s_s[] -------- */
10193      /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
10194      Bool isD    = (size & 1) == 1;
10195      Bool isMULX = bitU == 1;
10196      UInt index;
10197      if      (!isD)             index = (bitH << 1) | bitL;
10198      else if (isD && bitL == 0) index = bitH;
10199      else return False; // sz:L == x11 => unallocated encoding
10200      vassert(index < (isD ? 2 : 4));
10201      IRType ity   = isD ? Ity_F64 : Ity_F32;
10202      IRTemp elem  = newTemp(ity);
10203      UInt   mm    = (bitM << 4) | mmLO4;
10204      assign(elem, getQRegLane(mm, index, ity));
10205      IRTemp dupd  = math_DUP_TO_V128(elem, ity);
10206      IROp   opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10207      IRTemp rm    = mk_get_IR_rounding_mode();
10208      IRTemp t1    = newTempV128();
10209      // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
10210      assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10211      putQReg128(dd,
10212                 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10213                                                         mkexpr(t1))));
10214      const HChar c = isD ? 'd' : 's';
10215      DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
10216          c, dd, c, nn, nameQReg128(mm), c, index);
10217      return True;
10218   }
10219
10220   if (bitU == 0
10221       && (opcode == BITS4(1,0,1,1)
10222           || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
10223      /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
10224      /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
10225      /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
10226      /* Widens, and size refers to the narrowed lanes. */
10227      UInt ks = 3;
10228      switch (opcode) {
10229         case BITS4(1,0,1,1): ks = 0; break;
10230         case BITS4(0,0,1,1): ks = 1; break;
10231         case BITS4(0,1,1,1): ks = 2; break;
10232         default: vassert(0);
10233      }
10234      vassert(ks >= 0 && ks <= 2);
10235      UInt mm  = 32; // invalid
10236      UInt ix  = 16; // invalid
10237      switch (size) {
10238         case X00:
10239            return False; // h_b_b[] case is not allowed
10240         case X01:
10241            mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10242         case X10:
10243            mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10244         case X11:
10245            return False; // q_d_d[] case is not allowed
10246         default:
10247            vassert(0);
10248      }
10249      vassert(mm < 32 && ix < 16);
10250      IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
10251      vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10252      newTempsV128_2(&vecN, &vecD);
10253      assign(vecN, getQReg128(nn));
10254      IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10255      assign(vecD, getQReg128(dd));
10256      math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10257                       False/*!is2*/, size, "mas"[ks],
10258                       vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10259      IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10260      putQReg128(dd, unop(opZHI, mkexpr(res)));
10261      vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10262      updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10263      if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10264         updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10265      }
10266      const HChar* nm        = ks == 0 ? "sqmull"
10267                                       : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10268      const HChar  arrNarrow = "bhsd"[size];
10269      const HChar  arrWide   = "bhsd"[size+1];
10270      DIP("%s %c%d, %c%d, v%d.%c[%u]\n",
10271          nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
10272      return True;
10273   }
10274
10275   if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
10276      /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10277      /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10278      UInt mm  = 32; // invalid
10279      UInt ix  = 16; // invalid
10280      switch (size) {
10281         case X00:
10282            return False; // b case is not allowed
10283         case X01:
10284            mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10285         case X10:
10286            mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10287         case X11:
10288            return False; // q case is not allowed
10289         default:
10290            vassert(0);
10291      }
10292      vassert(mm < 32 && ix < 16);
10293      Bool isR = opcode == BITS4(1,1,0,1);
10294      IRTemp res, sat1q, sat1n, vN, vM;
10295      res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10296      vN = newTempV128();
10297      assign(vN, getQReg128(nn));
10298      vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10299      math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10300      IROp opZHI = mkVecZEROHIxxOFV128(size);
10301      putQReg128(dd, unop(opZHI, mkexpr(res)));
10302      updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10303      const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
10304      HChar ch         = size == X01 ? 'h' : 's';
10305      DIP("%s %c%d, %c%d, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, dd, ix);
10306      return True;
10307   }
10308
10309   return False;
10310#  undef INSN
10311}
10312
10313
10314static
10315Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
10316{
10317   /* 31    28     22   18   15     10 9 4
10318      0 q u 011110 immh immb opcode 1  n d
10319      Decode fields: u,opcode
10320   */
10321#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
10322   if (INSN(31,31) != 0
10323       || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
10324      return False;
10325   }
10326   UInt bitQ   = INSN(30,30);
10327   UInt bitU   = INSN(29,29);
10328   UInt immh   = INSN(22,19);
10329   UInt immb   = INSN(18,16);
10330   UInt opcode = INSN(15,11);
10331   UInt nn     = INSN(9,5);
10332   UInt dd     = INSN(4,0);
10333
10334   if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
10335      /* -------- 0,00000 SSHR std7_std7_#imm -------- */
10336      /* -------- 1,00000 USHR std7_std7_#imm -------- */
10337      /* -------- 0,00010 SSRA std7_std7_#imm -------- */
10338      /* -------- 1,00010 USRA std7_std7_#imm -------- */
10339      /* laneTy, shift = case immh:immb of
10340                         0001:xxx -> B, SHR:8-xxx
10341                         001x:xxx -> H, SHR:16-xxxx
10342                         01xx:xxx -> S, SHR:32-xxxxx
10343                         1xxx:xxx -> D, SHR:64-xxxxxx
10344                         other    -> invalid
10345      */
10346      UInt size  = 0;
10347      UInt shift = 0;
10348      Bool isQ   = bitQ == 1;
10349      Bool isU   = bitU == 1;
10350      Bool isAcc = opcode == BITS5(0,0,0,1,0);
10351      Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10352      if (!ok || (bitQ == 0 && size == X11)) return False;
10353      vassert(size >= 0 && size <= 3);
10354      UInt lanebits = 8 << size;
10355      vassert(shift >= 1 && shift <= lanebits);
10356      IROp    op  = isU ? mkVecSHRN(size) : mkVecSARN(size);
10357      IRExpr* src = getQReg128(nn);
10358      IRTemp  shf = newTempV128();
10359      IRTemp  res = newTempV128();
10360      if (shift == lanebits && isU) {
10361         assign(shf, mkV128(0x0000));
10362      } else {
10363         UInt nudge = 0;
10364         if (shift == lanebits) {
10365            vassert(!isU);
10366            nudge = 1;
10367         }
10368         assign(shf, binop(op, src, mkU8(shift - nudge)));
10369      }
10370      assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10371                        : mkexpr(shf));
10372      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10373      HChar laneCh = "bhsd"[size];
10374      UInt  nLanes = (isQ ? 128 : 64) / lanebits;
10375      const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
10376                              : (isU ? "ushr" : "sshr");
10377      DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10378          nameQReg128(dd), nLanes, laneCh,
10379          nameQReg128(nn), nLanes, laneCh, shift);
10380      return True;
10381   }
10382
10383   if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
10384      /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
10385      /* -------- 1,00100 URSHR std7_std7_#imm -------- */
10386      /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
10387      /* -------- 1,00110 URSRA std7_std7_#imm -------- */
10388      /* laneTy, shift = case immh:immb of
10389                         0001:xxx -> B, SHR:8-xxx
10390                         001x:xxx -> H, SHR:16-xxxx
10391                         01xx:xxx -> S, SHR:32-xxxxx
10392                         1xxx:xxx -> D, SHR:64-xxxxxx
10393                         other    -> invalid
10394      */
10395      UInt size  = 0;
10396      UInt shift = 0;
10397      Bool isQ   = bitQ == 1;
10398      Bool isU   = bitU == 1;
10399      Bool isAcc = opcode == BITS5(0,0,1,1,0);
10400      Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10401      if (!ok || (bitQ == 0 && size == X11)) return False;
10402      vassert(size >= 0 && size <= 3);
10403      UInt lanebits = 8 << size;
10404      vassert(shift >= 1 && shift <= lanebits);
10405      IROp    op   = isU ? mkVecRSHU(size) : mkVecRSHS(size);
10406      IRExpr* src  = getQReg128(nn);
10407      IRTemp  imm8 = newTemp(Ity_I8);
10408      assign(imm8, mkU8((UChar)(-shift)));
10409      IRExpr* amt  = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
10410      IRTemp  shf  = newTempV128();
10411      IRTemp  res  = newTempV128();
10412      assign(shf, binop(op, src, amt));
10413      assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10414                        : mkexpr(shf));
10415      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10416      HChar laneCh = "bhsd"[size];
10417      UInt  nLanes = (isQ ? 128 : 64) / lanebits;
10418      const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
10419                              : (isU ? "urshr" : "srshr");
10420      DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10421          nameQReg128(dd), nLanes, laneCh,
10422          nameQReg128(nn), nLanes, laneCh, shift);
10423      return True;
10424   }
10425
10426   if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
10427      /* -------- 1,01000 SRI std7_std7_#imm -------- */
10428      /* laneTy, shift = case immh:immb of
10429                         0001:xxx -> B, SHR:8-xxx
10430                         001x:xxx -> H, SHR:16-xxxx
10431                         01xx:xxx -> S, SHR:32-xxxxx
10432                         1xxx:xxx -> D, SHR:64-xxxxxx
10433                         other    -> invalid
10434      */
10435      UInt size  = 0;
10436      UInt shift = 0;
10437      Bool isQ   = bitQ == 1;
10438      Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10439      if (!ok || (bitQ == 0 && size == X11)) return False;
10440      vassert(size >= 0 && size <= 3);
10441      UInt lanebits = 8 << size;
10442      vassert(shift >= 1 && shift <= lanebits);
10443      IRExpr* src = getQReg128(nn);
10444      IRTemp  res = newTempV128();
10445      if (shift == lanebits) {
10446         assign(res, getQReg128(dd));
10447      } else {
10448         assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
10449         IRExpr* nmask = binop(mkVecSHLN(size),
10450                               mkV128(0xFFFF), mkU8(lanebits - shift));
10451         IRTemp  tmp   = newTempV128();
10452         assign(tmp, binop(Iop_OrV128,
10453                           mkexpr(res),
10454                           binop(Iop_AndV128, getQReg128(dd), nmask)));
10455         res = tmp;
10456      }
10457      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10458      HChar laneCh = "bhsd"[size];
10459      UInt  nLanes = (isQ ? 128 : 64) / lanebits;
10460      DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
10461          nameQReg128(dd), nLanes, laneCh,
10462          nameQReg128(nn), nLanes, laneCh, shift);
10463      return True;
10464   }
10465
10466   if (opcode == BITS5(0,1,0,1,0)) {
10467      /* -------- 0,01010 SHL std7_std7_#imm -------- */
10468      /* -------- 1,01010 SLI std7_std7_#imm -------- */
10469      /* laneTy, shift = case immh:immb of
10470                         0001:xxx -> B, xxx
10471                         001x:xxx -> H, xxxx
10472                         01xx:xxx -> S, xxxxx
10473                         1xxx:xxx -> D, xxxxxx
10474                         other    -> invalid
10475      */
10476      UInt size  = 0;
10477      UInt shift = 0;
10478      Bool isSLI = bitU == 1;
10479      Bool isQ   = bitQ == 1;
10480      Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10481      if (!ok || (bitQ == 0 && size == X11)) return False;
10482      vassert(size >= 0 && size <= 3);
10483      /* The shift encoding has opposite sign for the leftwards case.
10484         Adjust shift to compensate. */
10485      UInt lanebits = 8 << size;
10486      shift = lanebits - shift;
10487      vassert(shift >= 0 && shift < lanebits);
10488      IROp    op  = mkVecSHLN(size);
10489      IRExpr* src = getQReg128(nn);
10490      IRTemp  res = newTempV128();
10491      if (shift == 0) {
10492         assign(res, src);
10493      } else {
10494         assign(res, binop(op, src, mkU8(shift)));
10495         if (isSLI) {
10496            IRExpr* nmask = binop(mkVecSHRN(size),
10497                                  mkV128(0xFFFF), mkU8(lanebits - shift));
10498            IRTemp  tmp   = newTempV128();
10499            assign(tmp, binop(Iop_OrV128,
10500                              mkexpr(res),
10501                              binop(Iop_AndV128, getQReg128(dd), nmask)));
10502            res = tmp;
10503         }
10504      }
10505      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10506      HChar laneCh = "bhsd"[size];
10507      UInt  nLanes = (isQ ? 128 : 64) / lanebits;
10508      const HChar* nm = isSLI ? "sli" : "shl";
10509      DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10510          nameQReg128(dd), nLanes, laneCh,
10511          nameQReg128(nn), nLanes, laneCh, shift);
10512      return True;
10513   }
10514
10515   if (opcode == BITS5(0,1,1,1,0)
10516       || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
10517      /* -------- 0,01110  SQSHL  std7_std7_#imm -------- */
10518      /* -------- 1,01110  UQSHL  std7_std7_#imm -------- */
10519      /* -------- 1,01100  SQSHLU std7_std7_#imm -------- */
10520      UInt size  = 0;
10521      UInt shift = 0;
10522      Bool isQ   = bitQ == 1;
10523      Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10524      if (!ok || (bitQ == 0 && size == X11)) return False;
10525      vassert(size >= 0 && size <= 3);
10526      /* The shift encoding has opposite sign for the leftwards case.
10527         Adjust shift to compensate. */
10528      UInt lanebits = 8 << size;
10529      shift = lanebits - shift;
10530      vassert(shift >= 0 && shift < lanebits);
10531      const HChar* nm = NULL;
10532      /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
10533      else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
10534      else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
10535      else vassert(0);
10536      IRTemp qDiff1 = IRTemp_INVALID;
10537      IRTemp qDiff2 = IRTemp_INVALID;
10538      IRTemp res = IRTemp_INVALID;
10539      IRTemp src = newTempV128();
10540      assign(src, getQReg128(nn));
10541      math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
10542      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10543      updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
10544                                    isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
10545      const HChar* arr = nameArr_Q_SZ(bitQ, size);
10546      DIP("%s %s.%s, %s.%s, #%u\n", nm,
10547          nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
10548      return True;
10549   }
10550
10551   if (bitU == 0
10552       && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
10553      /* -------- 0,10000  SHRN{,2} #imm -------- */
10554      /* -------- 0,10001 RSHRN{,2} #imm -------- */
10555      /* Narrows, and size is the narrow size. */
10556      UInt size  = 0;
10557      UInt shift = 0;
10558      Bool is2   = bitQ == 1;
10559      Bool isR   = opcode == BITS5(1,0,0,0,1);
10560      Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10561      if (!ok || size == X11) return False;
10562      vassert(shift >= 1);
10563      IRTemp t1 = newTempV128();
10564      IRTemp t2 = newTempV128();
10565      IRTemp t3 = newTempV128();
10566      assign(t1, getQReg128(nn));
10567      assign(t2, isR ? binop(mkVecADD(size+1),
10568                             mkexpr(t1),
10569                             mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
10570                     : mkexpr(t1));
10571      assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
10572      IRTemp t4 = math_NARROW_LANES(t3, t3, size);
10573      putLO64andZUorPutHI64(is2, dd, t4);
10574      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10575      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10576      DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
10577          nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10578      return True;
10579   }
10580
10581   if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
10582       || (bitU == 1
10583           && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
10584      /* -------- 0,10010   SQSHRN{,2} #imm -------- */
10585      /* -------- 1,10010   UQSHRN{,2} #imm -------- */
10586      /* -------- 0,10011  SQRSHRN{,2} #imm -------- */
10587      /* -------- 1,10011  UQRSHRN{,2} #imm -------- */
10588      /* -------- 1,10000  SQSHRUN{,2} #imm -------- */
10589      /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
10590      UInt size  = 0;
10591      UInt shift = 0;
10592      Bool is2   = bitQ == 1;
10593      Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10594      if (!ok || size == X11) return False;
10595      vassert(shift >= 1 && shift <= (8 << size));
10596      const HChar* nm = "??";
10597      IROp op = Iop_INVALID;
10598      /* Decide on the name and the operation. */
10599      /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
10600         nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
10601      }
10602      else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10603         nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
10604      }
10605      else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
10606         nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
10607      }
10608      else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
10609         nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
10610      }
10611      else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
10612         nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
10613      }
10614      else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
10615         nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
10616      }
10617      else vassert(0);
10618      /* Compute the result (Q, shifted value) pair. */
10619      IRTemp src128 = newTempV128();
10620      assign(src128, getQReg128(nn));
10621      IRTemp pair = newTempV128();
10622      assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
10623      /* Update the result reg */
10624      IRTemp res64in128 = newTempV128();
10625      assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
10626      putLO64andZUorPutHI64(is2, dd, res64in128);
10627      /* Update the Q flag. */
10628      IRTemp q64q64 = newTempV128();
10629      assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
10630      IRTemp z128 = newTempV128();
10631      assign(z128, mkV128(0x0000));
10632      updateQCFLAGwithDifference(q64q64, z128);
10633      /* */
10634      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10635      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10636      DIP("%s %s.%s, %s.%s, #%u\n", nm,
10637          nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10638      return True;
10639   }
10640
10641   if (opcode == BITS5(1,0,1,0,0)) {
10642      /* -------- 0,10100 SSHLL{,2} #imm -------- */
10643      /* -------- 1,10100 USHLL{,2} #imm -------- */
10644      /* 31  28     22   18   15     9 4
10645         0q0 011110 immh immb 101001 n d  SSHLL Vd.Ta, Vn.Tb, #sh
10646         0q1 011110 immh immb 101001 n d  USHLL Vd.Ta, Vn.Tb, #sh
10647         where Ta,Tb,sh
10648           = case immh of 1xxx -> invalid
10649                          01xx -> 2d, 2s(q0)/4s(q1),  immh:immb - 32 (0..31)
10650                          001x -> 4s, 4h(q0)/8h(q1),  immh:immb - 16 (0..15)
10651                          0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8  (0..7)
10652                          0000 -> AdvSIMD modified immediate (???)
10653      */
10654      Bool    isQ   = bitQ == 1;
10655      Bool    isU   = bitU == 1;
10656      UInt    immhb = (immh << 3) | immb;
10657      IRTemp  src   = newTempV128();
10658      IRTemp  zero  = newTempV128();
10659      IRExpr* res   = NULL;
10660      UInt    sh    = 0;
10661      const HChar* ta = "??";
10662      const HChar* tb = "??";
10663      assign(src, getQReg128(nn));
10664      assign(zero, mkV128(0x0000));
10665      if (immh & 8) {
10666         /* invalid; don't assign to res */
10667      }
10668      else if (immh & 4) {
10669         sh = immhb - 32;
10670         vassert(sh < 32); /* so 32-sh is 1..32 */
10671         ta = "2d";
10672         tb = isQ ? "4s" : "2s";
10673         IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
10674                           : mk_InterleaveLO32x4(src, zero);
10675         res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
10676      }
10677      else if (immh & 2) {
10678         sh = immhb - 16;
10679         vassert(sh < 16); /* so 16-sh is 1..16 */
10680         ta = "4s";
10681         tb = isQ ? "8h" : "4h";
10682         IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
10683                           : mk_InterleaveLO16x8(src, zero);
10684         res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
10685      }
10686      else if (immh & 1) {
10687         sh = immhb - 8;
10688         vassert(sh < 8); /* so 8-sh is 1..8 */
10689         ta = "8h";
10690         tb = isQ ? "16b" : "8b";
10691         IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
10692                           : mk_InterleaveLO8x16(src, zero);
10693         res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
10694      } else {
10695         vassert(immh == 0);
10696         /* invalid; don't assign to res */
10697      }
10698      /* */
10699      if (res) {
10700         putQReg128(dd, res);
10701         DIP("%cshll%s %s.%s, %s.%s, #%d\n",
10702             isU ? 'u' : 's', isQ ? "2" : "",
10703             nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
10704         return True;
10705      }
10706      return False;
10707   }
10708
10709   if (opcode == BITS5(1,1,1,0,0)) {
10710      /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
10711      /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
10712      /* If immh is of the form 00xx, the insn is invalid. */
10713      if (immh < BITS4(0,1,0,0)) return False;
10714      UInt size  = 0;
10715      UInt fbits = 0;
10716      Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10717      /* The following holds because immh is never zero. */
10718      vassert(ok);
10719      /* The following holds because immh >= 0100. */
10720      vassert(size == X10 || size == X11);
10721      Bool isD = size == X11;
10722      Bool isU = bitU == 1;
10723      Bool isQ = bitQ == 1;
10724      if (isD && !isQ) return False; /* reject .1d case */
10725      vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10726      Double  scale  = two_to_the_minus(fbits);
10727      IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10728                           : IRExpr_Const(IRConst_F32( (Float)scale ));
10729      IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
10730      IROp    opCVT  = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10731                           : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10732      IRType tyF = isD ? Ity_F64 : Ity_F32;
10733      IRType tyI = isD ? Ity_I64 : Ity_I32;
10734      UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
10735      vassert(nLanes == 2 || nLanes == 4);
10736      for (UInt i = 0; i < nLanes; i++) {
10737         IRTemp src = newTemp(tyI);
10738         IRTemp res = newTemp(tyF);
10739         IRTemp rm  = mk_get_IR_rounding_mode();
10740         assign(src, getQRegLane(nn, i, tyI));
10741         assign(res, triop(opMUL, mkexpr(rm),
10742                                  binop(opCVT, mkexpr(rm), mkexpr(src)),
10743                                  scaleE));
10744         putQRegLane(dd, i, mkexpr(res));
10745      }
10746      if (!isQ) {
10747         putQRegLane(dd, 1, mkU64(0));
10748      }
10749      const HChar* arr = nameArr_Q_SZ(bitQ, size);
10750      DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf",
10751          nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
10752      return True;
10753   }
10754
10755   if (opcode == BITS5(1,1,1,1,1)) {
10756      /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
10757      /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
10758      /* If immh is of the form 00xx, the insn is invalid. */
10759      if (immh < BITS4(0,1,0,0)) return False;
10760      UInt size  = 0;
10761      UInt fbits = 0;
10762      Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10763      /* The following holds because immh is never zero. */
10764      vassert(ok);
10765      /* The following holds because immh >= 0100. */
10766      vassert(size == X10 || size == X11);
10767      Bool isD = size == X11;
10768      Bool isU = bitU == 1;
10769      Bool isQ = bitQ == 1;
10770      if (isD && !isQ) return False; /* reject .1d case */
10771      vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10772      Double  scale  = two_to_the_plus(fbits);
10773      IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10774                           : IRExpr_Const(IRConst_F32( (Float)scale ));
10775      IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
10776      IROp    opCVT  = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
10777                           : (isD ? Iop_F64toI64S : Iop_F32toI32S);
10778      IRType tyF = isD ? Ity_F64 : Ity_F32;
10779      IRType tyI = isD ? Ity_I64 : Ity_I32;
10780      UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
10781      vassert(nLanes == 2 || nLanes == 4);
10782      for (UInt i = 0; i < nLanes; i++) {
10783         IRTemp src = newTemp(tyF);
10784         IRTemp res = newTemp(tyI);
10785         IRTemp rm  = newTemp(Ity_I32);
10786         assign(src, getQRegLane(nn, i, tyF));
10787         assign(rm,  mkU32(Irrm_ZERO));
10788         assign(res, binop(opCVT, mkexpr(rm),
10789                                  triop(opMUL, mkexpr(rm),
10790                                               mkexpr(src), scaleE)));
10791         putQRegLane(dd, i, mkexpr(res));
10792      }
10793      if (!isQ) {
10794         putQRegLane(dd, 1, mkU64(0));
10795      }
10796      const HChar* arr = nameArr_Q_SZ(bitQ, size);
10797      DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs",
10798          nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
10799      return True;
10800   }
10801
10802#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
10803   return False;
10804#  undef INSN
10805}
10806
10807
10808static
10809Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
10810{
10811   /* 31 30 29 28    23   21 20 15     11 9 4
10812      0  Q  U  01110 size 1  m  opcode 00 n d
10813      Decode fields: u,opcode
10814   */
10815#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
10816   if (INSN(31,31) != 0
10817       || INSN(28,24) != BITS5(0,1,1,1,0)
10818       || INSN(21,21) != 1
10819       || INSN(11,10) != BITS2(0,0)) {
10820      return False;
10821   }
10822   UInt bitQ   = INSN(30,30);
10823   UInt bitU   = INSN(29,29);
10824   UInt size   = INSN(23,22);
10825   UInt mm     = INSN(20,16);
10826   UInt opcode = INSN(15,12);
10827   UInt nn     = INSN(9,5);
10828   UInt dd     = INSN(4,0);
10829   vassert(size < 4);
10830   Bool is2    = bitQ == 1;
10831
10832   if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
10833      /* -------- 0,0000 SADDL{2} -------- */
10834      /* -------- 1,0000 UADDL{2} -------- */
10835      /* -------- 0,0010 SSUBL{2} -------- */
10836      /* -------- 1,0010 USUBL{2} -------- */
10837      /* Widens, and size refers to the narrowed lanes. */
10838      if (size == X11) return False;
10839      vassert(size <= 2);
10840      Bool   isU   = bitU == 1;
10841      Bool   isADD = opcode == BITS4(0,0,0,0);
10842      IRTemp argL  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
10843      IRTemp argR  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
10844      IRTemp res   = newTempV128();
10845      assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
10846                        mkexpr(argL), mkexpr(argR)));
10847      putQReg128(dd, mkexpr(res));
10848      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10849      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10850      const HChar* nm        = isADD ? (isU ? "uaddl" : "saddl")
10851                                     : (isU ? "usubl" : "ssubl");
10852      DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10853          nameQReg128(dd), arrWide,
10854          nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10855      return True;
10856   }
10857
10858   if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
10859      /* -------- 0,0001 SADDW{2} -------- */
10860      /* -------- 1,0001 UADDW{2} -------- */
10861      /* -------- 0,0011 SSUBW{2} -------- */
10862      /* -------- 1,0011 USUBW{2} -------- */
10863      /* Widens, and size refers to the narrowed lanes. */
10864      if (size == X11) return False;
10865      vassert(size <= 2);
10866      Bool   isU   = bitU == 1;
10867      Bool   isADD = opcode == BITS4(0,0,0,1);
10868      IRTemp argR  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
10869      IRTemp res   = newTempV128();
10870      assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
10871                        getQReg128(nn), mkexpr(argR)));
10872      putQReg128(dd, mkexpr(res));
10873      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10874      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10875      const HChar* nm        = isADD ? (isU ? "uaddw" : "saddw")
10876                                     : (isU ? "usubw" : "ssubw");
10877      DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10878          nameQReg128(dd), arrWide,
10879          nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
10880      return True;
10881   }
10882
10883   if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
10884      /* -------- 0,0100  ADDHN{2} -------- */
10885      /* -------- 1,0100 RADDHN{2} -------- */
10886      /* -------- 0,0110  SUBHN{2} -------- */
10887      /* -------- 1,0110 RSUBHN{2} -------- */
10888      /* Narrows, and size refers to the narrowed lanes. */
10889      if (size == X11) return False;
10890      vassert(size <= 2);
10891      const UInt shift[3] = { 8, 16, 32 };
10892      Bool isADD = opcode == BITS4(0,1,0,0);
10893      Bool isR   = bitU == 1;
10894      /* Combined elements in wide lanes */
10895      IRTemp  wide  = newTempV128();
10896      IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
10897                            getQReg128(nn), getQReg128(mm));
10898      if (isR) {
10899         wideE = binop(mkVecADD(size+1),
10900                       wideE,
10901                       mkexpr(math_VEC_DUP_IMM(size+1,
10902                                               1ULL << (shift[size]-1))));
10903      }
10904      assign(wide, wideE);
10905      /* Top halves of elements, still in wide lanes */
10906      IRTemp shrd = newTempV128();
10907      assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
10908      /* Elements now compacted into lower 64 bits */
10909      IRTemp new64 = newTempV128();
10910      assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
10911      putLO64andZUorPutHI64(is2, dd, new64);
10912      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10913      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10914      const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
10915                              : (isR ? "rsubhn" : "subhn");
10916      DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10917          nameQReg128(dd), arrNarrow,
10918          nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
10919      return True;
10920   }
10921
10922   if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
10923      /* -------- 0,0101 SABAL{2} -------- */
10924      /* -------- 1,0101 UABAL{2} -------- */
10925      /* -------- 0,0111 SABDL{2} -------- */
10926      /* -------- 1,0111 UABDL{2} -------- */
10927      /* Widens, and size refers to the narrowed lanes. */
10928      if (size == X11) return False;
10929      vassert(size <= 2);
10930      Bool   isU   = bitU == 1;
10931      Bool   isACC = opcode == BITS4(0,1,0,1);
10932      IRTemp argL  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
10933      IRTemp argR  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
10934      IRTemp abd   = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
10935      IRTemp res   = newTempV128();
10936      assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
10937                        : mkexpr(abd));
10938      putQReg128(dd, mkexpr(res));
10939      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10940      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10941      const HChar* nm        = isACC ? (isU ? "uabal" : "sabal")
10942                                     : (isU ? "uabdl" : "sabdl");
10943      DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10944          nameQReg128(dd), arrWide,
10945          nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10946      return True;
10947   }
10948
10949   if (opcode == BITS4(1,1,0,0)
10950       || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
10951      /* -------- 0,1100  SMULL{2} -------- */ // 0 (ks)
10952      /* -------- 1,1100  UMULL{2} -------- */ // 0
10953      /* -------- 0,1000  SMLAL{2} -------- */ // 1
10954      /* -------- 1,1000  UMLAL{2} -------- */ // 1
10955      /* -------- 0,1010  SMLSL{2} -------- */ // 2
10956      /* -------- 1,1010  UMLSL{2} -------- */ // 2
10957      /* Widens, and size refers to the narrowed lanes. */
10958      UInt ks = 3;
10959      switch (opcode) {
10960         case BITS4(1,1,0,0): ks = 0; break;
10961         case BITS4(1,0,0,0): ks = 1; break;
10962         case BITS4(1,0,1,0): ks = 2; break;
10963         default: vassert(0);
10964      }
10965      vassert(ks >= 0 && ks <= 2);
10966      if (size == X11) return False;
10967      vassert(size <= 2);
10968      Bool   isU  = bitU == 1;
10969      IRTemp vecN = newTempV128();
10970      IRTemp vecM = newTempV128();
10971      IRTemp vecD = newTempV128();
10972      assign(vecN, getQReg128(nn));
10973      assign(vecM, getQReg128(mm));
10974      assign(vecD, getQReg128(dd));
10975      IRTemp res = IRTemp_INVALID;
10976      math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
10977                    vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10978      putQReg128(dd, mkexpr(res));
10979      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10980      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10981      const HChar* nm        = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
10982      DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
10983          nameQReg128(dd), arrWide,
10984          nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10985      return True;
10986   }
10987
10988   if (bitU == 0
10989       && (opcode == BITS4(1,1,0,1)
10990           || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
10991      /* -------- 0,1101  SQDMULL{2} -------- */ // 0 (ks)
10992      /* -------- 0,1001  SQDMLAL{2} -------- */ // 1
10993      /* -------- 0,1011  SQDMLSL{2} -------- */ // 2
10994      /* Widens, and size refers to the narrowed lanes. */
10995      UInt ks = 3;
10996      switch (opcode) {
10997         case BITS4(1,1,0,1): ks = 0; break;
10998         case BITS4(1,0,0,1): ks = 1; break;
10999         case BITS4(1,0,1,1): ks = 2; break;
11000         default: vassert(0);
11001      }
11002      vassert(ks >= 0 && ks <= 2);
11003      if (size == X00 || size == X11) return False;
11004      vassert(size <= 2);
11005      IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
11006      vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
11007      newTempsV128_3(&vecN, &vecM, &vecD);
11008      assign(vecN, getQReg128(nn));
11009      assign(vecM, getQReg128(mm));
11010      assign(vecD, getQReg128(dd));
11011      math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
11012                       is2, size, "mas"[ks],
11013                       vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11014      putQReg128(dd, mkexpr(res));
11015      vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
11016      updateQCFLAGwithDifference(sat1q, sat1n);
11017      if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
11018         updateQCFLAGwithDifference(sat2q, sat2n);
11019      }
11020      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11021      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
11022      const HChar* nm        = ks == 0 ? "sqdmull"
11023                                       : (ks == 1 ? "sqdmlal" : "sqdmlsl");
11024      DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11025          nameQReg128(dd), arrWide,
11026          nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11027      return True;
11028   }
11029
11030   if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
11031      /* -------- 0,1110  PMULL{2} -------- */
11032      /* Widens, and size refers to the narrowed lanes. */
11033      if (size != X00) return False;
11034      IRTemp res
11035         = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
11036                                     getQReg128(nn), getQReg128(mm));
11037      putQReg128(dd, mkexpr(res));
11038      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11039      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
11040      DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
11041          nameQReg128(dd), arrNarrow,
11042          nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
11043      return True;
11044   }
11045
11046   return False;
11047#  undef INSN
11048}
11049
11050
11051static
11052Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
11053{
11054   /* 31 30 29 28    23   21 20 15     10 9 4
11055      0  Q  U  01110 size 1  m  opcode 1  n d
11056      Decode fields: u,size,opcode
11057   */
11058#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
11059   if (INSN(31,31) != 0
11060       || INSN(28,24) != BITS5(0,1,1,1,0)
11061       || INSN(21,21) != 1
11062       || INSN(10,10) != 1) {
11063      return False;
11064   }
11065   UInt bitQ   = INSN(30,30);
11066   UInt bitU   = INSN(29,29);
11067   UInt size   = INSN(23,22);
11068   UInt mm     = INSN(20,16);
11069   UInt opcode = INSN(15,11);
11070   UInt nn     = INSN(9,5);
11071   UInt dd     = INSN(4,0);
11072   vassert(size < 4);
11073
11074   if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
11075      /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
11076      /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
11077      /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
11078      /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
11079      if (size == X11) return False;
11080      Bool isADD = opcode == BITS5(0,0,0,0,0);
11081      Bool isU   = bitU == 1;
11082      /* Widen both args out, do the math, narrow to final result. */
11083      IRTemp argL   = newTempV128();
11084      IRTemp argLhi = IRTemp_INVALID;
11085      IRTemp argLlo = IRTemp_INVALID;
11086      IRTemp argR   = newTempV128();
11087      IRTemp argRhi = IRTemp_INVALID;
11088      IRTemp argRlo = IRTemp_INVALID;
11089      IRTemp resHi  = newTempV128();
11090      IRTemp resLo  = newTempV128();
11091      IRTemp res    = IRTemp_INVALID;
11092      assign(argL, getQReg128(nn));
11093      argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
11094      argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True,  size, mkexpr(argL));
11095      assign(argR, getQReg128(mm));
11096      argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
11097      argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True,  size, mkexpr(argR));
11098      IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
11099      IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
11100      assign(resHi, binop(opSxR,
11101                          binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
11102                          mkU8(1)));
11103      assign(resLo, binop(opSxR,
11104                          binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
11105                          mkU8(1)));
11106      res = math_NARROW_LANES ( resHi, resLo, size );
11107      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11108      const HChar* nm  = isADD ? (isU ? "uhadd" : "shadd")
11109                               : (isU ? "uhsub" : "shsub");
11110      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11111      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11112          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11113      return True;
11114   }
11115
11116   if (opcode == BITS5(0,0,0,1,0)) {
11117      /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
11118      /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
11119      if (bitQ == 0 && size == X11) return False; // implied 1d case
11120      Bool   isU  = bitU == 1;
11121      IRTemp argL = newTempV128();
11122      IRTemp argR = newTempV128();
11123      assign(argL, getQReg128(nn));
11124      assign(argR, getQReg128(mm));
11125      IRTemp res = math_RHADD(size, isU, argL, argR);
11126      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11127      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11128      DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
11129          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11130      return True;
11131   }
11132
11133   if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
11134      /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
11135      /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
11136      /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
11137      /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
11138      if (bitQ == 0 && size == X11) return False; // implied 1d case
11139      Bool isADD = opcode == BITS5(0,0,0,0,1);
11140      Bool isU   = bitU == 1;
11141      IROp qop   = Iop_INVALID;
11142      IROp nop   = Iop_INVALID;
11143      if (isADD) {
11144         qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
11145         nop = mkVecADD(size);
11146      } else {
11147         qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
11148         nop = mkVecSUB(size);
11149      }
11150      IRTemp argL = newTempV128();
11151      IRTemp argR = newTempV128();
11152      IRTemp qres = newTempV128();
11153      IRTemp nres = newTempV128();
11154      assign(argL, getQReg128(nn));
11155      assign(argR, getQReg128(mm));
11156      assign(qres, math_MAYBE_ZERO_HI64_fromE(
11157                      bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11158      assign(nres, math_MAYBE_ZERO_HI64_fromE(
11159                      bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11160      putQReg128(dd, mkexpr(qres));
11161      updateQCFLAGwithDifference(qres, nres);
11162      const HChar* nm  = isADD ? (isU ? "uqadd" : "sqadd")
11163                               : (isU ? "uqsub" : "sqsub");
11164      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11165      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11166          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11167      return True;
11168   }
11169
11170   if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
11171      /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
11172      /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
11173      /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
11174      /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
11175      Bool   isORx  = (size & 2) == 2;
11176      Bool   invert = (size & 1) == 1;
11177      IRTemp res    = newTempV128();
11178      assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
11179                        getQReg128(nn),
11180                        invert ? unop(Iop_NotV128, getQReg128(mm))
11181                               : getQReg128(mm)));
11182      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11183      const HChar* names[4] = { "and", "bic", "orr", "orn" };
11184      const HChar* ar = bitQ == 1 ? "16b" : "8b";
11185      DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
11186          nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
11187      return True;
11188   }
11189
11190   if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
11191      /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
11192      /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
11193      /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
11194      /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
11195      IRTemp argD = newTempV128();
11196      IRTemp argN = newTempV128();
11197      IRTemp argM = newTempV128();
11198      assign(argD, getQReg128(dd));
11199      assign(argN, getQReg128(nn));
11200      assign(argM, getQReg128(mm));
11201      const IROp opXOR = Iop_XorV128;
11202      const IROp opAND = Iop_AndV128;
11203      const IROp opNOT = Iop_NotV128;
11204      IRTemp res = newTempV128();
11205      switch (size) {
11206         case BITS2(0,0): /* EOR */
11207            assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
11208            break;
11209         case BITS2(0,1): /* BSL */
11210            assign(res, binop(opXOR, mkexpr(argM),
11211                              binop(opAND,
11212                                    binop(opXOR, mkexpr(argM), mkexpr(argN)),
11213                                          mkexpr(argD))));
11214            break;
11215         case BITS2(1,0): /* BIT */
11216            assign(res, binop(opXOR, mkexpr(argD),
11217                              binop(opAND,
11218                                    binop(opXOR, mkexpr(argD), mkexpr(argN)),
11219                                    mkexpr(argM))));
11220            break;
11221         case BITS2(1,1): /* BIF */
11222            assign(res, binop(opXOR, mkexpr(argD),
11223                              binop(opAND,
11224                                    binop(opXOR, mkexpr(argD), mkexpr(argN)),
11225                                    unop(opNOT, mkexpr(argM)))));
11226            break;
11227         default:
11228            vassert(0);
11229      }
11230      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11231      const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
11232      const HChar* arr = bitQ == 1 ? "16b" : "8b";
11233      DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
11234          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11235      return True;
11236   }
11237
11238   if (opcode == BITS5(0,0,1,1,0)) {
11239      /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
11240      /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
11241      if (bitQ == 0 && size == X11) return False; // implied 1d case
11242      Bool   isGT  = bitU == 0;
11243      IRExpr* argL = getQReg128(nn);
11244      IRExpr* argR = getQReg128(mm);
11245      IRTemp  res  = newTempV128();
11246      assign(res,
11247             isGT ? binop(mkVecCMPGTS(size), argL, argR)
11248                  : binop(mkVecCMPGTU(size), argL, argR));
11249      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11250      const HChar* nm  = isGT ? "cmgt" : "cmhi";
11251      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11252      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11253          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11254      return True;
11255   }
11256
11257   if (opcode == BITS5(0,0,1,1,1)) {
11258      /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
11259      /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
11260      if (bitQ == 0 && size == X11) return False; // implied 1d case
11261      Bool    isGE = bitU == 0;
11262      IRExpr* argL = getQReg128(nn);
11263      IRExpr* argR = getQReg128(mm);
11264      IRTemp  res  = newTempV128();
11265      assign(res,
11266             isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
11267                  : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
11268      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11269      const HChar* nm  = isGE ? "cmge" : "cmhs";
11270      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11271      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11272          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11273      return True;
11274   }
11275
11276   if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
11277      /* -------- 0,xx,01000 SSHL  std7_std7_std7 -------- */
11278      /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
11279      /* -------- 1,xx,01000 USHL  std7_std7_std7 -------- */
11280      /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
11281      if (bitQ == 0 && size == X11) return False; // implied 1d case
11282      Bool isU = bitU == 1;
11283      Bool isR = opcode == BITS5(0,1,0,1,0);
11284      IROp op  = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
11285                     : (isU ? mkVecSHU(size)  : mkVecSHS(size));
11286      IRTemp res = newTempV128();
11287      assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11288      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11289      const HChar* nm  = isR ? (isU ? "urshl" : "srshl")
11290                             : (isU ? "ushl"  : "sshl");
11291      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11292      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11293          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11294      return True;
11295   }
11296
11297   if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
11298      /* -------- 0,xx,01001 SQSHL  std7_std7_std7 -------- */
11299      /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
11300      /* -------- 1,xx,01001 UQSHL  std7_std7_std7 -------- */
11301      /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
11302      if (bitQ == 0 && size == X11) return False; // implied 1d case
11303      Bool isU = bitU == 1;
11304      Bool isR = opcode == BITS5(0,1,0,1,1);
11305      IROp op  = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
11306                     : (isU ? mkVecQANDUQSH(size)  : mkVecQANDSQSH(size));
11307      /* This is a bit tricky.  If we're only interested in the lowest 64 bits
11308         of the result (viz, bitQ == 0), then we must adjust the operands to
11309         ensure that the upper part of the result, that we don't care about,
11310         doesn't pollute the returned Q value.  To do this, zero out the upper
11311         operand halves beforehand.  This works because it means, for the
11312         lanes we don't care about, we are shifting zero by zero, which can
11313         never saturate. */
11314      IRTemp res256 = newTemp(Ity_V256);
11315      IRTemp resSH  = newTempV128();
11316      IRTemp resQ   = newTempV128();
11317      IRTemp zero   = newTempV128();
11318      assign(res256, binop(op,
11319                           math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
11320                           math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
11321      assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
11322      assign(resQ,  unop(Iop_V256toV128_1, mkexpr(res256)));
11323      assign(zero,  mkV128(0x0000));
11324      putQReg128(dd, mkexpr(resSH));
11325      updateQCFLAGwithDifference(resQ, zero);
11326      const HChar* nm  = isR ? (isU ? "uqrshl" : "sqrshl")
11327                             : (isU ? "uqshl"  : "sqshl");
11328      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11329      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11330          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11331      return True;
11332   }
11333
11334   if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
11335      /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
11336      /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
11337      /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
11338      /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
11339      if (bitQ == 0 && size == X11) return False; // implied 1d case
11340      Bool isU   = bitU == 1;
11341      Bool isMAX = (opcode & 1) == 0;
11342      IROp op    = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11343                         : (isU ? mkVecMINU(size) : mkVecMINS(size));
11344      IRTemp t   = newTempV128();
11345      assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
11346      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
11347      const HChar* nm = isMAX ? (isU ? "umax" : "smax")
11348                              : (isU ? "umin" : "smin");
11349      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11350      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11351          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11352      return True;
11353   }
11354
11355   if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
11356      /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
11357      /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
11358      /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
11359      /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
11360      if (size == X11) return False; // 1d/2d cases not allowed
11361      Bool isU   = bitU == 1;
11362      Bool isACC = opcode == BITS5(0,1,1,1,1);
11363      vassert(size <= 2);
11364      IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
11365      IRTemp t2 = newTempV128();
11366      assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
11367                       : mkexpr(t1));
11368      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11369      const HChar* nm  = isACC ? (isU ? "uaba" : "saba")
11370                               : (isU ? "uabd" : "sabd");
11371      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11372      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11373          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11374      return True;
11375   }
11376
11377   if (opcode == BITS5(1,0,0,0,0)) {
11378      /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
11379      /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
11380      if (bitQ == 0 && size == X11) return False; // implied 1d case
11381      Bool   isSUB = bitU == 1;
11382      IROp   op    = isSUB ? mkVecSUB(size) : mkVecADD(size);
11383      IRTemp t     = newTempV128();
11384      assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
11385      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
11386      const HChar* nm  = isSUB ? "sub" : "add";
11387      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11388      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11389          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11390      return True;
11391   }
11392
11393   if (opcode == BITS5(1,0,0,0,1)) {
11394      /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
11395      /* -------- 1,xx,10001 CMEQ  std7_std7_std7 -------- */ // ==
11396      if (bitQ == 0 && size == X11) return False; // implied 1d case
11397      Bool    isEQ = bitU == 1;
11398      IRExpr* argL = getQReg128(nn);
11399      IRExpr* argR = getQReg128(mm);
11400      IRTemp  res  = newTempV128();
11401      assign(res,
11402             isEQ ? binop(mkVecCMPEQ(size), argL, argR)
11403                  : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
11404                                            binop(Iop_AndV128, argL, argR),
11405                                            mkV128(0x0000))));
11406      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11407      const HChar* nm  = isEQ ? "cmeq" : "cmtst";
11408      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11409      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11410          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11411      return True;
11412   }
11413
11414   if (opcode == BITS5(1,0,0,1,0)) {
11415      /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
11416      /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
11417      if (bitQ == 0 && size == X11) return False; // implied 1d case
11418      Bool isMLS = bitU == 1;
11419      IROp   opMUL    = mkVecMUL(size);
11420      IROp   opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
11421      IRTemp res      = newTempV128();
11422      if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
11423         assign(res, binop(opADDSUB,
11424                           getQReg128(dd),
11425                           binop(opMUL, getQReg128(nn), getQReg128(mm))));
11426         putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11427         const HChar* arr = nameArr_Q_SZ(bitQ, size);
11428         DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
11429             nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11430         return True;
11431      }
11432      return False;
11433   }
11434
11435   if (opcode == BITS5(1,0,0,1,1)) {
11436      /* -------- 0,xx,10011 MUL  std7_std7_std7 -------- */
11437      /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
11438      if (bitQ == 0 && size == X11) return False; // implied 1d case
11439      Bool isPMUL = bitU == 1;
11440      const IROp opsPMUL[4]
11441         = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
11442      IROp   opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
11443      IRTemp res   = newTempV128();
11444      if (opMUL != Iop_INVALID) {
11445         assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
11446         putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11447         const HChar* arr = nameArr_Q_SZ(bitQ, size);
11448         DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
11449             nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11450         return True;
11451      }
11452      return False;
11453   }
11454
11455   if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
11456      /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
11457      /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
11458      /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
11459      /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
11460      if (size == X11) return False;
11461      Bool isU   = bitU == 1;
11462      Bool isMAX = opcode == BITS5(1,0,1,0,0);
11463      IRTemp vN  = newTempV128();
11464      IRTemp vM  = newTempV128();
11465      IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11466                      : (isU ? mkVecMINU(size) : mkVecMINS(size));
11467      assign(vN, getQReg128(nn));
11468      assign(vM, getQReg128(mm));
11469      IRTemp res128 = newTempV128();
11470      assign(res128,
11471             binop(op,
11472                   binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11473                   binop(mkVecCATODDLANES(size),  mkexpr(vM), mkexpr(vN))));
11474      /* In the half-width case, use CatEL32x4 to extract the half-width
11475         result from the full-width result. */
11476      IRExpr* res
11477         = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11478                            binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11479                                                        mkexpr(res128)))
11480                     : mkexpr(res128);
11481      putQReg128(dd, res);
11482      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11483      const HChar* nm  = isMAX ? (isU ? "umaxp" : "smaxp")
11484                               : (isU ? "uminp" : "sminp");
11485      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11486          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11487      return True;
11488   }
11489
11490   if (opcode == BITS5(1,0,1,1,0)) {
11491      /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
11492      /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
11493      if (size == X00 || size == X11) return False;
11494      Bool isR = bitU == 1;
11495      IRTemp res, sat1q, sat1n, vN, vM;
11496      res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
11497      newTempsV128_2(&vN, &vM);
11498      assign(vN, getQReg128(nn));
11499      assign(vM, getQReg128(mm));
11500      math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
11501      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11502      IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
11503      updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
11504      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11505      const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
11506      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11507          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11508      return True;
11509   }
11510
11511   if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
11512      /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
11513      if (bitQ == 0 && size == X11) return False; // implied 1d case
11514      IRTemp vN = newTempV128();
11515      IRTemp vM = newTempV128();
11516      assign(vN, getQReg128(nn));
11517      assign(vM, getQReg128(mm));
11518      IRTemp res128 = newTempV128();
11519      assign(res128,
11520             binop(mkVecADD(size),
11521                   binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11522                   binop(mkVecCATODDLANES(size),  mkexpr(vM), mkexpr(vN))));
11523      /* In the half-width case, use CatEL32x4 to extract the half-width
11524         result from the full-width result. */
11525      IRExpr* res
11526         = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11527                            binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11528                                                        mkexpr(res128)))
11529                     : mkexpr(res128);
11530      putQReg128(dd, res);
11531      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11532      DIP("addp %s.%s, %s.%s, %s.%s\n",
11533          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11534      return True;
11535   }
11536
11537   if (bitU == 0
11538       && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11539      /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11540      /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11541      /* -------- 0,0x,11110 FMAX   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11542      /* -------- 0,1x,11110 FMIN   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11543      /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11544      Bool   isD   = (size & 1) == 1;
11545      if (bitQ == 0 && isD) return False; // implied 1d case
11546      Bool   isMIN = (size & 2) == 2;
11547      Bool   isNM  = opcode == BITS5(1,1,0,0,0);
11548      IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
11549      IRTemp res   = newTempV128();
11550      assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
11551      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11552      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11553      DIP("%s%s %s.%s, %s.%s, %s.%s\n",
11554          isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11555          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11556      return True;
11557   }
11558
11559   if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
11560      /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11561      /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11562      Bool isD   = (size & 1) == 1;
11563      Bool isSUB = (size & 2) == 2;
11564      if (bitQ == 0 && isD) return False; // implied 1d case
11565      IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
11566      IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11567      IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11568      IRTemp rm = mk_get_IR_rounding_mode();
11569      IRTemp t1 = newTempV128();
11570      IRTemp t2 = newTempV128();
11571      // FIXME: double rounding; use FMA primops instead
11572      assign(t1, triop(opMUL,
11573                       mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11574      assign(t2, triop(isSUB ? opSUB : opADD,
11575                       mkexpr(rm), getQReg128(dd), mkexpr(t1)));
11576      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11577      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11578      DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
11579          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11580      return True;
11581   }
11582
11583   if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
11584      /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11585      /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11586      Bool isD   = (size & 1) == 1;
11587      Bool isSUB = (size & 2) == 2;
11588      if (bitQ == 0 && isD) return False; // implied 1d case
11589      const IROp ops[4]
11590         = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
11591      IROp   op = ops[size];
11592      IRTemp rm = mk_get_IR_rounding_mode();
11593      IRTemp t1 = newTempV128();
11594      IRTemp t2 = newTempV128();
11595      assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11596      assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
11597      putQReg128(dd, mkexpr(t2));
11598      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11599      DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
11600          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11601      return True;
11602   }
11603
11604   if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
11605      /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11606      Bool isD = (size & 1) == 1;
11607      if (bitQ == 0 && isD) return False; // implied 1d case
11608      IROp   opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11609      IROp   opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
11610      IRTemp rm    = mk_get_IR_rounding_mode();
11611      IRTemp t1    = newTempV128();
11612      IRTemp t2    = newTempV128();
11613      // FIXME: use Abd primop instead?
11614      assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11615      assign(t2, unop(opABS, mkexpr(t1)));
11616      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11617      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11618      DIP("fabd %s.%s, %s.%s, %s.%s\n",
11619          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11620      return True;
11621   }
11622
11623   if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
11624      /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11625      /* -------- 1,0x,11011 FMUL  2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11626      // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
11627      Bool isD    = (size & 1) == 1;
11628      Bool isMULX = bitU == 0;
11629      if (bitQ == 0 && isD) return False; // implied 1d case
11630      IRTemp rm = mk_get_IR_rounding_mode();
11631      IRTemp t1 = newTempV128();
11632      assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
11633                       mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11634      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11635      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11636      DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
11637          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11638      return True;
11639   }
11640
11641   if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
11642      /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11643      /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11644      Bool isD = (size & 1) == 1;
11645      if (bitQ == 0 && isD) return False; // implied 1d case
11646      Bool   isGE  = bitU == 1;
11647      IROp   opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
11648                          : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
11649      IRTemp t1    = newTempV128();
11650      assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
11651                      : binop(opCMP, getQReg128(nn), getQReg128(mm)));
11652      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11653      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11654      DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
11655          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11656      return True;
11657   }
11658
11659   if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
11660      /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11661      Bool isD = (size & 1) == 1;
11662      if (bitQ == 0 && isD) return False; // implied 1d case
11663      IROp   opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
11664      IRTemp t1    = newTempV128();
11665      assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
11666      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11667      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11668      DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
11669          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11670      return True;
11671   }
11672
11673   if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
11674      /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11675      /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11676      Bool isD  = (size & 1) == 1;
11677      Bool isGT = (size & 2) == 2;
11678      if (bitQ == 0 && isD) return False; // implied 1d case
11679      IROp   opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
11680                          : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
11681      IROp   opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
11682      IRTemp t1    = newTempV128();
11683      assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
11684                              unop(opABS, getQReg128(nn)))); // swapd
11685      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11686      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11687      DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
11688          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11689      return True;
11690   }
11691
11692   if (bitU == 1
11693       && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11694      /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11695      /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11696      /* -------- 1,0x,11110 FMAXP   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11697      /* -------- 1,1x,11110 FMINP   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11698      /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11699      Bool isD = (size & 1) == 1;
11700      if (bitQ == 0 && isD) return False; // implied 1d case
11701      Bool   isMIN = (size & 2) == 2;
11702      Bool   isNM  = opcode == BITS5(1,1,0,0,0);
11703      IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
11704      IRTemp srcN  = newTempV128();
11705      IRTemp srcM  = newTempV128();
11706      IRTemp preL  = IRTemp_INVALID;
11707      IRTemp preR  = IRTemp_INVALID;
11708      assign(srcN, getQReg128(nn));
11709      assign(srcM, getQReg128(mm));
11710      math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
11711                                           srcM, srcN, isD, bitQ);
11712      putQReg128(
11713         dd, math_MAYBE_ZERO_HI64_fromE(
11714                bitQ,
11715                binop(opMXX, mkexpr(preL), mkexpr(preR))));
11716      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11717      DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
11718          isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11719          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11720      return True;
11721   }
11722
11723   if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
11724      /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11725      Bool isD = size == X01;
11726      if (bitQ == 0 && isD) return False; // implied 1d case
11727      IRTemp srcN = newTempV128();
11728      IRTemp srcM = newTempV128();
11729      IRTemp preL = IRTemp_INVALID;
11730      IRTemp preR = IRTemp_INVALID;
11731      assign(srcN, getQReg128(nn));
11732      assign(srcM, getQReg128(mm));
11733      math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
11734                                           srcM, srcN, isD, bitQ);
11735      putQReg128(
11736         dd, math_MAYBE_ZERO_HI64_fromE(
11737                bitQ,
11738                triop(mkVecADDF(isD ? 3 : 2),
11739                      mkexpr(mk_get_IR_rounding_mode()),
11740                      mkexpr(preL), mkexpr(preR))));
11741      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11742      DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
11743          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11744      return True;
11745   }
11746
11747   if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
11748      /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11749      Bool isD = (size & 1) == 1;
11750      if (bitQ == 0 && isD) return False; // implied 1d case
11751      vassert(size <= 1);
11752      const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
11753      IROp   op = ops[size];
11754      IRTemp rm = mk_get_IR_rounding_mode();
11755      IRTemp t1 = newTempV128();
11756      IRTemp t2 = newTempV128();
11757      assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11758      assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
11759      putQReg128(dd, mkexpr(t2));
11760      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11761      DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
11762          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11763      return True;
11764   }
11765
11766   if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
11767      /* -------- 0,0x,11111: FRECPS  2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11768      /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11769      Bool isSQRT = (size & 2) == 2;
11770      Bool isD    = (size & 1) == 1;
11771      if (bitQ == 0 && isD) return False; // implied 1d case
11772      IROp op     = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
11773                           : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
11774      IRTemp res = newTempV128();
11775      assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11776      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11777      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11778      DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
11779          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11780      return True;
11781   }
11782
11783   return False;
11784#  undef INSN
11785}
11786
11787
11788static
11789Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
11790{
11791   /* 31 30 29 28    23   21    16     11 9 4
11792      0  Q  U  01110 size 10000 opcode 10 n d
11793      Decode fields: U,size,opcode
11794   */
11795#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
11796   if (INSN(31,31) != 0
11797       || INSN(28,24) != BITS5(0,1,1,1,0)
11798       || INSN(21,17) != BITS5(1,0,0,0,0)
11799       || INSN(11,10) != BITS2(1,0)) {
11800      return False;
11801   }
11802   UInt bitQ   = INSN(30,30);
11803   UInt bitU   = INSN(29,29);
11804   UInt size   = INSN(23,22);
11805   UInt opcode = INSN(16,12);
11806   UInt nn     = INSN(9,5);
11807   UInt dd     = INSN(4,0);
11808   vassert(size < 4);
11809
11810   if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
11811      /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
11812      /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
11813      /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
11814      const IROp iops[3] = { Iop_Reverse8sIn64_x2,
11815                             Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
11816      vassert(size <= 2);
11817      IRTemp res = newTempV128();
11818      assign(res, unop(iops[size], getQReg128(nn)));
11819      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11820      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11821      DIP("%s %s.%s, %s.%s\n", "rev64",
11822          nameQReg128(dd), arr, nameQReg128(nn), arr);
11823      return True;
11824   }
11825
11826   if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
11827      /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
11828      /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
11829      Bool   isH = size == X01;
11830      IRTemp res = newTempV128();
11831      IROp   iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
11832      assign(res, unop(iop, getQReg128(nn)));
11833      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11834      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11835      DIP("%s %s.%s, %s.%s\n", "rev32",
11836          nameQReg128(dd), arr, nameQReg128(nn), arr);
11837      return True;
11838   }
11839
11840   if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
11841      /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
11842      IRTemp res = newTempV128();
11843      assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
11844      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11845      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11846      DIP("%s %s.%s, %s.%s\n", "rev16",
11847          nameQReg128(dd), arr, nameQReg128(nn), arr);
11848      return True;
11849   }
11850
11851   if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
11852      /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
11853      /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
11854      /* -------- 0,xx,00110: SADALP std6_std6 -------- */
11855      /* -------- 1,xx,00110: UADALP std6_std6 -------- */
11856      /* Widens, and size refers to the narrow size. */
11857      if (size == X11) return False; // no 1d or 2d cases
11858      Bool   isU   = bitU == 1;
11859      Bool   isACC = opcode == BITS5(0,0,1,1,0);
11860      IRTemp src   = newTempV128();
11861      IRTemp sum   = newTempV128();
11862      IRTemp res   = newTempV128();
11863      assign(src, getQReg128(nn));
11864      assign(sum,
11865             binop(mkVecADD(size+1),
11866                   mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
11867                             isU, True/*fromOdd*/, size, mkexpr(src))),
11868                   mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
11869                             isU, False/*!fromOdd*/, size, mkexpr(src)))));
11870      assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
11871                        : mkexpr(sum));
11872      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11873      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11874      const HChar* arrWide   = nameArr_Q_SZ(bitQ, size+1);
11875      DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
11876                                     : (isU ? "uaddlp" : "saddlp"),
11877          nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
11878      return True;
11879   }
11880
11881   if (opcode == BITS5(0,0,0,1,1)) {
11882      /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
11883      /* -------- 1,xx,00011: USQADD std7_std7 -------- */
11884      if (bitQ == 0 && size == X11) return False; // implied 1d case
11885      Bool isUSQADD = bitU == 1;
11886      /* This is switched (in the US vs SU sense) deliberately.
11887         SUQADD corresponds to the ExtUSsatSS variants and
11888         USQADD corresponds to the ExtSUsatUU variants.
11889         See libvex_ir for more details. */
11890      IROp   qop  = isUSQADD ? mkVecQADDEXTSUSATUU(size)
11891                             : mkVecQADDEXTUSSATSS(size);
11892      IROp   nop  = mkVecADD(size);
11893      IRTemp argL = newTempV128();
11894      IRTemp argR = newTempV128();
11895      IRTemp qres = newTempV128();
11896      IRTemp nres = newTempV128();
11897      /* Because the two arguments to the addition are implicitly
11898         extended differently (one signedly, the other unsignedly) it is
11899         important to present them to the primop in the correct order. */
11900      assign(argL, getQReg128(nn));
11901      assign(argR, getQReg128(dd));
11902      assign(qres, math_MAYBE_ZERO_HI64_fromE(
11903                      bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11904      assign(nres, math_MAYBE_ZERO_HI64_fromE(
11905                      bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11906      putQReg128(dd, mkexpr(qres));
11907      updateQCFLAGwithDifference(qres, nres);
11908      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11909      DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
11910          nameQReg128(dd), arr, nameQReg128(nn), arr);
11911      return True;
11912   }
11913
11914   if (opcode == BITS5(0,0,1,0,0)) {
11915      /* -------- 0,xx,00100: CLS std6_std6 -------- */
11916      /* -------- 1,xx,00100: CLZ std6_std6 -------- */
11917      if (size == X11) return False; // no 1d or 2d cases
11918      const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
11919      const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
11920      Bool   isCLZ = bitU == 1;
11921      IRTemp res   = newTempV128();
11922      vassert(size <= 2);
11923      assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
11924      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11925      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11926      DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
11927          nameQReg128(dd), arr, nameQReg128(nn), arr);
11928      return True;
11929   }
11930
11931   if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
11932      /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
11933      /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
11934      IRTemp res = newTempV128();
11935      assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
11936      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11937      const HChar* arr = nameArr_Q_SZ(bitQ, 0);
11938      DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
11939          nameQReg128(dd), arr, nameQReg128(nn), arr);
11940      return True;
11941   }
11942
11943   if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
11944      /* -------- 1,01,00101  RBIT 16b_16b, 8b_8b -------- */
11945      IRTemp res = newTempV128();
11946      assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
11947      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11948      const HChar* arr = nameArr_Q_SZ(bitQ, 0);
11949      DIP("%s %s.%s, %s.%s\n", "rbit",
11950          nameQReg128(dd), arr, nameQReg128(nn), arr);
11951      return True;
11952   }
11953
11954   if (opcode == BITS5(0,0,1,1,1)) {
11955      /* -------- 0,xx,00111 SQABS std7_std7 -------- */
11956      /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
11957      if (bitQ == 0 && size == X11) return False; // implied 1d case
11958      Bool   isNEG  = bitU == 1;
11959      IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
11960      (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
11961                                         getQReg128(nn), size );
11962      IRTemp qres = newTempV128(), nres = newTempV128();
11963      assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
11964      assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
11965      putQReg128(dd, mkexpr(qres));
11966      updateQCFLAGwithDifference(qres, nres);
11967      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11968      DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
11969          nameQReg128(dd), arr, nameQReg128(nn), arr);
11970      return True;
11971   }
11972
11973   if (opcode == BITS5(0,1,0,0,0)) {
11974      /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
11975      /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
11976      if (bitQ == 0 && size == X11) return False; // implied 1d case
11977      Bool    isGT  = bitU == 0;
11978      IRExpr* argL  = getQReg128(nn);
11979      IRExpr* argR  = mkV128(0x0000);
11980      IRTemp  res   = newTempV128();
11981      IROp    opGTS = mkVecCMPGTS(size);
11982      assign(res, isGT ? binop(opGTS, argL, argR)
11983                       : unop(Iop_NotV128, binop(opGTS, argR, argL)));
11984      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11985      const HChar* arr = nameArr_Q_SZ(bitQ, size);
11986      DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
11987          nameQReg128(dd), arr, nameQReg128(nn), arr);
11988      return True;
11989   }
11990
11991   if (opcode == BITS5(0,1,0,0,1)) {
11992      /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
11993      /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
11994      if (bitQ == 0 && size == X11) return False; // implied 1d case
11995      Bool    isEQ = bitU == 0;
11996      IRExpr* argL = getQReg128(nn);
11997      IRExpr* argR = mkV128(0x0000);
11998      IRTemp  res  = newTempV128();
11999      assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
12000                       : unop(Iop_NotV128,
12001                              binop(mkVecCMPGTS(size), argL, argR)));
12002      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12003      const HChar* arr = nameArr_Q_SZ(bitQ, size);
12004      DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
12005          nameQReg128(dd), arr, nameQReg128(nn), arr);
12006      return True;
12007   }
12008
12009   if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
12010      /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
12011      if (bitQ == 0 && size == X11) return False; // implied 1d case
12012      IRExpr* argL = getQReg128(nn);
12013      IRExpr* argR = mkV128(0x0000);
12014      IRTemp  res  = newTempV128();
12015      assign(res, binop(mkVecCMPGTS(size), argR, argL));
12016      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12017      const HChar* arr = nameArr_Q_SZ(bitQ, size);
12018      DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
12019          nameQReg128(dd), arr, nameQReg128(nn), arr);
12020      return True;
12021   }
12022
12023   if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
12024      /* -------- 0,xx,01011: ABS std7_std7 -------- */
12025      if (bitQ == 0 && size == X11) return False; // implied 1d case
12026      IRTemp res = newTempV128();
12027      assign(res, unop(mkVecABS(size), getQReg128(nn)));
12028      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12029      const HChar* arr = nameArr_Q_SZ(bitQ, size);
12030      DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12031      return True;
12032   }
12033
12034   if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
12035      /* -------- 1,xx,01011: NEG std7_std7 -------- */
12036      if (bitQ == 0 && size == X11) return False; // implied 1d case
12037      IRTemp res = newTempV128();
12038      assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
12039      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12040      const HChar* arr = nameArr_Q_SZ(bitQ, size);
12041      DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12042      return True;
12043   }
12044
12045   UInt ix = 0; /*INVALID*/
12046   if (size >= X10) {
12047      switch (opcode) {
12048         case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
12049         case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
12050         case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
12051         default: break;
12052      }
12053   }
12054   if (ix > 0) {
12055      /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
12056      /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
12057      /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
12058      /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
12059      /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
12060      if (bitQ == 0 && size == X11) return False; // implied 1d case
12061      Bool   isD     = size == X11;
12062      IROp   opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
12063      IROp   opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
12064      IROp   opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12065      IROp   opCmp   = Iop_INVALID;
12066      Bool   swap    = False;
12067      const HChar* nm = "??";
12068      switch (ix) {
12069         case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
12070         case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
12071         case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
12072         case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
12073         case 5: nm = "fcmle"; opCmp = opCmpLE; break;
12074         default: vassert(0);
12075      }
12076      IRExpr* zero = mkV128(0x0000);
12077      IRTemp res = newTempV128();
12078      assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
12079                       : binop(opCmp, getQReg128(nn), zero));
12080      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12081      const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12082      DIP("%s %s.%s, %s.%s, #0.0\n", nm,
12083          nameQReg128(dd), arr, nameQReg128(nn), arr);
12084      return True;
12085   }
12086
12087   if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
12088      /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
12089      /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
12090      if (bitQ == 0 && size == X11) return False; // implied 1d case
12091      Bool   isFNEG = bitU == 1;
12092      IROp   op     = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
12093                             : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
12094      IRTemp res = newTempV128();
12095      assign(res, unop(op, getQReg128(nn)));
12096      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12097      const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12098      DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
12099          nameQReg128(dd), arr, nameQReg128(nn), arr);
12100      return True;
12101   }
12102
12103   if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
12104      /* -------- 0,xx,10010: XTN{,2} -------- */
12105      if (size == X11) return False;
12106      vassert(size < 3);
12107      Bool   is2  = bitQ == 1;
12108      IROp   opN  = mkVecNARROWUN(size);
12109      IRTemp resN = newTempV128();
12110      assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
12111      putLO64andZUorPutHI64(is2, dd, resN);
12112      const HChar* nm        = "xtn";
12113      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12114      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
12115      DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12116          nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12117      return True;
12118   }
12119
12120   if (opcode == BITS5(1,0,1,0,0)
12121       || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
12122      /* -------- 0,xx,10100: SQXTN{,2} -------- */
12123      /* -------- 1,xx,10100: UQXTN{,2} -------- */
12124      /* -------- 1,xx,10010: SQXTUN{,2} -------- */
12125      if (size == X11) return False;
12126      vassert(size < 3);
12127      Bool  is2    = bitQ == 1;
12128      IROp  opN    = Iop_INVALID;
12129      Bool  zWiden = True;
12130      const HChar* nm = "??";
12131      /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
12132         opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
12133      }
12134      else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
12135         opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
12136      }
12137      else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
12138         opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
12139      }
12140      else vassert(0);
12141      IRTemp src  = newTempV128();
12142      assign(src, getQReg128(nn));
12143      IRTemp resN = newTempV128();
12144      assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
12145      putLO64andZUorPutHI64(is2, dd, resN);
12146      IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
12147                                              size, mkexpr(resN));
12148      updateQCFLAGwithDifference(src, resW);
12149      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12150      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
12151      DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12152          nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12153      return True;
12154   }
12155
12156   if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
12157      /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
12158      /* Widens, and size is the narrow size. */
12159      if (size == X11) return False;
12160      Bool is2   = bitQ == 1;
12161      IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
12162      IROp opSHL = mkVecSHLN(size+1);
12163      IRTemp src = newTempV128();
12164      IRTemp res = newTempV128();
12165      assign(src, getQReg128(nn));
12166      assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
12167                               mkU8(8 << size)));
12168      putQReg128(dd, mkexpr(res));
12169      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12170      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
12171      DIP("shll%s %s.%s, %s.%s, #%u\n", is2 ? "2" : "",
12172          nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
12173      return True;
12174   }
12175
12176   if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
12177      /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
12178      UInt   nLanes = size == X00 ? 4 : 2;
12179      IRType srcTy  = size == X00 ? Ity_F32 : Ity_F64;
12180      IROp   opCvt  = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
12181      IRTemp rm     = mk_get_IR_rounding_mode();
12182      IRTemp src[nLanes];
12183      for (UInt i = 0; i < nLanes; i++) {
12184         src[i] = newTemp(srcTy);
12185         assign(src[i], getQRegLane(nn, i, srcTy));
12186      }
12187      for (UInt i = 0; i < nLanes; i++) {
12188         putQRegLane(dd, nLanes * bitQ + i,
12189                         binop(opCvt, mkexpr(rm), mkexpr(src[i])));
12190      }
12191      if (bitQ == 0) {
12192         putQRegLane(dd, 1, mkU64(0));
12193      }
12194      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12195      const HChar* arrWide   = nameArr_Q_SZ(1,    1+size+1);
12196      DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12197          nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12198      return True;
12199   }
12200
12201   if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
12202      /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
12203      /* Using Irrm_NEAREST here isn't right.  The docs say "round to
12204         odd" but I don't know what that really means. */
12205      IRType srcTy = Ity_F64;
12206      IROp   opCvt = Iop_F64toF32;
12207      IRTemp src[2];
12208      for (UInt i = 0; i < 2; i++) {
12209         src[i] = newTemp(srcTy);
12210         assign(src[i], getQRegLane(nn, i, srcTy));
12211      }
12212      for (UInt i = 0; i < 2; i++) {
12213         putQRegLane(dd, 2 * bitQ + i,
12214                         binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i])));
12215      }
12216      if (bitQ == 0) {
12217         putQRegLane(dd, 1, mkU64(0));
12218      }
12219      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12220      const HChar* arrWide   = nameArr_Q_SZ(1,    1+size+1);
12221      DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12222          nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12223      return True;
12224   }
12225
12226   if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
12227      /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
12228      UInt   nLanes = size == X00 ? 4 : 2;
12229      IRType srcTy  = size == X00 ? Ity_F16 : Ity_F32;
12230      IROp   opCvt  = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
12231      IRTemp src[nLanes];
12232      for (UInt i = 0; i < nLanes; i++) {
12233         src[i] = newTemp(srcTy);
12234         assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
12235      }
12236      for (UInt i = 0; i < nLanes; i++) {
12237         putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
12238      }
12239      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12240      const HChar* arrWide   = nameArr_Q_SZ(1,    1+size+1);
12241      DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12242          nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12243      return True;
12244   }
12245
12246   ix = 0;
12247   if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
12248      ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
12249      // = 1 + bitU[0]:size[1]:opcode[0]
12250      vassert(ix >= 1 && ix <= 8);
12251      if (ix == 7) ix = 0;
12252   }
12253   if (ix > 0) {
12254      /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
12255      /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
12256      /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
12257      /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
12258      /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
12259      /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
12260      /* -------- 1,1x,11000 (apparently unassigned)    (7) -------- */
12261      /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
12262      /* rm plan:
12263         FRINTN: tieeven -- !! FIXME KLUDGED !!
12264         FRINTM: -inf
12265         FRINTP: +inf
12266         FRINTZ: zero
12267         FRINTA: tieaway -- !! FIXME KLUDGED !!
12268         FRINTX: per FPCR + "exact = TRUE"
12269         FRINTI: per FPCR
12270      */
12271      Bool isD = (size & 1) == 1;
12272      if (bitQ == 0 && isD) return False; // implied 1d case
12273
12274      IRTemp irrmRM = mk_get_IR_rounding_mode();
12275
12276      UChar ch = '?';
12277      IRTemp irrm = newTemp(Ity_I32);
12278      switch (ix) {
12279         case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12280         case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
12281         case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
12282         case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
12283         // The following is a kludge.  Should be: Irrm_NEAREST_TIE_AWAY_0
12284         case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12285         // I am unsure about the following, due to the "integral exact"
12286         // description in the manual.  What does it mean? (frintx, that is)
12287         case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
12288         case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
12289         default: vassert(0);
12290      }
12291
12292      IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
12293      if (isD) {
12294         for (UInt i = 0; i < 2; i++) {
12295            putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12296                                            getQRegLane(nn, i, Ity_F64)));
12297         }
12298      } else {
12299         UInt n = bitQ==1 ? 4 : 2;
12300         for (UInt i = 0; i < n; i++) {
12301            putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12302                                            getQRegLane(nn, i, Ity_F32)));
12303         }
12304         if (bitQ == 0)
12305            putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12306      }
12307      const HChar* arr = nameArr_Q_SZ(bitQ, size);
12308      DIP("frint%c %s.%s, %s.%s\n", ch,
12309          nameQReg128(dd), arr, nameQReg128(nn), arr);
12310      return True;
12311   }
12312
12313   ix = 0; /*INVALID*/
12314   switch (opcode) {
12315      case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
12316      case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
12317      case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
12318      default: break;
12319   }
12320   if (ix > 0) {
12321      /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12322      /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12323      /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12324      /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12325      /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12326      /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12327      /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12328      /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12329      /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12330      /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12331      Bool isD = (size & 1) == 1;
12332      if (bitQ == 0 && isD) return False; // implied 1d case
12333
12334      IRRoundingMode irrm = 8; /*impossible*/
12335      HChar          ch   = '?';
12336      switch (ix) {
12337         case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
12338         case 2: ch = 'm'; irrm = Irrm_NegINF;  break;
12339         case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
12340         case 4: ch = 'p'; irrm = Irrm_PosINF;  break;
12341         case 5: ch = 'z'; irrm = Irrm_ZERO;    break;
12342         default: vassert(0);
12343      }
12344      IROp cvt = Iop_INVALID;
12345      if (bitU == 1) {
12346         cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
12347      } else {
12348         cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
12349      }
12350      if (isD) {
12351         for (UInt i = 0; i < 2; i++) {
12352            putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12353                                            getQRegLane(nn, i, Ity_F64)));
12354         }
12355      } else {
12356         UInt n = bitQ==1 ? 4 : 2;
12357         for (UInt i = 0; i < n; i++) {
12358            putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12359                                            getQRegLane(nn, i, Ity_F32)));
12360         }
12361         if (bitQ == 0)
12362            putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12363      }
12364      const HChar* arr = nameArr_Q_SZ(bitQ, size);
12365      DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
12366          nameQReg128(dd), arr, nameQReg128(nn), arr);
12367      return True;
12368   }
12369
12370   if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
12371      /* -------- 0,10,11100: URECPE  4s_4s, 2s_2s -------- */
12372      /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
12373      Bool isREC = bitU == 0;
12374      IROp op    = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
12375      IRTemp res = newTempV128();
12376      assign(res, unop(op, getQReg128(nn)));
12377      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12378      const HChar* nm  = isREC ? "urecpe" : "ursqrte";
12379      const HChar* arr = nameArr_Q_SZ(bitQ, size);
12380      DIP("%s %s.%s, %s.%s\n", nm,
12381          nameQReg128(dd), arr, nameQReg128(nn), arr);
12382      return True;
12383   }
12384
12385   if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
12386      /* -------- 0,0x,11101: SCVTF -------- */
12387      /* -------- 1,0x,11101: UCVTF -------- */
12388      /* 31  28      22 21       15     9 4
12389         0q0 01110 0 sz 1  00001 110110 n d  SCVTF Vd, Vn
12390         0q1 01110 0 sz 1  00001 110110 n d  UCVTF Vd, Vn
12391         with laneage:
12392         case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
12393      */
12394      Bool isQ   = bitQ == 1;
12395      Bool isU   = bitU == 1;
12396      Bool isF64 = (size & 1) == 1;
12397      if (isQ || !isF64) {
12398         IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
12399         UInt   nLanes = 0;
12400         Bool   zeroHI = False;
12401         const HChar* arrSpec = NULL;
12402         Bool   ok  = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
12403                                       isQ, isF64 );
12404         IROp   iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
12405                          : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
12406         IRTemp rm  = mk_get_IR_rounding_mode();
12407         UInt   i;
12408         vassert(ok); /* the 'if' above should ensure this */
12409         for (i = 0; i < nLanes; i++) {
12410            putQRegLane(dd, i,
12411                        binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
12412         }
12413         if (zeroHI) {
12414            putQRegLane(dd, 1, mkU64(0));
12415         }
12416         DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
12417             nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
12418         return True;
12419      }
12420      /* else fall through */
12421   }
12422
12423   if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
12424      /* -------- 0,1x,11101: FRECPE  2d_2d, 4s_4s, 2s_2s -------- */
12425      /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
12426      Bool isSQRT = bitU == 1;
12427      Bool isD    = (size & 1) == 1;
12428      IROp op     = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
12429                           : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
12430      if (bitQ == 0 && isD) return False; // implied 1d case
12431      IRTemp resV = newTempV128();
12432      assign(resV, unop(op, getQReg128(nn)));
12433      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12434      const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12435      DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
12436          nameQReg128(dd), arr, nameQReg128(nn), arr);
12437      return True;
12438   }
12439
12440   if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
12441      /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
12442      Bool isD = (size & 1) == 1;
12443      IROp op  = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4;
12444      if (bitQ == 0 && isD) return False; // implied 1d case
12445      IRTemp resV = newTempV128();
12446      assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()),
12447                             getQReg128(nn)));
12448      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12449      const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12450      DIP("%s %s.%s, %s.%s\n", "fsqrt",
12451          nameQReg128(dd), arr, nameQReg128(nn), arr);
12452      return True;
12453   }
12454
12455   return False;
12456#  undef INSN
12457}
12458
12459
12460static
12461Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
12462{
12463   /* 31    28    23   21 20 19 15     11   9 4
12464      0 Q U 01111 size L  M  m  opcode H  0 n d
12465      Decode fields are: u,size,opcode
12466      M is really part of the mm register number.  Individual
12467      cases need to inspect L and H though.
12468   */
12469#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12470   if (INSN(31,31) != 0
12471       || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
12472      return False;
12473   }
12474   UInt bitQ   = INSN(30,30);
12475   UInt bitU   = INSN(29,29);
12476   UInt size   = INSN(23,22);
12477   UInt bitL   = INSN(21,21);
12478   UInt bitM   = INSN(20,20);
12479   UInt mmLO4  = INSN(19,16);
12480   UInt opcode = INSN(15,12);
12481   UInt bitH   = INSN(11,11);
12482   UInt nn     = INSN(9,5);
12483   UInt dd     = INSN(4,0);
12484   vassert(size < 4);
12485   vassert(bitH < 2 && bitM < 2 && bitL < 2);
12486
12487   if (bitU == 0 && size >= X10
12488       && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
12489      /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12490      /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12491      if (bitQ == 0 && size == X11) return False; // implied 1d case
12492      Bool isD   = (size & 1) == 1;
12493      Bool isSUB = opcode == BITS4(0,1,0,1);
12494      UInt index;
12495      if      (!isD)             index = (bitH << 1) | bitL;
12496      else if (isD && bitL == 0) index = bitH;
12497      else return False; // sz:L == x11 => unallocated encoding
12498      vassert(index < (isD ? 2 : 4));
12499      IRType ity   = isD ? Ity_F64 : Ity_F32;
12500      IRTemp elem  = newTemp(ity);
12501      UInt   mm    = (bitM << 4) | mmLO4;
12502      assign(elem, getQRegLane(mm, index, ity));
12503      IRTemp dupd  = math_DUP_TO_V128(elem, ity);
12504      IROp   opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
12505      IROp   opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12506      IROp   opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
12507      IRTemp rm    = mk_get_IR_rounding_mode();
12508      IRTemp t1    = newTempV128();
12509      IRTemp t2    = newTempV128();
12510      // FIXME: double rounding; use FMA primops instead
12511      assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
12512      assign(t2, triop(isSUB ? opSUB : opADD,
12513                       mkexpr(rm), getQReg128(dd), mkexpr(t1)));
12514      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12515      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12516      DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
12517          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
12518          isD ? 'd' : 's', index);
12519      return True;
12520   }
12521
12522   if (size >= X10 && opcode == BITS4(1,0,0,1)) {
12523      /* -------- 0,1x,1001 FMUL  2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12524      /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12525      if (bitQ == 0 && size == X11) return False; // implied 1d case
12526      Bool isD    = (size & 1) == 1;
12527      Bool isMULX = bitU == 1;
12528      UInt index;
12529      if      (!isD)             index = (bitH << 1) | bitL;
12530      else if (isD && bitL == 0) index = bitH;
12531      else return False; // sz:L == x11 => unallocated encoding
12532      vassert(index < (isD ? 2 : 4));
12533      IRType ity  = isD ? Ity_F64 : Ity_F32;
12534      IRTemp elem = newTemp(ity);
12535      UInt   mm   = (bitM << 4) | mmLO4;
12536      assign(elem, getQRegLane(mm, index, ity));
12537      IRTemp dupd = math_DUP_TO_V128(elem, ity);
12538      // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
12539      IRTemp res  = newTempV128();
12540      assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12541                        mkexpr(mk_get_IR_rounding_mode()),
12542                        getQReg128(nn), mkexpr(dupd)));
12543      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12544      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12545      DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
12546          isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
12547          nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
12548      return True;
12549   }
12550
12551   if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
12552       || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
12553      /* -------- 1,xx,0000 MLA s/h variants only -------- */
12554      /* -------- 1,xx,0100 MLS s/h variants only -------- */
12555      /* -------- 0,xx,1000 MUL s/h variants only -------- */
12556      Bool isMLA = opcode == BITS4(0,0,0,0);
12557      Bool isMLS = opcode == BITS4(0,1,0,0);
12558      UInt mm    = 32; // invalid
12559      UInt ix    = 16; // invalid
12560      switch (size) {
12561         case X00:
12562            return False; // b case is not allowed
12563         case X01:
12564            mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12565         case X10:
12566            mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12567         case X11:
12568            return False; // d case is not allowed
12569         default:
12570            vassert(0);
12571      }
12572      vassert(mm < 32 && ix < 16);
12573      IROp   opMUL = mkVecMUL(size);
12574      IROp   opADD = mkVecADD(size);
12575      IROp   opSUB = mkVecSUB(size);
12576      HChar  ch    = size == X01 ? 'h' : 's';
12577      IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12578      IRTemp vecD  = newTempV128();
12579      IRTemp vecN  = newTempV128();
12580      IRTemp res   = newTempV128();
12581      assign(vecD, getQReg128(dd));
12582      assign(vecN, getQReg128(nn));
12583      IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
12584      if (isMLA || isMLS) {
12585         assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
12586      } else {
12587         assign(res, prod);
12588      }
12589      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12590      const HChar* arr = nameArr_Q_SZ(bitQ, size);
12591      DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
12592                                                : (isMLS ? "mls" : "mul"),
12593          nameQReg128(dd), arr,
12594          nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12595      return True;
12596   }
12597
12598   if (opcode == BITS4(1,0,1,0)
12599       || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
12600      /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
12601      /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
12602      /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
12603      /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
12604      /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
12605      /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
12606      /* Widens, and size refers to the narrowed lanes. */
12607      UInt ks = 3;
12608      switch (opcode) {
12609         case BITS4(1,0,1,0): ks = 0; break;
12610         case BITS4(0,0,1,0): ks = 1; break;
12611         case BITS4(0,1,1,0): ks = 2; break;
12612         default: vassert(0);
12613      }
12614      vassert(ks >= 0 && ks <= 2);
12615      Bool isU = bitU == 1;
12616      Bool is2 = bitQ == 1;
12617      UInt mm  = 32; // invalid
12618      UInt ix  = 16; // invalid
12619      switch (size) {
12620         case X00:
12621            return False; // h_b_b[] case is not allowed
12622         case X01:
12623            mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12624         case X10:
12625            mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12626         case X11:
12627            return False; // q_d_d[] case is not allowed
12628         default:
12629            vassert(0);
12630      }
12631      vassert(mm < 32 && ix < 16);
12632      IRTemp vecN  = newTempV128();
12633      IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12634      IRTemp vecD  = newTempV128();
12635      assign(vecN, getQReg128(nn));
12636      assign(vecD, getQReg128(dd));
12637      IRTemp res = IRTemp_INVALID;
12638      math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
12639                    vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
12640      putQReg128(dd, mkexpr(res));
12641      const HChar* nm        = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
12642      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12643      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
12644      HChar ch               = size == X01 ? 'h' : 's';
12645      DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
12646          isU ? 'u' : 's', nm, is2 ? "2" : "",
12647          nameQReg128(dd), arrWide,
12648          nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
12649      return True;
12650   }
12651
12652   if (bitU == 0
12653       && (opcode == BITS4(1,0,1,1)
12654           || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
12655      /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
12656      /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
12657      /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
12658      /* Widens, and size refers to the narrowed lanes. */
12659      UInt ks = 3;
12660      switch (opcode) {
12661         case BITS4(1,0,1,1): ks = 0; break;
12662         case BITS4(0,0,1,1): ks = 1; break;
12663         case BITS4(0,1,1,1): ks = 2; break;
12664         default: vassert(0);
12665      }
12666      vassert(ks >= 0 && ks <= 2);
12667      Bool is2 = bitQ == 1;
12668      UInt mm  = 32; // invalid
12669      UInt ix  = 16; // invalid
12670      switch (size) {
12671         case X00:
12672            return False; // h_b_b[] case is not allowed
12673         case X01:
12674            mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12675         case X10:
12676            mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12677         case X11:
12678            return False; // q_d_d[] case is not allowed
12679         default:
12680            vassert(0);
12681      }
12682      vassert(mm < 32 && ix < 16);
12683      IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
12684      vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
12685      newTempsV128_2(&vecN, &vecD);
12686      assign(vecN, getQReg128(nn));
12687      IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12688      assign(vecD, getQReg128(dd));
12689      math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
12690                       is2, size, "mas"[ks],
12691                       vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
12692      putQReg128(dd, mkexpr(res));
12693      vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
12694      updateQCFLAGwithDifference(sat1q, sat1n);
12695      if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
12696         updateQCFLAGwithDifference(sat2q, sat2n);
12697      }
12698      const HChar* nm        = ks == 0 ? "sqdmull"
12699                                       : (ks == 1 ? "sqdmlal" : "sqdmlsl");
12700      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12701      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
12702      HChar ch               = size == X01 ? 'h' : 's';
12703      DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
12704          nm, is2 ? "2" : "",
12705          nameQReg128(dd), arrWide,
12706          nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
12707      return True;
12708   }
12709
12710   if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
12711      /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
12712      /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
12713      UInt mm  = 32; // invalid
12714      UInt ix  = 16; // invalid
12715      switch (size) {
12716         case X00:
12717            return False; // b case is not allowed
12718         case X01:
12719            mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12720         case X10:
12721            mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12722         case X11:
12723            return False; // q case is not allowed
12724         default:
12725            vassert(0);
12726      }
12727      vassert(mm < 32 && ix < 16);
12728      Bool isR = opcode == BITS4(1,1,0,1);
12729      IRTemp res, sat1q, sat1n, vN, vM;
12730      res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
12731      vN = newTempV128();
12732      assign(vN, getQReg128(nn));
12733      vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12734      math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
12735      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12736      IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
12737      updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
12738      const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
12739      const HChar* arr = nameArr_Q_SZ(bitQ, size);
12740      HChar ch         = size == X01 ? 'h' : 's';
12741      DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
12742          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12743      return True;
12744   }
12745
12746   return False;
12747#  undef INSN
12748}
12749
12750
12751static
12752Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
12753{
12754#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12755   return False;
12756#  undef INSN
12757}
12758
12759
12760static
12761Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
12762{
12763#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12764   return False;
12765#  undef INSN
12766}
12767
12768
12769static
12770Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
12771{
12772#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12773   return False;
12774#  undef INSN
12775}
12776
12777
12778static
12779Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
12780{
12781   /* 31  28    23 21 20 15 13   9 4
12782      000 11110 ty 1  m  op 1000 n opcode2
12783      The first 3 bits are really "M 0 S", but M and S are always zero.
12784      Decode fields are: ty,op,opcode2
12785   */
12786#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12787   if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12788       || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
12789      return False;
12790   }
12791   UInt ty      = INSN(23,22);
12792   UInt mm      = INSN(20,16);
12793   UInt op      = INSN(15,14);
12794   UInt nn      = INSN(9,5);
12795   UInt opcode2 = INSN(4,0);
12796   vassert(ty < 4);
12797
12798   if (ty <= X01 && op == X00
12799       && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
12800      /* -------- 0x,00,00000 FCMP  d_d,   s_s -------- */
12801      /* -------- 0x,00,01000 FCMP  d_#0, s_#0 -------- */
12802      /* -------- 0x,00,10000 FCMPE d_d,   s_s -------- */
12803      /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
12804      /* 31        23   20    15      9 4
12805         000 11110 01 1     m 00 1000 n 10 000  FCMPE Dn, Dm
12806         000 11110 01 1 00000 00 1000 n 11 000  FCMPE Dn, #0.0
12807         000 11110 01 1     m 00 1000 n 00 000  FCMP  Dn, Dm
12808         000 11110 01 1 00000 00 1000 n 01 000  FCMP  Dn, #0.0
12809
12810         000 11110 00 1     m 00 1000 n 10 000  FCMPE Sn, Sm
12811         000 11110 00 1 00000 00 1000 n 11 000  FCMPE Sn, #0.0
12812         000 11110 00 1     m 00 1000 n 00 000  FCMP  Sn, Sm
12813         000 11110 00 1 00000 00 1000 n 01 000  FCMP  Sn, #0.0
12814
12815         FCMPE generates Invalid Operation exn if either arg is any kind
12816         of NaN.  FCMP generates Invalid Operation exn if either arg is a
12817         signalling NaN.  We ignore this detail here and produce the same
12818         IR for both.
12819      */
12820      Bool   isD     = (ty & 1) == 1;
12821      Bool   isCMPE  = (opcode2 & 16) == 16;
12822      Bool   cmpZero = (opcode2 & 8) == 8;
12823      IRType ity     = isD ? Ity_F64 : Ity_F32;
12824      Bool   valid   = True;
12825      if (cmpZero && mm != 0) valid = False;
12826      if (valid) {
12827         IRTemp argL  = newTemp(ity);
12828         IRTemp argR  = newTemp(ity);
12829         IRTemp irRes = newTemp(Ity_I32);
12830         assign(argL, getQRegLO(nn, ity));
12831         assign(argR,
12832                cmpZero
12833                   ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
12834                   : getQRegLO(mm, ity));
12835         assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
12836                             mkexpr(argL), mkexpr(argR)));
12837         IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
12838         IRTemp nzcv_28x0 = newTemp(Ity_I64);
12839         assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
12840         setFlags_COPY(nzcv_28x0);
12841         DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
12842             cmpZero ? "#0.0" : nameQRegLO(mm, ity));
12843         return True;
12844      }
12845      return False;
12846   }
12847
12848   return False;
12849#  undef INSN
12850}
12851
12852
12853static
12854Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
12855{
12856   /* 31  28    23 21 20 15   11 9 4  3
12857      000 11110 ty 1  m  cond 01 n op nzcv
12858      The first 3 bits are really "M 0 S", but M and S are always zero.
12859      Decode fields are: ty,op
12860   */
12861#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12862   if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12863       || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
12864      return False;
12865   }
12866   UInt ty   = INSN(23,22);
12867   UInt mm   = INSN(20,16);
12868   UInt cond = INSN(15,12);
12869   UInt nn   = INSN(9,5);
12870   UInt op   = INSN(4,4);
12871   UInt nzcv = INSN(3,0);
12872   vassert(ty < 4 && op <= 1);
12873
12874   if (ty <= BITS2(0,1)) {
12875      /* -------- 00,0 FCCMP  s_s -------- */
12876      /* -------- 00,1 FCCMPE s_s -------- */
12877      /* -------- 01,0 FCCMP  d_d -------- */
12878      /* -------- 01,1 FCCMPE d_d -------- */
12879
12880      /* FCCMPE generates Invalid Operation exn if either arg is any kind
12881         of NaN.  FCCMP generates Invalid Operation exn if either arg is a
12882         signalling NaN.  We ignore this detail here and produce the same
12883         IR for both.
12884      */
12885      Bool   isD    = (ty & 1) == 1;
12886      Bool   isCMPE = op == 1;
12887      IRType ity    = isD ? Ity_F64 : Ity_F32;
12888      IRTemp argL   = newTemp(ity);
12889      IRTemp argR   = newTemp(ity);
12890      IRTemp irRes  = newTemp(Ity_I32);
12891      assign(argL,  getQRegLO(nn, ity));
12892      assign(argR,  getQRegLO(mm, ity));
12893      assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
12894                          mkexpr(argL), mkexpr(argR)));
12895      IRTemp condT = newTemp(Ity_I1);
12896      assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
12897      IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
12898
12899      IRTemp nzcvT_28x0 = newTemp(Ity_I64);
12900      assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
12901
12902      IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
12903
12904      IRTemp nzcv_28x0 = newTemp(Ity_I64);
12905      assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
12906                                   mkexpr(nzcvT_28x0), nzcvF_28x0));
12907      setFlags_COPY(nzcv_28x0);
12908      DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
12909          nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
12910      return True;
12911   }
12912
12913   return False;
12914#  undef INSN
12915}
12916
12917
12918static
12919Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
12920{
12921   /* 31        23 21 20 15   11 9 5
12922      000 11110 ty 1  m  cond 11 n d
12923      The first 3 bits are really "M 0 S", but M and S are always zero.
12924      Decode fields: ty
12925   */
12926#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12927   if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
12928       || INSN(11,10) != BITS2(1,1)) {
12929      return False;
12930   }
12931   UInt ty   = INSN(23,22);
12932   UInt mm   = INSN(20,16);
12933   UInt cond = INSN(15,12);
12934   UInt nn   = INSN(9,5);
12935   UInt dd   = INSN(4,0);
12936   if (ty <= X01) {
12937      /* -------- 00: FCSEL s_s -------- */
12938      /* -------- 00: FCSEL d_d -------- */
12939      IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
12940      IRTemp srcT = newTemp(ity);
12941      IRTemp srcF = newTemp(ity);
12942      IRTemp res  = newTemp(ity);
12943      assign(srcT, getQRegLO(nn, ity));
12944      assign(srcF, getQRegLO(mm, ity));
12945      assign(res, IRExpr_ITE(
12946                     unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
12947                     mkexpr(srcT), mkexpr(srcF)));
12948      putQReg128(dd, mkV128(0x0000));
12949      putQRegLO(dd, mkexpr(res));
12950      DIP("fcsel %s, %s, %s, %s\n",
12951          nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
12952          nameCC(cond));
12953      return True;
12954   }
12955   return False;
12956#  undef INSN
12957}
12958
12959
12960static
12961Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
12962{
12963   /* 31  28    23 21 20     14    9 4
12964      000 11110 ty 1  opcode 10000 n d
12965      The first 3 bits are really "M 0 S", but M and S are always zero.
12966      Decode fields: ty,opcode
12967   */
12968#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12969   if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12970       || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
12971      return False;
12972   }
12973   UInt ty     = INSN(23,22);
12974   UInt opcode = INSN(20,15);
12975   UInt nn     = INSN(9,5);
12976   UInt dd     = INSN(4,0);
12977
12978   if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
12979      /* -------- 0x,000000: FMOV  d_d, s_s -------- */
12980      /* -------- 0x,000001: FABS  d_d, s_s -------- */
12981      /* -------- 0x,000010: FNEG  d_d, s_s -------- */
12982      /* -------- 0x,000011: FSQRT d_d, s_s -------- */
12983      IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
12984      IRTemp src = newTemp(ity);
12985      IRTemp res = newTemp(ity);
12986      const HChar* nm = "??";
12987      assign(src, getQRegLO(nn, ity));
12988      switch (opcode) {
12989         case BITS6(0,0,0,0,0,0):
12990            nm = "fmov"; assign(res, mkexpr(src)); break;
12991         case BITS6(0,0,0,0,0,1):
12992            nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
12993         case BITS6(0,0,0,0,1,0):
12994            nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
12995         case BITS6(0,0,0,0,1,1):
12996            nm = "fsqrt";
12997            assign(res, binop(mkSQRTF(ity),
12998                              mkexpr(mk_get_IR_rounding_mode()),
12999                              mkexpr(src))); break;
13000         default:
13001            vassert(0);
13002      }
13003      putQReg128(dd, mkV128(0x0000));
13004      putQRegLO(dd, mkexpr(res));
13005      DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13006      return True;
13007   }
13008
13009   if (   (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
13010                         || opcode == BITS6(0,0,0,1,0,1)))
13011       || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
13012                         || opcode == BITS6(0,0,0,1,0,1)))
13013       || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
13014                         || opcode == BITS6(0,0,0,1,0,0)))) {
13015      /* -------- 11,000100: FCVT s_h -------- */
13016      /* -------- 11,000101: FCVT d_h -------- */
13017      /* -------- 00,000111: FCVT h_s -------- */
13018      /* -------- 00,000101: FCVT d_s -------- */
13019      /* -------- 01,000111: FCVT h_d -------- */
13020      /* -------- 01,000100: FCVT s_d -------- */
13021      /* 31        23 21    16 14    9 4
13022         000 11110 11 10001 00 10000 n d   FCVT Sd, Hn
13023         --------- 11 ----- 01 ---------   FCVT Dd, Hn
13024         --------- 00 ----- 11 ---------   FCVT Hd, Sn
13025         --------- 00 ----- 01 ---------   FCVT Dd, Sn
13026         --------- 01 ----- 11 ---------   FCVT Hd, Dn
13027         --------- 01 ----- 00 ---------   FCVT Sd, Dn
13028         Rounding, when dst is smaller than src, is per the FPCR.
13029      */
13030      UInt b2322 = ty;
13031      UInt b1615 = opcode & BITS2(1,1);
13032      switch ((b2322 << 2) | b1615) {
13033         case BITS4(0,0,0,1):   // S -> D
13034         case BITS4(1,1,0,1): { // H -> D
13035            Bool   srcIsH = b2322 == BITS2(1,1);
13036            IRType srcTy  = srcIsH ? Ity_F16 : Ity_F32;
13037            IRTemp res    = newTemp(Ity_F64);
13038            assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
13039                             getQRegLO(nn, srcTy)));
13040            putQReg128(dd, mkV128(0x0000));
13041            putQRegLO(dd, mkexpr(res));
13042            DIP("fcvt %s, %s\n",
13043                nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
13044            return True;
13045         }
13046         case BITS4(0,1,0,0):   // D -> S
13047         case BITS4(0,1,1,1): { // D -> H
13048            Bool   dstIsH = b1615 == BITS2(1,1);
13049            IRType dstTy  = dstIsH ? Ity_F16 : Ity_F32;
13050            IRTemp res    = newTemp(dstTy);
13051            assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
13052                              mkexpr(mk_get_IR_rounding_mode()),
13053                              getQRegLO(nn, Ity_F64)));
13054            putQReg128(dd, mkV128(0x0000));
13055            putQRegLO(dd, mkexpr(res));
13056            DIP("fcvt %s, %s\n",
13057                nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
13058            return True;
13059         }
13060         case BITS4(0,0,1,1):   // S -> H
13061         case BITS4(1,1,0,0): { // H -> S
13062            Bool   toH   = b1615 == BITS2(1,1);
13063            IRType srcTy = toH ? Ity_F32 : Ity_F16;
13064            IRType dstTy = toH ? Ity_F16 : Ity_F32;
13065            IRTemp res = newTemp(dstTy);
13066            if (toH) {
13067               assign(res, binop(Iop_F32toF16,
13068                                 mkexpr(mk_get_IR_rounding_mode()),
13069                                 getQRegLO(nn, srcTy)));
13070
13071            } else {
13072               assign(res, unop(Iop_F16toF32,
13073                                getQRegLO(nn, srcTy)));
13074            }
13075            putQReg128(dd, mkV128(0x0000));
13076            putQRegLO(dd, mkexpr(res));
13077            DIP("fcvt %s, %s\n",
13078                nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
13079            return True;
13080         }
13081         default:
13082            break;
13083      }
13084      /* else unhandled */
13085      return False;
13086   }
13087
13088   if (ty <= X01
13089       && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
13090       && opcode != BITS6(0,0,1,1,0,1)) {
13091      /* -------- 0x,001000 FRINTN d_d, s_s -------- */
13092      /* -------- 0x,001001 FRINTP d_d, s_s -------- */
13093      /* -------- 0x,001010 FRINTM d_d, s_s -------- */
13094      /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
13095      /* -------- 0x,001100 FRINTA d_d, s_s -------- */
13096      /* -------- 0x,001110 FRINTX d_d, s_s -------- */
13097      /* -------- 0x,001111 FRINTI d_d, s_s -------- */
13098      /* 31        23 21   17  14    9 4
13099         000 11110 0x 1001 111 10000 n d  FRINTI Fd, Fm (round per FPCR)
13100                           rm
13101         x==0 => S-registers, x==1 => D-registers
13102         rm (17:15) encodings:
13103            111 per FPCR  (FRINTI)
13104            001 +inf      (FRINTP)
13105            010 -inf      (FRINTM)
13106            011 zero      (FRINTZ)
13107            000 tieeven   (FRINTN) -- !! FIXME KLUDGED !!
13108            100 tieaway   (FRINTA) -- !! FIXME KLUDGED !!
13109            110 per FPCR + "exact = TRUE" (FRINTX)
13110            101 unallocated
13111      */
13112      Bool    isD   = (ty & 1) == 1;
13113      UInt    rm    = opcode & BITS6(0,0,0,1,1,1);
13114      IRType  ity   = isD ? Ity_F64 : Ity_F32;
13115      IRExpr* irrmE = NULL;
13116      UChar   ch    = '?';
13117      switch (rm) {
13118         case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
13119         case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
13120         case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
13121         // The following is a kludge.  Should be: Irrm_NEAREST_TIE_AWAY_0
13122         case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
13123         // I am unsure about the following, due to the "integral exact"
13124         // description in the manual.  What does it mean? (frintx, that is)
13125         case BITS3(1,1,0):
13126            ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
13127         case BITS3(1,1,1):
13128            ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
13129         // The following is a kludge.  There's no Irrm_ value to represent
13130         // this ("to nearest, with ties to even")
13131         case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
13132         default: break;
13133      }
13134      if (irrmE) {
13135         IRTemp src = newTemp(ity);
13136         IRTemp dst = newTemp(ity);
13137         assign(src, getQRegLO(nn, ity));
13138         assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13139                           irrmE, mkexpr(src)));
13140         putQReg128(dd, mkV128(0x0000));
13141         putQRegLO(dd, mkexpr(dst));
13142         DIP("frint%c %s, %s\n",
13143             ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13144         return True;
13145      }
13146      return False;
13147   }
13148
13149   return False;
13150#  undef INSN
13151}
13152
13153
13154static
13155Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
13156{
13157   /* 31  28    23 21 20 15     11 9 4
13158      000 11110 ty 1  m  opcode 10 n d
13159      The first 3 bits are really "M 0 S", but M and S are always zero.
13160      Decode fields: ty, opcode
13161   */
13162#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13163   if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13164       || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
13165      return False;
13166   }
13167   UInt ty     = INSN(23,22);
13168   UInt mm     = INSN(20,16);
13169   UInt opcode = INSN(15,12);
13170   UInt nn     = INSN(9,5);
13171   UInt dd     = INSN(4,0);
13172
13173   if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
13174      /* ------- 0x,0000: FMUL d_d, s_s ------- */
13175      /* ------- 0x,0001: FDIV d_d, s_s ------- */
13176      /* ------- 0x,0010: FADD d_d, s_s ------- */
13177      /* ------- 0x,0011: FSUB d_d, s_s ------- */
13178      /* ------- 0x,0100: FMAX d_d, s_s ------- */
13179      /* ------- 0x,0101: FMIN d_d, s_s ------- */
13180      /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
13181      /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
13182      IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
13183      IROp   iop = Iop_INVALID;
13184      const HChar* nm = "???";
13185      switch (opcode) {
13186         case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
13187         case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
13188         case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
13189         case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
13190         case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
13191         case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
13192         case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
13193         case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
13194         default: vassert(0);
13195      }
13196      if (opcode <= BITS4(0,0,1,1)) {
13197         // This is really not good code.  TODO: avoid width-changing
13198         IRTemp res = newTemp(ity);
13199         assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13200                                getQRegLO(nn, ity), getQRegLO(mm, ity)));
13201         putQReg128(dd, mkV128(0));
13202         putQRegLO(dd, mkexpr(res));
13203      } else {
13204         putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
13205                             binop(iop, getQReg128(nn), getQReg128(mm))));
13206      }
13207      DIP("%s %s, %s, %s\n",
13208          nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13209      return True;
13210   }
13211
13212   if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
13213      /* ------- 0x,1000: FNMUL d_d, s_s ------- */
13214      IRType ity  = ty == X00 ? Ity_F32 : Ity_F64;
13215      IROp   iop  = mkMULF(ity);
13216      IROp   iopn = mkNEGF(ity);
13217      const HChar* nm = "fnmul";
13218      IRExpr* resE = unop(iopn,
13219                          triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13220                                getQRegLO(nn, ity), getQRegLO(mm, ity)));
13221      IRTemp  res  = newTemp(ity);
13222      assign(res, resE);
13223      putQReg128(dd, mkV128(0));
13224      putQRegLO(dd, mkexpr(res));
13225      DIP("%s %s, %s, %s\n",
13226          nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13227      return True;
13228   }
13229
13230   return False;
13231#  undef INSN
13232}
13233
13234
13235static
13236Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
13237{
13238   /* 31  28    23 21 20 15 14 9 4
13239      000 11111 ty o1 m  o0 a  n d
13240      The first 3 bits are really "M 0 S", but M and S are always zero.
13241      Decode fields: ty,o1,o0
13242   */
13243#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13244   if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
13245      return False;
13246   }
13247   UInt ty    = INSN(23,22);
13248   UInt bitO1 = INSN(21,21);
13249   UInt mm    = INSN(20,16);
13250   UInt bitO0 = INSN(15,15);
13251   UInt aa    = INSN(14,10);
13252   UInt nn    = INSN(9,5);
13253   UInt dd    = INSN(4,0);
13254   vassert(ty < 4);
13255
13256   if (ty <= X01) {
13257      /* -------- 0x,0,0 FMADD  d_d_d_d, s_s_s_s -------- */
13258      /* -------- 0x,0,1 FMSUB  d_d_d_d, s_s_s_s -------- */
13259      /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
13260      /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
13261      /* -------------------- F{N}M{ADD,SUB} -------------------- */
13262      /* 31          22   20 15 14 9 4   ix
13263         000 11111 0 sz 0 m  0  a  n d   0   FMADD  Fd,Fn,Fm,Fa
13264         000 11111 0 sz 0 m  1  a  n d   1   FMSUB  Fd,Fn,Fm,Fa
13265         000 11111 0 sz 1 m  0  a  n d   2   FNMADD Fd,Fn,Fm,Fa
13266         000 11111 0 sz 1 m  1  a  n d   3   FNMSUB Fd,Fn,Fm,Fa
13267         where Fx=Dx when sz=1, Fx=Sx when sz=0
13268
13269                  -----SPEC------    ----IMPL----
13270         fmadd       a +    n * m    a + n * m
13271         fmsub       a + (-n) * m    a - n * m
13272         fnmadd   (-a) + (-n) * m    -(a + n * m)
13273         fnmsub   (-a) +    n * m    -(a - n * m)
13274      */
13275      Bool    isD   = (ty & 1) == 1;
13276      UInt    ix    = (bitO1 << 1) | bitO0;
13277      IRType  ity   = isD ? Ity_F64 : Ity_F32;
13278      IROp    opADD = mkADDF(ity);
13279      IROp    opSUB = mkSUBF(ity);
13280      IROp    opMUL = mkMULF(ity);
13281      IROp    opNEG = mkNEGF(ity);
13282      IRTemp  res   = newTemp(ity);
13283      IRExpr* eA    = getQRegLO(aa, ity);
13284      IRExpr* eN    = getQRegLO(nn, ity);
13285      IRExpr* eM    = getQRegLO(mm, ity);
13286      IRExpr* rm    = mkexpr(mk_get_IR_rounding_mode());
13287      IRExpr* eNxM  = triop(opMUL, rm, eN, eM);
13288      switch (ix) {
13289         case 0:  assign(res, triop(opADD, rm, eA, eNxM)); break;
13290         case 1:  assign(res, triop(opSUB, rm, eA, eNxM)); break;
13291         case 2:  assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
13292         case 3:  assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
13293         default: vassert(0);
13294      }
13295      putQReg128(dd, mkV128(0x0000));
13296      putQRegLO(dd, mkexpr(res));
13297      const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
13298      DIP("%s %s, %s, %s, %s\n",
13299          names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
13300                     nameQRegLO(mm, ity), nameQRegLO(aa, ity));
13301      return True;
13302   }
13303
13304   return False;
13305#  undef INSN
13306}
13307
13308
13309static
13310Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
13311{
13312   /* 31  28    23 21 20   12  9    4
13313      000 11110 ty 1  imm8 100 imm5 d
13314      The first 3 bits are really "M 0 S", but M and S are always zero.
13315   */
13316#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13317   if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13318       || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
13319      return False;
13320   }
13321   UInt ty     = INSN(23,22);
13322   UInt imm8   = INSN(20,13);
13323   UInt imm5   = INSN(9,5);
13324   UInt dd     = INSN(4,0);
13325
13326   /* ------- 00,00000: FMOV s_imm ------- */
13327   /* ------- 01,00000: FMOV d_imm ------- */
13328   if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
13329      Bool  isD  = (ty & 1) == 1;
13330      ULong imm  = VFPExpandImm(imm8, isD ? 64 : 32);
13331      if (!isD) {
13332         vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
13333      }
13334      putQReg128(dd, mkV128(0));
13335      putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
13336      DIP("fmov %s, #0x%llx\n",
13337          nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
13338      return True;
13339   }
13340
13341   return False;
13342#  undef INSN
13343}
13344
13345
13346static
13347Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
13348{
13349#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13350   /* 31 30 29 28    23   21 20    18     15    9 4
13351      sf  0  0 11110 type 0  rmode opcode scale n d
13352      The first 3 bits are really "sf 0 S", but S is always zero.
13353      Decode fields: sf,type,rmode,opcode
13354   */
13355#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13356   if (INSN(30,29) != BITS2(0,0)
13357       || INSN(28,24) != BITS5(1,1,1,1,0)
13358       || INSN(21,21) != 0) {
13359      return False;
13360   }
13361   UInt bitSF = INSN(31,31);
13362   UInt ty    = INSN(23,22); // type
13363   UInt rm    = INSN(20,19); // rmode
13364   UInt op    = INSN(18,16); // opcode
13365   UInt sc    = INSN(15,10); // scale
13366   UInt nn    = INSN(9,5);
13367   UInt dd    = INSN(4,0);
13368
13369   if (ty <= X01 && rm == X11
13370       && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
13371      /* -------- (ix) sf ty rm opc -------- */
13372      /* -------- 0    0  00 11 000: FCVTZS w_s_#fbits -------- */
13373      /* -------- 1    0  01 11 000: FCVTZS w_d_#fbits -------- */
13374      /* -------- 2    1  00 11 000: FCVTZS x_s_#fbits -------- */
13375      /* -------- 3    1  01 11 000: FCVTZS x_d_#fbits -------- */
13376
13377      /* -------- 4    0  00 11 001: FCVTZU w_s_#fbits -------- */
13378      /* -------- 5    0  01 11 001: FCVTZU w_d_#fbits -------- */
13379      /* -------- 6    1  00 11 001: FCVTZU x_s_#fbits -------- */
13380      /* -------- 7    1  01 11 001: FCVTZU x_d_#fbits -------- */
13381      Bool isI64 = bitSF == 1;
13382      Bool isF64 = (ty & 1) == 1;
13383      Bool isU   = (op & 1) == 1;
13384      UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13385
13386      Int fbits = 64 - sc;
13387      vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
13388
13389      Double  scale  = two_to_the_plus(fbits);
13390      IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
13391                             : IRExpr_Const(IRConst_F32( (Float)scale ));
13392      IROp    opMUL  = isF64 ? Iop_MulF64 : Iop_MulF32;
13393
13394      const IROp ops[8]
13395        = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S,
13396            Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U };
13397      IRTemp irrm = newTemp(Ity_I32);
13398      assign(irrm, mkU32(Irrm_ZERO));
13399
13400      IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32);
13401      IRExpr* res = binop(ops[ix], mkexpr(irrm),
13402                                   triop(opMUL, mkexpr(irrm), src, scaleE));
13403      putIRegOrZR(isI64, dd, res);
13404
13405      DIP("fcvtz%c %s, %s, #%d\n",
13406          isU ? 'u' : 's', nameIRegOrZR(isI64, dd),
13407          nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits);
13408      return True;
13409   }
13410
13411   /* ------ sf,ty,rm,opc ------ */
13412   /* ------ x,0x,00,010  SCVTF s/d, w/x, #fbits  ------ */
13413   /* ------ x,0x,00,011  UCVTF s/d, w/x, #fbits  ------ */
13414   /* (ix) sf  S 28    ty   rm opc 15    9 4
13415      0    0 0 0 11110 00 0 00 010 scale n d  SCVTF Sd, Wn, #fbits
13416      1    0 0 0 11110 01 0 00 010 scale n d  SCVTF Dd, Wn, #fbits
13417      2    1 0 0 11110 00 0 00 010 scale n d  SCVTF Sd, Xn, #fbits
13418      3    1 0 0 11110 01 0 00 010 scale n d  SCVTF Dd, Xn, #fbits
13419
13420      4    0 0 0 11110 00 0 00 011 scale n d  UCVTF Sd, Wn, #fbits
13421      5    0 0 0 11110 01 0 00 011 scale n d  UCVTF Dd, Wn, #fbits
13422      6    1 0 0 11110 00 0 00 011 scale n d  UCVTF Sd, Xn, #fbits
13423      7    1 0 0 11110 01 0 00 011 scale n d  UCVTF Dd, Xn, #fbits
13424
13425      These are signed/unsigned conversion from integer registers to
13426      FP registers, all 4 32/64-bit combinations, rounded per FPCR,
13427      scaled per |scale|.
13428   */
13429   if (ty <= X01 && rm == X00
13430       && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
13431       && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
13432      Bool isI64 = bitSF == 1;
13433      Bool isF64 = (ty & 1) == 1;
13434      Bool isU   = (op & 1) == 1;
13435      UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13436
13437      Int fbits = 64 - sc;
13438      vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
13439
13440      Double  scale  = two_to_the_minus(fbits);
13441      IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
13442                             : IRExpr_Const(IRConst_F32( (Float)scale ));
13443      IROp    opMUL  = isF64 ? Iop_MulF64 : Iop_MulF32;
13444
13445      const IROp ops[8]
13446        = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
13447            Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
13448      IRExpr* src = getIRegOrZR(isI64, nn);
13449      IRExpr* res = (isF64 && !isI64)
13450                       ? unop(ops[ix], src)
13451                       : binop(ops[ix],
13452                               mkexpr(mk_get_IR_rounding_mode()), src);
13453      putQReg128(dd, mkV128(0));
13454      putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
13455
13456      DIP("%ccvtf %s, %s, #%d\n",
13457          isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
13458          nameIRegOrZR(isI64, nn), fbits);
13459      return True;
13460   }
13461
13462   return False;
13463#  undef INSN
13464}
13465
13466
13467static
13468Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
13469{
13470   /* 31 30 29 28    23   21 20    18     15     9 4
13471      sf  0  0 11110 type 1  rmode opcode 000000 n d
13472      The first 3 bits are really "sf 0 S", but S is always zero.
13473      Decode fields: sf,type,rmode,opcode
13474   */
13475#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13476   if (INSN(30,29) != BITS2(0,0)
13477       || INSN(28,24) != BITS5(1,1,1,1,0)
13478       || INSN(21,21) != 1
13479       || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
13480      return False;
13481   }
13482   UInt bitSF = INSN(31,31);
13483   UInt ty    = INSN(23,22); // type
13484   UInt rm    = INSN(20,19); // rmode
13485   UInt op    = INSN(18,16); // opcode
13486   UInt nn    = INSN(9,5);
13487   UInt dd    = INSN(4,0);
13488
13489   // op = 000, 001
13490   /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
13491   /*    30       23   20 18  15     9 4
13492      sf 00 11110 0x 1 00 000 000000 n d  FCVTNS Rd, Fn (round to
13493      sf 00 11110 0x 1 00 001 000000 n d  FCVTNU Rd, Fn  nearest)
13494      ---------------- 01 --------------  FCVTP-------- (round to +inf)
13495      ---------------- 10 --------------  FCVTM-------- (round to -inf)
13496      ---------------- 11 --------------  FCVTZ-------- (round to zero)
13497      ---------------- 00 100 ----------  FCVTAS------- (nearest, ties away)
13498      ---------------- 00 101 ----------  FCVTAU------- (nearest, ties away)
13499
13500      Rd is Xd when sf==1, Wd when sf==0
13501      Fn is Dn when x==1, Sn when x==0
13502      20:19 carry the rounding mode, using the same encoding as FPCR
13503   */
13504   if (ty <= X01
13505       && (   ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
13506           || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
13507          )
13508      ) {
13509      Bool isI64 = bitSF == 1;
13510      Bool isF64 = (ty & 1) == 1;
13511      Bool isU   = (op & 1) == 1;
13512      /* Decide on the IR rounding mode to use. */
13513      IRRoundingMode irrm = 8; /*impossible*/
13514      HChar ch = '?';
13515      if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
13516         switch (rm) {
13517            case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
13518            case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
13519            case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
13520            case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
13521            default: vassert(0);
13522         }
13523      } else {
13524         vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
13525         switch (rm) {
13526            case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
13527            default: vassert(0);
13528         }
13529      }
13530      vassert(irrm != 8);
13531      /* Decide on the conversion primop, based on the source size,
13532         dest size and signedness (8 possibilities).  Case coding:
13533            F32 ->s I32   0
13534            F32 ->u I32   1
13535            F32 ->s I64   2
13536            F32 ->u I64   3
13537            F64 ->s I32   4
13538            F64 ->u I32   5
13539            F64 ->s I64   6
13540            F64 ->u I64   7
13541      */
13542      UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
13543      vassert(ix < 8);
13544      const IROp iops[8]
13545         = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
13546             Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
13547      IROp iop = iops[ix];
13548      // A bit of ATCery: bounce all cases we haven't seen an example of.
13549      if (/* F32toI32S */
13550             (iop == Iop_F32toI32S && irrm == Irrm_ZERO)   /* FCVTZS Wd,Sn */
13551          || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
13552          || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
13553          || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
13554          /* F32toI32U */
13555          || (iop == Iop_F32toI32U && irrm == Irrm_ZERO)   /* FCVTZU Wd,Sn */
13556          || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
13557          || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
13558          || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
13559          /* F32toI64S */
13560          || (iop == Iop_F32toI64S && irrm == Irrm_ZERO)   /* FCVTZS Xd,Sn */
13561          || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
13562          || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
13563          || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
13564          /* F32toI64U */
13565          || (iop == Iop_F32toI64U && irrm == Irrm_ZERO)   /* FCVTZU Xd,Sn */
13566          || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
13567          || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
13568          || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
13569          /* F64toI32S */
13570          || (iop == Iop_F64toI32S && irrm == Irrm_ZERO)   /* FCVTZS Wd,Dn */
13571          || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
13572          || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
13573          || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
13574          /* F64toI32U */
13575          || (iop == Iop_F64toI32U && irrm == Irrm_ZERO)   /* FCVTZU Wd,Dn */
13576          || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
13577          || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
13578          || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
13579          /* F64toI64S */
13580          || (iop == Iop_F64toI64S && irrm == Irrm_ZERO)   /* FCVTZS Xd,Dn */
13581          || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
13582          || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
13583          || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
13584          /* F64toI64U */
13585          || (iop == Iop_F64toI64U && irrm == Irrm_ZERO)   /* FCVTZU Xd,Dn */
13586          || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
13587          || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
13588          || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
13589         ) {
13590        /* validated */
13591      } else {
13592        return False;
13593      }
13594      IRType srcTy  = isF64 ? Ity_F64 : Ity_F32;
13595      IRType dstTy  = isI64 ? Ity_I64 : Ity_I32;
13596      IRTemp src    = newTemp(srcTy);
13597      IRTemp dst    = newTemp(dstTy);
13598      assign(src, getQRegLO(nn, srcTy));
13599      assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
13600      putIRegOrZR(isI64, dd, mkexpr(dst));
13601      DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
13602          nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
13603      return True;
13604   }
13605
13606   // op = 010, 011
13607   /* -------------- {S,U}CVTF (scalar, integer) -------------- */
13608   /* (ix) sf  S 28    ty   rm op  15     9 4
13609      0    0 0 0 11110 00 1 00 010 000000 n d  SCVTF Sd, Wn
13610      1    0 0 0 11110 01 1 00 010 000000 n d  SCVTF Dd, Wn
13611      2    1 0 0 11110 00 1 00 010 000000 n d  SCVTF Sd, Xn
13612      3    1 0 0 11110 01 1 00 010 000000 n d  SCVTF Dd, Xn
13613
13614      4    0 0 0 11110 00 1 00 011 000000 n d  UCVTF Sd, Wn
13615      5    0 0 0 11110 01 1 00 011 000000 n d  UCVTF Dd, Wn
13616      6    1 0 0 11110 00 1 00 011 000000 n d  UCVTF Sd, Xn
13617      7    1 0 0 11110 01 1 00 011 000000 n d  UCVTF Dd, Xn
13618
13619      These are signed/unsigned conversion from integer registers to
13620      FP registers, all 4 32/64-bit combinations, rounded per FPCR.
13621   */
13622   if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
13623      Bool isI64 = bitSF == 1;
13624      Bool isF64 = (ty & 1) == 1;
13625      Bool isU   = (op & 1) == 1;
13626      UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13627      const IROp ops[8]
13628        = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
13629            Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
13630      IRExpr* src = getIRegOrZR(isI64, nn);
13631      IRExpr* res = (isF64 && !isI64)
13632                       ? unop(ops[ix], src)
13633                       : binop(ops[ix],
13634                               mkexpr(mk_get_IR_rounding_mode()), src);
13635      putQReg128(dd, mkV128(0));
13636      putQRegLO(dd, res);
13637      DIP("%ccvtf %s, %s\n",
13638          isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
13639          nameIRegOrZR(isI64, nn));
13640      return True;
13641   }
13642
13643   // op = 110, 111
13644   /* -------- FMOV (general) -------- */
13645   /* case sf  S       ty   rm op  15     9 4
13646       (1) 0 0 0 11110 00 1 00 111 000000 n d     FMOV Sd,      Wn
13647       (2) 1 0 0 11110 01 1 00 111 000000 n d     FMOV Dd,      Xn
13648       (3) 1 0 0 11110 10 1 01 111 000000 n d     FMOV Vd.D[1], Xn
13649
13650       (4) 0 0 0 11110 00 1 00 110 000000 n d     FMOV Wd, Sn
13651       (5) 1 0 0 11110 01 1 00 110 000000 n d     FMOV Xd, Dn
13652       (6) 1 0 0 11110 10 1 01 110 000000 n d     FMOV Xd, Vn.D[1]
13653   */
13654   if (1) {
13655      UInt ix = 0; // case
13656      if (bitSF == 0) {
13657         if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
13658            ix = 1;
13659         else
13660         if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
13661            ix = 4;
13662      } else {
13663         vassert(bitSF == 1);
13664         if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
13665            ix = 2;
13666         else
13667         if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
13668            ix = 5;
13669         else
13670         if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
13671            ix = 3;
13672         else
13673         if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
13674            ix = 6;
13675      }
13676      if (ix > 0) {
13677         switch (ix) {
13678            case 1:
13679               putQReg128(dd, mkV128(0));
13680               putQRegLO(dd, getIReg32orZR(nn));
13681               DIP("fmov s%u, w%u\n", dd, nn);
13682               break;
13683            case 2:
13684               putQReg128(dd, mkV128(0));
13685               putQRegLO(dd, getIReg64orZR(nn));
13686               DIP("fmov d%u, x%u\n", dd, nn);
13687               break;
13688            case 3:
13689               putQRegHI64(dd, getIReg64orZR(nn));
13690               DIP("fmov v%u.d[1], x%u\n", dd, nn);
13691               break;
13692            case 4:
13693               putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
13694               DIP("fmov w%u, s%u\n", dd, nn);
13695               break;
13696            case 5:
13697               putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
13698               DIP("fmov x%u, d%u\n", dd, nn);
13699               break;
13700            case 6:
13701               putIReg64orZR(dd, getQRegHI64(nn));
13702               DIP("fmov x%u, v%u.d[1]\n", dd, nn);
13703               break;
13704            default:
13705               vassert(0);
13706         }
13707         return True;
13708      }
13709      /* undecodable; fall through */
13710   }
13711
13712   return False;
13713#  undef INSN
13714}
13715
13716
13717static
13718Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
13719{
13720   Bool ok;
13721   ok = dis_AdvSIMD_EXT(dres, insn);
13722   if (UNLIKELY(ok)) return True;
13723   ok = dis_AdvSIMD_TBL_TBX(dres, insn);
13724   if (UNLIKELY(ok)) return True;
13725   ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
13726   if (UNLIKELY(ok)) return True;
13727   ok = dis_AdvSIMD_across_lanes(dres, insn);
13728   if (UNLIKELY(ok)) return True;
13729   ok = dis_AdvSIMD_copy(dres, insn);
13730   if (UNLIKELY(ok)) return True;
13731   ok = dis_AdvSIMD_modified_immediate(dres, insn);
13732   if (UNLIKELY(ok)) return True;
13733   ok = dis_AdvSIMD_scalar_copy(dres, insn);
13734   if (UNLIKELY(ok)) return True;
13735   ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
13736   if (UNLIKELY(ok)) return True;
13737   ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
13738   if (UNLIKELY(ok)) return True;
13739   ok = dis_AdvSIMD_scalar_three_different(dres, insn);
13740   if (UNLIKELY(ok)) return True;
13741   ok = dis_AdvSIMD_scalar_three_same(dres, insn);
13742   if (UNLIKELY(ok)) return True;
13743   ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
13744   if (UNLIKELY(ok)) return True;
13745   ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
13746   if (UNLIKELY(ok)) return True;
13747   ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
13748   if (UNLIKELY(ok)) return True;
13749   ok = dis_AdvSIMD_three_different(dres, insn);
13750   if (UNLIKELY(ok)) return True;
13751   ok = dis_AdvSIMD_three_same(dres, insn);
13752   if (UNLIKELY(ok)) return True;
13753   ok = dis_AdvSIMD_two_reg_misc(dres, insn);
13754   if (UNLIKELY(ok)) return True;
13755   ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
13756   if (UNLIKELY(ok)) return True;
13757   ok = dis_AdvSIMD_crypto_aes(dres, insn);
13758   if (UNLIKELY(ok)) return True;
13759   ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
13760   if (UNLIKELY(ok)) return True;
13761   ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
13762   if (UNLIKELY(ok)) return True;
13763   ok = dis_AdvSIMD_fp_compare(dres, insn);
13764   if (UNLIKELY(ok)) return True;
13765   ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
13766   if (UNLIKELY(ok)) return True;
13767   ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
13768   if (UNLIKELY(ok)) return True;
13769   ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
13770   if (UNLIKELY(ok)) return True;
13771   ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
13772   if (UNLIKELY(ok)) return True;
13773   ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
13774   if (UNLIKELY(ok)) return True;
13775   ok = dis_AdvSIMD_fp_immediate(dres, insn);
13776   if (UNLIKELY(ok)) return True;
13777   ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
13778   if (UNLIKELY(ok)) return True;
13779   ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
13780   if (UNLIKELY(ok)) return True;
13781   return False;
13782}
13783
13784
13785/*------------------------------------------------------------*/
13786/*--- Disassemble a single ARM64 instruction               ---*/
13787/*------------------------------------------------------------*/
13788
13789/* Disassemble a single ARM64 instruction into IR.  The instruction
13790   has is located at |guest_instr| and has guest IP of
13791   |guest_PC_curr_instr|, which will have been set before the call
13792   here.  Returns True iff the instruction was decoded, in which case
13793   *dres will be set accordingly, or False, in which case *dres should
13794   be ignored by the caller. */
13795
13796static
13797Bool disInstr_ARM64_WRK (
13798        /*MB_OUT*/DisResult* dres,
13799        Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
13800        Bool         resteerCisOk,
13801        void*        callback_opaque,
13802        const UChar* guest_instr,
13803        const VexArchInfo* archinfo,
13804        const VexAbiInfo*  abiinfo
13805     )
13806{
13807   // A macro to fish bits out of 'insn'.
13808#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13809
13810//ZZ    DisResult dres;
13811//ZZ    UInt      insn;
13812//ZZ    //Bool      allow_VFP = False;
13813//ZZ    //UInt      hwcaps = archinfo->hwcaps;
13814//ZZ    IRTemp    condT; /* :: Ity_I32 */
13815//ZZ    UInt      summary;
13816//ZZ    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
13817//ZZ
13818//ZZ    /* What insn variants are we supporting today? */
13819//ZZ    //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
13820//ZZ    // etc etc
13821
13822   /* Set result defaults. */
13823   dres->whatNext    = Dis_Continue;
13824   dres->len         = 4;
13825   dres->continueAt  = 0;
13826   dres->jk_StopHere = Ijk_INVALID;
13827
13828   /* At least this is simple on ARM64: insns are all 4 bytes long, and
13829      4-aligned.  So just fish the whole thing out of memory right now
13830      and have done. */
13831   UInt insn = getUIntLittleEndianly( guest_instr );
13832
13833   if (0) vex_printf("insn: 0x%x\n", insn);
13834
13835   DIP("\t(arm64) 0x%llx:  ", (ULong)guest_PC_curr_instr);
13836
13837   vassert(0 == (guest_PC_curr_instr & 3ULL));
13838
13839   /* ----------------------------------------------------------- */
13840
13841   /* Spot "Special" instructions (see comment at top of file). */
13842   {
13843      const UChar* code = guest_instr;
13844      /* Spot the 16-byte preamble:
13845            93CC0D8C   ror x12, x12, #3
13846            93CC358C   ror x12, x12, #13
13847            93CCCD8C   ror x12, x12, #51
13848            93CCF58C   ror x12, x12, #61
13849      */
13850      UInt word1 = 0x93CC0D8C;
13851      UInt word2 = 0x93CC358C;
13852      UInt word3 = 0x93CCCD8C;
13853      UInt word4 = 0x93CCF58C;
13854      if (getUIntLittleEndianly(code+ 0) == word1 &&
13855          getUIntLittleEndianly(code+ 4) == word2 &&
13856          getUIntLittleEndianly(code+ 8) == word3 &&
13857          getUIntLittleEndianly(code+12) == word4) {
13858         /* Got a "Special" instruction preamble.  Which one is it? */
13859         if (getUIntLittleEndianly(code+16) == 0xAA0A014A
13860                                               /* orr x10,x10,x10 */) {
13861            /* X3 = client_request ( X4 ) */
13862            DIP("x3 = client_request ( x4 )\n");
13863            putPC(mkU64( guest_PC_curr_instr + 20 ));
13864            dres->jk_StopHere = Ijk_ClientReq;
13865            dres->whatNext    = Dis_StopHere;
13866            return True;
13867         }
13868         else
13869         if (getUIntLittleEndianly(code+16) == 0xAA0B016B
13870                                               /* orr x11,x11,x11 */) {
13871            /* X3 = guest_NRADDR */
13872            DIP("x3 = guest_NRADDR\n");
13873            dres->len = 20;
13874            putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
13875            return True;
13876         }
13877         else
13878         if (getUIntLittleEndianly(code+16) == 0xAA0C018C
13879                                               /* orr x12,x12,x12 */) {
13880            /*  branch-and-link-to-noredir X8 */
13881            DIP("branch-and-link-to-noredir x8\n");
13882            putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
13883            putPC(getIReg64orZR(8));
13884            dres->jk_StopHere = Ijk_NoRedir;
13885            dres->whatNext    = Dis_StopHere;
13886            return True;
13887         }
13888         else
13889         if (getUIntLittleEndianly(code+16) == 0xAA090129
13890                                               /* orr x9,x9,x9 */) {
13891            /* IR injection */
13892            DIP("IR injection\n");
13893            vex_inject_ir(irsb, Iend_LE);
13894            // Invalidate the current insn. The reason is that the IRop we're
13895            // injecting here can change. In which case the translation has to
13896            // be redone. For ease of handling, we simply invalidate all the
13897            // time.
13898            stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
13899            stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(20)));
13900            putPC(mkU64( guest_PC_curr_instr + 20 ));
13901            dres->whatNext    = Dis_StopHere;
13902            dres->jk_StopHere = Ijk_InvalICache;
13903            return True;
13904         }
13905         /* We don't know what it is. */
13906         return False;
13907         /*NOTREACHED*/
13908      }
13909   }
13910
13911   /* ----------------------------------------------------------- */
13912
13913   /* Main ARM64 instruction decoder starts here. */
13914
13915   Bool ok = False;
13916
13917   /* insn[28:25] determines the top-level grouping, so let's start
13918      off with that.
13919
13920      For all of these dis_ARM64_ functions, we pass *dres with the
13921      normal default results "insn OK, 4 bytes long, keep decoding" so
13922      they don't need to change it.  However, decodes of control-flow
13923      insns may cause *dres to change.
13924   */
13925   switch (INSN(28,25)) {
13926      case BITS4(1,0,0,0): case BITS4(1,0,0,1):
13927         // Data processing - immediate
13928         ok = dis_ARM64_data_processing_immediate(dres, insn);
13929         break;
13930      case BITS4(1,0,1,0): case BITS4(1,0,1,1):
13931         // Branch, exception generation and system instructions
13932         ok = dis_ARM64_branch_etc(dres, insn, archinfo);
13933         break;
13934      case BITS4(0,1,0,0): case BITS4(0,1,1,0):
13935      case BITS4(1,1,0,0): case BITS4(1,1,1,0):
13936         // Loads and stores
13937         ok = dis_ARM64_load_store(dres, insn);
13938         break;
13939      case BITS4(0,1,0,1): case BITS4(1,1,0,1):
13940         // Data processing - register
13941         ok = dis_ARM64_data_processing_register(dres, insn);
13942         break;
13943      case BITS4(0,1,1,1): case BITS4(1,1,1,1):
13944         // Data processing - SIMD and floating point
13945         ok = dis_ARM64_simd_and_fp(dres, insn);
13946         break;
13947      case BITS4(0,0,0,0): case BITS4(0,0,0,1):
13948      case BITS4(0,0,1,0): case BITS4(0,0,1,1):
13949         // UNALLOCATED
13950         break;
13951      default:
13952         vassert(0); /* Can't happen */
13953   }
13954
13955   /* If the next-level down decoders failed, make sure |dres| didn't
13956      get changed. */
13957   if (!ok) {
13958      vassert(dres->whatNext    == Dis_Continue);
13959      vassert(dres->len         == 4);
13960      vassert(dres->continueAt  == 0);
13961      vassert(dres->jk_StopHere == Ijk_INVALID);
13962   }
13963
13964   return ok;
13965
13966#  undef INSN
13967}
13968
13969
13970/*------------------------------------------------------------*/
13971/*--- Top-level fn                                         ---*/
13972/*------------------------------------------------------------*/
13973
13974/* Disassemble a single instruction into IR.  The instruction
13975   is located in host memory at &guest_code[delta]. */
13976
13977DisResult disInstr_ARM64 ( IRSB*        irsb_IN,
13978                           Bool         (*resteerOkFn) ( void*, Addr ),
13979                           Bool         resteerCisOk,
13980                           void*        callback_opaque,
13981                           const UChar* guest_code_IN,
13982                           Long         delta_IN,
13983                           Addr         guest_IP,
13984                           VexArch      guest_arch,
13985                           const VexArchInfo* archinfo,
13986                           const VexAbiInfo*  abiinfo,
13987                           VexEndness   host_endness_IN,
13988                           Bool         sigill_diag_IN )
13989{
13990   DisResult dres;
13991   vex_bzero(&dres, sizeof(dres));
13992
13993   /* Set globals (see top of this file) */
13994   vassert(guest_arch == VexArchARM64);
13995
13996   irsb                = irsb_IN;
13997   host_endness        = host_endness_IN;
13998   guest_PC_curr_instr = (Addr64)guest_IP;
13999
14000   /* Sanity checks */
14001   /* (x::UInt - 2) <= 15   ===   x >= 2 && x <= 17 (I hope) */
14002   vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
14003   vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
14004
14005   /* Try to decode */
14006   Bool ok = disInstr_ARM64_WRK( &dres,
14007                                 resteerOkFn, resteerCisOk, callback_opaque,
14008                                 &guest_code_IN[delta_IN],
14009                                 archinfo, abiinfo );
14010   if (ok) {
14011      /* All decode successes end up here. */
14012      vassert(dres.len == 4 || dres.len == 20);
14013      switch (dres.whatNext) {
14014         case Dis_Continue:
14015            putPC( mkU64(dres.len + guest_PC_curr_instr) );
14016            break;
14017         case Dis_ResteerU:
14018         case Dis_ResteerC:
14019            putPC(mkU64(dres.continueAt));
14020            break;
14021         case Dis_StopHere:
14022            break;
14023         default:
14024            vassert(0);
14025      }
14026      DIP("\n");
14027   } else {
14028      /* All decode failures end up here. */
14029      if (sigill_diag_IN) {
14030         Int   i, j;
14031         UChar buf[64];
14032         UInt  insn
14033                  = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
14034         vex_bzero(buf, sizeof(buf));
14035         for (i = j = 0; i < 32; i++) {
14036            if (i > 0) {
14037              if ((i & 7) == 0) buf[j++] = ' ';
14038              else if ((i & 3) == 0) buf[j++] = '\'';
14039            }
14040            buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
14041         }
14042         vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
14043         vex_printf("disInstr(arm64): %s\n", buf);
14044      }
14045
14046      /* Tell the dispatcher that this insn cannot be decoded, and so
14047         has not been executed, and (is currently) the next to be
14048         executed.  PC should be up-to-date since it is made so at the
14049         start of each insn, but nevertheless be paranoid and update
14050         it again right now. */
14051      putPC( mkU64(guest_PC_curr_instr) );
14052      dres.len         = 0;
14053      dres.whatNext    = Dis_StopHere;
14054      dres.jk_StopHere = Ijk_NoDecode;
14055      dres.continueAt  = 0;
14056   }
14057   return dres;
14058}
14059
14060
14061/*--------------------------------------------------------------------*/
14062/*--- end                                       guest_arm64_toIR.c ---*/
14063/*--------------------------------------------------------------------*/
14064