1
2/*--------------------------------------------------------------------*/
3/*--- begin                                       guest_arm_toIR.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2017 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2017 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36/* XXXX thumb to check:
37   that all cases where putIRegT writes r15, we generate a jump.
38
39   All uses of newTemp assign to an IRTemp and not a UInt
40
41   For all thumb loads and stores, including VFP ones, new-ITSTATE is
42   backed out before the memory op, and restored afterwards.  This
43   needs to happen even after we go uncond.  (and for sure it doesn't
44   happen for VFP loads/stores right now).
45
46   VFP on thumb: check that we exclude all r13/r15 cases that we
47   should.
48
49   XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
50   taking into account the number of insns guarded by an IT.
51
52   remove the nasty hack, in the spechelper, of looking for Or32(...,
53   0xE0) in as the first arg to armg_calculate_condition, and instead
54   use Slice44 as specified in comments in the spechelper.
55
56   add specialisations for armg_calculate_flag_c and _v, as they
57   are moderately often needed in Thumb code.
58
59   Correctness: ITSTATE handling in Thumb SVCs is wrong.
60
61   Correctness (obscure): in m_transtab, when invalidating code
62   address ranges, invalidate up to 18 bytes after the end of the
63   range.  This is because the ITSTATE optimisation at the top of
64   _THUMB_WRK below analyses up to 18 bytes before the start of any
65   given instruction, and so might depend on the invalidated area.
66*/
67
68/* Limitations, etc
69
70   - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
71     These instructions are non-restartable in the case where the
72     transfer(s) fault.
73
74   - SWP: the restart jump back is Ijk_Boring; it should be
75     Ijk_NoRedir but that's expensive.  See comments on casLE() in
76     guest_x86_toIR.c.
77*/
78
79/* "Special" instructions.
80
81   This instruction decoder can decode four special instructions
82   which mean nothing natively (are no-ops as far as regs/mem are
83   concerned) but have meaning for supporting Valgrind.  A special
84   instruction is flagged by a 16-byte preamble:
85
86      E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
87      (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
88       mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
89
90   Following that, one of the following 3 are allowed
91   (standard interpretation in parentheses):
92
93      E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
94      E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
95      E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
96      E1899009 (orr r9,r9,r9)      IR injection
97
98   Any other bytes following the 16-byte preamble are illegal and
99   constitute a failure in instruction decoding.  This all assumes
100   that the preamble will never occur except in specific code
101   fragments designed for Valgrind to catch.
102*/
103
104/* Translates ARM(v5) code to IR. */
105
106#include "libvex_basictypes.h"
107#include "libvex_ir.h"
108#include "libvex.h"
109#include "libvex_guest_arm.h"
110
111#include "main_util.h"
112#include "main_globals.h"
113#include "guest_generic_bb_to_IR.h"
114#include "guest_arm_defs.h"
115
116
117/*------------------------------------------------------------*/
118/*--- Globals                                              ---*/
119/*------------------------------------------------------------*/
120
121/* These are set at the start of the translation of a instruction, so
122   that we don't have to pass them around endlessly.  CONST means does
123   not change during translation of the instruction.
124*/
125
126/* CONST: what is the host's endianness?  This has to do with float vs
127   double register accesses on VFP, but it's complex and not properly
128   thought out. */
129static VexEndness host_endness;
130
131/* CONST: The guest address for the instruction currently being
132   translated.  This is the real, "decoded" address (not subject
133   to the CPSR.T kludge). */
134static Addr32 guest_R15_curr_instr_notENC;
135
136/* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
137   insn is Thumb (True) or ARM (False). */
138static Bool __curr_is_Thumb;
139
140/* MOD: The IRSB* into which we're generating code. */
141static IRSB* irsb;
142
143/* These are to do with handling writes to r15.  They are initially
144   set at the start of disInstr_ARM_WRK to indicate no update,
145   possibly updated during the routine, and examined again at the end.
146   If they have been set to indicate a r15 update then a jump is
147   generated.  Note, "explicit" jumps (b, bx, etc) are generated
148   directly, not using this mechanism -- this is intended to handle
149   the implicit-style jumps resulting from (eg) assigning to r15 as
150   the result of insns we wouldn't normally consider branchy. */
151
152/* MOD.  Initially False; set to True iff abovementioned handling is
153   required. */
154static Bool r15written;
155
156/* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
157   is conditional, this holds the gating IRTemp :: Ity_I32.  If the
158   branch to be generated is unconditional, this remains
159   IRTemp_INVALID. */
160static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
161
162/* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
163   this holds the jump kind. */
164static IRTemp r15kind;
165
166
167/*------------------------------------------------------------*/
168/*--- Debugging output                                     ---*/
169/*------------------------------------------------------------*/
170
171#define DIP(format, args...)           \
172   if (vex_traceflags & VEX_TRACE_FE)  \
173      vex_printf(format, ## args)
174
175#define DIS(buf, format, args...)      \
176   if (vex_traceflags & VEX_TRACE_FE)  \
177      vex_sprintf(buf, format, ## args)
178
179#define ASSERT_IS_THUMB \
180   do { vassert(__curr_is_Thumb); } while (0)
181
182#define ASSERT_IS_ARM \
183   do { vassert(! __curr_is_Thumb); } while (0)
184
185
186/*------------------------------------------------------------*/
187/*--- Helper bits and pieces for deconstructing the        ---*/
188/*--- arm insn stream.                                     ---*/
189/*------------------------------------------------------------*/
190
191/* Do a little-endian load of a 32-bit word, regardless of the
192   endianness of the underlying host. */
193static inline UInt getUIntLittleEndianly ( const UChar* p )
194{
195   UInt w = 0;
196   w = (w << 8) | p[3];
197   w = (w << 8) | p[2];
198   w = (w << 8) | p[1];
199   w = (w << 8) | p[0];
200   return w;
201}
202
203/* Do a little-endian load of a 16-bit word, regardless of the
204   endianness of the underlying host. */
205static inline UShort getUShortLittleEndianly ( const UChar* p )
206{
207   UShort w = 0;
208   w = (w << 8) | p[1];
209   w = (w << 8) | p[0];
210   return w;
211}
212
213static UInt ROR32 ( UInt x, UInt sh ) {
214   vassert(sh >= 0 && sh < 32);
215   if (sh == 0)
216      return x;
217   else
218      return (x << (32-sh)) | (x >> sh);
219}
220
221static Int popcount32 ( UInt x )
222{
223   Int res = 0, i;
224   for (i = 0; i < 32; i++) {
225      res += (x & 1);
226      x >>= 1;
227   }
228   return res;
229}
230
231static UInt setbit32 ( UInt x, Int ix, UInt b )
232{
233   UInt mask = 1 << ix;
234   x &= ~mask;
235   x |= ((b << ix) & mask);
236   return x;
237}
238
239#define BITS2(_b1,_b0) \
240   (((_b1) << 1) | (_b0))
241
242#define BITS3(_b2,_b1,_b0)                      \
243  (((_b2) << 2) | ((_b1) << 1) | (_b0))
244
245#define BITS4(_b3,_b2,_b1,_b0) \
246   (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
247
248#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
249   ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
250    | BITS4((_b3),(_b2),(_b1),(_b0)))
251
252#define BITS5(_b4,_b3,_b2,_b1,_b0)  \
253   (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
254#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
255   (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
256#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
257   (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
258
259#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
260   (((_b8) << 8) \
261    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
262
263#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
264   (((_b9) << 9) | ((_b8) << 8)                                \
265    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
266
267#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
268   ( ((_b10) << 10) | ((_b9) << 9) | ((_b8) << 8)              \
269    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
270
271#define BITS12(_b11,_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
272   ( ((_b11) << 11) | ((_b10) << 10) | ((_b9) << 9) | ((_b8) << 8) \
273    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
274
275/* produces _uint[_bMax:_bMin] */
276#define SLICE_UInt(_uint,_bMax,_bMin) \
277   (( ((UInt)(_uint)) >> (_bMin)) \
278    & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
279
280
281/*------------------------------------------------------------*/
282/*--- Helper bits and pieces for creating IR fragments.    ---*/
283/*------------------------------------------------------------*/
284
285static IRExpr* mkU64 ( ULong i )
286{
287   return IRExpr_Const(IRConst_U64(i));
288}
289
290static IRExpr* mkU32 ( UInt i )
291{
292   return IRExpr_Const(IRConst_U32(i));
293}
294
295static IRExpr* mkU8 ( UInt i )
296{
297   vassert(i < 256);
298   return IRExpr_Const(IRConst_U8( (UChar)i ));
299}
300
301static IRExpr* mkexpr ( IRTemp tmp )
302{
303   return IRExpr_RdTmp(tmp);
304}
305
306static IRExpr* unop ( IROp op, IRExpr* a )
307{
308   return IRExpr_Unop(op, a);
309}
310
311static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
312{
313   return IRExpr_Binop(op, a1, a2);
314}
315
316static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
317{
318   return IRExpr_Triop(op, a1, a2, a3);
319}
320
321static IRExpr* loadLE ( IRType ty, IRExpr* addr )
322{
323   return IRExpr_Load(Iend_LE, ty, addr);
324}
325
326/* Add a statement to the list held by "irbb". */
327static void stmt ( IRStmt* st )
328{
329   addStmtToIRSB( irsb, st );
330}
331
332static void assign ( IRTemp dst, IRExpr* e )
333{
334   stmt( IRStmt_WrTmp(dst, e) );
335}
336
337static void storeLE ( IRExpr* addr, IRExpr* data )
338{
339   stmt( IRStmt_Store(Iend_LE, addr, data) );
340}
341
342static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
343{
344   if (guardT == IRTemp_INVALID) {
345      /* unconditional */
346      storeLE(addr, data);
347   } else {
348      stmt( IRStmt_StoreG(Iend_LE, addr, data,
349                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
350   }
351}
352
353static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
354                            IRExpr* addr, IRExpr* alt,
355                            IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
356{
357   if (guardT == IRTemp_INVALID) {
358      /* unconditional */
359      IRExpr* loaded = NULL;
360      switch (cvt) {
361         case ILGop_Ident32:
362            loaded = loadLE(Ity_I32, addr); break;
363         case ILGop_8Uto32:
364            loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
365         case ILGop_8Sto32:
366            loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
367         case ILGop_16Uto32:
368            loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
369         case ILGop_16Sto32:
370            loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
371         default:
372            vassert(0);
373      }
374      vassert(loaded != NULL);
375      assign(dst, loaded);
376   } else {
377      /* Generate a guarded load into 'dst', but apply 'cvt' to the
378         loaded data before putting the data in 'dst'.  If the load
379         does not take place, 'alt' is placed directly in 'dst'. */
380      stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
381                         binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
382   }
383}
384
385/* Generate a new temporary of the given type. */
386static IRTemp newTemp ( IRType ty )
387{
388   vassert(isPlausibleIRType(ty));
389   return newIRTemp( irsb->tyenv, ty );
390}
391
392/* Produces a value in 0 .. 3, which is encoded as per the type
393   IRRoundingMode. */
394static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
395{
396   return mkU32(Irrm_NEAREST);
397}
398
399/* Generate an expression for SRC rotated right by ROT. */
400static IRExpr* genROR32( IRTemp src, Int rot )
401{
402   vassert(rot >= 0 && rot < 32);
403   if (rot == 0)
404      return mkexpr(src);
405   return
406      binop(Iop_Or32,
407            binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
408            binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
409}
410
411static IRExpr* mkU128 ( ULong i )
412{
413   return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
414}
415
416/* Generate a 4-aligned version of the given expression if
417   the given condition is true.  Else return it unchanged. */
418static IRExpr* align4if ( IRExpr* e, Bool b )
419{
420   if (b)
421      return binop(Iop_And32, e, mkU32(~3));
422   else
423      return e;
424}
425
426
427/*------------------------------------------------------------*/
428/*--- Helpers for accessing guest registers.               ---*/
429/*------------------------------------------------------------*/
430
431#define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
432#define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
433#define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
434#define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
435#define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
436#define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
437#define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
438#define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
439#define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
440#define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
441#define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
442#define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
443#define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
444#define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
445#define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
446#define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
447
448#define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
449#define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
450#define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
451#define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
452#define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
453
454#define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
455#define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
456#define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
457#define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
458#define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
459#define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
460#define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
461#define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
462#define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
463#define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
464#define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
465#define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
466#define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
467#define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
468#define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
469#define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
470#define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
471#define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
472#define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
473#define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
474#define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
475#define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
476#define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
477#define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
478#define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
479#define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
480#define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
481#define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
482#define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
483#define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
484#define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
485#define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
486
487#define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
488#define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
489#define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
490#define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
491#define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
492#define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
493#define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
494#define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
495
496#define OFFB_CMSTART  offsetof(VexGuestARMState,guest_CMSTART)
497#define OFFB_CMLEN    offsetof(VexGuestARMState,guest_CMLEN)
498
499
500/* ---------------- Integer registers ---------------- */
501
502static Int integerGuestRegOffset ( UInt iregNo )
503{
504   /* Do we care about endianness here?  We do if sub-parts of integer
505      registers are accessed, but I don't think that ever happens on
506      ARM. */
507   switch (iregNo) {
508      case 0:  return OFFB_R0;
509      case 1:  return OFFB_R1;
510      case 2:  return OFFB_R2;
511      case 3:  return OFFB_R3;
512      case 4:  return OFFB_R4;
513      case 5:  return OFFB_R5;
514      case 6:  return OFFB_R6;
515      case 7:  return OFFB_R7;
516      case 8:  return OFFB_R8;
517      case 9:  return OFFB_R9;
518      case 10: return OFFB_R10;
519      case 11: return OFFB_R11;
520      case 12: return OFFB_R12;
521      case 13: return OFFB_R13;
522      case 14: return OFFB_R14;
523      case 15: return OFFB_R15T;
524      default: vassert(0);
525   }
526}
527
528/* Plain ("low level") read from a reg; no +8 offset magic for r15. */
529static IRExpr* llGetIReg ( UInt iregNo )
530{
531   vassert(iregNo < 16);
532   return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
533}
534
535/* Architected read from a reg in ARM mode.  This automagically adds 8
536   to all reads of r15. */
537static IRExpr* getIRegA ( UInt iregNo )
538{
539   IRExpr* e;
540   ASSERT_IS_ARM;
541   vassert(iregNo < 16);
542   if (iregNo == 15) {
543      /* If asked for r15, don't read the guest state value, as that
544         may not be up to date in the case where loop unrolling has
545         happened, because the first insn's write to the block is
546         omitted; hence in the 2nd and subsequent unrollings we don't
547         have a correct value in guest r15.  Instead produce the
548         constant that we know would be produced at this point. */
549      vassert(0 == (guest_R15_curr_instr_notENC & 3));
550      e = mkU32(guest_R15_curr_instr_notENC + 8);
551   } else {
552      e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
553   }
554   return e;
555}
556
557/* Architected read from a reg in Thumb mode.  This automagically adds
558   4 to all reads of r15. */
559static IRExpr* getIRegT ( UInt iregNo )
560{
561   IRExpr* e;
562   ASSERT_IS_THUMB;
563   vassert(iregNo < 16);
564   if (iregNo == 15) {
565      /* Ditto comment in getIReg. */
566      vassert(0 == (guest_R15_curr_instr_notENC & 1));
567      e = mkU32(guest_R15_curr_instr_notENC + 4);
568   } else {
569      e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
570   }
571   return e;
572}
573
574/* Plain ("low level") write to a reg; no jump or alignment magic for
575   r15. */
576static void llPutIReg ( UInt iregNo, IRExpr* e )
577{
578   vassert(iregNo < 16);
579   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
580   stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
581}
582
583/* Architected write to an integer register in ARM mode.  If it is to
584   r15, record info so at the end of this insn's translation, a branch
585   to it can be made.  Also handles conditional writes to the
586   register: if guardT == IRTemp_INVALID then the write is
587   unconditional.  If writing r15, also 4-align it. */
588static void putIRegA ( UInt       iregNo,
589                       IRExpr*    e,
590                       IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
591                       IRJumpKind jk /* if a jump is generated */ )
592{
593   /* if writing r15, force e to be 4-aligned. */
594   // INTERWORKING FIXME.  this needs to be relaxed so that
595   // puts caused by LDMxx which load r15 interwork right.
596   // but is no aligned too relaxed?
597   //if (iregNo == 15)
598   //   e = binop(Iop_And32, e, mkU32(~3));
599   ASSERT_IS_ARM;
600   /* So, generate either an unconditional or a conditional write to
601      the reg. */
602   if (guardT == IRTemp_INVALID) {
603      /* unconditional write */
604      llPutIReg( iregNo, e );
605   } else {
606      llPutIReg( iregNo,
607                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
608                             e, llGetIReg(iregNo) ));
609   }
610   if (iregNo == 15) {
611      // assert against competing r15 updates.  Shouldn't
612      // happen; should be ruled out by the instr matching
613      // logic.
614      vassert(r15written == False);
615      vassert(r15guard   == IRTemp_INVALID);
616      vassert(r15kind    == Ijk_Boring);
617      r15written = True;
618      r15guard   = guardT;
619      r15kind    = jk;
620   }
621}
622
623
624/* Architected write to an integer register in Thumb mode.  Writes to
625   r15 are not allowed.  Handles conditional writes to the register:
626   if guardT == IRTemp_INVALID then the write is unconditional. */
627static void putIRegT ( UInt       iregNo,
628                       IRExpr*    e,
629                       IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
630{
631   /* So, generate either an unconditional or a conditional write to
632      the reg. */
633   ASSERT_IS_THUMB;
634   vassert(iregNo >= 0 && iregNo <= 14);
635   if (guardT == IRTemp_INVALID) {
636      /* unconditional write */
637      llPutIReg( iregNo, e );
638   } else {
639      llPutIReg( iregNo,
640                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
641                             e, llGetIReg(iregNo) ));
642   }
643}
644
645
646/* Thumb16 and Thumb32 only.
647   Returns true if reg is 13 or 15.  Implements the BadReg
648   predicate in the ARM ARM. */
649static Bool isBadRegT ( UInt r )
650{
651   vassert(r <= 15);
652   ASSERT_IS_THUMB;
653   return r == 13 || r == 15;
654}
655
656
657/* ---------------- Double registers ---------------- */
658
659static Int doubleGuestRegOffset ( UInt dregNo )
660{
661   /* Do we care about endianness here?  Probably do if we ever get
662      into the situation of dealing with the single-precision VFP
663      registers. */
664   switch (dregNo) {
665      case 0:  return OFFB_D0;
666      case 1:  return OFFB_D1;
667      case 2:  return OFFB_D2;
668      case 3:  return OFFB_D3;
669      case 4:  return OFFB_D4;
670      case 5:  return OFFB_D5;
671      case 6:  return OFFB_D6;
672      case 7:  return OFFB_D7;
673      case 8:  return OFFB_D8;
674      case 9:  return OFFB_D9;
675      case 10: return OFFB_D10;
676      case 11: return OFFB_D11;
677      case 12: return OFFB_D12;
678      case 13: return OFFB_D13;
679      case 14: return OFFB_D14;
680      case 15: return OFFB_D15;
681      case 16: return OFFB_D16;
682      case 17: return OFFB_D17;
683      case 18: return OFFB_D18;
684      case 19: return OFFB_D19;
685      case 20: return OFFB_D20;
686      case 21: return OFFB_D21;
687      case 22: return OFFB_D22;
688      case 23: return OFFB_D23;
689      case 24: return OFFB_D24;
690      case 25: return OFFB_D25;
691      case 26: return OFFB_D26;
692      case 27: return OFFB_D27;
693      case 28: return OFFB_D28;
694      case 29: return OFFB_D29;
695      case 30: return OFFB_D30;
696      case 31: return OFFB_D31;
697      default: vassert(0);
698   }
699}
700
701/* Plain ("low level") read from a VFP Dreg. */
702static IRExpr* llGetDReg ( UInt dregNo )
703{
704   vassert(dregNo < 32);
705   return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
706}
707
708/* Architected read from a VFP Dreg. */
709static IRExpr* getDReg ( UInt dregNo ) {
710   return llGetDReg( dregNo );
711}
712
713/* Plain ("low level") write to a VFP Dreg. */
714static void llPutDReg ( UInt dregNo, IRExpr* e )
715{
716   vassert(dregNo < 32);
717   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
718   stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
719}
720
721/* Architected write to a VFP Dreg.  Handles conditional writes to the
722   register: if guardT == IRTemp_INVALID then the write is
723   unconditional. */
724static void putDReg ( UInt    dregNo,
725                      IRExpr* e,
726                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
727{
728   /* So, generate either an unconditional or a conditional write to
729      the reg. */
730   if (guardT == IRTemp_INVALID) {
731      /* unconditional write */
732      llPutDReg( dregNo, e );
733   } else {
734      llPutDReg( dregNo,
735                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
736                             e, llGetDReg(dregNo) ));
737   }
738}
739
740/* And now exactly the same stuff all over again, but this time
741   taking/returning I64 rather than F64, to support 64-bit Neon
742   ops. */
743
744/* Plain ("low level") read from a Neon Integer Dreg. */
745static IRExpr* llGetDRegI64 ( UInt dregNo )
746{
747   vassert(dregNo < 32);
748   return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
749}
750
751/* Architected read from a Neon Integer Dreg. */
752static IRExpr* getDRegI64 ( UInt dregNo ) {
753   return llGetDRegI64( dregNo );
754}
755
756/* Plain ("low level") write to a Neon Integer Dreg. */
757static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
758{
759   vassert(dregNo < 32);
760   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
761   stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
762}
763
764/* Architected write to a Neon Integer Dreg.  Handles conditional
765   writes to the register: if guardT == IRTemp_INVALID then the write
766   is unconditional. */
767static void putDRegI64 ( UInt    dregNo,
768                         IRExpr* e,
769                         IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
770{
771   /* So, generate either an unconditional or a conditional write to
772      the reg. */
773   if (guardT == IRTemp_INVALID) {
774      /* unconditional write */
775      llPutDRegI64( dregNo, e );
776   } else {
777      llPutDRegI64( dregNo,
778                    IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
779                                e, llGetDRegI64(dregNo) ));
780   }
781}
782
783/* ---------------- Quad registers ---------------- */
784
785static Int quadGuestRegOffset ( UInt qregNo )
786{
787   /* Do we care about endianness here?  Probably do if we ever get
788      into the situation of dealing with the 64 bit Neon registers. */
789   switch (qregNo) {
790      case 0:  return OFFB_D0;
791      case 1:  return OFFB_D2;
792      case 2:  return OFFB_D4;
793      case 3:  return OFFB_D6;
794      case 4:  return OFFB_D8;
795      case 5:  return OFFB_D10;
796      case 6:  return OFFB_D12;
797      case 7:  return OFFB_D14;
798      case 8:  return OFFB_D16;
799      case 9:  return OFFB_D18;
800      case 10: return OFFB_D20;
801      case 11: return OFFB_D22;
802      case 12: return OFFB_D24;
803      case 13: return OFFB_D26;
804      case 14: return OFFB_D28;
805      case 15: return OFFB_D30;
806      default: vassert(0);
807   }
808}
809
810/* Plain ("low level") read from a Neon Qreg. */
811static IRExpr* llGetQReg ( UInt qregNo )
812{
813   vassert(qregNo < 16);
814   return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
815}
816
817/* Architected read from a Neon Qreg. */
818static IRExpr* getQReg ( UInt qregNo ) {
819   return llGetQReg( qregNo );
820}
821
822/* Plain ("low level") write to a Neon Qreg. */
823static void llPutQReg ( UInt qregNo, IRExpr* e )
824{
825   vassert(qregNo < 16);
826   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
827   stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
828}
829
830/* Architected write to a Neon Qreg.  Handles conditional writes to the
831   register: if guardT == IRTemp_INVALID then the write is
832   unconditional. */
833static void putQReg ( UInt    qregNo,
834                      IRExpr* e,
835                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
836{
837   /* So, generate either an unconditional or a conditional write to
838      the reg. */
839   if (guardT == IRTemp_INVALID) {
840      /* unconditional write */
841      llPutQReg( qregNo, e );
842   } else {
843      llPutQReg( qregNo,
844                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
845                             e, llGetQReg(qregNo) ));
846   }
847}
848
849
850/* ---------------- Float registers ---------------- */
851
852static Int floatGuestRegOffset ( UInt fregNo )
853{
854   /* Start with the offset of the containing double, and then correct
855      for endianness.  Actually this is completely bogus and needs
856      careful thought. */
857   Int off;
858   /* NB! Limit is 64, not 32, because we might be pulling F32 bits
859      out of SIMD registers, and there are 16 SIMD registers each of
860      128 bits (4 x F32). */
861   vassert(fregNo < 64);
862   off = doubleGuestRegOffset(fregNo >> 1);
863   if (host_endness == VexEndnessLE) {
864      if (fregNo & 1)
865         off += 4;
866   } else {
867      vassert(0);
868   }
869   return off;
870}
871
872/* Plain ("low level") read from a VFP Freg. */
873static IRExpr* llGetFReg ( UInt fregNo )
874{
875   vassert(fregNo < 32);
876   return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
877}
878
879static IRExpr* llGetFReg_up_to_64 ( UInt fregNo )
880{
881   vassert(fregNo < 64);
882   return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
883}
884
885/* Architected read from a VFP Freg. */
886static IRExpr* getFReg ( UInt fregNo ) {
887   return llGetFReg( fregNo );
888}
889
890/* Plain ("low level") write to a VFP Freg. */
891static void llPutFReg ( UInt fregNo, IRExpr* e )
892{
893   vassert(fregNo < 32);
894   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
895   stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
896}
897
898static void llPutFReg_up_to_64 ( UInt fregNo, IRExpr* e )
899{
900   vassert(fregNo < 64);
901   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
902   stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
903}
904
905/* Architected write to a VFP Freg.  Handles conditional writes to the
906   register: if guardT == IRTemp_INVALID then the write is
907   unconditional. */
908static void putFReg ( UInt    fregNo,
909                      IRExpr* e,
910                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
911{
912   /* So, generate either an unconditional or a conditional write to
913      the reg. */
914   if (guardT == IRTemp_INVALID) {
915      /* unconditional write */
916      llPutFReg( fregNo, e );
917   } else {
918      llPutFReg( fregNo,
919                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
920                             e, llGetFReg(fregNo) ));
921   }
922}
923
924
925/* ---------------- Misc registers ---------------- */
926
927static void putMiscReg32 ( UInt    gsoffset,
928                           IRExpr* e, /* :: Ity_I32 */
929                           IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
930{
931   switch (gsoffset) {
932      case OFFB_FPSCR:   break;
933      case OFFB_QFLAG32: break;
934      case OFFB_GEFLAG0: break;
935      case OFFB_GEFLAG1: break;
936      case OFFB_GEFLAG2: break;
937      case OFFB_GEFLAG3: break;
938      default: vassert(0); /* awaiting more cases */
939   }
940   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
941
942   if (guardT == IRTemp_INVALID) {
943      /* unconditional write */
944      stmt(IRStmt_Put(gsoffset, e));
945   } else {
946      stmt(IRStmt_Put(
947         gsoffset,
948         IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
949                     e, IRExpr_Get(gsoffset, Ity_I32) )
950      ));
951   }
952}
953
954static IRTemp get_ITSTATE ( void )
955{
956   ASSERT_IS_THUMB;
957   IRTemp t = newTemp(Ity_I32);
958   assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
959   return t;
960}
961
962static void put_ITSTATE ( IRTemp t )
963{
964   ASSERT_IS_THUMB;
965   stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
966}
967
968static IRTemp get_QFLAG32 ( void )
969{
970   IRTemp t = newTemp(Ity_I32);
971   assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
972   return t;
973}
974
975static void put_QFLAG32 ( IRTemp t, IRTemp condT )
976{
977   putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
978}
979
980/* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
981   Status Register) to indicate that overflow or saturation occurred.
982   Nb: t must be zero to denote no saturation, and any nonzero
983   value to indicate saturation. */
984static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
985{
986   IRTemp old = get_QFLAG32();
987   IRTemp nyu = newTemp(Ity_I32);
988   assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
989   put_QFLAG32(nyu, condT);
990}
991
992/* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
993   flagNo: which flag bit to set [3...0]
994   lowbits_to_ignore:  0 = look at all 32 bits
995                       8 = look at top 24 bits only
996                      16 = look at top 16 bits only
997                      31 = look at the top bit only
998   e: input value to be evaluated.
999   The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
1000   masked out.  If the resulting value is zero then the GE flag is
1001   set to 0; any other value sets the flag to 1. */
1002static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
1003                           Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
1004                           IRExpr* e,             /* Ity_I32 */
1005                           IRTemp condT )
1006{
1007   vassert( flagNo >= 0 && flagNo <= 3 );
1008   vassert( lowbits_to_ignore == 0  ||
1009            lowbits_to_ignore == 8  ||
1010            lowbits_to_ignore == 16 ||
1011            lowbits_to_ignore == 31 );
1012   IRTemp masked = newTemp(Ity_I32);
1013   assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
1014
1015   switch (flagNo) {
1016      case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
1017      case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
1018      case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
1019      case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
1020      default: vassert(0);
1021   }
1022}
1023
1024/* Return the (32-bit, zero-or-nonzero representation scheme) of
1025   the specified GE flag. */
1026static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
1027{
1028   switch (flagNo) {
1029      case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
1030      case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
1031      case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
1032      case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
1033      default: vassert(0);
1034   }
1035}
1036
1037/* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
1038   2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
1039   15 of the value.  All other bits are ignored. */
1040static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
1041{
1042   IRTemp ge10 = newTemp(Ity_I32);
1043   IRTemp ge32 = newTemp(Ity_I32);
1044   assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1045   assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1046   put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
1047   put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
1048   put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
1049   put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
1050}
1051
1052
1053/* Set all 4 GE flags from the given 32-bit value as follows: GE 3
1054   from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
1055   bit 7.  All other bits are ignored. */
1056static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
1057{
1058   IRTemp ge0 = newTemp(Ity_I32);
1059   IRTemp ge1 = newTemp(Ity_I32);
1060   IRTemp ge2 = newTemp(Ity_I32);
1061   IRTemp ge3 = newTemp(Ity_I32);
1062   assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
1063   assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1064   assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
1065   assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1066   put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
1067   put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
1068   put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
1069   put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
1070}
1071
1072
1073/* ---------------- FPSCR stuff ---------------- */
1074
1075/* Generate IR to get hold of the rounding mode bits in FPSCR, and
1076   convert them to IR format.  Bind the final result to the
1077   returned temp. */
1078static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1079{
1080   /* The ARMvfp encoding for rounding mode bits is:
1081         00  to nearest
1082         01  to +infinity
1083         10  to -infinity
1084         11  to zero
1085      We need to convert that to the IR encoding:
1086         00  to nearest (the default)
1087         10  to +infinity
1088         01  to -infinity
1089         11  to zero
1090      Which can be done by swapping bits 0 and 1.
1091      The rmode bits are at 23:22 in FPSCR.
1092   */
1093   IRTemp armEncd = newTemp(Ity_I32);
1094   IRTemp swapped = newTemp(Ity_I32);
1095   /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
1096      we don't zero out bits 24 and above, since the assignment to
1097      'swapped' will mask them out anyway. */
1098   assign(armEncd,
1099          binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
1100   /* Now swap them. */
1101   assign(swapped,
1102          binop(Iop_Or32,
1103                binop(Iop_And32,
1104                      binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1105                      mkU32(2)),
1106                binop(Iop_And32,
1107                      binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1108                      mkU32(1))
1109         ));
1110   return swapped;
1111}
1112
1113
1114/*------------------------------------------------------------*/
1115/*--- Helpers for flag handling and conditional insns      ---*/
1116/*------------------------------------------------------------*/
1117
1118static const HChar* name_ARMCondcode ( ARMCondcode cond )
1119{
1120   switch (cond) {
1121      case ARMCondEQ:  return "{eq}";
1122      case ARMCondNE:  return "{ne}";
1123      case ARMCondHS:  return "{hs}";  // or 'cs'
1124      case ARMCondLO:  return "{lo}";  // or 'cc'
1125      case ARMCondMI:  return "{mi}";
1126      case ARMCondPL:  return "{pl}";
1127      case ARMCondVS:  return "{vs}";
1128      case ARMCondVC:  return "{vc}";
1129      case ARMCondHI:  return "{hi}";
1130      case ARMCondLS:  return "{ls}";
1131      case ARMCondGE:  return "{ge}";
1132      case ARMCondLT:  return "{lt}";
1133      case ARMCondGT:  return "{gt}";
1134      case ARMCondLE:  return "{le}";
1135      case ARMCondAL:  return ""; // {al}: is the default
1136      case ARMCondNV:  return "{nv}";
1137      default: vpanic("name_ARMCondcode");
1138   }
1139}
1140/* and a handy shorthand for it */
1141static const HChar* nCC ( ARMCondcode cond ) {
1142   return name_ARMCondcode(cond);
1143}
1144
1145
1146/* Build IR to calculate some particular condition from stored
1147   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1148   Ity_I32, suitable for narrowing.  Although the return type is
1149   Ity_I32, the returned value is either 0 or 1.  'cond' must be
1150   :: Ity_I32 and must denote the condition to compute in
1151   bits 7:4, and be zero everywhere else.
1152*/
1153static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
1154{
1155   vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
1156   /* And 'cond' had better produce a value in which only bits 7:4 are
1157      nonzero.  However, obviously we can't assert for that. */
1158
1159   /* So what we're constructing for the first argument is
1160      "(cond << 4) | stored-operation".
1161      However, as per comments above, 'cond' must be supplied
1162      pre-shifted to this function.
1163
1164      This pairing scheme requires that the ARM_CC_OP_ values all fit
1165      in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1166      8 bits of the first argument. */
1167   IRExpr** args
1168      = mkIRExprVec_4(
1169           binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
1170           IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1171           IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1172           IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
1173        );
1174   IRExpr* call
1175      = mkIRExprCCall(
1176           Ity_I32,
1177           0/*regparm*/,
1178           "armg_calculate_condition", &armg_calculate_condition,
1179           args
1180        );
1181
1182   /* Exclude the requested condition, OP and NDEP from definedness
1183      checking.  We're only interested in DEP1 and DEP2. */
1184   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1185   return call;
1186}
1187
1188
1189/* Build IR to calculate some particular condition from stored
1190   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1191   Ity_I32, suitable for narrowing.  Although the return type is
1192   Ity_I32, the returned value is either 0 or 1.
1193*/
1194static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
1195{
1196  /* First arg is "(cond << 4) | condition".  This requires that the
1197     ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1198     (COND, OP) pair in the lowest 8 bits of the first argument. */
1199   vassert(cond >= 0 && cond <= 15);
1200   return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
1201}
1202
1203
1204/* Build IR to calculate just the carry flag from stored
1205   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1206   Ity_I32. */
1207static IRExpr* mk_armg_calculate_flag_c ( void )
1208{
1209   IRExpr** args
1210      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1211                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1212                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1213                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1214   IRExpr* call
1215      = mkIRExprCCall(
1216           Ity_I32,
1217           0/*regparm*/,
1218           "armg_calculate_flag_c", &armg_calculate_flag_c,
1219           args
1220        );
1221   /* Exclude OP and NDEP from definedness checking.  We're only
1222      interested in DEP1 and DEP2. */
1223   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1224   return call;
1225}
1226
1227
1228/* Build IR to calculate just the overflow flag from stored
1229   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1230   Ity_I32. */
1231static IRExpr* mk_armg_calculate_flag_v ( void )
1232{
1233   IRExpr** args
1234      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1235                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1236                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1237                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1238   IRExpr* call
1239      = mkIRExprCCall(
1240           Ity_I32,
1241           0/*regparm*/,
1242           "armg_calculate_flag_v", &armg_calculate_flag_v,
1243           args
1244        );
1245   /* Exclude OP and NDEP from definedness checking.  We're only
1246      interested in DEP1 and DEP2. */
1247   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1248   return call;
1249}
1250
1251
1252/* Build IR to calculate N Z C V in bits 31:28 of the
1253   returned word. */
1254static IRExpr* mk_armg_calculate_flags_nzcv ( void )
1255{
1256   IRExpr** args
1257      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1258                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1259                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1260                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1261   IRExpr* call
1262      = mkIRExprCCall(
1263           Ity_I32,
1264           0/*regparm*/,
1265           "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
1266           args
1267        );
1268   /* Exclude OP and NDEP from definedness checking.  We're only
1269      interested in DEP1 and DEP2. */
1270   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1271   return call;
1272}
1273
1274static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
1275{
1276   IRExpr** args1;
1277   IRExpr** args2;
1278   IRExpr *call1, *call2, *res;
1279
1280   if (Q) {
1281      args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
1282                              binop(Iop_GetElem32x4, resL, mkU8(1)),
1283                              binop(Iop_GetElem32x4, resR, mkU8(0)),
1284                              binop(Iop_GetElem32x4, resR, mkU8(1)) );
1285      args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
1286                              binop(Iop_GetElem32x4, resL, mkU8(3)),
1287                              binop(Iop_GetElem32x4, resR, mkU8(2)),
1288                              binop(Iop_GetElem32x4, resR, mkU8(3)) );
1289   } else {
1290      args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
1291                              binop(Iop_GetElem32x2, resL, mkU8(1)),
1292                              binop(Iop_GetElem32x2, resR, mkU8(0)),
1293                              binop(Iop_GetElem32x2, resR, mkU8(1)) );
1294   }
1295
1296   call1 = mkIRExprCCall(
1297             Ity_I32,
1298             0/*regparm*/,
1299             "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1300             args1
1301          );
1302   if (Q) {
1303      call2 = mkIRExprCCall(
1304                Ity_I32,
1305                0/*regparm*/,
1306                "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1307                args2
1308             );
1309   }
1310   if (Q) {
1311      res = binop(Iop_Or32, call1, call2);
1312   } else {
1313      res = call1;
1314   }
1315   return res;
1316}
1317
1318// FIXME: this is named wrongly .. looks like a sticky set of
1319// QC, not a write to it.
1320static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
1321                         IRTemp condT )
1322{
1323   putMiscReg32 (OFFB_FPSCR,
1324                 binop(Iop_Or32,
1325                       IRExpr_Get(OFFB_FPSCR, Ity_I32),
1326                       binop(Iop_Shl32,
1327                             mk_armg_calculate_flag_qc(resL, resR, Q),
1328                             mkU8(27))),
1329                 condT);
1330}
1331
1332/* Build IR to conditionally set the flags thunk.  As with putIReg, if
1333   guard is IRTemp_INVALID then it's unconditional, else it holds a
1334   condition :: Ity_I32. */
1335static
1336void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
1337                         IRTemp t_dep2, IRTemp t_ndep,
1338                         IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1339{
1340   vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
1341   vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
1342   vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
1343   vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
1344   if (guardT == IRTemp_INVALID) {
1345      /* unconditional */
1346      stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
1347      stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1348      stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1349      stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1350   } else {
1351      /* conditional */
1352      IRTemp c1 = newTemp(Ity_I1);
1353      assign( c1, binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)) );
1354      stmt( IRStmt_Put(
1355               OFFB_CC_OP,
1356               IRExpr_ITE( mkexpr(c1),
1357                           mkU32(cc_op),
1358                           IRExpr_Get(OFFB_CC_OP, Ity_I32) ) ));
1359      stmt( IRStmt_Put(
1360               OFFB_CC_DEP1,
1361               IRExpr_ITE( mkexpr(c1),
1362                           mkexpr(t_dep1),
1363                           IRExpr_Get(OFFB_CC_DEP1, Ity_I32) ) ));
1364      stmt( IRStmt_Put(
1365               OFFB_CC_DEP2,
1366               IRExpr_ITE( mkexpr(c1),
1367                           mkexpr(t_dep2),
1368                           IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ) ));
1369      stmt( IRStmt_Put(
1370               OFFB_CC_NDEP,
1371               IRExpr_ITE( mkexpr(c1),
1372                           mkexpr(t_ndep),
1373                           IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ) ));
1374   }
1375}
1376
1377
1378/* Minor variant of the above that sets NDEP to zero (if it
1379   sets it at all) */
1380static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1381                             IRTemp t_dep2,
1382                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1383{
1384   IRTemp z32 = newTemp(Ity_I32);
1385   assign( z32, mkU32(0) );
1386   setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1387}
1388
1389
1390/* Minor variant of the above that sets DEP2 to zero (if it
1391   sets it at all) */
1392static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1393                             IRTemp t_ndep,
1394                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1395{
1396   IRTemp z32 = newTemp(Ity_I32);
1397   assign( z32, mkU32(0) );
1398   setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1399}
1400
1401
1402/* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1403   sets them at all) */
1404static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1405                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1406{
1407   IRTemp z32 = newTemp(Ity_I32);
1408   assign( z32, mkU32(0) );
1409   setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1410}
1411
1412
1413/* ARM only */
1414/* Generate a side-exit to the next instruction, if the given guard
1415   expression :: Ity_I32 is 0 (note!  the side exit is taken if the
1416   condition is false!)  This is used to skip over conditional
1417   instructions which we can't generate straight-line code for, either
1418   because they are too complex or (more likely) they potentially
1419   generate exceptions.
1420*/
1421static void mk_skip_over_A32_if_cond_is_false (
1422               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1423            )
1424{
1425   ASSERT_IS_ARM;
1426   vassert(guardT != IRTemp_INVALID);
1427   vassert(0 == (guest_R15_curr_instr_notENC & 3));
1428   stmt( IRStmt_Exit(
1429            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1430            Ijk_Boring,
1431            IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
1432            OFFB_R15T
1433       ));
1434}
1435
1436/* Thumb16 only */
1437/* ditto, but jump over a 16-bit thumb insn */
1438static void mk_skip_over_T16_if_cond_is_false (
1439               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1440            )
1441{
1442   ASSERT_IS_THUMB;
1443   vassert(guardT != IRTemp_INVALID);
1444   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1445   stmt( IRStmt_Exit(
1446            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1447            Ijk_Boring,
1448            IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
1449            OFFB_R15T
1450       ));
1451}
1452
1453
1454/* Thumb32 only */
1455/* ditto, but jump over a 32-bit thumb insn */
1456static void mk_skip_over_T32_if_cond_is_false (
1457               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1458            )
1459{
1460   ASSERT_IS_THUMB;
1461   vassert(guardT != IRTemp_INVALID);
1462   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1463   stmt( IRStmt_Exit(
1464            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1465            Ijk_Boring,
1466            IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
1467            OFFB_R15T
1468       ));
1469}
1470
1471
1472/* Thumb16 and Thumb32 only
1473   Generate a SIGILL followed by a restart of the current instruction
1474   if the given temp is nonzero. */
1475static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
1476{
1477   ASSERT_IS_THUMB;
1478   vassert(t != IRTemp_INVALID);
1479   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1480   stmt(
1481      IRStmt_Exit(
1482         binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
1483         Ijk_NoDecode,
1484         IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
1485         OFFB_R15T
1486      )
1487   );
1488}
1489
1490
1491/* Inspect the old_itstate, and generate a SIGILL if it indicates that
1492   we are currently in an IT block and are not the last in the block.
1493   This also rolls back guest_ITSTATE to its old value before the exit
1494   and restores it to its new value afterwards.  This is so that if
1495   the exit is taken, we have an up to date version of ITSTATE
1496   available.  Without doing that, we have no hope of making precise
1497   exceptions work. */
1498static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
1499               IRTemp old_itstate /* :: Ity_I32 */,
1500               IRTemp new_itstate /* :: Ity_I32 */
1501            )
1502{
1503   ASSERT_IS_THUMB;
1504   put_ITSTATE(old_itstate); // backout
1505   IRTemp guards_for_next3 = newTemp(Ity_I32);
1506   assign(guards_for_next3,
1507          binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
1508   gen_SIGILL_T_if_nonzero(guards_for_next3);
1509   put_ITSTATE(new_itstate); //restore
1510}
1511
1512
1513/* Simpler version of the above, which generates a SIGILL if
1514   we're anywhere within an IT block. */
1515static void gen_SIGILL_T_if_in_ITBlock (
1516               IRTemp old_itstate /* :: Ity_I32 */,
1517               IRTemp new_itstate /* :: Ity_I32 */
1518            )
1519{
1520   put_ITSTATE(old_itstate); // backout
1521   gen_SIGILL_T_if_nonzero(old_itstate);
1522   put_ITSTATE(new_itstate); //restore
1523}
1524
1525
1526/* Generate an APSR value, from the NZCV thunk, and
1527   from QFLAG32 and GEFLAG0 .. GEFLAG3. */
1528static IRTemp synthesise_APSR ( void )
1529{
1530   IRTemp res1 = newTemp(Ity_I32);
1531   // Get NZCV
1532   assign( res1, mk_armg_calculate_flags_nzcv() );
1533   // OR in the Q value
1534   IRTemp res2 = newTemp(Ity_I32);
1535   assign(
1536      res2,
1537      binop(Iop_Or32,
1538            mkexpr(res1),
1539            binop(Iop_Shl32,
1540                  unop(Iop_1Uto32,
1541                       binop(Iop_CmpNE32,
1542                             mkexpr(get_QFLAG32()),
1543                             mkU32(0))),
1544                  mkU8(ARMG_CC_SHIFT_Q)))
1545   );
1546   // OR in GE0 .. GE3
1547   IRExpr* ge0
1548      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
1549   IRExpr* ge1
1550      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
1551   IRExpr* ge2
1552      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
1553   IRExpr* ge3
1554      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
1555   IRTemp res3 = newTemp(Ity_I32);
1556   assign(res3,
1557          binop(Iop_Or32,
1558                mkexpr(res2),
1559                binop(Iop_Or32,
1560                      binop(Iop_Or32,
1561                            binop(Iop_Shl32, ge0, mkU8(16)),
1562                            binop(Iop_Shl32, ge1, mkU8(17))),
1563                      binop(Iop_Or32,
1564                            binop(Iop_Shl32, ge2, mkU8(18)),
1565                            binop(Iop_Shl32, ge3, mkU8(19))) )));
1566   return res3;
1567}
1568
1569
1570/* and the inverse transformation: given an APSR value,
1571   set the NZCV thunk, the Q flag, and the GE flags. */
1572static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
1573                                IRTemp apsrT, IRTemp condT )
1574{
1575   vassert(write_nzcvq || write_ge);
1576   if (write_nzcvq) {
1577      // Do NZCV
1578      IRTemp immT = newTemp(Ity_I32);
1579      assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
1580      setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
1581      // Do Q
1582      IRTemp qnewT = newTemp(Ity_I32);
1583      assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
1584      put_QFLAG32(qnewT, condT);
1585   }
1586   if (write_ge) {
1587      // Do GE3..0
1588      put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
1589                   condT);
1590      put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
1591                   condT);
1592      put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
1593                   condT);
1594      put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
1595                   condT);
1596   }
1597}
1598
1599
1600/*------------------------------------------------------------*/
1601/*--- Helpers for saturation                               ---*/
1602/*------------------------------------------------------------*/
1603
1604/* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
1605   (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
1606   (b) the floor is computed from the value of imm5.  these two fnsn
1607   should be commoned up. */
1608
1609/* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
1610   Optionally return flag resQ saying whether saturation occurred.
1611   See definition in manual, section A2.2.1, page 41
1612   (bits(N), boolean) UnsignedSatQ( integer i, integer N )
1613   {
1614     if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
1615     elsif ( i < 0 )    { result = 0; saturated = TRUE; }
1616     else               { result = i; saturated = FALSE; }
1617     return ( result<N-1:0>, saturated );
1618   }
1619*/
1620static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
1621                             IRTemp* resQ, /* OUT - Ity_I32  */
1622                             IRTemp regT,  /* value to clamp - Ity_I32 */
1623                             UInt imm5 )   /* saturation ceiling */
1624{
1625   ULong ceil64  = (1ULL << imm5) - 1;    // (2^imm5)-1
1626   UInt  ceil    = (UInt)ceil64;
1627   UInt  floor   = 0;
1628
1629   IRTemp nd0 = newTemp(Ity_I32);
1630   IRTemp nd1 = newTemp(Ity_I32);
1631   IRTemp nd2 = newTemp(Ity_I1);
1632   IRTemp nd3 = newTemp(Ity_I32);
1633   IRTemp nd4 = newTemp(Ity_I32);
1634   IRTemp nd5 = newTemp(Ity_I1);
1635   IRTemp nd6 = newTemp(Ity_I32);
1636
1637   assign( nd0, mkexpr(regT) );
1638   assign( nd1, mkU32(ceil) );
1639   assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1640   assign( nd3, IRExpr_ITE(mkexpr(nd2), mkexpr(nd1), mkexpr(nd0)) );
1641   assign( nd4, mkU32(floor) );
1642   assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1643   assign( nd6, IRExpr_ITE(mkexpr(nd5), mkexpr(nd4), mkexpr(nd3)) );
1644   assign( *res, mkexpr(nd6) );
1645
1646   /* if saturation occurred, then resQ is set to some nonzero value
1647      if sat did not occur, resQ is guaranteed to be zero. */
1648   if (resQ) {
1649      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1650   }
1651}
1652
1653
1654/* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
1655   Optionally return flag resQ saying whether saturation occurred.
1656   - see definition in manual, section A2.2.1, page 41
1657   (bits(N), boolean ) SignedSatQ( integer i, integer N )
1658   {
1659     if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
1660     elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
1661     else                      { result = i;           saturated = FALSE; }
1662     return ( result[N-1:0], saturated );
1663   }
1664*/
1665static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
1666                           UInt imm5,      /* saturation ceiling */
1667                           IRTemp* res,    /* OUT - Ity_I32 */
1668                           IRTemp* resQ )  /* OUT - Ity_I32  */
1669{
1670   Long ceil64  =  (1LL << (imm5-1)) - 1;  //  (2^(imm5-1))-1
1671   Long floor64 = -(1LL << (imm5-1));      // -(2^(imm5-1))
1672   Int  ceil    = (Int)ceil64;
1673   Int  floor   = (Int)floor64;
1674
1675   IRTemp nd0 = newTemp(Ity_I32);
1676   IRTemp nd1 = newTemp(Ity_I32);
1677   IRTemp nd2 = newTemp(Ity_I1);
1678   IRTemp nd3 = newTemp(Ity_I32);
1679   IRTemp nd4 = newTemp(Ity_I32);
1680   IRTemp nd5 = newTemp(Ity_I1);
1681   IRTemp nd6 = newTemp(Ity_I32);
1682
1683   assign( nd0, mkexpr(regT) );
1684   assign( nd1, mkU32(ceil) );
1685   assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1686   assign( nd3, IRExpr_ITE( mkexpr(nd2), mkexpr(nd1), mkexpr(nd0) ) );
1687   assign( nd4, mkU32(floor) );
1688   assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1689   assign( nd6, IRExpr_ITE( mkexpr(nd5), mkexpr(nd4), mkexpr(nd3) ) );
1690   assign( *res, mkexpr(nd6) );
1691
1692   /* if saturation occurred, then resQ is set to some nonzero value
1693      if sat did not occur, resQ is guaranteed to be zero. */
1694   if (resQ) {
1695     assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1696   }
1697}
1698
1699
1700/* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
1701   overflow occurred for 32-bit addition.  Needs both args and the
1702   result.  HD p27. */
1703static
1704IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
1705                                      IRTemp argL, IRTemp argR )
1706{
1707   IRTemp res = newTemp(Ity_I32);
1708   assign(res, resE);
1709   return
1710      binop( Iop_Shr32,
1711             binop( Iop_And32,
1712                    binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
1713                    binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
1714             mkU8(31) );
1715}
1716
1717/* Similarly .. also from HD p27 .. */
1718static
1719IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
1720                                      IRTemp argL, IRTemp argR )
1721{
1722   IRTemp res = newTemp(Ity_I32);
1723   assign(res, resE);
1724   return
1725      binop( Iop_Shr32,
1726             binop( Iop_And32,
1727                    binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
1728                    binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )),
1729             mkU8(31) );
1730}
1731
1732
1733/*------------------------------------------------------------*/
1734/*--- Larger helpers                                       ---*/
1735/*------------------------------------------------------------*/
1736
1737/* Compute both the result and new C flag value for a LSL by an imm5
1738   or by a register operand.  May generate reads of the old C value
1739   (hence only safe to use before any writes to guest state happen).
1740   Are factored out so can be used by both ARM and Thumb.
1741
1742   Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
1743   "res" (the result)  is a.k.a. "shop", shifter operand
1744   "newC" (the new C)  is a.k.a. "shco", shifter carry out
1745
1746   The calling convention for res and newC is a bit funny.  They could
1747   be passed by value, but instead are passed by ref.
1748
1749   The C (shco) value computed must be zero in bits 31:1, as the IR
1750   optimisations for flag handling (guest_arm_spechelper) rely on
1751   that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
1752   for it.  Same applies to all these functions that compute shco
1753   after a shift or rotate, not just this one.
1754*/
1755
1756static void compute_result_and_C_after_LSL_by_imm5 (
1757               /*OUT*/HChar* buf,
1758               IRTemp* res,
1759               IRTemp* newC,
1760               IRTemp rMt, UInt shift_amt, /* operands */
1761               UInt rM      /* only for debug printing */
1762            )
1763{
1764   if (shift_amt == 0) {
1765      if (newC) {
1766         assign( *newC, mk_armg_calculate_flag_c() );
1767      }
1768      assign( *res, mkexpr(rMt) );
1769      DIS(buf, "r%u", rM);
1770   } else {
1771      vassert(shift_amt >= 1 && shift_amt <= 31);
1772      if (newC) {
1773         assign( *newC,
1774                 binop(Iop_And32,
1775                       binop(Iop_Shr32, mkexpr(rMt),
1776                                        mkU8(32 - shift_amt)),
1777                       mkU32(1)));
1778      }
1779      assign( *res,
1780              binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
1781      DIS(buf, "r%u, LSL #%u", rM, shift_amt);
1782   }
1783}
1784
1785
1786static void compute_result_and_C_after_LSL_by_reg (
1787               /*OUT*/HChar* buf,
1788               IRTemp* res,
1789               IRTemp* newC,
1790               IRTemp rMt, IRTemp rSt,  /* operands */
1791               UInt rM,    UInt rS      /* only for debug printing */
1792            )
1793{
1794   // shift left in range 0 .. 255
1795   // amt  = rS & 255
1796   // res  = amt < 32 ?  Rm << amt  : 0
1797   // newC = amt == 0     ? oldC  :
1798   //        amt in 1..32 ?  Rm[32-amt]  : 0
1799   IRTemp amtT = newTemp(Ity_I32);
1800   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1801   if (newC) {
1802      /* mux0X(amt == 0,
1803               mux0X(amt < 32,
1804                     0,
1805                     Rm[(32-amt) & 31]),
1806               oldC)
1807      */
1808      /* About the best you can do is pray that iropt is able
1809         to nuke most or all of the following junk. */
1810      IRTemp oldC = newTemp(Ity_I32);
1811      assign(oldC, mk_armg_calculate_flag_c() );
1812      assign(
1813         *newC,
1814         IRExpr_ITE(
1815            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1816            mkexpr(oldC),
1817            IRExpr_ITE(
1818               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1819               binop(Iop_And32,
1820                     binop(Iop_Shr32,
1821                           mkexpr(rMt),
1822                           unop(Iop_32to8,
1823                                binop(Iop_And32,
1824                                      binop(Iop_Sub32,
1825                                            mkU32(32),
1826                                            mkexpr(amtT)),
1827                                      mkU32(31)
1828                                )
1829                           )
1830                     ),
1831                     mkU32(1)
1832                     ),
1833               mkU32(0)
1834            )
1835         )
1836      );
1837   }
1838   // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1839   // Lhs of the & limits the shift to 31 bits, so as to
1840   // give known IR semantics.  Rhs of the & is all 1s for
1841   // Rs <= 31 and all 0s for Rs >= 32.
1842   assign(
1843      *res,
1844      binop(
1845         Iop_And32,
1846         binop(Iop_Shl32,
1847               mkexpr(rMt),
1848               unop(Iop_32to8,
1849                    binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1850         binop(Iop_Sar32,
1851               binop(Iop_Sub32,
1852                     mkexpr(amtT),
1853                     mkU32(32)),
1854               mkU8(31))));
1855    DIS(buf, "r%u, LSL r%u", rM, rS);
1856}
1857
1858
1859static void compute_result_and_C_after_LSR_by_imm5 (
1860               /*OUT*/HChar* buf,
1861               IRTemp* res,
1862               IRTemp* newC,
1863               IRTemp rMt, UInt shift_amt, /* operands */
1864               UInt rM      /* only for debug printing */
1865            )
1866{
1867   if (shift_amt == 0) {
1868      // conceptually a 32-bit shift, however:
1869      // res  = 0
1870      // newC = Rm[31]
1871      if (newC) {
1872         assign( *newC,
1873                 binop(Iop_And32,
1874                       binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1875                       mkU32(1)));
1876      }
1877      assign( *res, mkU32(0) );
1878      DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
1879   } else {
1880      // shift in range 1..31
1881      // res  = Rm >>u shift_amt
1882      // newC = Rm[shift_amt - 1]
1883      vassert(shift_amt >= 1 && shift_amt <= 31);
1884      if (newC) {
1885         assign( *newC,
1886                 binop(Iop_And32,
1887                       binop(Iop_Shr32, mkexpr(rMt),
1888                                        mkU8(shift_amt - 1)),
1889                       mkU32(1)));
1890      }
1891      assign( *res,
1892              binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
1893      DIS(buf, "r%u, LSR #%u", rM, shift_amt);
1894   }
1895}
1896
1897
1898static void compute_result_and_C_after_LSR_by_reg (
1899               /*OUT*/HChar* buf,
1900               IRTemp* res,
1901               IRTemp* newC,
1902               IRTemp rMt, IRTemp rSt,  /* operands */
1903               UInt rM,    UInt rS      /* only for debug printing */
1904            )
1905{
1906   // shift right in range 0 .. 255
1907   // amt = rS & 255
1908   // res  = amt < 32 ?  Rm >>u amt  : 0
1909   // newC = amt == 0     ? oldC  :
1910   //        amt in 1..32 ?  Rm[amt-1]  : 0
1911   IRTemp amtT = newTemp(Ity_I32);
1912   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1913   if (newC) {
1914      /* mux0X(amt == 0,
1915               mux0X(amt < 32,
1916                     0,
1917                     Rm[(amt-1) & 31]),
1918               oldC)
1919      */
1920      IRTemp oldC = newTemp(Ity_I32);
1921      assign(oldC, mk_armg_calculate_flag_c() );
1922      assign(
1923         *newC,
1924         IRExpr_ITE(
1925            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1926            mkexpr(oldC),
1927            IRExpr_ITE(
1928               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1929               binop(Iop_And32,
1930                     binop(Iop_Shr32,
1931                           mkexpr(rMt),
1932                           unop(Iop_32to8,
1933                                binop(Iop_And32,
1934                                      binop(Iop_Sub32,
1935                                            mkexpr(amtT),
1936                                            mkU32(1)),
1937                                      mkU32(31)
1938                                )
1939                           )
1940                     ),
1941                     mkU32(1)
1942                     ),
1943               mkU32(0)
1944            )
1945         )
1946      );
1947   }
1948   // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1949   // Lhs of the & limits the shift to 31 bits, so as to
1950   // give known IR semantics.  Rhs of the & is all 1s for
1951   // Rs <= 31 and all 0s for Rs >= 32.
1952   assign(
1953      *res,
1954      binop(
1955         Iop_And32,
1956         binop(Iop_Shr32,
1957               mkexpr(rMt),
1958               unop(Iop_32to8,
1959                    binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1960         binop(Iop_Sar32,
1961               binop(Iop_Sub32,
1962                     mkexpr(amtT),
1963                     mkU32(32)),
1964               mkU8(31))));
1965    DIS(buf, "r%u, LSR r%u", rM, rS);
1966}
1967
1968
1969static void compute_result_and_C_after_ASR_by_imm5 (
1970               /*OUT*/HChar* buf,
1971               IRTemp* res,
1972               IRTemp* newC,
1973               IRTemp rMt, UInt shift_amt, /* operands */
1974               UInt rM      /* only for debug printing */
1975            )
1976{
1977   if (shift_amt == 0) {
1978      // conceptually a 32-bit shift, however:
1979      // res  = Rm >>s 31
1980      // newC = Rm[31]
1981      if (newC) {
1982         assign( *newC,
1983                 binop(Iop_And32,
1984                       binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1985                       mkU32(1)));
1986      }
1987      assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
1988      DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
1989   } else {
1990      // shift in range 1..31
1991      // res = Rm >>s shift_amt
1992      // newC = Rm[shift_amt - 1]
1993      vassert(shift_amt >= 1 && shift_amt <= 31);
1994      if (newC) {
1995         assign( *newC,
1996                 binop(Iop_And32,
1997                       binop(Iop_Shr32, mkexpr(rMt),
1998                                        mkU8(shift_amt - 1)),
1999                       mkU32(1)));
2000      }
2001      assign( *res,
2002              binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
2003      DIS(buf, "r%u, ASR #%u", rM, shift_amt);
2004   }
2005}
2006
2007
2008static void compute_result_and_C_after_ASR_by_reg (
2009               /*OUT*/HChar* buf,
2010               IRTemp* res,
2011               IRTemp* newC,
2012               IRTemp rMt, IRTemp rSt,  /* operands */
2013               UInt rM,    UInt rS      /* only for debug printing */
2014            )
2015{
2016   // arithmetic shift right in range 0 .. 255
2017   // amt = rS & 255
2018   // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
2019   // newC = amt == 0     ? oldC  :
2020   //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
2021   IRTemp amtT = newTemp(Ity_I32);
2022   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2023   if (newC) {
2024      /* mux0X(amt == 0,
2025               mux0X(amt < 32,
2026                     Rm[31],
2027                     Rm[(amt-1) & 31])
2028               oldC)
2029      */
2030      IRTemp oldC = newTemp(Ity_I32);
2031      assign(oldC, mk_armg_calculate_flag_c() );
2032      assign(
2033         *newC,
2034         IRExpr_ITE(
2035            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
2036            mkexpr(oldC),
2037            IRExpr_ITE(
2038               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
2039               binop(Iop_And32,
2040                     binop(Iop_Shr32,
2041                           mkexpr(rMt),
2042                           unop(Iop_32to8,
2043                                binop(Iop_And32,
2044                                      binop(Iop_Sub32,
2045                                            mkexpr(amtT),
2046                                            mkU32(1)),
2047                                      mkU32(31)
2048                                )
2049                           )
2050                     ),
2051                     mkU32(1)
2052                     ),
2053               binop(Iop_And32,
2054                     binop(Iop_Shr32,
2055                           mkexpr(rMt),
2056                           mkU8(31)
2057                     ),
2058                     mkU32(1)
2059               )
2060            )
2061         )
2062      );
2063   }
2064   // (Rm >>s (amt <u 32 ? amt : 31))
2065   assign(
2066      *res,
2067      binop(
2068         Iop_Sar32,
2069         mkexpr(rMt),
2070         unop(
2071            Iop_32to8,
2072            IRExpr_ITE(
2073               binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32)),
2074               mkexpr(amtT),
2075               mkU32(31)))));
2076    DIS(buf, "r%u, ASR r%u", rM, rS);
2077}
2078
2079
2080static void compute_result_and_C_after_ROR_by_reg (
2081               /*OUT*/HChar* buf,
2082               IRTemp* res,
2083               IRTemp* newC,
2084               IRTemp rMt, IRTemp rSt,  /* operands */
2085               UInt rM,    UInt rS      /* only for debug printing */
2086            )
2087{
2088   // rotate right in range 0 .. 255
2089   // amt = rS & 255
2090   // shop =  Rm `ror` (amt & 31)
2091   // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
2092   IRTemp amtT = newTemp(Ity_I32);
2093   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2094   IRTemp amt5T = newTemp(Ity_I32);
2095   assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
2096   IRTemp oldC = newTemp(Ity_I32);
2097   assign(oldC, mk_armg_calculate_flag_c() );
2098   if (newC) {
2099      assign(
2100         *newC,
2101         IRExpr_ITE(
2102            binop(Iop_CmpNE32, mkexpr(amtT), mkU32(0)),
2103            binop(Iop_And32,
2104                  binop(Iop_Shr32,
2105                        mkexpr(rMt),
2106                        unop(Iop_32to8,
2107                             binop(Iop_And32,
2108                                   binop(Iop_Sub32,
2109                                         mkexpr(amtT),
2110                                         mkU32(1)
2111                                   ),
2112                                   mkU32(31)
2113                             )
2114                        )
2115                  ),
2116                  mkU32(1)
2117            ),
2118            mkexpr(oldC)
2119         )
2120      );
2121   }
2122   assign(
2123      *res,
2124      IRExpr_ITE(
2125         binop(Iop_CmpNE32, mkexpr(amt5T), mkU32(0)),
2126         binop(Iop_Or32,
2127               binop(Iop_Shr32,
2128                     mkexpr(rMt),
2129                     unop(Iop_32to8, mkexpr(amt5T))
2130               ),
2131               binop(Iop_Shl32,
2132                     mkexpr(rMt),
2133                     unop(Iop_32to8,
2134                          binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
2135                     )
2136               )
2137               ),
2138         mkexpr(rMt)
2139      )
2140   );
2141   DIS(buf, "r%u, ROR r#%u", rM, rS);
2142}
2143
2144
2145/* Generate an expression corresponding to the immediate-shift case of
2146   a shifter operand.  This is used both for ARM and Thumb2.
2147
2148   Bind it to a temporary, and return that via *res.  If newC is
2149   non-NULL, also compute a value for the shifter's carry out (in the
2150   LSB of a word), bind it to a temporary, and return that via *shco.
2151
2152   Generates GETs from the guest state and is therefore not safe to
2153   use once we start doing PUTs to it, for any given instruction.
2154
2155   'how' is encoded thusly:
2156      00b LSL,  01b LSR,  10b ASR,  11b ROR
2157   Most but not all ARM and Thumb integer insns use this encoding.
2158   Be careful to ensure the right value is passed here.
2159*/
2160static void compute_result_and_C_after_shift_by_imm5 (
2161               /*OUT*/HChar* buf,
2162               /*OUT*/IRTemp* res,
2163               /*OUT*/IRTemp* newC,
2164               IRTemp  rMt,       /* reg to shift */
2165               UInt    how,       /* what kind of shift */
2166               UInt    shift_amt, /* shift amount (0..31) */
2167               UInt    rM         /* only for debug printing */
2168            )
2169{
2170   vassert(shift_amt < 32);
2171   vassert(how < 4);
2172
2173   switch (how) {
2174
2175      case 0:
2176         compute_result_and_C_after_LSL_by_imm5(
2177            buf, res, newC, rMt, shift_amt, rM
2178         );
2179         break;
2180
2181      case 1:
2182         compute_result_and_C_after_LSR_by_imm5(
2183            buf, res, newC, rMt, shift_amt, rM
2184         );
2185         break;
2186
2187      case 2:
2188         compute_result_and_C_after_ASR_by_imm5(
2189            buf, res, newC, rMt, shift_amt, rM
2190         );
2191         break;
2192
2193      case 3:
2194         if (shift_amt == 0) {
2195            IRTemp oldcT = newTemp(Ity_I32);
2196            // rotate right 1 bit through carry (?)
2197            // RRX -- described at ARM ARM A5-17
2198            // res  = (oldC << 31) | (Rm >>u 1)
2199            // newC = Rm[0]
2200            if (newC) {
2201               assign( *newC,
2202                       binop(Iop_And32, mkexpr(rMt), mkU32(1)));
2203            }
2204            assign( oldcT, mk_armg_calculate_flag_c() );
2205            assign( *res,
2206                    binop(Iop_Or32,
2207                          binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
2208                          binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
2209            DIS(buf, "r%u, RRX", rM);
2210         } else {
2211            // rotate right in range 1..31
2212            // res  = Rm `ror` shift_amt
2213            // newC = Rm[shift_amt - 1]
2214            vassert(shift_amt >= 1 && shift_amt <= 31);
2215            if (newC) {
2216               assign( *newC,
2217                       binop(Iop_And32,
2218                             binop(Iop_Shr32, mkexpr(rMt),
2219                                              mkU8(shift_amt - 1)),
2220                             mkU32(1)));
2221            }
2222            assign( *res,
2223                    binop(Iop_Or32,
2224                          binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
2225                          binop(Iop_Shl32, mkexpr(rMt),
2226                                           mkU8(32-shift_amt))));
2227            DIS(buf, "r%u, ROR #%u", rM, shift_amt);
2228         }
2229         break;
2230
2231      default:
2232         /*NOTREACHED*/
2233         vassert(0);
2234   }
2235}
2236
2237
2238/* Generate an expression corresponding to the register-shift case of
2239   a shifter operand.  This is used both for ARM and Thumb2.
2240
2241   Bind it to a temporary, and return that via *res.  If newC is
2242   non-NULL, also compute a value for the shifter's carry out (in the
2243   LSB of a word), bind it to a temporary, and return that via *shco.
2244
2245   Generates GETs from the guest state and is therefore not safe to
2246   use once we start doing PUTs to it, for any given instruction.
2247
2248   'how' is encoded thusly:
2249      00b LSL,  01b LSR,  10b ASR,  11b ROR
2250   Most but not all ARM and Thumb integer insns use this encoding.
2251   Be careful to ensure the right value is passed here.
2252*/
2253static void compute_result_and_C_after_shift_by_reg (
2254               /*OUT*/HChar*  buf,
2255               /*OUT*/IRTemp* res,
2256               /*OUT*/IRTemp* newC,
2257               IRTemp  rMt,       /* reg to shift */
2258               UInt    how,       /* what kind of shift */
2259               IRTemp  rSt,       /* shift amount */
2260               UInt    rM,        /* only for debug printing */
2261               UInt    rS         /* only for debug printing */
2262            )
2263{
2264   vassert(how < 4);
2265   switch (how) {
2266      case 0: { /* LSL */
2267         compute_result_and_C_after_LSL_by_reg(
2268            buf, res, newC, rMt, rSt, rM, rS
2269         );
2270         break;
2271      }
2272      case 1: { /* LSR */
2273         compute_result_and_C_after_LSR_by_reg(
2274            buf, res, newC, rMt, rSt, rM, rS
2275         );
2276         break;
2277      }
2278      case 2: { /* ASR */
2279         compute_result_and_C_after_ASR_by_reg(
2280            buf, res, newC, rMt, rSt, rM, rS
2281         );
2282         break;
2283      }
2284      case 3: { /* ROR */
2285         compute_result_and_C_after_ROR_by_reg(
2286             buf, res, newC, rMt, rSt, rM, rS
2287         );
2288         break;
2289      }
2290      default:
2291         /*NOTREACHED*/
2292         vassert(0);
2293   }
2294}
2295
2296
2297/* Generate an expression corresponding to a shifter_operand, bind it
2298   to a temporary, and return that via *shop.  If shco is non-NULL,
2299   also compute a value for the shifter's carry out (in the LSB of a
2300   word), bind it to a temporary, and return that via *shco.
2301
2302   If for some reason we can't come up with a shifter operand (missing
2303   case?  not really a shifter operand?) return False.
2304
2305   Generates GETs from the guest state and is therefore not safe to
2306   use once we start doing PUTs to it, for any given instruction.
2307
2308   For ARM insns only; not for Thumb.
2309*/
2310static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
2311                                 /*OUT*/IRTemp* shop,
2312                                 /*OUT*/IRTemp* shco,
2313                                 /*OUT*/HChar* buf )
2314{
2315   UInt insn_4 = (insn_11_0 >> 4) & 1;
2316   UInt insn_7 = (insn_11_0 >> 7) & 1;
2317   vassert(insn_25 <= 0x1);
2318   vassert(insn_11_0 <= 0xFFF);
2319
2320   vassert(shop && *shop == IRTemp_INVALID);
2321   *shop = newTemp(Ity_I32);
2322
2323   if (shco) {
2324      vassert(*shco == IRTemp_INVALID);
2325      *shco = newTemp(Ity_I32);
2326   }
2327
2328   /* 32-bit immediate */
2329
2330   if (insn_25 == 1) {
2331      /* immediate: (7:0) rotated right by 2 * (11:8) */
2332      UInt imm = (insn_11_0 >> 0) & 0xFF;
2333      UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
2334      vassert(rot <= 30);
2335      imm = ROR32(imm, rot);
2336      if (shco) {
2337         if (rot == 0) {
2338            assign( *shco, mk_armg_calculate_flag_c() );
2339         } else {
2340            assign( *shco, mkU32( (imm >> 31) & 1 ) );
2341         }
2342      }
2343      DIS(buf, "#0x%x", imm);
2344      assign( *shop, mkU32(imm) );
2345      return True;
2346   }
2347
2348   /* Shift/rotate by immediate */
2349
2350   if (insn_25 == 0 && insn_4 == 0) {
2351      /* Rm (3:0) shifted (6:5) by immediate (11:7) */
2352      UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
2353      UInt rM        = (insn_11_0 >> 0) & 0xF;
2354      UInt how       = (insn_11_0 >> 5) & 3;
2355      /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2356      IRTemp rMt = newTemp(Ity_I32);
2357      assign(rMt, getIRegA(rM));
2358
2359      vassert(shift_amt <= 31);
2360
2361      compute_result_and_C_after_shift_by_imm5(
2362         buf, shop, shco, rMt, how, shift_amt, rM
2363      );
2364      return True;
2365   }
2366
2367   /* Shift/rotate by register */
2368   if (insn_25 == 0 && insn_4 == 1) {
2369      /* Rm (3:0) shifted (6:5) by Rs (11:8) */
2370      UInt rM  = (insn_11_0 >> 0) & 0xF;
2371      UInt rS  = (insn_11_0 >> 8) & 0xF;
2372      UInt how = (insn_11_0 >> 5) & 3;
2373      /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2374      IRTemp rMt = newTemp(Ity_I32);
2375      IRTemp rSt = newTemp(Ity_I32);
2376
2377      if (insn_7 == 1)
2378         return False; /* not really a shifter operand */
2379
2380      assign(rMt, getIRegA(rM));
2381      assign(rSt, getIRegA(rS));
2382
2383      compute_result_and_C_after_shift_by_reg(
2384         buf, shop, shco, rMt, how, rSt, rM, rS
2385      );
2386      return True;
2387   }
2388
2389   vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
2390   return False;
2391}
2392
2393
2394/* ARM only */
2395static
2396IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
2397                                    /*OUT*/HChar* buf )
2398{
2399   vassert(rN < 16);
2400   vassert(bU < 2);
2401   vassert(imm12 < 0x1000);
2402   HChar opChar = bU == 1 ? '+' : '-';
2403   DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
2404   return
2405      binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2406             getIRegA(rN),
2407             mkU32(imm12) );
2408}
2409
2410
2411/* ARM only.
2412   NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
2413*/
2414static
2415IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
2416                                          UInt sh2, UInt imm5,
2417                                          /*OUT*/HChar* buf )
2418{
2419   vassert(rN < 16);
2420   vassert(bU < 2);
2421   vassert(rM < 16);
2422   vassert(sh2 < 4);
2423   vassert(imm5 < 32);
2424   HChar   opChar = bU == 1 ? '+' : '-';
2425   IRExpr* index  = NULL;
2426   switch (sh2) {
2427      case 0: /* LSL */
2428         /* imm5 can be in the range 0 .. 31 inclusive. */
2429         index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
2430         DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
2431         break;
2432      case 1: /* LSR */
2433         if (imm5 == 0) {
2434            index = mkU32(0);
2435            vassert(0); // ATC
2436         } else {
2437            index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
2438         }
2439         DIS(buf, "[r%u, %cr%u, LSR #%u]",
2440                  rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2441         break;
2442      case 2: /* ASR */
2443         /* Doesn't this just mean that the behaviour with imm5 == 0
2444            is the same as if it had been 31 ? */
2445         if (imm5 == 0) {
2446            index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
2447            vassert(0); // ATC
2448         } else {
2449            index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
2450         }
2451         DIS(buf, "[r%u, %cr%u, ASR #%u]",
2452                  rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2453         break;
2454      case 3: /* ROR or RRX */
2455         if (imm5 == 0) {
2456            IRTemp rmT    = newTemp(Ity_I32);
2457            IRTemp cflagT = newTemp(Ity_I32);
2458            assign(rmT, getIRegA(rM));
2459            assign(cflagT, mk_armg_calculate_flag_c());
2460            index = binop(Iop_Or32,
2461                          binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
2462                          binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
2463            DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
2464         } else {
2465            IRTemp rmT = newTemp(Ity_I32);
2466            assign(rmT, getIRegA(rM));
2467            vassert(imm5 >= 1 && imm5 <= 31);
2468            index = binop(Iop_Or32,
2469                          binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
2470                          binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
2471            DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
2472         }
2473         break;
2474      default:
2475         vassert(0);
2476   }
2477   vassert(index);
2478   return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2479                getIRegA(rN), index);
2480}
2481
2482
2483/* ARM only */
2484static
2485IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
2486                                   /*OUT*/HChar* buf )
2487{
2488   vassert(rN < 16);
2489   vassert(bU < 2);
2490   vassert(imm8 < 0x100);
2491   HChar opChar = bU == 1 ? '+' : '-';
2492   DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
2493   return
2494      binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2495             getIRegA(rN),
2496             mkU32(imm8) );
2497}
2498
2499
2500/* ARM only */
2501static
2502IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
2503                                  /*OUT*/HChar* buf )
2504{
2505   vassert(rN < 16);
2506   vassert(bU < 2);
2507   vassert(rM < 16);
2508   HChar   opChar = bU == 1 ? '+' : '-';
2509   IRExpr* index  = getIRegA(rM);
2510   DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
2511   return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2512                getIRegA(rN), index);
2513}
2514
2515
2516/* irRes :: Ity_I32 holds a floating point comparison result encoded
2517   as an IRCmpF64Result.  Generate code to convert it to an
2518   ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
2519   Assign a new temp to hold that value, and return the temp. */
2520static
2521IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
2522{
2523   IRTemp ix       = newTemp(Ity_I32);
2524   IRTemp termL    = newTemp(Ity_I32);
2525   IRTemp termR    = newTemp(Ity_I32);
2526   IRTemp nzcv     = newTemp(Ity_I32);
2527
2528   /* This is where the fun starts.  We have to convert 'irRes' from
2529      an IR-convention return result (IRCmpF64Result) to an
2530      ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
2531      4 bits of 'nzcv'. */
2532   /* Map compare result from IR to ARM(nzcv) */
2533   /*
2534      FP cmp result | IR   | ARM(nzcv)
2535      --------------------------------
2536      UN              0x45   0011
2537      LT              0x01   1000
2538      GT              0x00   0010
2539      EQ              0x40   0110
2540   */
2541   /* Now since you're probably wondering WTF ..
2542
2543      ix fishes the useful bits out of the IR value, bits 6 and 0, and
2544      places them side by side, giving a number which is 0, 1, 2 or 3.
2545
2546      termL is a sequence cooked up by GNU superopt.  It converts ix
2547         into an almost correct value NZCV value (incredibly), except
2548         for the case of UN, where it produces 0100 instead of the
2549         required 0011.
2550
2551      termR is therefore a correction term, also computed from ix.  It
2552         is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
2553         the final correct value, we subtract termR from termL.
2554
2555      Don't take my word for it.  There's a test program at the bottom
2556      of this file, to try this out with.
2557   */
2558   assign(
2559      ix,
2560      binop(Iop_Or32,
2561            binop(Iop_And32,
2562                  binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
2563                  mkU32(3)),
2564            binop(Iop_And32, mkexpr(irRes), mkU32(1))));
2565
2566   assign(
2567      termL,
2568      binop(Iop_Add32,
2569            binop(Iop_Shr32,
2570                  binop(Iop_Sub32,
2571                        binop(Iop_Shl32,
2572                              binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
2573                              mkU8(30)),
2574                        mkU32(1)),
2575                  mkU8(29)),
2576            mkU32(1)));
2577
2578   assign(
2579      termR,
2580      binop(Iop_And32,
2581            binop(Iop_And32,
2582                  mkexpr(ix),
2583                  binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
2584            mkU32(1)));
2585
2586   assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
2587   return nzcv;
2588}
2589
2590
2591/* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
2592   updatesC is non-NULL, a boolean is written to it indicating whether
2593   or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
2594*/
2595static UInt thumbExpandImm ( Bool* updatesC,
2596                             UInt imm1, UInt imm3, UInt imm8 )
2597{
2598   vassert(imm1 < (1<<1));
2599   vassert(imm3 < (1<<3));
2600   vassert(imm8 < (1<<8));
2601   UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
2602   UInt abcdefgh = imm8;
2603   UInt lbcdefgh = imm8 | 0x80;
2604   if (updatesC) {
2605      *updatesC = i_imm3_a >= 8;
2606   }
2607   switch (i_imm3_a) {
2608      case 0: case 1:
2609         return abcdefgh;
2610      case 2: case 3:
2611         return (abcdefgh << 16) | abcdefgh;
2612      case 4: case 5:
2613         return (abcdefgh << 24) | (abcdefgh << 8);
2614      case 6: case 7:
2615         return (abcdefgh << 24) | (abcdefgh << 16)
2616                | (abcdefgh << 8) | abcdefgh;
2617      case 8 ... 31:
2618         return lbcdefgh << (32 - i_imm3_a);
2619      default:
2620         break;
2621   }
2622   /*NOTREACHED*/vassert(0);
2623}
2624
2625
2626/* Version of thumbExpandImm where we simply feed it the
2627   instruction halfwords (the lowest addressed one is I0). */
2628static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
2629                                        UShort i0s, UShort i1s )
2630{
2631   UInt i0    = (UInt)i0s;
2632   UInt i1    = (UInt)i1s;
2633   UInt imm1  = SLICE_UInt(i0,10,10);
2634   UInt imm3  = SLICE_UInt(i1,14,12);
2635   UInt imm8  = SLICE_UInt(i1,7,0);
2636   return thumbExpandImm(updatesC, imm1, imm3, imm8);
2637}
2638
2639
2640/* Thumb16 only.  Given the firstcond and mask fields from an IT
2641   instruction, compute the 32-bit ITSTATE value implied, as described
2642   in libvex_guest_arm.h.  This is not the ARM ARM representation.
2643   Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
2644   disassembly printing.  Returns False if firstcond or mask
2645   denote something invalid.
2646
2647   The number and conditions for the instructions to be
2648   conditionalised depend on firstcond and mask:
2649
2650   mask      cond 1    cond 2      cond 3      cond 4
2651
2652   1000      fc[3:0]
2653   x100      fc[3:0]   fc[3:1]:x
2654   xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
2655   xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
2656
2657   The condition fields are assembled in *itstate backwards (cond 4 at
2658   the top, cond 1 at the bottom).  Conditions are << 4'd and then
2659   ^0xE'd, and those fields that correspond to instructions in the IT
2660   block are tagged with a 1 bit.
2661*/
2662static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
2663                              /*OUT*/HChar* ch1,
2664                              /*OUT*/HChar* ch2,
2665                              /*OUT*/HChar* ch3,
2666                              UInt firstcond, UInt mask )
2667{
2668   vassert(firstcond <= 0xF);
2669   vassert(mask <= 0xF);
2670   *itstate = 0;
2671   *ch1 = *ch2 = *ch3 = '.';
2672   if (mask == 0)
2673      return False; /* the logic below actually ensures this anyway,
2674                       but clearer to make it explicit. */
2675   if (firstcond == 0xF)
2676      return False; /* NV is not allowed */
2677   if (firstcond == 0xE && popcount32(mask) != 1)
2678      return False; /* if firstcond is AL then all the rest must be too */
2679
2680   UInt m3 = (mask >> 3) & 1;
2681   UInt m2 = (mask >> 2) & 1;
2682   UInt m1 = (mask >> 1) & 1;
2683   UInt m0 = (mask >> 0) & 1;
2684
2685   UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
2686   UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
2687
2688   if (m3 == 1 && (m2|m1|m0) == 0) {
2689      *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
2690      *itstate ^= 0xE0E0E0E0;
2691      return True;
2692   }
2693
2694   if (m2 == 1 && (m1|m0) == 0) {
2695      *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
2696      *itstate ^= 0xE0E0E0E0;
2697      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2698      return True;
2699   }
2700
2701   if (m1 == 1 && m0 == 0) {
2702      *itstate = (ni << 24)
2703                 | (setbit32(fc, 4, m2) << 16)
2704                 | (setbit32(fc, 4, m3) << 8) | fc;
2705      *itstate ^= 0xE0E0E0E0;
2706      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2707      *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2708      return True;
2709   }
2710
2711   if (m0 == 1) {
2712      *itstate = (setbit32(fc, 4, m1) << 24)
2713                 | (setbit32(fc, 4, m2) << 16)
2714                 | (setbit32(fc, 4, m3) << 8) | fc;
2715      *itstate ^= 0xE0E0E0E0;
2716      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2717      *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2718      *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
2719      return True;
2720   }
2721
2722   return False;
2723}
2724
2725
2726/* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
2727   Chapter 7 Section 1. */
2728static IRTemp gen_BITREV ( IRTemp x0 )
2729{
2730   IRTemp x1 = newTemp(Ity_I32);
2731   IRTemp x2 = newTemp(Ity_I32);
2732   IRTemp x3 = newTemp(Ity_I32);
2733   IRTemp x4 = newTemp(Ity_I32);
2734   IRTemp x5 = newTemp(Ity_I32);
2735   UInt   c1 = 0x55555555;
2736   UInt   c2 = 0x33333333;
2737   UInt   c3 = 0x0F0F0F0F;
2738   UInt   c4 = 0x00FF00FF;
2739   UInt   c5 = 0x0000FFFF;
2740   assign(x1,
2741          binop(Iop_Or32,
2742                binop(Iop_Shl32,
2743                      binop(Iop_And32, mkexpr(x0), mkU32(c1)),
2744                      mkU8(1)),
2745                binop(Iop_Shr32,
2746                      binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
2747                      mkU8(1))
2748   ));
2749   assign(x2,
2750          binop(Iop_Or32,
2751                binop(Iop_Shl32,
2752                      binop(Iop_And32, mkexpr(x1), mkU32(c2)),
2753                      mkU8(2)),
2754                binop(Iop_Shr32,
2755                      binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
2756                      mkU8(2))
2757   ));
2758   assign(x3,
2759          binop(Iop_Or32,
2760                binop(Iop_Shl32,
2761                      binop(Iop_And32, mkexpr(x2), mkU32(c3)),
2762                      mkU8(4)),
2763                binop(Iop_Shr32,
2764                      binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
2765                      mkU8(4))
2766   ));
2767   assign(x4,
2768          binop(Iop_Or32,
2769                binop(Iop_Shl32,
2770                      binop(Iop_And32, mkexpr(x3), mkU32(c4)),
2771                      mkU8(8)),
2772                binop(Iop_Shr32,
2773                      binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
2774                      mkU8(8))
2775   ));
2776   assign(x5,
2777          binop(Iop_Or32,
2778                binop(Iop_Shl32,
2779                      binop(Iop_And32, mkexpr(x4), mkU32(c5)),
2780                      mkU8(16)),
2781                binop(Iop_Shr32,
2782                      binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
2783                      mkU8(16))
2784   ));
2785   return x5;
2786}
2787
2788
2789/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2790   0:1:2:3 (aka byte-swap). */
2791static IRTemp gen_REV ( IRTemp arg )
2792{
2793   IRTemp res = newTemp(Ity_I32);
2794   assign(res,
2795          binop(Iop_Or32,
2796                binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
2797          binop(Iop_Or32,
2798                binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2799                                 mkU32(0x00FF0000)),
2800          binop(Iop_Or32,
2801                binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2802                                       mkU32(0x0000FF00)),
2803                binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
2804                                       mkU32(0x000000FF) )
2805   ))));
2806   return res;
2807}
2808
2809
2810/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2811   2:3:0:1 (swap within lo and hi halves). */
2812static IRTemp gen_REV16 ( IRTemp arg )
2813{
2814   IRTemp res = newTemp(Ity_I32);
2815   assign(res,
2816          binop(Iop_Or32,
2817                binop(Iop_And32,
2818                      binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2819                      mkU32(0xFF00FF00)),
2820                binop(Iop_And32,
2821                      binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2822                      mkU32(0x00FF00FF))));
2823   return res;
2824}
2825
2826
2827/*------------------------------------------------------------*/
2828/*--- Advanced SIMD (NEON) instructions                    ---*/
2829/*------------------------------------------------------------*/
2830
2831/*------------------------------------------------------------*/
2832/*--- NEON data processing                                 ---*/
2833/*------------------------------------------------------------*/
2834
2835/* For all NEON DP ops, we use the normal scheme to handle conditional
2836   writes to registers -- pass in condT and hand that on to the
2837   put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
2838   since NEON is unconditional for ARM.  In Thumb mode condT is
2839   derived from the ITSTATE shift register in the normal way. */
2840
2841static
2842UInt get_neon_d_regno(UInt theInstr)
2843{
2844   UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2845   if (theInstr & 0x40) {
2846      if (x & 1) {
2847         x = x + 0x100;
2848      } else {
2849         x = x >> 1;
2850      }
2851   }
2852   return x;
2853}
2854
2855static
2856UInt get_neon_n_regno(UInt theInstr)
2857{
2858   UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
2859   if (theInstr & 0x40) {
2860      if (x & 1) {
2861         x = x + 0x100;
2862      } else {
2863         x = x >> 1;
2864      }
2865   }
2866   return x;
2867}
2868
2869static
2870UInt get_neon_m_regno(UInt theInstr)
2871{
2872   UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2873   if (theInstr & 0x40) {
2874      if (x & 1) {
2875         x = x + 0x100;
2876      } else {
2877         x = x >> 1;
2878      }
2879   }
2880   return x;
2881}
2882
2883static
2884Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
2885{
2886   UInt dreg = get_neon_d_regno(theInstr);
2887   UInt mreg = get_neon_m_regno(theInstr);
2888   UInt nreg = get_neon_n_regno(theInstr);
2889   UInt imm4 = (theInstr >> 8) & 0xf;
2890   UInt Q = (theInstr >> 6) & 1;
2891   HChar reg_t = Q ? 'q' : 'd';
2892
2893   if (Q) {
2894      putQReg(dreg, triop(Iop_SliceV128, /*hiV128*/getQReg(mreg),
2895                          /*loV128*/getQReg(nreg), mkU8(imm4)), condT);
2896   } else {
2897      putDRegI64(dreg, triop(Iop_Slice64, /*hiI64*/getDRegI64(mreg),
2898                             /*loI64*/getDRegI64(nreg), mkU8(imm4)), condT);
2899   }
2900   DIP("vext.8 %c%u, %c%u, %c%u, #%u\n", reg_t, dreg, reg_t, nreg,
2901                                         reg_t, mreg, imm4);
2902   return True;
2903}
2904
2905/* Generate specific vector FP binary ops, possibly with a fake
2906   rounding mode as required by the primop. */
2907static
2908IRExpr* binop_w_fake_RM ( IROp op, IRExpr* argL, IRExpr* argR )
2909{
2910   switch (op) {
2911      case Iop_Add32Fx4:
2912      case Iop_Sub32Fx4:
2913      case Iop_Mul32Fx4:
2914         return triop(op, get_FAKE_roundingmode(), argL, argR );
2915      case Iop_Add32x4: case Iop_Add16x8:
2916      case Iop_Sub32x4: case Iop_Sub16x8:
2917      case Iop_Mul32x4: case Iop_Mul16x8:
2918      case Iop_Mul32x2: case Iop_Mul16x4:
2919      case Iop_Add32Fx2:
2920      case Iop_Sub32Fx2:
2921      case Iop_Mul32Fx2:
2922      case Iop_PwAdd32Fx2:
2923         return binop(op, argL, argR);
2924      default:
2925        ppIROp(op);
2926        vassert(0);
2927   }
2928}
2929
2930/* VTBL, VTBX */
2931static
2932Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
2933{
2934   UInt op = (theInstr >> 6) & 1;
2935   UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
2936   UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
2937   UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
2938   UInt len = (theInstr >> 8) & 3;
2939   Int i;
2940   IROp cmp;
2941   ULong imm;
2942   IRTemp arg_l;
2943   IRTemp old_mask, new_mask, cur_mask;
2944   IRTemp old_res, new_res;
2945   IRTemp old_arg, new_arg;
2946
2947   if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
2948      return False;
2949   if (nreg + len > 31)
2950      return False;
2951
2952   cmp = Iop_CmpGT8Ux8;
2953
2954   old_mask = newTemp(Ity_I64);
2955   old_res = newTemp(Ity_I64);
2956   old_arg = newTemp(Ity_I64);
2957   assign(old_mask, mkU64(0));
2958   assign(old_res, mkU64(0));
2959   assign(old_arg, getDRegI64(mreg));
2960   imm = 8;
2961   imm = (imm <<  8) | imm;
2962   imm = (imm << 16) | imm;
2963   imm = (imm << 32) | imm;
2964
2965   for (i = 0; i <= len; i++) {
2966      arg_l = newTemp(Ity_I64);
2967      new_mask = newTemp(Ity_I64);
2968      cur_mask = newTemp(Ity_I64);
2969      new_res = newTemp(Ity_I64);
2970      new_arg = newTemp(Ity_I64);
2971      assign(arg_l, getDRegI64(nreg+i));
2972      assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
2973      assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
2974      assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
2975      assign(new_res, binop(Iop_Or64,
2976                            mkexpr(old_res),
2977                            binop(Iop_And64,
2978                                  binop(Iop_Perm8x8,
2979                                        mkexpr(arg_l),
2980                                        binop(Iop_And64,
2981                                              mkexpr(old_arg),
2982                                              mkexpr(cur_mask))),
2983                                  mkexpr(cur_mask))));
2984
2985      old_arg = new_arg;
2986      old_mask = new_mask;
2987      old_res = new_res;
2988   }
2989   if (op) {
2990      new_res = newTemp(Ity_I64);
2991      assign(new_res, binop(Iop_Or64,
2992                            binop(Iop_And64,
2993                                  getDRegI64(dreg),
2994                                  unop(Iop_Not64, mkexpr(old_mask))),
2995                            mkexpr(old_res)));
2996      old_res = new_res;
2997   }
2998
2999   putDRegI64(dreg, mkexpr(old_res), condT);
3000   DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
3001   if (len > 0) {
3002      DIP("d%u-d%u", nreg, nreg + len);
3003   } else {
3004      DIP("d%u", nreg);
3005   }
3006   DIP("}, d%u\n", mreg);
3007   return True;
3008}
3009
3010/* VDUP (scalar)  */
3011static
3012Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
3013{
3014   UInt Q = (theInstr >> 6) & 1;
3015   UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
3016   UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
3017   UInt imm4 = (theInstr >> 16) & 0xF;
3018   UInt index;
3019   UInt size;
3020   IRTemp arg_m;
3021   IRTemp res;
3022   IROp op, op2;
3023
3024   if ((imm4 == 0) || (imm4 == 8))
3025      return False;
3026   if ((Q == 1) && ((dreg & 1) == 1))
3027      return False;
3028   if (Q)
3029      dreg >>= 1;
3030   arg_m = newTemp(Ity_I64);
3031   assign(arg_m, getDRegI64(mreg));
3032   if (Q)
3033      res = newTemp(Ity_V128);
3034   else
3035      res = newTemp(Ity_I64);
3036   if ((imm4 & 1) == 1) {
3037      op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
3038      op2 = Iop_GetElem8x8;
3039      index = imm4 >> 1;
3040      size = 8;
3041   } else if ((imm4 & 3) == 2) {
3042      op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
3043      op2 = Iop_GetElem16x4;
3044      index = imm4 >> 2;
3045      size = 16;
3046   } else if ((imm4 & 7) == 4) {
3047      op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
3048      op2 = Iop_GetElem32x2;
3049      index = imm4 >> 3;
3050      size = 32;
3051   } else {
3052      return False; // can this ever happen?
3053   }
3054   assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
3055   if (Q) {
3056      putQReg(dreg, mkexpr(res), condT);
3057   } else {
3058      putDRegI64(dreg, mkexpr(res), condT);
3059   }
3060   DIP("vdup.%u %c%u, d%u[%u]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
3061   return True;
3062}
3063
3064/* A7.4.1 Three registers of the same length */
3065static
3066Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
3067{
3068   /* In paths where this returns False, indicating a non-decodable
3069      instruction, there may still be some IR assignments to temporaries
3070      generated.  This is inconvenient but harmless, and the post-front-end
3071      IR optimisation pass will just remove them anyway.  So there's no
3072      effort made here to tidy it up.
3073   */
3074   UInt Q = (theInstr >> 6) & 1;
3075   UInt dreg = get_neon_d_regno(theInstr);
3076   UInt nreg = get_neon_n_regno(theInstr);
3077   UInt mreg = get_neon_m_regno(theInstr);
3078   UInt A = (theInstr >> 8) & 0xF;
3079   UInt B = (theInstr >> 4) & 1;
3080   UInt C = (theInstr >> 20) & 0x3;
3081   UInt U = (theInstr >> 24) & 1;
3082   UInt size = C;
3083
3084   IRTemp arg_n;
3085   IRTemp arg_m;
3086   IRTemp res;
3087
3088   if (Q) {
3089      arg_n = newTemp(Ity_V128);
3090      arg_m = newTemp(Ity_V128);
3091      res = newTemp(Ity_V128);
3092      assign(arg_n, getQReg(nreg));
3093      assign(arg_m, getQReg(mreg));
3094   } else {
3095      arg_n = newTemp(Ity_I64);
3096      arg_m = newTemp(Ity_I64);
3097      res = newTemp(Ity_I64);
3098      assign(arg_n, getDRegI64(nreg));
3099      assign(arg_m, getDRegI64(mreg));
3100   }
3101
3102   switch(A) {
3103      case 0:
3104         if (B == 0) {
3105            /* VHADD */
3106            ULong imm = 0;
3107            IRExpr *imm_val;
3108            IROp addOp;
3109            IROp andOp;
3110            IROp shOp;
3111            HChar regType = Q ? 'q' : 'd';
3112
3113            if (size == 3)
3114               return False;
3115            switch(size) {
3116               case 0: imm = 0x101010101010101LL; break;
3117               case 1: imm = 0x1000100010001LL; break;
3118               case 2: imm = 0x100000001LL; break;
3119               default: vassert(0);
3120            }
3121            if (Q) {
3122               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3123               andOp = Iop_AndV128;
3124            } else {
3125               imm_val = mkU64(imm);
3126               andOp = Iop_And64;
3127            }
3128            if (U) {
3129               switch(size) {
3130                  case 0:
3131                     addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3132                     shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3133                     break;
3134                  case 1:
3135                     addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3136                     shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3137                     break;
3138                  case 2:
3139                     addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3140                     shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3141                     break;
3142                  default:
3143                     vassert(0);
3144               }
3145            } else {
3146               switch(size) {
3147                  case 0:
3148                     addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3149                     shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3150                     break;
3151                  case 1:
3152                     addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3153                     shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3154                     break;
3155                  case 2:
3156                     addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3157                     shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3158                     break;
3159                  default:
3160                     vassert(0);
3161               }
3162            }
3163            assign(res,
3164                   binop(addOp,
3165                         binop(addOp,
3166                               binop(shOp, mkexpr(arg_m), mkU8(1)),
3167                               binop(shOp, mkexpr(arg_n), mkU8(1))),
3168                         binop(shOp,
3169                               binop(addOp,
3170                                     binop(andOp, mkexpr(arg_m), imm_val),
3171                                     binop(andOp, mkexpr(arg_n), imm_val)),
3172                               mkU8(1))));
3173            DIP("vhadd.%c%d %c%u, %c%u, %c%u\n",
3174                U ? 'u' : 's', 8 << size, regType,
3175                dreg, regType, nreg, regType, mreg);
3176         } else {
3177            /* VQADD */
3178            IROp op, op2;
3179            IRTemp tmp;
3180            HChar reg_t = Q ? 'q' : 'd';
3181            if (Q) {
3182               switch (size) {
3183                  case 0:
3184                     op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
3185                     op2 = Iop_Add8x16;
3186                     break;
3187                  case 1:
3188                     op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
3189                     op2 = Iop_Add16x8;
3190                     break;
3191                  case 2:
3192                     op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
3193                     op2 = Iop_Add32x4;
3194                     break;
3195                  case 3:
3196                     op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
3197                     op2 = Iop_Add64x2;
3198                     break;
3199                  default:
3200                     vassert(0);
3201               }
3202            } else {
3203               switch (size) {
3204                  case 0:
3205                     op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
3206                     op2 = Iop_Add8x8;
3207                     break;
3208                  case 1:
3209                     op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
3210                     op2 = Iop_Add16x4;
3211                     break;
3212                  case 2:
3213                     op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
3214                     op2 = Iop_Add32x2;
3215                     break;
3216                  case 3:
3217                     op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
3218                     op2 = Iop_Add64;
3219                     break;
3220                  default:
3221                     vassert(0);
3222               }
3223            }
3224            if (Q) {
3225               tmp = newTemp(Ity_V128);
3226            } else {
3227               tmp = newTemp(Ity_I64);
3228            }
3229            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3230            assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3231            setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3232            DIP("vqadd.%c%d %c%u %c%u, %c%u\n",
3233                U ? 'u' : 's',
3234                8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3235         }
3236         break;
3237      case 1:
3238         if (B == 0) {
3239            /* VRHADD */
3240            /* VRHADD C, A, B ::=
3241                 C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
3242            IROp shift_op, add_op;
3243            IRTemp cc;
3244            ULong one = 1;
3245            HChar reg_t = Q ? 'q' : 'd';
3246            switch (size) {
3247               case 0: one = (one <<  8) | one; /* fall through */
3248               case 1: one = (one << 16) | one; /* fall through */
3249               case 2: one = (one << 32) | one; break;
3250               case 3: return False;
3251               default: vassert(0);
3252            }
3253            if (Q) {
3254               switch (size) {
3255                  case 0:
3256                     shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
3257                     add_op = Iop_Add8x16;
3258                     break;
3259                  case 1:
3260                     shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
3261                     add_op = Iop_Add16x8;
3262                     break;
3263                  case 2:
3264                     shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
3265                     add_op = Iop_Add32x4;
3266                     break;
3267                  case 3:
3268                     return False;
3269                  default:
3270                     vassert(0);
3271               }
3272            } else {
3273               switch (size) {
3274                  case 0:
3275                     shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
3276                     add_op = Iop_Add8x8;
3277                     break;
3278                  case 1:
3279                     shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
3280                     add_op = Iop_Add16x4;
3281                     break;
3282                  case 2:
3283                     shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
3284                     add_op = Iop_Add32x2;
3285                     break;
3286                  case 3:
3287                     return False;
3288                  default:
3289                     vassert(0);
3290               }
3291            }
3292            if (Q) {
3293               cc = newTemp(Ity_V128);
3294               assign(cc, binop(shift_op,
3295                                binop(add_op,
3296                                      binop(add_op,
3297                                            binop(Iop_AndV128,
3298                                                  mkexpr(arg_n),
3299                                                  binop(Iop_64HLtoV128,
3300                                                        mkU64(one),
3301                                                        mkU64(one))),
3302                                            binop(Iop_AndV128,
3303                                                  mkexpr(arg_m),
3304                                                  binop(Iop_64HLtoV128,
3305                                                        mkU64(one),
3306                                                        mkU64(one)))),
3307                                      binop(Iop_64HLtoV128,
3308                                            mkU64(one),
3309                                            mkU64(one))),
3310                                mkU8(1)));
3311               assign(res, binop(add_op,
3312                                 binop(add_op,
3313                                       binop(shift_op,
3314                                             mkexpr(arg_n),
3315                                             mkU8(1)),
3316                                       binop(shift_op,
3317                                             mkexpr(arg_m),
3318                                             mkU8(1))),
3319                                 mkexpr(cc)));
3320            } else {
3321               cc = newTemp(Ity_I64);
3322               assign(cc, binop(shift_op,
3323                                binop(add_op,
3324                                      binop(add_op,
3325                                            binop(Iop_And64,
3326                                                  mkexpr(arg_n),
3327                                                  mkU64(one)),
3328                                            binop(Iop_And64,
3329                                                  mkexpr(arg_m),
3330                                                  mkU64(one))),
3331                                      mkU64(one)),
3332                                mkU8(1)));
3333               assign(res, binop(add_op,
3334                                 binop(add_op,
3335                                       binop(shift_op,
3336                                             mkexpr(arg_n),
3337                                             mkU8(1)),
3338                                       binop(shift_op,
3339                                             mkexpr(arg_m),
3340                                             mkU8(1))),
3341                                 mkexpr(cc)));
3342            }
3343            DIP("vrhadd.%c%d %c%u, %c%u, %c%u\n",
3344                U ? 'u' : 's',
3345                8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3346         } else {
3347            if (U == 0)  {
3348               switch(C) {
3349                  case 0: {
3350                     /* VAND  */
3351                     HChar reg_t = Q ? 'q' : 'd';
3352                     if (Q) {
3353                        assign(res, binop(Iop_AndV128, mkexpr(arg_n),
3354                                                       mkexpr(arg_m)));
3355                     } else {
3356                        assign(res, binop(Iop_And64, mkexpr(arg_n),
3357                                                     mkexpr(arg_m)));
3358                     }
3359                     DIP("vand %c%u, %c%u, %c%u\n",
3360                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3361                     break;
3362                  }
3363                  case 1: {
3364                     /* VBIC  */
3365                     HChar reg_t = Q ? 'q' : 'd';
3366                     if (Q) {
3367                        assign(res, binop(Iop_AndV128,mkexpr(arg_n),
3368                               unop(Iop_NotV128, mkexpr(arg_m))));
3369                     } else {
3370                        assign(res, binop(Iop_And64, mkexpr(arg_n),
3371                               unop(Iop_Not64, mkexpr(arg_m))));
3372                     }
3373                     DIP("vbic %c%u, %c%u, %c%u\n",
3374                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3375                     break;
3376                  }
3377                  case 2:
3378                     if ( nreg != mreg) {
3379                        /* VORR  */
3380                        HChar reg_t = Q ? 'q' : 'd';
3381                        if (Q) {
3382                           assign(res, binop(Iop_OrV128, mkexpr(arg_n),
3383                                                         mkexpr(arg_m)));
3384                        } else {
3385                           assign(res, binop(Iop_Or64, mkexpr(arg_n),
3386                                                       mkexpr(arg_m)));
3387                        }
3388                        DIP("vorr %c%u, %c%u, %c%u\n",
3389                            reg_t, dreg, reg_t, nreg, reg_t, mreg);
3390                     } else {
3391                        /* VMOV  */
3392                        HChar reg_t = Q ? 'q' : 'd';
3393                        assign(res, mkexpr(arg_m));
3394                        DIP("vmov %c%u, %c%u\n", reg_t, dreg, reg_t, mreg);
3395                     }
3396                     break;
3397                  case 3:{
3398                     /* VORN  */
3399                     HChar reg_t = Q ? 'q' : 'd';
3400                     if (Q) {
3401                        assign(res, binop(Iop_OrV128,mkexpr(arg_n),
3402                               unop(Iop_NotV128, mkexpr(arg_m))));
3403                     } else {
3404                        assign(res, binop(Iop_Or64, mkexpr(arg_n),
3405                               unop(Iop_Not64, mkexpr(arg_m))));
3406                     }
3407                     DIP("vorn %c%u, %c%u, %c%u\n",
3408                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3409                     break;
3410                  }
3411                  default:
3412                     vassert(0);
3413               }
3414            } else {
3415               switch(C) {
3416                  case 0:
3417                     /* VEOR (XOR)  */
3418                     if (Q) {
3419                        assign(res, binop(Iop_XorV128, mkexpr(arg_n),
3420                                                       mkexpr(arg_m)));
3421                     } else {
3422                        assign(res, binop(Iop_Xor64, mkexpr(arg_n),
3423                                                     mkexpr(arg_m)));
3424                     }
3425                     DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
3426                           Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3427                     break;
3428                  case 1:
3429                     /* VBSL  */
3430                     if (Q) {
3431                        IRTemp reg_d = newTemp(Ity_V128);
3432                        assign(reg_d, getQReg(dreg));
3433                        assign(res,
3434                               binop(Iop_OrV128,
3435                                     binop(Iop_AndV128, mkexpr(arg_n),
3436                                                        mkexpr(reg_d)),
3437                                     binop(Iop_AndV128,
3438                                           mkexpr(arg_m),
3439                                           unop(Iop_NotV128,
3440                                                 mkexpr(reg_d)) ) ) );
3441                     } else {
3442                        IRTemp reg_d = newTemp(Ity_I64);
3443                        assign(reg_d, getDRegI64(dreg));
3444                        assign(res,
3445                               binop(Iop_Or64,
3446                                     binop(Iop_And64, mkexpr(arg_n),
3447                                                      mkexpr(reg_d)),
3448                                     binop(Iop_And64,
3449                                           mkexpr(arg_m),
3450                                           unop(Iop_Not64, mkexpr(reg_d)))));
3451                     }
3452                     DIP("vbsl %c%u, %c%u, %c%u\n",
3453                         Q ? 'q' : 'd', dreg,
3454                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3455                     break;
3456                  case 2:
3457                     /* VBIT  */
3458                     if (Q) {
3459                        IRTemp reg_d = newTemp(Ity_V128);
3460                        assign(reg_d, getQReg(dreg));
3461                        assign(res,
3462                               binop(Iop_OrV128,
3463                                     binop(Iop_AndV128, mkexpr(arg_n),
3464                                                        mkexpr(arg_m)),
3465                                     binop(Iop_AndV128,
3466                                           mkexpr(reg_d),
3467                                           unop(Iop_NotV128, mkexpr(arg_m)))));
3468                     } else {
3469                        IRTemp reg_d = newTemp(Ity_I64);
3470                        assign(reg_d, getDRegI64(dreg));
3471                        assign(res,
3472                               binop(Iop_Or64,
3473                                     binop(Iop_And64, mkexpr(arg_n),
3474                                                      mkexpr(arg_m)),
3475                                     binop(Iop_And64,
3476                                           mkexpr(reg_d),
3477                                           unop(Iop_Not64, mkexpr(arg_m)))));
3478                     }
3479                     DIP("vbit %c%u, %c%u, %c%u\n",
3480                         Q ? 'q' : 'd', dreg,
3481                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3482                     break;
3483                  case 3:
3484                     /* VBIF  */
3485                     if (Q) {
3486                        IRTemp reg_d = newTemp(Ity_V128);
3487                        assign(reg_d, getQReg(dreg));
3488                        assign(res,
3489                               binop(Iop_OrV128,
3490                                     binop(Iop_AndV128, mkexpr(reg_d),
3491                                                        mkexpr(arg_m)),
3492                                     binop(Iop_AndV128,
3493                                           mkexpr(arg_n),
3494                                           unop(Iop_NotV128, mkexpr(arg_m)))));
3495                     } else {
3496                        IRTemp reg_d = newTemp(Ity_I64);
3497                        assign(reg_d, getDRegI64(dreg));
3498                        assign(res,
3499                               binop(Iop_Or64,
3500                                     binop(Iop_And64, mkexpr(reg_d),
3501                                                      mkexpr(arg_m)),
3502                                     binop(Iop_And64,
3503                                           mkexpr(arg_n),
3504                                           unop(Iop_Not64, mkexpr(arg_m)))));
3505                     }
3506                     DIP("vbif %c%u, %c%u, %c%u\n",
3507                         Q ? 'q' : 'd', dreg,
3508                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3509                     break;
3510                  default:
3511                     vassert(0);
3512               }
3513            }
3514         }
3515         break;
3516      case 2:
3517         if (B == 0) {
3518            /* VHSUB */
3519            /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
3520            ULong imm = 0;
3521            IRExpr *imm_val;
3522            IROp subOp;
3523            IROp notOp;
3524            IROp andOp;
3525            IROp shOp;
3526            if (size == 3)
3527               return False;
3528            switch(size) {
3529               case 0: imm = 0x101010101010101LL; break;
3530               case 1: imm = 0x1000100010001LL; break;
3531               case 2: imm = 0x100000001LL; break;
3532               default: vassert(0);
3533            }
3534            if (Q) {
3535               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3536               andOp = Iop_AndV128;
3537               notOp = Iop_NotV128;
3538            } else {
3539               imm_val = mkU64(imm);
3540               andOp = Iop_And64;
3541               notOp = Iop_Not64;
3542            }
3543            if (U) {
3544               switch(size) {
3545                  case 0:
3546                     subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3547                     shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3548                     break;
3549                  case 1:
3550                     subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3551                     shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3552                     break;
3553                  case 2:
3554                     subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3555                     shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3556                     break;
3557                  default:
3558                     vassert(0);
3559               }
3560            } else {
3561               switch(size) {
3562                  case 0:
3563                     subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3564                     shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3565                     break;
3566                  case 1:
3567                     subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3568                     shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3569                     break;
3570                  case 2:
3571                     subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3572                     shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3573                     break;
3574                  default:
3575                     vassert(0);
3576               }
3577            }
3578            assign(res,
3579                   binop(subOp,
3580                         binop(subOp,
3581                               binop(shOp, mkexpr(arg_n), mkU8(1)),
3582                               binop(shOp, mkexpr(arg_m), mkU8(1))),
3583                         binop(andOp,
3584                               binop(andOp,
3585                                     unop(notOp, mkexpr(arg_n)),
3586                                     mkexpr(arg_m)),
3587                               imm_val)));
3588            DIP("vhsub.%c%d %c%u, %c%u, %c%u\n",
3589                U ? 'u' : 's', 8 << size,
3590                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3591                mreg);
3592         } else {
3593            /* VQSUB */
3594            IROp op, op2;
3595            IRTemp tmp;
3596            if (Q) {
3597               switch (size) {
3598                  case 0:
3599                     op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
3600                     op2 = Iop_Sub8x16;
3601                     break;
3602                  case 1:
3603                     op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
3604                     op2 = Iop_Sub16x8;
3605                     break;
3606                  case 2:
3607                     op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
3608                     op2 = Iop_Sub32x4;
3609                     break;
3610                  case 3:
3611                     op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
3612                     op2 = Iop_Sub64x2;
3613                     break;
3614                  default:
3615                     vassert(0);
3616               }
3617            } else {
3618               switch (size) {
3619                  case 0:
3620                     op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
3621                     op2 = Iop_Sub8x8;
3622                     break;
3623                  case 1:
3624                     op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
3625                     op2 = Iop_Sub16x4;
3626                     break;
3627                  case 2:
3628                     op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
3629                     op2 = Iop_Sub32x2;
3630                     break;
3631                  case 3:
3632                     op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
3633                     op2 = Iop_Sub64;
3634                     break;
3635                  default:
3636                     vassert(0);
3637               }
3638            }
3639            if (Q)
3640               tmp = newTemp(Ity_V128);
3641            else
3642               tmp = newTemp(Ity_I64);
3643            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3644            assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3645            setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3646            DIP("vqsub.%c%d %c%u, %c%u, %c%u\n",
3647                U ? 'u' : 's', 8 << size,
3648                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3649                mreg);
3650         }
3651         break;
3652      case 3: {
3653            IROp op;
3654            if (Q) {
3655               switch (size) {
3656                  case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
3657                  case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
3658                  case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
3659                  case 3: return False;
3660                  default: vassert(0);
3661               }
3662            } else {
3663               switch (size) {
3664                  case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
3665                  case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
3666                  case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
3667                  case 3: return False;
3668                  default: vassert(0);
3669               }
3670            }
3671            if (B == 0) {
3672               /* VCGT  */
3673               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3674               DIP("vcgt.%c%d %c%u, %c%u, %c%u\n",
3675                   U ? 'u' : 's', 8 << size,
3676                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3677                   mreg);
3678            } else {
3679               /* VCGE  */
3680               /* VCGE res, argn, argm
3681                    is equal to
3682                  VCGT tmp, argm, argn
3683                  VNOT res, tmp */
3684               assign(res,
3685                      unop(Q ? Iop_NotV128 : Iop_Not64,
3686                           binop(op, mkexpr(arg_m), mkexpr(arg_n))));
3687               DIP("vcge.%c%d %c%u, %c%u, %c%u\n",
3688                   U ? 'u' : 's', 8 << size,
3689                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3690                   mreg);
3691            }
3692         }
3693         break;
3694      case 4:
3695         if (B == 0) {
3696            /* VSHL */
3697            IROp op = Iop_INVALID, sub_op = Iop_INVALID;
3698            IRTemp tmp = IRTemp_INVALID;
3699            if (U) {
3700               switch (size) {
3701                  case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
3702                  case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
3703                  case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
3704                  case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
3705                  default: vassert(0);
3706               }
3707            } else {
3708               tmp = newTemp(Q ? Ity_V128 : Ity_I64);
3709               switch (size) {
3710                  case 0:
3711                     op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3712                     sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3713                     break;
3714                  case 1:
3715                     op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3716                     sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3717                     break;
3718                  case 2:
3719                     op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3720                     sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3721                     break;
3722                  case 3:
3723                     op = Q ? Iop_Sar64x2 : Iop_Sar64;
3724                     sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
3725                     break;
3726                  default:
3727                     vassert(0);
3728               }
3729            }
3730            if (U) {
3731               if (!Q && (size == 3))
3732                  assign(res, binop(op, mkexpr(arg_m),
3733                                        unop(Iop_64to8, mkexpr(arg_n))));
3734               else
3735                  assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3736            } else {
3737               if (Q)
3738                  assign(tmp, binop(sub_op,
3739                                    binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
3740                                    mkexpr(arg_n)));
3741               else
3742                  assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
3743               if (!Q && (size == 3))
3744                  assign(res, binop(op, mkexpr(arg_m),
3745                                        unop(Iop_64to8, mkexpr(tmp))));
3746               else
3747                  assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
3748            }
3749            DIP("vshl.%c%d %c%u, %c%u, %c%u\n",
3750                U ? 'u' : 's', 8 << size,
3751                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3752                nreg);
3753         } else {
3754            /* VQSHL */
3755            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
3756            IRTemp tmp, shval, mask, old_shval;
3757            UInt i;
3758            ULong esize;
3759            cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3760            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3761            if (U) {
3762               switch (size) {
3763                  case 0:
3764                     op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
3765                     op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
3766                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3767                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3768                     break;
3769                  case 1:
3770                     op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
3771                     op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
3772                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3773                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3774                     break;
3775                  case 2:
3776                     op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
3777                     op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
3778                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3779                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3780                     break;
3781                  case 3:
3782                     op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
3783                     op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
3784                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3785                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3786                     break;
3787                  default:
3788                     vassert(0);
3789               }
3790            } else {
3791               switch (size) {
3792                  case 0:
3793                     op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
3794                     op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3795                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3796                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3797                     break;
3798                  case 1:
3799                     op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
3800                     op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3801                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3802                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3803                     break;
3804                  case 2:
3805                     op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
3806                     op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3807                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3808                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3809                     break;
3810                  case 3:
3811                     op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
3812                     op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
3813                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3814                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3815                     break;
3816                  default:
3817                     vassert(0);
3818               }
3819            }
3820            if (Q) {
3821               tmp = newTemp(Ity_V128);
3822               shval = newTemp(Ity_V128);
3823               mask = newTemp(Ity_V128);
3824            } else {
3825               tmp = newTemp(Ity_I64);
3826               shval = newTemp(Ity_I64);
3827               mask = newTemp(Ity_I64);
3828            }
3829            assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3830            /* Only least significant byte from second argument is used.
3831               Copy this byte to the whole vector element. */
3832            assign(shval, binop(op_shrn,
3833                                binop(op_shln,
3834                                       mkexpr(arg_n),
3835                                       mkU8((8 << size) - 8)),
3836                                mkU8((8 << size) - 8)));
3837            for(i = 0; i < size; i++) {
3838               old_shval = shval;
3839               shval = newTemp(Q ? Ity_V128 : Ity_I64);
3840               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3841                                   mkexpr(old_shval),
3842                                   binop(op_shln,
3843                                         mkexpr(old_shval),
3844                                         mkU8(8 << i))));
3845            }
3846            /* If shift is greater or equal to the element size and
3847               element is non-zero, then QC flag should be set. */
3848            esize = (8 << size) - 1;
3849            esize = (esize <<  8) | esize;
3850            esize = (esize << 16) | esize;
3851            esize = (esize << 32) | esize;
3852            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3853                             binop(cmp_gt, mkexpr(shval),
3854                                           Q ? mkU128(esize) : mkU64(esize)),
3855                             unop(cmp_neq, mkexpr(arg_m))),
3856                       Q ? mkU128(0) : mkU64(0),
3857                       Q, condT);
3858            /* Othervise QC flag should be set if shift value is positive and
3859               result beign rightshifted the same value is not equal to left
3860               argument. */
3861            assign(mask, binop(cmp_gt, mkexpr(shval),
3862                                       Q ? mkU128(0) : mkU64(0)));
3863            if (!Q && size == 3)
3864               assign(tmp, binop(op_rev, mkexpr(res),
3865                                         unop(Iop_64to8, mkexpr(arg_n))));
3866            else
3867               assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
3868            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3869                             mkexpr(tmp), mkexpr(mask)),
3870                       binop(Q ? Iop_AndV128 : Iop_And64,
3871                             mkexpr(arg_m), mkexpr(mask)),
3872                       Q, condT);
3873            DIP("vqshl.%c%d %c%u, %c%u, %c%u\n",
3874                U ? 'u' : 's', 8 << size,
3875                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3876                nreg);
3877         }
3878         break;
3879      case 5:
3880         if (B == 0) {
3881            /* VRSHL */
3882            IROp op, op_shrn, op_shln, cmp_gt, op_add;
3883            IRTemp shval, old_shval, imm_val, round;
3884            UInt i;
3885            ULong imm;
3886            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3887            imm = 1L;
3888            switch (size) {
3889               case 0: imm = (imm <<  8) | imm; /* fall through */
3890               case 1: imm = (imm << 16) | imm; /* fall through */
3891               case 2: imm = (imm << 32) | imm; /* fall through */
3892               case 3: break;
3893               default: vassert(0);
3894            }
3895            imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3896            round = newTemp(Q ? Ity_V128 : Ity_I64);
3897            assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3898            if (U) {
3899               switch (size) {
3900                  case 0:
3901                     op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
3902                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3903                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3904                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3905                     break;
3906                  case 1:
3907                     op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
3908                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3909                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3910                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3911                     break;
3912                  case 2:
3913                     op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
3914                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3915                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3916                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3917                     break;
3918                  case 3:
3919                     op = Q ? Iop_Shl64x2 : Iop_Shl64;
3920                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
3921                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3922                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3923                     break;
3924                  default:
3925                     vassert(0);
3926               }
3927            } else {
3928               switch (size) {
3929                  case 0:
3930                     op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
3931                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3932                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3933                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3934                     break;
3935                  case 1:
3936                     op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
3937                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3938                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3939                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3940                     break;
3941                  case 2:
3942                     op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
3943                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3944                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3945                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3946                     break;
3947                  case 3:
3948                     op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
3949                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
3950                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3951                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3952                     break;
3953                  default:
3954                     vassert(0);
3955               }
3956            }
3957            if (Q) {
3958               shval = newTemp(Ity_V128);
3959            } else {
3960               shval = newTemp(Ity_I64);
3961            }
3962            /* Only least significant byte from second argument is used.
3963               Copy this byte to the whole vector element. */
3964            assign(shval, binop(op_shrn,
3965                                binop(op_shln,
3966                                       mkexpr(arg_n),
3967                                       mkU8((8 << size) - 8)),
3968                                mkU8((8 << size) - 8)));
3969            for (i = 0; i < size; i++) {
3970               old_shval = shval;
3971               shval = newTemp(Q ? Ity_V128 : Ity_I64);
3972               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3973                                   mkexpr(old_shval),
3974                                   binop(op_shln,
3975                                         mkexpr(old_shval),
3976                                         mkU8(8 << i))));
3977            }
3978            /* Compute the result */
3979            if (!Q && size == 3 && U) {
3980               assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3981                                   binop(op,
3982                                         mkexpr(arg_m),
3983                                         unop(Iop_64to8,
3984                                              binop(op_add,
3985                                                    mkexpr(arg_n),
3986                                                    mkexpr(imm_val)))),
3987                                   binop(Q ? Iop_AndV128 : Iop_And64,
3988                                         mkexpr(imm_val),
3989                                         binop(cmp_gt,
3990                                               Q ? mkU128(0) : mkU64(0),
3991                                               mkexpr(arg_n)))));
3992               assign(res, binop(op_add,
3993                                 binop(op,
3994                                       mkexpr(arg_m),
3995                                       unop(Iop_64to8, mkexpr(arg_n))),
3996                                 mkexpr(round)));
3997            } else {
3998               assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3999                                   binop(op,
4000                                         mkexpr(arg_m),
4001                                         binop(op_add,
4002                                               mkexpr(arg_n),
4003                                               mkexpr(imm_val))),
4004                                   binop(Q ? Iop_AndV128 : Iop_And64,
4005                                         mkexpr(imm_val),
4006                                         binop(cmp_gt,
4007                                               Q ? mkU128(0) : mkU64(0),
4008                                               mkexpr(arg_n)))));
4009               assign(res, binop(op_add,
4010                                 binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4011                                 mkexpr(round)));
4012            }
4013            DIP("vrshl.%c%d %c%u, %c%u, %c%u\n",
4014                U ? 'u' : 's', 8 << size,
4015                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4016                nreg);
4017         } else {
4018            /* VQRSHL */
4019            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
4020            IRTemp tmp, shval, mask, old_shval, imm_val, round;
4021            UInt i;
4022            ULong esize, imm;
4023            cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
4024            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
4025            imm = 1L;
4026            switch (size) {
4027               case 0: imm = (imm <<  8) | imm; /* fall through */
4028               case 1: imm = (imm << 16) | imm; /* fall through */
4029               case 2: imm = (imm << 32) | imm; /* fall through */
4030               case 3: break;
4031               default: vassert(0);
4032            }
4033            imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
4034            round = newTemp(Q ? Ity_V128 : Ity_I64);
4035            assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
4036            if (U) {
4037               switch (size) {
4038                  case 0:
4039                     op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
4040                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4041                     op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
4042                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4043                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4044                     break;
4045                  case 1:
4046                     op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
4047                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4048                     op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
4049                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4050                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4051                     break;
4052                  case 2:
4053                     op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
4054                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4055                     op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
4056                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4057                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4058                     break;
4059                  case 3:
4060                     op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
4061                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
4062                     op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
4063                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4064                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4065                     break;
4066                  default:
4067                     vassert(0);
4068               }
4069            } else {
4070               switch (size) {
4071                  case 0:
4072                     op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
4073                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4074                     op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
4075                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4076                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4077                     break;
4078                  case 1:
4079                     op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
4080                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4081                     op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
4082                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4083                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4084                     break;
4085                  case 2:
4086                     op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
4087                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4088                     op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
4089                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4090                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4091                     break;
4092                  case 3:
4093                     op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
4094                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
4095                     op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
4096                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4097                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4098                     break;
4099                  default:
4100                     vassert(0);
4101               }
4102            }
4103            if (Q) {
4104               tmp = newTemp(Ity_V128);
4105               shval = newTemp(Ity_V128);
4106               mask = newTemp(Ity_V128);
4107            } else {
4108               tmp = newTemp(Ity_I64);
4109               shval = newTemp(Ity_I64);
4110               mask = newTemp(Ity_I64);
4111            }
4112            /* Only least significant byte from second argument is used.
4113               Copy this byte to the whole vector element. */
4114            assign(shval, binop(op_shrn,
4115                                binop(op_shln,
4116                                       mkexpr(arg_n),
4117                                       mkU8((8 << size) - 8)),
4118                                mkU8((8 << size) - 8)));
4119            for (i = 0; i < size; i++) {
4120               old_shval = shval;
4121               shval = newTemp(Q ? Ity_V128 : Ity_I64);
4122               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
4123                                   mkexpr(old_shval),
4124                                   binop(op_shln,
4125                                         mkexpr(old_shval),
4126                                         mkU8(8 << i))));
4127            }
4128            /* Compute the result */
4129            assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
4130                                binop(op,
4131                                      mkexpr(arg_m),
4132                                      binop(op_add,
4133                                            mkexpr(arg_n),
4134                                            mkexpr(imm_val))),
4135                                binop(Q ? Iop_AndV128 : Iop_And64,
4136                                      mkexpr(imm_val),
4137                                      binop(cmp_gt,
4138                                            Q ? mkU128(0) : mkU64(0),
4139                                            mkexpr(arg_n)))));
4140            assign(res, binop(op_add,
4141                              binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4142                              mkexpr(round)));
4143            /* If shift is greater or equal to the element size and element is
4144               non-zero, then QC flag should be set. */
4145            esize = (8 << size) - 1;
4146            esize = (esize <<  8) | esize;
4147            esize = (esize << 16) | esize;
4148            esize = (esize << 32) | esize;
4149            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4150                             binop(cmp_gt, mkexpr(shval),
4151                                           Q ? mkU128(esize) : mkU64(esize)),
4152                             unop(cmp_neq, mkexpr(arg_m))),
4153                       Q ? mkU128(0) : mkU64(0),
4154                       Q, condT);
4155            /* Othervise QC flag should be set if shift value is positive and
4156               result beign rightshifted the same value is not equal to left
4157               argument. */
4158            assign(mask, binop(cmp_gt, mkexpr(shval),
4159                               Q ? mkU128(0) : mkU64(0)));
4160            if (!Q && size == 3)
4161               assign(tmp, binop(op_rev, mkexpr(res),
4162                                         unop(Iop_64to8, mkexpr(arg_n))));
4163            else
4164               assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
4165            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4166                             mkexpr(tmp), mkexpr(mask)),
4167                       binop(Q ? Iop_AndV128 : Iop_And64,
4168                             mkexpr(arg_m), mkexpr(mask)),
4169                       Q, condT);
4170            DIP("vqrshl.%c%d %c%u, %c%u, %c%u\n",
4171                U ? 'u' : 's', 8 << size,
4172                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4173                nreg);
4174         }
4175         break;
4176      case 6:
4177         /* VMAX, VMIN  */
4178         if (B == 0) {
4179            /* VMAX */
4180            IROp op;
4181            if (U == 0) {
4182               switch (size) {
4183                  case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
4184                  case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
4185                  case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
4186                  case 3: return False;
4187                  default: vassert(0);
4188               }
4189            } else {
4190               switch (size) {
4191                  case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
4192                  case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
4193                  case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
4194                  case 3: return False;
4195                  default: vassert(0);
4196               }
4197            }
4198            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4199            DIP("vmax.%c%d %c%u, %c%u, %c%u\n",
4200                U ? 'u' : 's', 8 << size,
4201                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4202                mreg);
4203         } else {
4204            /* VMIN */
4205            IROp op;
4206            if (U == 0) {
4207               switch (size) {
4208                  case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
4209                  case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
4210                  case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
4211                  case 3: return False;
4212                  default: vassert(0);
4213               }
4214            } else {
4215               switch (size) {
4216                  case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
4217                  case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
4218                  case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
4219                  case 3: return False;
4220                  default: vassert(0);
4221               }
4222            }
4223            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4224            DIP("vmin.%c%d %c%u, %c%u, %c%u\n",
4225                U ? 'u' : 's', 8 << size,
4226                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4227                mreg);
4228         }
4229         break;
4230      case 7:
4231         if (B == 0) {
4232            /* VABD */
4233            IROp op_cmp, op_sub;
4234            IRTemp cond;
4235            if ((theInstr >> 23) & 1) {
4236               vpanic("VABDL should not be in dis_neon_data_3same\n");
4237            }
4238            if (Q) {
4239               switch (size) {
4240                  case 0:
4241                     op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4242                     op_sub = Iop_Sub8x16;
4243                     break;
4244                  case 1:
4245                     op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4246                     op_sub = Iop_Sub16x8;
4247                     break;
4248                  case 2:
4249                     op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4250                     op_sub = Iop_Sub32x4;
4251                     break;
4252                  case 3:
4253                     return False;
4254                  default:
4255                     vassert(0);
4256               }
4257            } else {
4258               switch (size) {
4259                  case 0:
4260                     op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4261                     op_sub = Iop_Sub8x8;
4262                     break;
4263                  case 1:
4264                     op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4265                     op_sub = Iop_Sub16x4;
4266                     break;
4267                  case 2:
4268                     op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4269                     op_sub = Iop_Sub32x2;
4270                     break;
4271                  case 3:
4272                     return False;
4273                  default:
4274                     vassert(0);
4275               }
4276            }
4277            if (Q) {
4278               cond = newTemp(Ity_V128);
4279            } else {
4280               cond = newTemp(Ity_I64);
4281            }
4282            assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4283            assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
4284                              binop(Q ? Iop_AndV128 : Iop_And64,
4285                                    binop(op_sub, mkexpr(arg_n),
4286                                                  mkexpr(arg_m)),
4287                                    mkexpr(cond)),
4288                              binop(Q ? Iop_AndV128 : Iop_And64,
4289                                    binop(op_sub, mkexpr(arg_m),
4290                                                  mkexpr(arg_n)),
4291                                    unop(Q ? Iop_NotV128 : Iop_Not64,
4292                                         mkexpr(cond)))));
4293            DIP("vabd.%c%d %c%u, %c%u, %c%u\n",
4294                U ? 'u' : 's', 8 << size,
4295                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4296                mreg);
4297         } else {
4298            /* VABA */
4299            IROp op_cmp, op_sub, op_add;
4300            IRTemp cond, acc, tmp;
4301            if ((theInstr >> 23) & 1) {
4302               vpanic("VABAL should not be in dis_neon_data_3same");
4303            }
4304            if (Q) {
4305               switch (size) {
4306                  case 0:
4307                     op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4308                     op_sub = Iop_Sub8x16;
4309                     op_add = Iop_Add8x16;
4310                     break;
4311                  case 1:
4312                     op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4313                     op_sub = Iop_Sub16x8;
4314                     op_add = Iop_Add16x8;
4315                     break;
4316                  case 2:
4317                     op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4318                     op_sub = Iop_Sub32x4;
4319                     op_add = Iop_Add32x4;
4320                     break;
4321                  case 3:
4322                     return False;
4323                  default:
4324                     vassert(0);
4325               }
4326            } else {
4327               switch (size) {
4328                  case 0:
4329                     op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4330                     op_sub = Iop_Sub8x8;
4331                     op_add = Iop_Add8x8;
4332                     break;
4333                  case 1:
4334                     op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4335                     op_sub = Iop_Sub16x4;
4336                     op_add = Iop_Add16x4;
4337                     break;
4338                  case 2:
4339                     op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4340                     op_sub = Iop_Sub32x2;
4341                     op_add = Iop_Add32x2;
4342                     break;
4343                  case 3:
4344                     return False;
4345                  default:
4346                     vassert(0);
4347               }
4348            }
4349            if (Q) {
4350               cond = newTemp(Ity_V128);
4351               acc = newTemp(Ity_V128);
4352               tmp = newTemp(Ity_V128);
4353               assign(acc, getQReg(dreg));
4354            } else {
4355               cond = newTemp(Ity_I64);
4356               acc = newTemp(Ity_I64);
4357               tmp = newTemp(Ity_I64);
4358               assign(acc, getDRegI64(dreg));
4359            }
4360            assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4361            assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
4362                              binop(Q ? Iop_AndV128 : Iop_And64,
4363                                    binop(op_sub, mkexpr(arg_n),
4364                                                  mkexpr(arg_m)),
4365                                    mkexpr(cond)),
4366                              binop(Q ? Iop_AndV128 : Iop_And64,
4367                                    binop(op_sub, mkexpr(arg_m),
4368                                                  mkexpr(arg_n)),
4369                                    unop(Q ? Iop_NotV128 : Iop_Not64,
4370                                         mkexpr(cond)))));
4371            assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
4372            DIP("vaba.%c%d %c%u, %c%u, %c%u\n",
4373                U ? 'u' : 's', 8 << size,
4374                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4375                mreg);
4376         }
4377         break;
4378      case 8:
4379         if (B == 0) {
4380            IROp op;
4381            if (U == 0) {
4382               /* VADD  */
4383               switch (size) {
4384                  case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
4385                  case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
4386                  case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
4387                  case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
4388                  default: vassert(0);
4389               }
4390               DIP("vadd.i%d %c%u, %c%u, %c%u\n",
4391                   8 << size, Q ? 'q' : 'd',
4392                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4393            } else {
4394               /* VSUB  */
4395               switch (size) {
4396                  case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
4397                  case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
4398                  case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
4399                  case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
4400                  default: vassert(0);
4401               }
4402               DIP("vsub.i%d %c%u, %c%u, %c%u\n",
4403                   8 << size, Q ? 'q' : 'd',
4404                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4405            }
4406            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4407         } else {
4408            IROp op;
4409            switch (size) {
4410               case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
4411               case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
4412               case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
4413               case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
4414               default: vassert(0);
4415            }
4416            if (U == 0) {
4417               /* VTST  */
4418               assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
4419                                          mkexpr(arg_n),
4420                                          mkexpr(arg_m))));
4421               DIP("vtst.%d %c%u, %c%u, %c%u\n",
4422                   8 << size, Q ? 'q' : 'd',
4423                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4424            } else {
4425               /* VCEQ  */
4426               assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
4427                                unop(op,
4428                                     binop(Q ? Iop_XorV128 : Iop_Xor64,
4429                                           mkexpr(arg_n),
4430                                           mkexpr(arg_m)))));
4431               DIP("vceq.i%d %c%u, %c%u, %c%u\n",
4432                   8 << size, Q ? 'q' : 'd',
4433                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4434            }
4435         }
4436         break;
4437      case 9:
4438         if (B == 0) {
4439            /* VMLA, VMLS (integer) */
4440            IROp op, op2;
4441            UInt P = (theInstr >> 24) & 1;
4442            if (P) {
4443               switch (size) {
4444                  case 0:
4445                     op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4446                     op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
4447                     break;
4448                  case 1:
4449                     op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4450                     op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
4451                     break;
4452                  case 2:
4453                     op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4454                     op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
4455                     break;
4456                  case 3:
4457                     return False;
4458                  default:
4459                     vassert(0);
4460               }
4461            } else {
4462               switch (size) {
4463                  case 0:
4464                     op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4465                     op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
4466                     break;
4467                  case 1:
4468                     op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4469                     op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
4470                     break;
4471                  case 2:
4472                     op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4473                     op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
4474                     break;
4475                  case 3:
4476                     return False;
4477                  default:
4478                     vassert(0);
4479               }
4480            }
4481            assign(res, binop(op2,
4482                              Q ? getQReg(dreg) : getDRegI64(dreg),
4483                              binop(op, mkexpr(arg_n), mkexpr(arg_m))));
4484            DIP("vml%c.i%d %c%u, %c%u, %c%u\n",
4485                P ? 's' : 'a', 8 << size,
4486                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4487                mreg);
4488         } else {
4489            /* VMUL */
4490            IROp op;
4491            UInt P = (theInstr >> 24) & 1;
4492            if (P) {
4493               switch (size) {
4494                  case 0:
4495                     op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
4496                     break;
4497                  case 1: case 2: case 3: return False;
4498                  default: vassert(0);
4499               }
4500            } else {
4501               switch (size) {
4502                  case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
4503                  case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
4504                  case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
4505                  case 3: return False;
4506                  default: vassert(0);
4507               }
4508            }
4509            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4510            DIP("vmul.%c%d %c%u, %c%u, %c%u\n",
4511                P ? 'p' : 'i', 8 << size,
4512                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4513                mreg);
4514         }
4515         break;
4516      case 10: {
4517         /* VPMAX, VPMIN  */
4518         UInt P = (theInstr >> 4) & 1;
4519         IROp op;
4520         if (Q)
4521            return False;
4522         if (P) {
4523            switch (size) {
4524               case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
4525               case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
4526               case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
4527               case 3: return False;
4528               default: vassert(0);
4529            }
4530         } else {
4531            switch (size) {
4532               case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
4533               case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
4534               case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
4535               case 3: return False;
4536               default: vassert(0);
4537            }
4538         }
4539         assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4540         DIP("vp%s.%c%d %c%u, %c%u, %c%u\n",
4541             P ? "min" : "max", U ? 'u' : 's',
4542             8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4543             Q ? 'q' : 'd', mreg);
4544         break;
4545      }
4546      case 11:
4547         if (B == 0) {
4548            if (U == 0) {
4549               /* VQDMULH  */
4550               IROp op ,op2;
4551               ULong imm;
4552               switch (size) {
4553                  case 0: case 3:
4554                     return False;
4555                  case 1:
4556                     op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
4557                     op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4558                     imm = 1LL << 15;
4559                     imm = (imm << 16) | imm;
4560                     imm = (imm << 32) | imm;
4561                     break;
4562                  case 2:
4563                     op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
4564                     op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4565                     imm = 1LL << 31;
4566                     imm = (imm << 32) | imm;
4567                     break;
4568                  default:
4569                     vassert(0);
4570               }
4571               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4572               setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4573                                binop(op2, mkexpr(arg_n),
4574                                           Q ? mkU128(imm) : mkU64(imm)),
4575                                binop(op2, mkexpr(arg_m),
4576                                           Q ? mkU128(imm) : mkU64(imm))),
4577                          Q ? mkU128(0) : mkU64(0),
4578                          Q, condT);
4579               DIP("vqdmulh.s%d %c%u, %c%u, %c%u\n",
4580                   8 << size, Q ? 'q' : 'd',
4581                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4582            } else {
4583               /* VQRDMULH */
4584               IROp op ,op2;
4585               ULong imm;
4586               switch(size) {
4587                  case 0: case 3:
4588                     return False;
4589                  case 1:
4590                     imm = 1LL << 15;
4591                     imm = (imm << 16) | imm;
4592                     imm = (imm << 32) | imm;
4593                     op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
4594                     op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4595                     break;
4596                  case 2:
4597                     imm = 1LL << 31;
4598                     imm = (imm << 32) | imm;
4599                     op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
4600                     op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4601                     break;
4602                  default:
4603                     vassert(0);
4604               }
4605               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4606               setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4607                                binop(op2, mkexpr(arg_n),
4608                                           Q ? mkU128(imm) : mkU64(imm)),
4609                                binop(op2, mkexpr(arg_m),
4610                                           Q ? mkU128(imm) : mkU64(imm))),
4611                          Q ? mkU128(0) : mkU64(0),
4612                          Q, condT);
4613               DIP("vqrdmulh.s%d %c%u, %c%u, %c%u\n",
4614                   8 << size, Q ? 'q' : 'd',
4615                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4616            }
4617         } else {
4618            if (U == 0) {
4619               /* VPADD */
4620               IROp op;
4621               if (Q)
4622                  return False;
4623               switch (size) {
4624                  case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
4625                  case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
4626                  case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
4627                  case 3: return False;
4628                  default: vassert(0);
4629               }
4630               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4631               DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
4632                   8 << size, Q ? 'q' : 'd',
4633                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4634            } else {
4635               return False;
4636            }
4637         }
4638         break;
4639      case 12: {
4640         return False;
4641      }
4642      /* Starting from here these are FP SIMD cases */
4643      case 13:
4644         if (B == 0) {
4645            IROp op;
4646            if (U == 0) {
4647               if ((C >> 1) == 0) {
4648                  /* VADD  */
4649                  op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
4650                  DIP("vadd.f32 %c%u, %c%u, %c%u\n",
4651                      Q ? 'q' : 'd', dreg,
4652                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4653               } else {
4654                  /* VSUB  */
4655                  op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
4656                  DIP("vsub.f32 %c%u, %c%u, %c%u\n",
4657                      Q ? 'q' : 'd', dreg,
4658                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4659               }
4660            } else {
4661               if ((C >> 1) == 0) {
4662                  /* VPADD */
4663                  if (Q)
4664                     return False;
4665                  op = Iop_PwAdd32Fx2;
4666                  DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4667               } else {
4668                  /* VABD  */
4669                  if (Q) {
4670                     assign(res, unop(Iop_Abs32Fx4,
4671                                      triop(Iop_Sub32Fx4,
4672                                            get_FAKE_roundingmode(),
4673                                            mkexpr(arg_n),
4674                                            mkexpr(arg_m))));
4675                  } else {
4676                     assign(res, unop(Iop_Abs32Fx2,
4677                                      binop(Iop_Sub32Fx2,
4678                                            mkexpr(arg_n),
4679                                            mkexpr(arg_m))));
4680                  }
4681                  DIP("vabd.f32 %c%u, %c%u, %c%u\n",
4682                      Q ? 'q' : 'd', dreg,
4683                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4684                  break;
4685               }
4686            }
4687            assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4688         } else {
4689            if (U == 0) {
4690               /* VMLA, VMLS  */
4691               IROp op, op2;
4692               UInt P = (theInstr >> 21) & 1;
4693               if (P) {
4694                  switch (size & 1) {
4695                     case 0:
4696                        op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4697                        op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
4698                        break;
4699                     case 1: return False;
4700                     default: vassert(0);
4701                  }
4702               } else {
4703                  switch (size & 1) {
4704                     case 0:
4705                        op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4706                        op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
4707                        break;
4708                     case 1: return False;
4709                     default: vassert(0);
4710                  }
4711               }
4712               assign(res, binop_w_fake_RM(
4713                              op2,
4714                              Q ? getQReg(dreg) : getDRegI64(dreg),
4715                              binop_w_fake_RM(op, mkexpr(arg_n),
4716                                                  mkexpr(arg_m))));
4717
4718               DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
4719                   P ? 's' : 'a', Q ? 'q' : 'd',
4720                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4721            } else {
4722               /* VMUL  */
4723               IROp op;
4724               if ((C >> 1) != 0)
4725                  return False;
4726               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
4727               assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4728               DIP("vmul.f32 %c%u, %c%u, %c%u\n",
4729                   Q ? 'q' : 'd', dreg,
4730                   Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4731            }
4732         }
4733         break;
4734      case 14:
4735         if (B == 0) {
4736            if (U == 0) {
4737               if ((C >> 1) == 0) {
4738                  /* VCEQ  */
4739                  IROp op;
4740                  if ((theInstr >> 20) & 1)
4741                     return False;
4742                  op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
4743                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4744                  DIP("vceq.f32 %c%u, %c%u, %c%u\n",
4745                      Q ? 'q' : 'd', dreg,
4746                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4747               } else {
4748                  return False;
4749               }
4750            } else {
4751               if ((C >> 1) == 0) {
4752                  /* VCGE  */
4753                  IROp op;
4754                  if ((theInstr >> 20) & 1)
4755                     return False;
4756                  op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4757                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4758                  DIP("vcge.f32 %c%u, %c%u, %c%u\n",
4759                      Q ? 'q' : 'd', dreg,
4760                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4761               } else {
4762                  /* VCGT  */
4763                  IROp op;
4764                  if ((theInstr >> 20) & 1)
4765                     return False;
4766                  op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4767                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4768                  DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
4769                      Q ? 'q' : 'd', dreg,
4770                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4771               }
4772            }
4773         } else {
4774            if (U == 1) {
4775               /* VACGE, VACGT */
4776               UInt op_bit = (theInstr >> 21) & 1;
4777               IROp op, op2;
4778               op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
4779               if (op_bit) {
4780                  op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4781                  assign(res, binop(op,
4782                                    unop(op2, mkexpr(arg_n)),
4783                                    unop(op2, mkexpr(arg_m))));
4784               } else {
4785                  op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4786                  assign(res, binop(op,
4787                                    unop(op2, mkexpr(arg_n)),
4788                                    unop(op2, mkexpr(arg_m))));
4789               }
4790               DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
4791                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4792                   Q ? 'q' : 'd', mreg);
4793            } else {
4794               return False;
4795            }
4796         }
4797         break;
4798      case 15:
4799         if (B == 0) {
4800            if (U == 0) {
4801               /* VMAX, VMIN  */
4802               IROp op;
4803               if ((theInstr >> 20) & 1)
4804                  return False;
4805               if ((theInstr >> 21) & 1) {
4806                  op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
4807                  DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4808                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4809               } else {
4810                  op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
4811                  DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4812                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4813               }
4814               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4815            } else {
4816               /* VPMAX, VPMIN   */
4817               IROp op;
4818               if (Q)
4819                  return False;
4820               if ((theInstr >> 20) & 1)
4821                  return False;
4822               if ((theInstr >> 21) & 1) {
4823                  op = Iop_PwMin32Fx2;
4824                  DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4825               } else {
4826                  op = Iop_PwMax32Fx2;
4827                  DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4828               }
4829               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4830            }
4831         } else {
4832            if (U == 0) {
4833               if ((C >> 1) == 0) {
4834                  /* VRECPS */
4835                  if ((theInstr >> 20) & 1)
4836                     return False;
4837                  assign(res, binop(Q ? Iop_RecipStep32Fx4
4838                                      : Iop_RecipStep32Fx2,
4839                                    mkexpr(arg_n),
4840                                    mkexpr(arg_m)));
4841                  DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4842                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4843               } else {
4844                  /* VRSQRTS  */
4845                  if ((theInstr >> 20) & 1)
4846                     return False;
4847                  assign(res, binop(Q ? Iop_RSqrtStep32Fx4
4848                                      : Iop_RSqrtStep32Fx2,
4849                                    mkexpr(arg_n),
4850                                    mkexpr(arg_m)));
4851                  DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4852                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4853               }
4854            } else {
4855               return False;
4856            }
4857         }
4858         break;
4859      default:
4860         /*NOTREACHED*/
4861         vassert(0);
4862   }
4863
4864   if (Q) {
4865      putQReg(dreg, mkexpr(res), condT);
4866   } else {
4867      putDRegI64(dreg, mkexpr(res), condT);
4868   }
4869
4870   return True;
4871}
4872
4873/* A7.4.2 Three registers of different length */
4874static
4875Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
4876{
4877   /* In paths where this returns False, indicating a non-decodable
4878      instruction, there may still be some IR assignments to temporaries
4879      generated.  This is inconvenient but harmless, and the post-front-end
4880      IR optimisation pass will just remove them anyway.  So there's no
4881      effort made here to tidy it up.
4882   */
4883   UInt A = (theInstr >> 8) & 0xf;
4884   UInt B = (theInstr >> 20) & 3;
4885   UInt U = (theInstr >> 24) & 1;
4886   UInt P = (theInstr >> 9) & 1;
4887   UInt mreg = get_neon_m_regno(theInstr);
4888   UInt nreg = get_neon_n_regno(theInstr);
4889   UInt dreg = get_neon_d_regno(theInstr);
4890   UInt size = B;
4891   ULong imm;
4892   IRTemp res, arg_m, arg_n, cond, tmp;
4893   IROp cvt, cvt2, cmp, op, op2, sh, add;
4894   switch (A) {
4895      case 0: case 1: case 2: case 3:
4896         /* VADDL, VADDW, VSUBL, VSUBW */
4897         if (dreg & 1)
4898            return False;
4899         dreg >>= 1;
4900         size = B;
4901         switch (size) {
4902            case 0:
4903               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4904               op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
4905               break;
4906            case 1:
4907               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4908               op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
4909               break;
4910            case 2:
4911               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4912               op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
4913               break;
4914            case 3:
4915               return False;
4916            default:
4917               vassert(0);
4918         }
4919         arg_n = newTemp(Ity_V128);
4920         arg_m = newTemp(Ity_V128);
4921         if (A & 1) {
4922            if (nreg & 1)
4923               return False;
4924            nreg >>= 1;
4925            assign(arg_n, getQReg(nreg));
4926         } else {
4927            assign(arg_n, unop(cvt, getDRegI64(nreg)));
4928         }
4929         assign(arg_m, unop(cvt, getDRegI64(mreg)));
4930         putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4931                       condT);
4932         DIP("v%s%c.%c%d q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
4933             (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
4934             (A & 1) ? 'q' : 'd', nreg, mreg);
4935         return True;
4936      case 4:
4937         /* VADDHN, VRADDHN */
4938         if (mreg & 1)
4939            return False;
4940         mreg >>= 1;
4941         if (nreg & 1)
4942            return False;
4943         nreg >>= 1;
4944         size = B;
4945         switch (size) {
4946            case 0:
4947               op = Iop_Add16x8;
4948               cvt = Iop_NarrowUn16to8x8;
4949               sh = Iop_ShrN16x8;
4950               imm = 1U << 7;
4951               imm = (imm << 16) | imm;
4952               imm = (imm << 32) | imm;
4953               break;
4954            case 1:
4955               op = Iop_Add32x4;
4956               cvt = Iop_NarrowUn32to16x4;
4957               sh = Iop_ShrN32x4;
4958               imm = 1U << 15;
4959               imm = (imm << 32) | imm;
4960               break;
4961            case 2:
4962               op = Iop_Add64x2;
4963               cvt = Iop_NarrowUn64to32x2;
4964               sh = Iop_ShrN64x2;
4965               imm = 1U << 31;
4966               break;
4967            case 3:
4968               return False;
4969            default:
4970               vassert(0);
4971         }
4972         tmp = newTemp(Ity_V128);
4973         res = newTemp(Ity_V128);
4974         assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
4975         if (U) {
4976            /* VRADDHN */
4977            assign(res, binop(op, mkexpr(tmp),
4978                     binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
4979         } else {
4980            assign(res, mkexpr(tmp));
4981         }
4982         putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
4983                    condT);
4984         DIP("v%saddhn.i%d d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
4985             nreg, mreg);
4986         return True;
4987      case 5:
4988         /* VABAL */
4989         if (!((theInstr >> 23) & 1)) {
4990            vpanic("VABA should not be in dis_neon_data_3diff\n");
4991         }
4992         if (dreg & 1)
4993            return False;
4994         dreg >>= 1;
4995         switch (size) {
4996            case 0:
4997               cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4998               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4999               cvt2 = Iop_Widen8Sto16x8;
5000               op = Iop_Sub16x8;
5001               op2 = Iop_Add16x8;
5002               break;
5003            case 1:
5004               cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5005               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5006               cvt2 = Iop_Widen16Sto32x4;
5007               op = Iop_Sub32x4;
5008               op2 = Iop_Add32x4;
5009               break;
5010            case 2:
5011               cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5012               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5013               cvt2 = Iop_Widen32Sto64x2;
5014               op = Iop_Sub64x2;
5015               op2 = Iop_Add64x2;
5016               break;
5017            case 3:
5018               return False;
5019            default:
5020               vassert(0);
5021         }
5022         arg_n = newTemp(Ity_V128);
5023         arg_m = newTemp(Ity_V128);
5024         cond = newTemp(Ity_V128);
5025         res = newTemp(Ity_V128);
5026         assign(arg_n, unop(cvt, getDRegI64(nreg)));
5027         assign(arg_m, unop(cvt, getDRegI64(mreg)));
5028         assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5029                                            getDRegI64(mreg))));
5030         assign(res, binop(op2,
5031                           binop(Iop_OrV128,
5032                                 binop(Iop_AndV128,
5033                                       binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5034                                       mkexpr(cond)),
5035                                 binop(Iop_AndV128,
5036                                       binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5037                                       unop(Iop_NotV128, mkexpr(cond)))),
5038                           getQReg(dreg)));
5039         putQReg(dreg, mkexpr(res), condT);
5040         DIP("vabal.%c%d q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5041             nreg, mreg);
5042         return True;
5043      case 6:
5044         /* VSUBHN, VRSUBHN */
5045         if (mreg & 1)
5046            return False;
5047         mreg >>= 1;
5048         if (nreg & 1)
5049            return False;
5050         nreg >>= 1;
5051         size = B;
5052         switch (size) {
5053            case 0:
5054               op = Iop_Sub16x8;
5055               op2 = Iop_Add16x8;
5056               cvt = Iop_NarrowUn16to8x8;
5057               sh = Iop_ShrN16x8;
5058               imm = 1U << 7;
5059               imm = (imm << 16) | imm;
5060               imm = (imm << 32) | imm;
5061               break;
5062            case 1:
5063               op = Iop_Sub32x4;
5064               op2 = Iop_Add32x4;
5065               cvt = Iop_NarrowUn32to16x4;
5066               sh = Iop_ShrN32x4;
5067               imm = 1U << 15;
5068               imm = (imm << 32) | imm;
5069               break;
5070            case 2:
5071               op = Iop_Sub64x2;
5072               op2 = Iop_Add64x2;
5073               cvt = Iop_NarrowUn64to32x2;
5074               sh = Iop_ShrN64x2;
5075               imm = 1U << 31;
5076               break;
5077            case 3:
5078               return False;
5079            default:
5080               vassert(0);
5081         }
5082         tmp = newTemp(Ity_V128);
5083         res = newTemp(Ity_V128);
5084         assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
5085         if (U) {
5086            /* VRSUBHN */
5087            assign(res, binop(op2, mkexpr(tmp),
5088                     binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
5089         } else {
5090            assign(res, mkexpr(tmp));
5091         }
5092         putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
5093                    condT);
5094         DIP("v%ssubhn.i%d d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
5095             nreg, mreg);
5096         return True;
5097      case 7:
5098         /* VABDL */
5099         if (!((theInstr >> 23) & 1)) {
5100            vpanic("VABL should not be in dis_neon_data_3diff\n");
5101         }
5102         if (dreg & 1)
5103            return False;
5104         dreg >>= 1;
5105         switch (size) {
5106            case 0:
5107               cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
5108               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
5109               cvt2 = Iop_Widen8Sto16x8;
5110               op = Iop_Sub16x8;
5111               break;
5112            case 1:
5113               cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5114               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5115               cvt2 = Iop_Widen16Sto32x4;
5116               op = Iop_Sub32x4;
5117               break;
5118            case 2:
5119               cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5120               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5121               cvt2 = Iop_Widen32Sto64x2;
5122               op = Iop_Sub64x2;
5123               break;
5124            case 3:
5125               return False;
5126            default:
5127               vassert(0);
5128         }
5129         arg_n = newTemp(Ity_V128);
5130         arg_m = newTemp(Ity_V128);
5131         cond = newTemp(Ity_V128);
5132         res = newTemp(Ity_V128);
5133         assign(arg_n, unop(cvt, getDRegI64(nreg)));
5134         assign(arg_m, unop(cvt, getDRegI64(mreg)));
5135         assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5136                                            getDRegI64(mreg))));
5137         assign(res, binop(Iop_OrV128,
5138                           binop(Iop_AndV128,
5139                                 binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5140                                 mkexpr(cond)),
5141                           binop(Iop_AndV128,
5142                                 binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5143                                 unop(Iop_NotV128, mkexpr(cond)))));
5144         putQReg(dreg, mkexpr(res), condT);
5145         DIP("vabdl.%c%d q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5146             nreg, mreg);
5147         return True;
5148      case 8:
5149      case 10:
5150         /* VMLAL, VMLSL (integer) */
5151         if (dreg & 1)
5152            return False;
5153         dreg >>= 1;
5154         size = B;
5155         switch (size) {
5156            case 0:
5157               op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5158               op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
5159               break;
5160            case 1:
5161               op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5162               op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5163               break;
5164            case 2:
5165               op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5166               op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5167               break;
5168            case 3:
5169               return False;
5170            default:
5171               vassert(0);
5172         }
5173         res = newTemp(Ity_V128);
5174         assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
5175         putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5176         DIP("vml%cl.%c%d q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
5177             8 << size, dreg, nreg, mreg);
5178         return True;
5179      case 9:
5180      case 11:
5181         /* VQDMLAL, VQDMLSL */
5182         if (U)
5183            return False;
5184         if (dreg & 1)
5185            return False;
5186         dreg >>= 1;
5187         size = B;
5188         switch (size) {
5189            case 0: case 3:
5190               return False;
5191            case 1:
5192               op = Iop_QDMull16Sx4;
5193               cmp = Iop_CmpEQ16x4;
5194               add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5195               op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5196               imm = 1LL << 15;
5197               imm = (imm << 16) | imm;
5198               imm = (imm << 32) | imm;
5199               break;
5200            case 2:
5201               op = Iop_QDMull32Sx2;
5202               cmp = Iop_CmpEQ32x2;
5203               add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5204               op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5205               imm = 1LL << 31;
5206               imm = (imm << 32) | imm;
5207               break;
5208            default:
5209               vassert(0);
5210         }
5211         res = newTemp(Ity_V128);
5212         tmp = newTemp(Ity_V128);
5213         assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
5214         assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5215         setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5216                    True, condT);
5217         setFlag_QC(binop(Iop_And64,
5218                          binop(cmp, getDRegI64(nreg), mkU64(imm)),
5219                          binop(cmp, getDRegI64(mreg), mkU64(imm))),
5220                    mkU64(0),
5221                    False, condT);
5222         putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5223         DIP("vqdml%cl.s%d q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
5224             nreg, mreg);
5225         return True;
5226      case 12:
5227      case 14:
5228         /* VMULL (integer or polynomial) */
5229         if (dreg & 1)
5230            return False;
5231         dreg >>= 1;
5232         size = B;
5233         switch (size) {
5234            case 0:
5235               op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5236               if (P)
5237                  op = Iop_PolynomialMull8x8;
5238               break;
5239            case 1:
5240               if (P) return False;
5241               op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5242               break;
5243            case 2:
5244               if (P) return False;
5245               op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5246               break;
5247            case 3:
5248               return False;
5249            default:
5250               vassert(0);
5251         }
5252         putQReg(dreg, binop(op, getDRegI64(nreg),
5253                                 getDRegI64(mreg)), condT);
5254         DIP("vmull.%c%d q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
5255               8 << size, dreg, nreg, mreg);
5256         return True;
5257      case 13:
5258         /* VQDMULL */
5259         if (U)
5260            return False;
5261         if (dreg & 1)
5262            return False;
5263         dreg >>= 1;
5264         size = B;
5265         switch (size) {
5266            case 0:
5267            case 3:
5268               return False;
5269            case 1:
5270               op = Iop_QDMull16Sx4;
5271               op2 = Iop_CmpEQ16x4;
5272               imm = 1LL << 15;
5273               imm = (imm << 16) | imm;
5274               imm = (imm << 32) | imm;
5275               break;
5276            case 2:
5277               op = Iop_QDMull32Sx2;
5278               op2 = Iop_CmpEQ32x2;
5279               imm = 1LL << 31;
5280               imm = (imm << 32) | imm;
5281               break;
5282            default:
5283               vassert(0);
5284         }
5285         putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
5286               condT);
5287         setFlag_QC(binop(Iop_And64,
5288                          binop(op2, getDRegI64(nreg), mkU64(imm)),
5289                          binop(op2, getDRegI64(mreg), mkU64(imm))),
5290                    mkU64(0),
5291                    False, condT);
5292         DIP("vqdmull.s%d q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
5293         return True;
5294      default:
5295         return False;
5296   }
5297   return False;
5298}
5299
5300/* A7.4.3 Two registers and a scalar */
5301static
5302Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
5303{
5304#  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
5305   UInt U = INSN(24,24);
5306   UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
5307   UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
5308   UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
5309   UInt size = INSN(21,20);
5310   UInt index;
5311   UInt Q = INSN(24,24);
5312
5313   if (INSN(27,25) != 1 || INSN(23,23) != 1
5314       || INSN(6,6) != 1 || INSN(4,4) != 0)
5315      return False;
5316
5317   /* VMLA, VMLS (scalar)  */
5318   if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
5319      IRTemp res, arg_m, arg_n;
5320      IROp dup, get, op, op2, add, sub;
5321      if (Q) {
5322         if ((dreg & 1) || (nreg & 1))
5323            return False;
5324         dreg >>= 1;
5325         nreg >>= 1;
5326         res = newTemp(Ity_V128);
5327         arg_m = newTemp(Ity_V128);
5328         arg_n = newTemp(Ity_V128);
5329         assign(arg_n, getQReg(nreg));
5330         switch(size) {
5331            case 1:
5332               dup = Iop_Dup16x8;
5333               get = Iop_GetElem16x4;
5334               index = mreg >> 3;
5335               mreg &= 7;
5336               break;
5337            case 2:
5338               dup = Iop_Dup32x4;
5339               get = Iop_GetElem32x2;
5340               index = mreg >> 4;
5341               mreg &= 0xf;
5342               break;
5343            case 0:
5344            case 3:
5345               return False;
5346            default:
5347               vassert(0);
5348         }
5349         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5350      } else {
5351         res = newTemp(Ity_I64);
5352         arg_m = newTemp(Ity_I64);
5353         arg_n = newTemp(Ity_I64);
5354         assign(arg_n, getDRegI64(nreg));
5355         switch(size) {
5356            case 1:
5357               dup = Iop_Dup16x4;
5358               get = Iop_GetElem16x4;
5359               index = mreg >> 3;
5360               mreg &= 7;
5361               break;
5362            case 2:
5363               dup = Iop_Dup32x2;
5364               get = Iop_GetElem32x2;
5365               index = mreg >> 4;
5366               mreg &= 0xf;
5367               break;
5368            case 0:
5369            case 3:
5370               return False;
5371            default:
5372               vassert(0);
5373         }
5374         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5375      }
5376      if (INSN(8,8)) {
5377         switch (size) {
5378            case 2:
5379               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5380               add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
5381               sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
5382               break;
5383            case 0:
5384            case 1:
5385            case 3:
5386               return False;
5387            default:
5388               vassert(0);
5389         }
5390      } else {
5391         switch (size) {
5392            case 1:
5393               op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5394               add = Q ? Iop_Add16x8 : Iop_Add16x4;
5395               sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
5396               break;
5397            case 2:
5398               op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5399               add = Q ? Iop_Add32x4 : Iop_Add32x2;
5400               sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
5401               break;
5402            case 0:
5403            case 3:
5404               return False;
5405            default:
5406               vassert(0);
5407         }
5408      }
5409      op2 = INSN(10,10) ? sub : add;
5410      assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5411      if (Q)
5412         putQReg(dreg, binop_w_fake_RM(op2, getQReg(dreg), mkexpr(res)),
5413                 condT);
5414      else
5415         putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
5416                    condT);
5417      DIP("vml%c.%c%d %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
5418            INSN(8,8) ? 'f' : 'i', 8 << size,
5419            Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
5420      return True;
5421   }
5422
5423   /* VMLAL, VMLSL (scalar)   */
5424   if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
5425      IRTemp res, arg_m, arg_n;
5426      IROp dup, get, op, op2, add, sub;
5427      if (dreg & 1)
5428         return False;
5429      dreg >>= 1;
5430      res = newTemp(Ity_V128);
5431      arg_m = newTemp(Ity_I64);
5432      arg_n = newTemp(Ity_I64);
5433      assign(arg_n, getDRegI64(nreg));
5434      switch(size) {
5435         case 1:
5436            dup = Iop_Dup16x4;
5437            get = Iop_GetElem16x4;
5438            index = mreg >> 3;
5439            mreg &= 7;
5440            break;
5441         case 2:
5442            dup = Iop_Dup32x2;
5443            get = Iop_GetElem32x2;
5444            index = mreg >> 4;
5445            mreg &= 0xf;
5446            break;
5447         case 0:
5448         case 3:
5449            return False;
5450         default:
5451            vassert(0);
5452      }
5453      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5454      switch (size) {
5455         case 1:
5456            op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5457            add = Iop_Add32x4;
5458            sub = Iop_Sub32x4;
5459            break;
5460         case 2:
5461            op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5462            add = Iop_Add64x2;
5463            sub = Iop_Sub64x2;
5464            break;
5465         case 0:
5466         case 3:
5467            return False;
5468         default:
5469            vassert(0);
5470      }
5471      op2 = INSN(10,10) ? sub : add;
5472      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5473      putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5474      DIP("vml%cl.%c%d q%u, d%u, d%u[%u]\n",
5475          INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
5476          8 << size, dreg, nreg, mreg, index);
5477      return True;
5478   }
5479
5480   /* VQDMLAL, VQDMLSL (scalar)  */
5481   if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
5482      IRTemp res, arg_m, arg_n, tmp;
5483      IROp dup, get, op, op2, add, cmp;
5484      UInt P = INSN(10,10);
5485      ULong imm;
5486      if (dreg & 1)
5487         return False;
5488      dreg >>= 1;
5489      res = newTemp(Ity_V128);
5490      arg_m = newTemp(Ity_I64);
5491      arg_n = newTemp(Ity_I64);
5492      assign(arg_n, getDRegI64(nreg));
5493      switch(size) {
5494         case 1:
5495            dup = Iop_Dup16x4;
5496            get = Iop_GetElem16x4;
5497            index = mreg >> 3;
5498            mreg &= 7;
5499            break;
5500         case 2:
5501            dup = Iop_Dup32x2;
5502            get = Iop_GetElem32x2;
5503            index = mreg >> 4;
5504            mreg &= 0xf;
5505            break;
5506         case 0:
5507         case 3:
5508            return False;
5509         default:
5510            vassert(0);
5511      }
5512      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5513      switch (size) {
5514         case 0:
5515         case 3:
5516            return False;
5517         case 1:
5518            op = Iop_QDMull16Sx4;
5519            cmp = Iop_CmpEQ16x4;
5520            add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5521            op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5522            imm = 1LL << 15;
5523            imm = (imm << 16) | imm;
5524            imm = (imm << 32) | imm;
5525            break;
5526         case 2:
5527            op = Iop_QDMull32Sx2;
5528            cmp = Iop_CmpEQ32x2;
5529            add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5530            op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5531            imm = 1LL << 31;
5532            imm = (imm << 32) | imm;
5533            break;
5534         default:
5535            vassert(0);
5536      }
5537      res = newTemp(Ity_V128);
5538      tmp = newTemp(Ity_V128);
5539      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5540      assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5541      setFlag_QC(binop(Iop_And64,
5542                       binop(cmp, mkexpr(arg_n), mkU64(imm)),
5543                       binop(cmp, mkexpr(arg_m), mkU64(imm))),
5544                 mkU64(0),
5545                 False, condT);
5546      setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5547                 True, condT);
5548      putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5549      DIP("vqdml%cl.s%d q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
5550          dreg, nreg, mreg, index);
5551      return True;
5552   }
5553
5554   /* VMUL (by scalar)  */
5555   if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
5556      IRTemp res, arg_m, arg_n;
5557      IROp dup, get, op;
5558      if (Q) {
5559         if ((dreg & 1) || (nreg & 1))
5560            return False;
5561         dreg >>= 1;
5562         nreg >>= 1;
5563         res = newTemp(Ity_V128);
5564         arg_m = newTemp(Ity_V128);
5565         arg_n = newTemp(Ity_V128);
5566         assign(arg_n, getQReg(nreg));
5567         switch(size) {
5568            case 1:
5569               dup = Iop_Dup16x8;
5570               get = Iop_GetElem16x4;
5571               index = mreg >> 3;
5572               mreg &= 7;
5573               break;
5574            case 2:
5575               dup = Iop_Dup32x4;
5576               get = Iop_GetElem32x2;
5577               index = mreg >> 4;
5578               mreg &= 0xf;
5579               break;
5580            case 0:
5581            case 3:
5582               return False;
5583            default:
5584               vassert(0);
5585         }
5586         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5587      } else {
5588         res = newTemp(Ity_I64);
5589         arg_m = newTemp(Ity_I64);
5590         arg_n = newTemp(Ity_I64);
5591         assign(arg_n, getDRegI64(nreg));
5592         switch(size) {
5593            case 1:
5594               dup = Iop_Dup16x4;
5595               get = Iop_GetElem16x4;
5596               index = mreg >> 3;
5597               mreg &= 7;
5598               break;
5599            case 2:
5600               dup = Iop_Dup32x2;
5601               get = Iop_GetElem32x2;
5602               index = mreg >> 4;
5603               mreg &= 0xf;
5604               break;
5605            case 0:
5606            case 3:
5607               return False;
5608            default:
5609               vassert(0);
5610         }
5611         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5612      }
5613      if (INSN(8,8)) {
5614         switch (size) {
5615            case 2:
5616               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5617               break;
5618            case 0:
5619            case 1:
5620            case 3:
5621               return False;
5622            default:
5623               vassert(0);
5624         }
5625      } else {
5626         switch (size) {
5627            case 1:
5628               op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5629               break;
5630            case 2:
5631               op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5632               break;
5633            case 0:
5634            case 3:
5635               return False;
5636            default:
5637               vassert(0);
5638         }
5639      }
5640      assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5641      if (Q)
5642         putQReg(dreg, mkexpr(res), condT);
5643      else
5644         putDRegI64(dreg, mkexpr(res), condT);
5645      DIP("vmul.%c%d %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
5646          8 << size, Q ? 'q' : 'd', dreg,
5647          Q ? 'q' : 'd', nreg, mreg, index);
5648      return True;
5649   }
5650
5651   /* VMULL (scalar) */
5652   if (INSN(11,8) == BITS4(1,0,1,0)) {
5653      IRTemp res, arg_m, arg_n;
5654      IROp dup, get, op;
5655      if (dreg & 1)
5656         return False;
5657      dreg >>= 1;
5658      res = newTemp(Ity_V128);
5659      arg_m = newTemp(Ity_I64);
5660      arg_n = newTemp(Ity_I64);
5661      assign(arg_n, getDRegI64(nreg));
5662      switch(size) {
5663         case 1:
5664            dup = Iop_Dup16x4;
5665            get = Iop_GetElem16x4;
5666            index = mreg >> 3;
5667            mreg &= 7;
5668            break;
5669         case 2:
5670            dup = Iop_Dup32x2;
5671            get = Iop_GetElem32x2;
5672            index = mreg >> 4;
5673            mreg &= 0xf;
5674            break;
5675         case 0:
5676         case 3:
5677            return False;
5678         default:
5679            vassert(0);
5680      }
5681      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5682      switch (size) {
5683         case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
5684         case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
5685         case 0: case 3: return False;
5686         default: vassert(0);
5687      }
5688      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5689      putQReg(dreg, mkexpr(res), condT);
5690      DIP("vmull.%c%d q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
5691          nreg, mreg, index);
5692      return True;
5693   }
5694
5695   /* VQDMULL */
5696   if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
5697      IROp op ,op2, dup, get;
5698      ULong imm;
5699      IRTemp arg_m, arg_n;
5700      if (dreg & 1)
5701         return False;
5702      dreg >>= 1;
5703      arg_m = newTemp(Ity_I64);
5704      arg_n = newTemp(Ity_I64);
5705      assign(arg_n, getDRegI64(nreg));
5706      switch(size) {
5707         case 1:
5708            dup = Iop_Dup16x4;
5709            get = Iop_GetElem16x4;
5710            index = mreg >> 3;
5711            mreg &= 7;
5712            break;
5713         case 2:
5714            dup = Iop_Dup32x2;
5715            get = Iop_GetElem32x2;
5716            index = mreg >> 4;
5717            mreg &= 0xf;
5718            break;
5719         case 0:
5720         case 3:
5721            return False;
5722         default:
5723            vassert(0);
5724      }
5725      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5726      switch (size) {
5727         case 0:
5728         case 3:
5729            return False;
5730         case 1:
5731            op = Iop_QDMull16Sx4;
5732            op2 = Iop_CmpEQ16x4;
5733            imm = 1LL << 15;
5734            imm = (imm << 16) | imm;
5735            imm = (imm << 32) | imm;
5736            break;
5737         case 2:
5738            op = Iop_QDMull32Sx2;
5739            op2 = Iop_CmpEQ32x2;
5740            imm = 1LL << 31;
5741            imm = (imm << 32) | imm;
5742            break;
5743         default:
5744            vassert(0);
5745      }
5746      putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5747            condT);
5748      setFlag_QC(binop(Iop_And64,
5749                       binop(op2, mkexpr(arg_n), mkU64(imm)),
5750                       binop(op2, mkexpr(arg_m), mkU64(imm))),
5751                 mkU64(0),
5752                 False, condT);
5753      DIP("vqdmull.s%d q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
5754          index);
5755      return True;
5756   }
5757
5758   /* VQDMULH */
5759   if (INSN(11,8) == BITS4(1,1,0,0)) {
5760      IROp op ,op2, dup, get;
5761      ULong imm;
5762      IRTemp res, arg_m, arg_n;
5763      if (Q) {
5764         if ((dreg & 1) || (nreg & 1))
5765            return False;
5766         dreg >>= 1;
5767         nreg >>= 1;
5768         res = newTemp(Ity_V128);
5769         arg_m = newTemp(Ity_V128);
5770         arg_n = newTemp(Ity_V128);
5771         assign(arg_n, getQReg(nreg));
5772         switch(size) {
5773            case 1:
5774               dup = Iop_Dup16x8;
5775               get = Iop_GetElem16x4;
5776               index = mreg >> 3;
5777               mreg &= 7;
5778               break;
5779            case 2:
5780               dup = Iop_Dup32x4;
5781               get = Iop_GetElem32x2;
5782               index = mreg >> 4;
5783               mreg &= 0xf;
5784               break;
5785            case 0:
5786            case 3:
5787               return False;
5788            default:
5789               vassert(0);
5790         }
5791         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5792      } else {
5793         res = newTemp(Ity_I64);
5794         arg_m = newTemp(Ity_I64);
5795         arg_n = newTemp(Ity_I64);
5796         assign(arg_n, getDRegI64(nreg));
5797         switch(size) {
5798            case 1:
5799               dup = Iop_Dup16x4;
5800               get = Iop_GetElem16x4;
5801               index = mreg >> 3;
5802               mreg &= 7;
5803               break;
5804            case 2:
5805               dup = Iop_Dup32x2;
5806               get = Iop_GetElem32x2;
5807               index = mreg >> 4;
5808               mreg &= 0xf;
5809               break;
5810            case 0:
5811            case 3:
5812               return False;
5813            default:
5814               vassert(0);
5815         }
5816         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5817      }
5818      switch (size) {
5819         case 0:
5820         case 3:
5821            return False;
5822         case 1:
5823            op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
5824            op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5825            imm = 1LL << 15;
5826            imm = (imm << 16) | imm;
5827            imm = (imm << 32) | imm;
5828            break;
5829         case 2:
5830            op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
5831            op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5832            imm = 1LL << 31;
5833            imm = (imm << 32) | imm;
5834            break;
5835         default:
5836            vassert(0);
5837      }
5838      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5839      setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5840                       binop(op2, mkexpr(arg_n),
5841                                  Q ? mkU128(imm) : mkU64(imm)),
5842                       binop(op2, mkexpr(arg_m),
5843                             Q ? mkU128(imm) : mkU64(imm))),
5844                 Q ? mkU128(0) : mkU64(0),
5845                 Q, condT);
5846      if (Q)
5847         putQReg(dreg, mkexpr(res), condT);
5848      else
5849         putDRegI64(dreg, mkexpr(res), condT);
5850      DIP("vqdmulh.s%d %c%u, %c%u, d%u[%u]\n",
5851          8 << size, Q ? 'q' : 'd', dreg,
5852          Q ? 'q' : 'd', nreg, mreg, index);
5853      return True;
5854   }
5855
5856   /* VQRDMULH (scalar) */
5857   if (INSN(11,8) == BITS4(1,1,0,1)) {
5858      IROp op ,op2, dup, get;
5859      ULong imm;
5860      IRTemp res, arg_m, arg_n;
5861      if (Q) {
5862         if ((dreg & 1) || (nreg & 1))
5863            return False;
5864         dreg >>= 1;
5865         nreg >>= 1;
5866         res = newTemp(Ity_V128);
5867         arg_m = newTemp(Ity_V128);
5868         arg_n = newTemp(Ity_V128);
5869         assign(arg_n, getQReg(nreg));
5870         switch(size) {
5871            case 1:
5872               dup = Iop_Dup16x8;
5873               get = Iop_GetElem16x4;
5874               index = mreg >> 3;
5875               mreg &= 7;
5876               break;
5877            case 2:
5878               dup = Iop_Dup32x4;
5879               get = Iop_GetElem32x2;
5880               index = mreg >> 4;
5881               mreg &= 0xf;
5882               break;
5883            case 0:
5884            case 3:
5885               return False;
5886            default:
5887               vassert(0);
5888         }
5889         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5890      } else {
5891         res = newTemp(Ity_I64);
5892         arg_m = newTemp(Ity_I64);
5893         arg_n = newTemp(Ity_I64);
5894         assign(arg_n, getDRegI64(nreg));
5895         switch(size) {
5896            case 1:
5897               dup = Iop_Dup16x4;
5898               get = Iop_GetElem16x4;
5899               index = mreg >> 3;
5900               mreg &= 7;
5901               break;
5902            case 2:
5903               dup = Iop_Dup32x2;
5904               get = Iop_GetElem32x2;
5905               index = mreg >> 4;
5906               mreg &= 0xf;
5907               break;
5908            case 0:
5909            case 3:
5910               return False;
5911            default:
5912               vassert(0);
5913         }
5914         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5915      }
5916      switch (size) {
5917         case 0:
5918         case 3:
5919            return False;
5920         case 1:
5921            op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
5922            op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5923            imm = 1LL << 15;
5924            imm = (imm << 16) | imm;
5925            imm = (imm << 32) | imm;
5926            break;
5927         case 2:
5928            op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
5929            op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5930            imm = 1LL << 31;
5931            imm = (imm << 32) | imm;
5932            break;
5933         default:
5934            vassert(0);
5935      }
5936      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5937      setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5938                       binop(op2, mkexpr(arg_n),
5939                                  Q ? mkU128(imm) : mkU64(imm)),
5940                       binop(op2, mkexpr(arg_m),
5941                                  Q ? mkU128(imm) : mkU64(imm))),
5942                 Q ? mkU128(0) : mkU64(0),
5943                 Q, condT);
5944      if (Q)
5945         putQReg(dreg, mkexpr(res), condT);
5946      else
5947         putDRegI64(dreg, mkexpr(res), condT);
5948      DIP("vqrdmulh.s%d %c%u, %c%u, d%u[%u]\n",
5949          8 << size, Q ? 'q' : 'd', dreg,
5950          Q ? 'q' : 'd', nreg, mreg, index);
5951      return True;
5952   }
5953
5954   return False;
5955#  undef INSN
5956}
5957
5958/* A7.4.4 Two registers and a shift amount */
5959static
5960Bool dis_neon_data_2reg_and_shift ( UInt theInstr, IRTemp condT )
5961{
5962   UInt A = (theInstr >> 8) & 0xf;
5963   UInt B = (theInstr >> 6) & 1;
5964   UInt L = (theInstr >> 7) & 1;
5965   UInt U = (theInstr >> 24) & 1;
5966   UInt Q = B;
5967   UInt imm6 = (theInstr >> 16) & 0x3f;
5968   UInt shift_imm;
5969   UInt size = 4;
5970   UInt tmp;
5971   UInt mreg = get_neon_m_regno(theInstr);
5972   UInt dreg = get_neon_d_regno(theInstr);
5973   ULong imm = 0;
5974   IROp op, cvt, add = Iop_INVALID, cvt2, op_rev;
5975   IRTemp reg_m, res, mask;
5976
5977   if (L == 0 && ((theInstr >> 19) & 7) == 0)
5978      /* It is one reg and immediate */
5979      return False;
5980
5981   tmp = (L << 6) | imm6;
5982   if (tmp & 0x40) {
5983      size = 3;
5984      shift_imm = 64 - imm6;
5985   } else if (tmp & 0x20) {
5986      size = 2;
5987      shift_imm = 64 - imm6;
5988   } else if (tmp & 0x10) {
5989      size = 1;
5990      shift_imm = 32 - imm6;
5991   } else if (tmp & 0x8) {
5992      size = 0;
5993      shift_imm = 16 - imm6;
5994   } else {
5995      return False;
5996   }
5997
5998   switch (A) {
5999      case 3:
6000      case 2:
6001         /* VRSHR, VRSRA */
6002         if (shift_imm > 0) {
6003            IRExpr *imm_val;
6004            imm = 1L;
6005            switch (size) {
6006               case 0:
6007                  imm = (imm << 8) | imm;
6008                  /* fall through */
6009               case 1:
6010                  imm = (imm << 16) | imm;
6011                  /* fall through */
6012               case 2:
6013                  imm = (imm << 32) | imm;
6014                  /* fall through */
6015               case 3:
6016                  break;
6017               default:
6018                  vassert(0);
6019            }
6020            if (Q) {
6021               reg_m = newTemp(Ity_V128);
6022               res = newTemp(Ity_V128);
6023               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
6024               assign(reg_m, getQReg(mreg));
6025               switch (size) {
6026                  case 0:
6027                     add = Iop_Add8x16;
6028                     op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
6029                     break;
6030                  case 1:
6031                     add = Iop_Add16x8;
6032                     op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6033                     break;
6034                  case 2:
6035                     add = Iop_Add32x4;
6036                     op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6037                     break;
6038                  case 3:
6039                     add = Iop_Add64x2;
6040                     op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6041                     break;
6042                  default:
6043                     vassert(0);
6044               }
6045            } else {
6046               reg_m = newTemp(Ity_I64);
6047               res = newTemp(Ity_I64);
6048               imm_val = mkU64(imm);
6049               assign(reg_m, getDRegI64(mreg));
6050               switch (size) {
6051                  case 0:
6052                     add = Iop_Add8x8;
6053                     op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
6054                     break;
6055                  case 1:
6056                     add = Iop_Add16x4;
6057                     op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
6058                     break;
6059                  case 2:
6060                     add = Iop_Add32x2;
6061                     op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6062                     break;
6063                  case 3:
6064                     add = Iop_Add64;
6065                     op = U ? Iop_Shr64 : Iop_Sar64;
6066                     break;
6067                  default:
6068                     vassert(0);
6069               }
6070            }
6071            assign(res,
6072                   binop(add,
6073                         binop(op,
6074                               mkexpr(reg_m),
6075                               mkU8(shift_imm)),
6076                         binop(Q ? Iop_AndV128 : Iop_And64,
6077                               binop(op,
6078                                     mkexpr(reg_m),
6079                                     mkU8(shift_imm - 1)),
6080                               imm_val)));
6081         } else {
6082            if (Q) {
6083               res = newTemp(Ity_V128);
6084               assign(res, getQReg(mreg));
6085            } else {
6086               res = newTemp(Ity_I64);
6087               assign(res, getDRegI64(mreg));
6088            }
6089         }
6090         if (A == 3) {
6091            if (Q) {
6092               putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6093                             condT);
6094            } else {
6095               putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6096                                condT);
6097            }
6098            DIP("vrsra.%c%d %c%u, %c%u, #%u\n",
6099                U ? 'u' : 's', 8 << size,
6100                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6101         } else {
6102            if (Q) {
6103               putQReg(dreg, mkexpr(res), condT);
6104            } else {
6105               putDRegI64(dreg, mkexpr(res), condT);
6106            }
6107            DIP("vrshr.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6108                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6109         }
6110         return True;
6111      case 1:
6112      case 0:
6113         /* VSHR, VSRA */
6114         if (Q) {
6115            reg_m = newTemp(Ity_V128);
6116            assign(reg_m, getQReg(mreg));
6117            res = newTemp(Ity_V128);
6118         } else {
6119            reg_m = newTemp(Ity_I64);
6120            assign(reg_m, getDRegI64(mreg));
6121            res = newTemp(Ity_I64);
6122         }
6123         if (Q) {
6124            switch (size) {
6125               case 0:
6126                  op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
6127                  add = Iop_Add8x16;
6128                  break;
6129               case 1:
6130                  op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6131                  add = Iop_Add16x8;
6132                  break;
6133               case 2:
6134                  op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6135                  add = Iop_Add32x4;
6136                  break;
6137               case 3:
6138                  op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6139                  add = Iop_Add64x2;
6140                  break;
6141               default:
6142                  vassert(0);
6143            }
6144         } else {
6145            switch (size) {
6146               case 0:
6147                  op =  U ? Iop_ShrN8x8 : Iop_SarN8x8;
6148                  add = Iop_Add8x8;
6149                  break;
6150               case 1:
6151                  op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
6152                  add = Iop_Add16x4;
6153                  break;
6154               case 2:
6155                  op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6156                  add = Iop_Add32x2;
6157                  break;
6158               case 3:
6159                  op = U ? Iop_Shr64 : Iop_Sar64;
6160                  add = Iop_Add64;
6161                  break;
6162               default:
6163                  vassert(0);
6164            }
6165         }
6166         assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6167         if (A == 1) {
6168            if (Q) {
6169               putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6170                             condT);
6171            } else {
6172               putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6173                                condT);
6174            }
6175            DIP("vsra.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6176                  Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6177         } else {
6178            if (Q) {
6179               putQReg(dreg, mkexpr(res), condT);
6180            } else {
6181               putDRegI64(dreg, mkexpr(res), condT);
6182            }
6183            DIP("vshr.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6184                  Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6185         }
6186         return True;
6187      case 4:
6188         /* VSRI */
6189         if (!U)
6190            return False;
6191         if (Q) {
6192            res = newTemp(Ity_V128);
6193            mask = newTemp(Ity_V128);
6194         } else {
6195            res = newTemp(Ity_I64);
6196            mask = newTemp(Ity_I64);
6197         }
6198         switch (size) {
6199            case 0: op = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; break;
6200            case 1: op = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; break;
6201            case 2: op = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; break;
6202            case 3: op = Q ? Iop_ShrN64x2 : Iop_Shr64; break;
6203            default: vassert(0);
6204         }
6205         if (Q) {
6206            assign(mask, binop(op, binop(Iop_64HLtoV128,
6207                                         mkU64(0xFFFFFFFFFFFFFFFFLL),
6208                                         mkU64(0xFFFFFFFFFFFFFFFFLL)),
6209                               mkU8(shift_imm)));
6210            assign(res, binop(Iop_OrV128,
6211                              binop(Iop_AndV128,
6212                                    getQReg(dreg),
6213                                    unop(Iop_NotV128,
6214                                         mkexpr(mask))),
6215                              binop(op,
6216                                    getQReg(mreg),
6217                                    mkU8(shift_imm))));
6218            putQReg(dreg, mkexpr(res), condT);
6219         } else {
6220            assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6221                               mkU8(shift_imm)));
6222            assign(res, binop(Iop_Or64,
6223                              binop(Iop_And64,
6224                                    getDRegI64(dreg),
6225                                    unop(Iop_Not64,
6226                                         mkexpr(mask))),
6227                              binop(op,
6228                                    getDRegI64(mreg),
6229                                    mkU8(shift_imm))));
6230            putDRegI64(dreg, mkexpr(res), condT);
6231         }
6232         DIP("vsri.%d %c%u, %c%u, #%u\n",
6233             8 << size, Q ? 'q' : 'd', dreg,
6234             Q ? 'q' : 'd', mreg, shift_imm);
6235         return True;
6236      case 5:
6237         if (U) {
6238            /* VSLI */
6239            shift_imm = 8 * (1 << size) - shift_imm;
6240            if (Q) {
6241               res = newTemp(Ity_V128);
6242               mask = newTemp(Ity_V128);
6243            } else {
6244               res = newTemp(Ity_I64);
6245               mask = newTemp(Ity_I64);
6246            }
6247            switch (size) {
6248               case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6249               case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6250               case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6251               case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6252               default: vassert(0);
6253            }
6254            if (Q) {
6255               assign(mask, binop(op, binop(Iop_64HLtoV128,
6256                                            mkU64(0xFFFFFFFFFFFFFFFFLL),
6257                                            mkU64(0xFFFFFFFFFFFFFFFFLL)),
6258                                  mkU8(shift_imm)));
6259               assign(res, binop(Iop_OrV128,
6260                                 binop(Iop_AndV128,
6261                                       getQReg(dreg),
6262                                       unop(Iop_NotV128,
6263                                            mkexpr(mask))),
6264                                 binop(op,
6265                                       getQReg(mreg),
6266                                       mkU8(shift_imm))));
6267               putQReg(dreg, mkexpr(res), condT);
6268            } else {
6269               assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6270                                  mkU8(shift_imm)));
6271               assign(res, binop(Iop_Or64,
6272                                 binop(Iop_And64,
6273                                       getDRegI64(dreg),
6274                                       unop(Iop_Not64,
6275                                            mkexpr(mask))),
6276                                 binop(op,
6277                                       getDRegI64(mreg),
6278                                       mkU8(shift_imm))));
6279               putDRegI64(dreg, mkexpr(res), condT);
6280            }
6281            DIP("vsli.%d %c%u, %c%u, #%u\n",
6282                8 << size, Q ? 'q' : 'd', dreg,
6283                Q ? 'q' : 'd', mreg, shift_imm);
6284            return True;
6285         } else {
6286            /* VSHL #imm */
6287            shift_imm = 8 * (1 << size) - shift_imm;
6288            if (Q) {
6289               res = newTemp(Ity_V128);
6290            } else {
6291               res = newTemp(Ity_I64);
6292            }
6293            switch (size) {
6294               case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6295               case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6296               case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6297               case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6298               default: vassert(0);
6299            }
6300            assign(res, binop(op, Q ? getQReg(mreg) : getDRegI64(mreg),
6301                     mkU8(shift_imm)));
6302            if (Q) {
6303               putQReg(dreg, mkexpr(res), condT);
6304            } else {
6305               putDRegI64(dreg, mkexpr(res), condT);
6306            }
6307            DIP("vshl.i%d %c%u, %c%u, #%u\n",
6308                8 << size, Q ? 'q' : 'd', dreg,
6309                Q ? 'q' : 'd', mreg, shift_imm);
6310            return True;
6311         }
6312         break;
6313      case 6:
6314      case 7:
6315         /* VQSHL, VQSHLU */
6316         shift_imm = 8 * (1 << size) - shift_imm;
6317         if (U) {
6318            if (A & 1) {
6319               switch (size) {
6320                  case 0:
6321                     op = Q ? Iop_QShlNsatUU8x16 : Iop_QShlNsatUU8x8;
6322                     op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6323                     break;
6324                  case 1:
6325                     op = Q ? Iop_QShlNsatUU16x8 : Iop_QShlNsatUU16x4;
6326                     op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6327                     break;
6328                  case 2:
6329                     op = Q ? Iop_QShlNsatUU32x4 : Iop_QShlNsatUU32x2;
6330                     op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6331                     break;
6332                  case 3:
6333                     op = Q ? Iop_QShlNsatUU64x2 : Iop_QShlNsatUU64x1;
6334                     op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6335                     break;
6336                  default:
6337                     vassert(0);
6338               }
6339               DIP("vqshl.u%d %c%u, %c%u, #%u\n",
6340                   8 << size,
6341                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6342            } else {
6343               switch (size) {
6344                  case 0:
6345                     op = Q ? Iop_QShlNsatSU8x16 : Iop_QShlNsatSU8x8;
6346                     op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6347                     break;
6348                  case 1:
6349                     op = Q ? Iop_QShlNsatSU16x8 : Iop_QShlNsatSU16x4;
6350                     op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6351                     break;
6352                  case 2:
6353                     op = Q ? Iop_QShlNsatSU32x4 : Iop_QShlNsatSU32x2;
6354                     op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6355                     break;
6356                  case 3:
6357                     op = Q ? Iop_QShlNsatSU64x2 : Iop_QShlNsatSU64x1;
6358                     op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6359                     break;
6360                  default:
6361                     vassert(0);
6362               }
6363               DIP("vqshlu.s%d %c%u, %c%u, #%u\n",
6364                   8 << size,
6365                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6366            }
6367         } else {
6368            if (!(A & 1))
6369               return False;
6370            switch (size) {
6371               case 0:
6372                  op = Q ? Iop_QShlNsatSS8x16 : Iop_QShlNsatSS8x8;
6373                  op_rev = Q ? Iop_SarN8x16 : Iop_SarN8x8;
6374                  break;
6375               case 1:
6376                  op = Q ? Iop_QShlNsatSS16x8 : Iop_QShlNsatSS16x4;
6377                  op_rev = Q ? Iop_SarN16x8 : Iop_SarN16x4;
6378                  break;
6379               case 2:
6380                  op = Q ? Iop_QShlNsatSS32x4 : Iop_QShlNsatSS32x2;
6381                  op_rev = Q ? Iop_SarN32x4 : Iop_SarN32x2;
6382                  break;
6383               case 3:
6384                  op = Q ? Iop_QShlNsatSS64x2 : Iop_QShlNsatSS64x1;
6385                  op_rev = Q ? Iop_SarN64x2 : Iop_Sar64;
6386                  break;
6387               default:
6388                  vassert(0);
6389            }
6390            DIP("vqshl.s%d %c%u, %c%u, #%u\n",
6391                8 << size,
6392                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6393         }
6394         if (Q) {
6395            tmp = newTemp(Ity_V128);
6396            res = newTemp(Ity_V128);
6397            reg_m = newTemp(Ity_V128);
6398            assign(reg_m, getQReg(mreg));
6399         } else {
6400            tmp = newTemp(Ity_I64);
6401            res = newTemp(Ity_I64);
6402            reg_m = newTemp(Ity_I64);
6403            assign(reg_m, getDRegI64(mreg));
6404         }
6405         assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6406         assign(tmp, binop(op_rev, mkexpr(res), mkU8(shift_imm)));
6407         setFlag_QC(mkexpr(tmp), mkexpr(reg_m), Q, condT);
6408         if (Q)
6409            putQReg(dreg, mkexpr(res), condT);
6410         else
6411            putDRegI64(dreg, mkexpr(res), condT);
6412         return True;
6413      case 8:
6414         if (!U) {
6415            if (L == 1)
6416               return False;
6417            size++;
6418            dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6419            mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
6420            if (mreg & 1)
6421               return False;
6422            mreg >>= 1;
6423            if (!B) {
6424               /* VSHRN*/
6425               IROp narOp;
6426               reg_m = newTemp(Ity_V128);
6427               assign(reg_m, getQReg(mreg));
6428               res = newTemp(Ity_I64);
6429               switch (size) {
6430                  case 1:
6431                     op = Iop_ShrN16x8;
6432                     narOp = Iop_NarrowUn16to8x8;
6433                     break;
6434                  case 2:
6435                     op = Iop_ShrN32x4;
6436                     narOp = Iop_NarrowUn32to16x4;
6437                     break;
6438                  case 3:
6439                     op = Iop_ShrN64x2;
6440                     narOp = Iop_NarrowUn64to32x2;
6441                     break;
6442                  default:
6443                     vassert(0);
6444               }
6445               assign(res, unop(narOp,
6446                                binop(op,
6447                                      mkexpr(reg_m),
6448                                      mkU8(shift_imm))));
6449               putDRegI64(dreg, mkexpr(res), condT);
6450               DIP("vshrn.i%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6451                   shift_imm);
6452               return True;
6453            } else {
6454               /* VRSHRN   */
6455               IROp addOp, shOp, narOp;
6456               IRExpr *imm_val;
6457               reg_m = newTemp(Ity_V128);
6458               assign(reg_m, getQReg(mreg));
6459               res = newTemp(Ity_I64);
6460               imm = 1L;
6461               switch (size) {
6462                  case 0: imm = (imm <<  8) | imm; /* fall through */
6463                  case 1: imm = (imm << 16) | imm; /* fall through */
6464                  case 2: imm = (imm << 32) | imm; /* fall through */
6465                  case 3: break;
6466                  default: vassert(0);
6467               }
6468               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
6469               switch (size) {
6470                  case 1:
6471                     addOp = Iop_Add16x8;
6472                     shOp = Iop_ShrN16x8;
6473                     narOp = Iop_NarrowUn16to8x8;
6474                     break;
6475                  case 2:
6476                     addOp = Iop_Add32x4;
6477                     shOp = Iop_ShrN32x4;
6478                     narOp = Iop_NarrowUn32to16x4;
6479                     break;
6480                  case 3:
6481                     addOp = Iop_Add64x2;
6482                     shOp = Iop_ShrN64x2;
6483                     narOp = Iop_NarrowUn64to32x2;
6484                     break;
6485                  default:
6486                     vassert(0);
6487               }
6488               assign(res, unop(narOp,
6489                                binop(addOp,
6490                                      binop(shOp,
6491                                            mkexpr(reg_m),
6492                                            mkU8(shift_imm)),
6493                                      binop(Iop_AndV128,
6494                                            binop(shOp,
6495                                                  mkexpr(reg_m),
6496                                                  mkU8(shift_imm - 1)),
6497                                            imm_val))));
6498               putDRegI64(dreg, mkexpr(res), condT);
6499               if (shift_imm == 0) {
6500                  DIP("vmov%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6501                      shift_imm);
6502               } else {
6503                  DIP("vrshrn.i%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6504                      shift_imm);
6505               }
6506               return True;
6507            }
6508         } else {
6509            /* fall through */
6510         }
6511      case 9:
6512         dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6513         mreg = ((theInstr >>  1) & 0x10) | (theInstr & 0xF);
6514         if (mreg & 1)
6515            return False;
6516         mreg >>= 1;
6517         size++;
6518         if ((theInstr >> 8) & 1) {
6519            switch (size) {
6520               case 1:
6521                  op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6522                  cvt = U ? Iop_QNarrowUn16Uto8Ux8 : Iop_QNarrowUn16Sto8Sx8;
6523                  cvt2 = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6524                  break;
6525               case 2:
6526                  op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6527                  cvt = U ? Iop_QNarrowUn32Uto16Ux4 : Iop_QNarrowUn32Sto16Sx4;
6528                  cvt2 = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6529                  break;
6530               case 3:
6531                  op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6532                  cvt = U ? Iop_QNarrowUn64Uto32Ux2 : Iop_QNarrowUn64Sto32Sx2;
6533                  cvt2 = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6534                  break;
6535               default:
6536                  vassert(0);
6537            }
6538            DIP("vq%sshrn.%c%d d%u, q%u, #%u\n", B ? "r" : "",
6539                U ? 'u' : 's', 8 << size, dreg, mreg, shift_imm);
6540         } else {
6541            vassert(U);
6542            switch (size) {
6543               case 1:
6544                  op = Iop_SarN16x8;
6545                  cvt = Iop_QNarrowUn16Sto8Ux8;
6546                  cvt2 = Iop_Widen8Uto16x8;
6547                  break;
6548               case 2:
6549                  op = Iop_SarN32x4;
6550                  cvt = Iop_QNarrowUn32Sto16Ux4;
6551                  cvt2 = Iop_Widen16Uto32x4;
6552                  break;
6553               case 3:
6554                  op = Iop_SarN64x2;
6555                  cvt = Iop_QNarrowUn64Sto32Ux2;
6556                  cvt2 = Iop_Widen32Uto64x2;
6557                  break;
6558               default:
6559                  vassert(0);
6560            }
6561            DIP("vq%sshrun.s%d d%u, q%u, #%u\n", B ? "r" : "",
6562                8 << size, dreg, mreg, shift_imm);
6563         }
6564         if (B) {
6565            if (shift_imm > 0) {
6566               imm = 1;
6567               switch (size) {
6568                  case 1: imm = (imm << 16) | imm; /* fall through */
6569                  case 2: imm = (imm << 32) | imm; /* fall through */
6570                  case 3: break;
6571                  case 0: default: vassert(0);
6572               }
6573               switch (size) {
6574                  case 1: add = Iop_Add16x8; break;
6575                  case 2: add = Iop_Add32x4; break;
6576                  case 3: add = Iop_Add64x2; break;
6577                  case 0: default: vassert(0);
6578               }
6579            }
6580         }
6581         reg_m = newTemp(Ity_V128);
6582         res = newTemp(Ity_V128);
6583         assign(reg_m, getQReg(mreg));
6584         if (B) {
6585            /* VQRSHRN, VQRSHRUN */
6586            assign(res, binop(add,
6587                              binop(op, mkexpr(reg_m), mkU8(shift_imm)),
6588                              binop(Iop_AndV128,
6589                                    binop(op,
6590                                          mkexpr(reg_m),
6591                                          mkU8(shift_imm - 1)),
6592                                    mkU128(imm))));
6593         } else {
6594            /* VQSHRN, VQSHRUN */
6595            assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6596         }
6597         setFlag_QC(unop(cvt2, unop(cvt, mkexpr(res))), mkexpr(res),
6598                    True, condT);
6599         putDRegI64(dreg, unop(cvt, mkexpr(res)), condT);
6600         return True;
6601      case 10:
6602         /* VSHLL
6603            VMOVL ::= VSHLL #0 */
6604         if (B)
6605            return False;
6606         if (dreg & 1)
6607            return False;
6608         dreg >>= 1;
6609         shift_imm = (8 << size) - shift_imm;
6610         res = newTemp(Ity_V128);
6611         switch (size) {
6612            case 0:
6613               op = Iop_ShlN16x8;
6614               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6615               break;
6616            case 1:
6617               op = Iop_ShlN32x4;
6618               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6619               break;
6620            case 2:
6621               op = Iop_ShlN64x2;
6622               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6623               break;
6624            case 3:
6625               return False;
6626            default:
6627               vassert(0);
6628         }
6629         assign(res, binop(op, unop(cvt, getDRegI64(mreg)), mkU8(shift_imm)));
6630         putQReg(dreg, mkexpr(res), condT);
6631         if (shift_imm == 0) {
6632            DIP("vmovl.%c%d q%u, d%u\n", U ? 'u' : 's', 8 << size,
6633                dreg, mreg);
6634         } else {
6635            DIP("vshll.%c%d q%u, d%u, #%u\n", U ? 'u' : 's', 8 << size,
6636                dreg, mreg, shift_imm);
6637         }
6638         return True;
6639      case 14:
6640      case 15:
6641         /* VCVT floating-point <-> fixed-point */
6642         if ((theInstr >> 8) & 1) {
6643            if (U) {
6644               op = Q ? Iop_F32ToFixed32Ux4_RZ : Iop_F32ToFixed32Ux2_RZ;
6645            } else {
6646               op = Q ? Iop_F32ToFixed32Sx4_RZ : Iop_F32ToFixed32Sx2_RZ;
6647            }
6648            DIP("vcvt.%c32.f32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6649                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6650                64 - ((theInstr >> 16) & 0x3f));
6651         } else {
6652            if (U) {
6653               op = Q ? Iop_Fixed32UToF32x4_RN : Iop_Fixed32UToF32x2_RN;
6654            } else {
6655               op = Q ? Iop_Fixed32SToF32x4_RN : Iop_Fixed32SToF32x2_RN;
6656            }
6657            DIP("vcvt.f32.%c32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6658                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6659                64 - ((theInstr >> 16) & 0x3f));
6660         }
6661         if (((theInstr >> 21) & 1) == 0)
6662            return False;
6663         if (Q) {
6664            putQReg(dreg, binop(op, getQReg(mreg),
6665                     mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6666         } else {
6667            putDRegI64(dreg, binop(op, getDRegI64(mreg),
6668                       mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6669         }
6670         return True;
6671      default:
6672         return False;
6673
6674   }
6675   return False;
6676}
6677
6678/* A7.4.5 Two registers, miscellaneous */
6679static
6680Bool dis_neon_data_2reg_misc ( UInt theInstr, IRTemp condT )
6681{
6682   UInt A = (theInstr >> 16) & 3;
6683   UInt B = (theInstr >> 6) & 0x1f;
6684   UInt Q = (theInstr >> 6) & 1;
6685   UInt U = (theInstr >> 24) & 1;
6686   UInt size = (theInstr >> 18) & 3;
6687   UInt dreg = get_neon_d_regno(theInstr);
6688   UInt mreg = get_neon_m_regno(theInstr);
6689   UInt F = (theInstr >> 10) & 1;
6690   IRTemp arg_d = IRTemp_INVALID;
6691   IRTemp arg_m = IRTemp_INVALID;
6692   IRTemp res = IRTemp_INVALID;
6693   switch (A) {
6694      case 0:
6695         if (Q) {
6696            arg_m = newTemp(Ity_V128);
6697            res = newTemp(Ity_V128);
6698            assign(arg_m, getQReg(mreg));
6699         } else {
6700            arg_m = newTemp(Ity_I64);
6701            res = newTemp(Ity_I64);
6702            assign(arg_m, getDRegI64(mreg));
6703         }
6704         switch (B >> 1) {
6705            case 0: {
6706               /* VREV64 */
6707               IROp op;
6708               switch (size) {
6709                  case 0:
6710                     op = Q ? Iop_Reverse8sIn64_x2 : Iop_Reverse8sIn64_x1;
6711                     break;
6712                  case 1:
6713                     op = Q ? Iop_Reverse16sIn64_x2 : Iop_Reverse16sIn64_x1;
6714                     break;
6715                  case 2:
6716                     op = Q ? Iop_Reverse32sIn64_x2 : Iop_Reverse32sIn64_x1;
6717                     break;
6718                  case 3:
6719                     return False;
6720                  default:
6721                     vassert(0);
6722               }
6723               assign(res, unop(op, mkexpr(arg_m)));
6724               DIP("vrev64.%d %c%u, %c%u\n", 8 << size,
6725                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6726               break;
6727            }
6728            case 1: {
6729               /* VREV32 */
6730               IROp op;
6731               switch (size) {
6732                  case 0:
6733                     op = Q ? Iop_Reverse8sIn32_x4 : Iop_Reverse8sIn32_x2;
6734                     break;
6735                  case 1:
6736                     op = Q ? Iop_Reverse16sIn32_x4 : Iop_Reverse16sIn32_x2;
6737                     break;
6738                  case 2:
6739                  case 3:
6740                     return False;
6741                  default:
6742                     vassert(0);
6743               }
6744               assign(res, unop(op, mkexpr(arg_m)));
6745               DIP("vrev32.%d %c%u, %c%u\n", 8 << size,
6746                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6747               break;
6748            }
6749            case 2: {
6750               /* VREV16 */
6751               IROp op;
6752               switch (size) {
6753                  case 0:
6754                     op = Q ? Iop_Reverse8sIn16_x8 : Iop_Reverse8sIn16_x4;
6755                     break;
6756                  case 1:
6757                  case 2:
6758                  case 3:
6759                     return False;
6760                  default:
6761                     vassert(0);
6762               }
6763               assign(res, unop(op, mkexpr(arg_m)));
6764               DIP("vrev16.%d %c%u, %c%u\n", 8 << size,
6765                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6766               break;
6767            }
6768            case 3:
6769               return False;
6770            case 4:
6771            case 5: {
6772               /* VPADDL */
6773               IROp op;
6774               U = (theInstr >> 7) & 1;
6775               if (Q) {
6776                  switch (size) {
6777                     case 0: op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16; break;
6778                     case 1: op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8; break;
6779                     case 2: op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4; break;
6780                     case 3: return False;
6781                     default: vassert(0);
6782                  }
6783               } else {
6784                  switch (size) {
6785                     case 0: op = U ? Iop_PwAddL8Ux8  : Iop_PwAddL8Sx8;  break;
6786                     case 1: op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4; break;
6787                     case 2: op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2; break;
6788                     case 3: return False;
6789                     default: vassert(0);
6790                  }
6791               }
6792               assign(res, unop(op, mkexpr(arg_m)));
6793               DIP("vpaddl.%c%d %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6794                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6795               break;
6796            }
6797            case 6:
6798            case 7:
6799               return False;
6800            case 8: {
6801               /* VCLS */
6802               IROp op;
6803               switch (size) {
6804                  case 0: op = Q ? Iop_Cls8x16 : Iop_Cls8x8; break;
6805                  case 1: op = Q ? Iop_Cls16x8 : Iop_Cls16x4; break;
6806                  case 2: op = Q ? Iop_Cls32x4 : Iop_Cls32x2; break;
6807                  case 3: return False;
6808                  default: vassert(0);
6809               }
6810               assign(res, unop(op, mkexpr(arg_m)));
6811               DIP("vcls.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6812                   Q ? 'q' : 'd', mreg);
6813               break;
6814            }
6815            case 9: {
6816               /* VCLZ */
6817               IROp op;
6818               switch (size) {
6819                  case 0: op = Q ? Iop_Clz8x16 : Iop_Clz8x8; break;
6820                  case 1: op = Q ? Iop_Clz16x8 : Iop_Clz16x4; break;
6821                  case 2: op = Q ? Iop_Clz32x4 : Iop_Clz32x2; break;
6822                  case 3: return False;
6823                  default: vassert(0);
6824               }
6825               assign(res, unop(op, mkexpr(arg_m)));
6826               DIP("vclz.i%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6827                   Q ? 'q' : 'd', mreg);
6828               break;
6829            }
6830            case 10:
6831               /* VCNT */
6832               assign(res, unop(Q ? Iop_Cnt8x16 : Iop_Cnt8x8, mkexpr(arg_m)));
6833               DIP("vcnt.8 %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6834                   mreg);
6835               break;
6836            case 11:
6837               /* VMVN */
6838               if (Q)
6839                  assign(res, unop(Iop_NotV128, mkexpr(arg_m)));
6840               else
6841                  assign(res, unop(Iop_Not64, mkexpr(arg_m)));
6842               DIP("vmvn %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6843                   mreg);
6844               break;
6845            case 12:
6846            case 13: {
6847               /* VPADAL */
6848               IROp op, add_op;
6849               U = (theInstr >> 7) & 1;
6850               if (Q) {
6851                  switch (size) {
6852                     case 0:
6853                        op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16;
6854                        add_op = Iop_Add16x8;
6855                        break;
6856                     case 1:
6857                        op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8;
6858                        add_op = Iop_Add32x4;
6859                        break;
6860                     case 2:
6861                        op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4;
6862                        add_op = Iop_Add64x2;
6863                        break;
6864                     case 3:
6865                        return False;
6866                     default:
6867                        vassert(0);
6868                  }
6869               } else {
6870                  switch (size) {
6871                     case 0:
6872                        op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8;
6873                        add_op = Iop_Add16x4;
6874                        break;
6875                     case 1:
6876                        op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4;
6877                        add_op = Iop_Add32x2;
6878                        break;
6879                     case 2:
6880                        op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2;
6881                        add_op = Iop_Add64;
6882                        break;
6883                     case 3:
6884                        return False;
6885                     default:
6886                        vassert(0);
6887                  }
6888               }
6889               if (Q) {
6890                  arg_d = newTemp(Ity_V128);
6891                  assign(arg_d, getQReg(dreg));
6892               } else {
6893                  arg_d = newTemp(Ity_I64);
6894                  assign(arg_d, getDRegI64(dreg));
6895               }
6896               assign(res, binop(add_op, unop(op, mkexpr(arg_m)),
6897                                         mkexpr(arg_d)));
6898               DIP("vpadal.%c%d %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6899                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6900               break;
6901            }
6902            case 14: {
6903               /* VQABS */
6904               IROp op_sub, op_qsub, op_cmp;
6905               IRTemp mask, tmp;
6906               IRExpr *zero1, *zero2;
6907               IRExpr *neg, *neg2;
6908               if (Q) {
6909                  zero1 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6910                  zero2 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6911                  mask = newTemp(Ity_V128);
6912                  tmp = newTemp(Ity_V128);
6913               } else {
6914                  zero1 = mkU64(0);
6915                  zero2 = mkU64(0);
6916                  mask = newTemp(Ity_I64);
6917                  tmp = newTemp(Ity_I64);
6918               }
6919               switch (size) {
6920                  case 0:
6921                     op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6922                     op_qsub = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6923                     op_cmp = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
6924                     break;
6925                  case 1:
6926                     op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6927                     op_qsub = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6928                     op_cmp = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4;
6929                     break;
6930                  case 2:
6931                     op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6932                     op_qsub = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6933                     op_cmp = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2;
6934                     break;
6935                  case 3:
6936                     return False;
6937                  default:
6938                     vassert(0);
6939               }
6940               assign(mask, binop(op_cmp, mkexpr(arg_m), zero1));
6941               neg = binop(op_qsub, zero2, mkexpr(arg_m));
6942               neg2 = binop(op_sub, zero2, mkexpr(arg_m));
6943               assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
6944                                 binop(Q ? Iop_AndV128 : Iop_And64,
6945                                       mkexpr(mask),
6946                                       mkexpr(arg_m)),
6947                                 binop(Q ? Iop_AndV128 : Iop_And64,
6948                                       unop(Q ? Iop_NotV128 : Iop_Not64,
6949                                            mkexpr(mask)),
6950                                       neg)));
6951               assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
6952                                 binop(Q ? Iop_AndV128 : Iop_And64,
6953                                       mkexpr(mask),
6954                                       mkexpr(arg_m)),
6955                                 binop(Q ? Iop_AndV128 : Iop_And64,
6956                                       unop(Q ? Iop_NotV128 : Iop_Not64,
6957                                            mkexpr(mask)),
6958                                       neg2)));
6959               setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
6960               DIP("vqabs.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6961                   Q ? 'q' : 'd', mreg);
6962               break;
6963            }
6964            case 15: {
6965               /* VQNEG */
6966               IROp op, op2;
6967               IRExpr *zero;
6968               if (Q) {
6969                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6970               } else {
6971                  zero = mkU64(0);
6972               }
6973               switch (size) {
6974                  case 0:
6975                     op = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6976                     op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6977                     break;
6978                  case 1:
6979                     op = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6980                     op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6981                     break;
6982                  case 2:
6983                     op = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6984                     op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6985                     break;
6986                  case 3:
6987                     return False;
6988                  default:
6989                     vassert(0);
6990               }
6991               assign(res, binop(op, zero, mkexpr(arg_m)));
6992               setFlag_QC(mkexpr(res), binop(op2, zero, mkexpr(arg_m)),
6993                          Q, condT);
6994               DIP("vqneg.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6995                   Q ? 'q' : 'd', mreg);
6996               break;
6997            }
6998            default:
6999               vassert(0);
7000         }
7001         if (Q) {
7002            putQReg(dreg, mkexpr(res), condT);
7003         } else {
7004            putDRegI64(dreg, mkexpr(res), condT);
7005         }
7006         return True;
7007      case 1:
7008         if (Q) {
7009            arg_m = newTemp(Ity_V128);
7010            res = newTemp(Ity_V128);
7011            assign(arg_m, getQReg(mreg));
7012         } else {
7013            arg_m = newTemp(Ity_I64);
7014            res = newTemp(Ity_I64);
7015            assign(arg_m, getDRegI64(mreg));
7016         }
7017         switch ((B >> 1) & 0x7) {
7018            case 0: {
7019               /* VCGT #0 */
7020               IRExpr *zero;
7021               IROp op;
7022               if (Q) {
7023                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7024               } else {
7025                  zero = mkU64(0);
7026               }
7027               if (F) {
7028                  switch (size) {
7029                     case 0: case 1: case 3: return False;
7030                     case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
7031                     default: vassert(0);
7032                  }
7033               } else {
7034                  switch (size) {
7035                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7036                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7037                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7038                     case 3: return False;
7039                     default: vassert(0);
7040                  }
7041               }
7042               assign(res, binop(op, mkexpr(arg_m), zero));
7043               DIP("vcgt.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7044                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7045               break;
7046            }
7047            case 1: {
7048               /* VCGE #0 */
7049               IROp op;
7050               IRExpr *zero;
7051               if (Q) {
7052                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7053               } else {
7054                  zero = mkU64(0);
7055               }
7056               if (F) {
7057                  switch (size) {
7058                     case 0: case 1: case 3: return False;
7059                     case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7060                     default: vassert(0);
7061                  }
7062                  assign(res, binop(op, mkexpr(arg_m), zero));
7063               } else {
7064                  switch (size) {
7065                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7066                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7067                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7068                     case 3: return False;
7069                     default: vassert(0);
7070                  }
7071                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7072                                   binop(op, zero, mkexpr(arg_m))));
7073               }
7074               DIP("vcge.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7075                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7076               break;
7077            }
7078            case 2: {
7079               /* VCEQ #0 */
7080               IROp op;
7081               IRExpr *zero;
7082               if (F) {
7083                  if (Q) {
7084                     zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7085                  } else {
7086                     zero = mkU64(0);
7087                  }
7088                  switch (size) {
7089                     case 0: case 1: case 3: return False;
7090                     case 2: op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2; break;
7091                     default: vassert(0);
7092                  }
7093                  assign(res, binop(op, zero, mkexpr(arg_m)));
7094               } else {
7095                  switch (size) {
7096                     case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
7097                     case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
7098                     case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
7099                     case 3: return False;
7100                     default: vassert(0);
7101                  }
7102                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7103                                   unop(op, mkexpr(arg_m))));
7104               }
7105               DIP("vceq.%c%d %c%u, %c%u, #0\n", F ? 'f' : 'i', 8 << size,
7106                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7107               break;
7108            }
7109            case 3: {
7110               /* VCLE #0 */
7111               IRExpr *zero;
7112               IROp op;
7113               if (Q) {
7114                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7115               } else {
7116                  zero = mkU64(0);
7117               }
7118               if (F) {
7119                  switch (size) {
7120                     case 0: case 1: case 3: return False;
7121                     case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7122                     default: vassert(0);
7123                  }
7124                  assign(res, binop(op, zero, mkexpr(arg_m)));
7125               } else {
7126                  switch (size) {
7127                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7128                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7129                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7130                     case 3: return False;
7131                     default: vassert(0);
7132                  }
7133                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7134                                   binop(op, mkexpr(arg_m), zero)));
7135               }
7136               DIP("vcle.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7137                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7138               break;
7139            }
7140            case 4: {
7141               /* VCLT #0 */
7142               IROp op;
7143               IRExpr *zero;
7144               if (Q) {
7145                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7146               } else {
7147                  zero = mkU64(0);
7148               }
7149               if (F) {
7150                  switch (size) {
7151                     case 0: case 1: case 3: return False;
7152                     case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
7153                     default: vassert(0);
7154                  }
7155                  assign(res, binop(op, zero, mkexpr(arg_m)));
7156               } else {
7157                  switch (size) {
7158                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7159                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7160                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7161                     case 3: return False;
7162                     default: vassert(0);
7163                  }
7164                  assign(res, binop(op, zero, mkexpr(arg_m)));
7165               }
7166               DIP("vclt.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7167                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7168               break;
7169            }
7170            case 5:
7171               return False;
7172            case 6: {
7173               /* VABS */
7174               if (!F) {
7175                  IROp op;
7176                  switch(size) {
7177                     case 0: op = Q ? Iop_Abs8x16 : Iop_Abs8x8; break;
7178                     case 1: op = Q ? Iop_Abs16x8 : Iop_Abs16x4; break;
7179                     case 2: op = Q ? Iop_Abs32x4 : Iop_Abs32x2; break;
7180                     case 3: return False;
7181                     default: vassert(0);
7182                  }
7183                  assign(res, unop(op, mkexpr(arg_m)));
7184               } else {
7185                  assign(res, unop(Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2,
7186                                   mkexpr(arg_m)));
7187               }
7188               DIP("vabs.%c%d %c%u, %c%u\n",
7189                   F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7190                   Q ? 'q' : 'd', mreg);
7191               break;
7192            }
7193            case 7: {
7194               /* VNEG */
7195               IROp op;
7196               IRExpr *zero;
7197               if (F) {
7198                  switch (size) {
7199                     case 0: case 1: case 3: return False;
7200                     case 2: op = Q ? Iop_Neg32Fx4 : Iop_Neg32Fx2; break;
7201                     default: vassert(0);
7202                  }
7203                  assign(res, unop(op, mkexpr(arg_m)));
7204               } else {
7205                  if (Q) {
7206                     zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7207                  } else {
7208                     zero = mkU64(0);
7209                  }
7210                  switch (size) {
7211                     case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
7212                     case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
7213                     case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
7214                     case 3: return False;
7215                     default: vassert(0);
7216                  }
7217                  assign(res, binop(op, zero, mkexpr(arg_m)));
7218               }
7219               DIP("vneg.%c%d %c%u, %c%u\n",
7220                   F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7221                   Q ? 'q' : 'd', mreg);
7222               break;
7223            }
7224            default:
7225               vassert(0);
7226         }
7227         if (Q) {
7228            putQReg(dreg, mkexpr(res), condT);
7229         } else {
7230            putDRegI64(dreg, mkexpr(res), condT);
7231         }
7232         return True;
7233      case 2:
7234         if ((B >> 1) == 0) {
7235            /* VSWP */
7236            if (Q) {
7237               arg_m = newTemp(Ity_V128);
7238               assign(arg_m, getQReg(mreg));
7239               putQReg(mreg, getQReg(dreg), condT);
7240               putQReg(dreg, mkexpr(arg_m), condT);
7241            } else {
7242               arg_m = newTemp(Ity_I64);
7243               assign(arg_m, getDRegI64(mreg));
7244               putDRegI64(mreg, getDRegI64(dreg), condT);
7245               putDRegI64(dreg, mkexpr(arg_m), condT);
7246            }
7247            DIP("vswp %c%u, %c%u\n",
7248                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7249            return True;
7250         } else if ((B >> 1) == 1) {
7251            /* VTRN */
7252            IROp op_odd = Iop_INVALID, op_even = Iop_INVALID;
7253            IRTemp old_m, old_d, new_d, new_m;
7254            if (Q) {
7255               old_m = newTemp(Ity_V128);
7256               old_d = newTemp(Ity_V128);
7257               new_m = newTemp(Ity_V128);
7258               new_d = newTemp(Ity_V128);
7259               assign(old_m, getQReg(mreg));
7260               assign(old_d, getQReg(dreg));
7261            } else {
7262               old_m = newTemp(Ity_I64);
7263               old_d = newTemp(Ity_I64);
7264               new_m = newTemp(Ity_I64);
7265               new_d = newTemp(Ity_I64);
7266               assign(old_m, getDRegI64(mreg));
7267               assign(old_d, getDRegI64(dreg));
7268            }
7269            if (Q) {
7270               switch (size) {
7271                  case 0:
7272                     op_odd  = Iop_InterleaveOddLanes8x16;
7273                     op_even = Iop_InterleaveEvenLanes8x16;
7274                     break;
7275                  case 1:
7276                     op_odd  = Iop_InterleaveOddLanes16x8;
7277                     op_even = Iop_InterleaveEvenLanes16x8;
7278                     break;
7279                  case 2:
7280                     op_odd  = Iop_InterleaveOddLanes32x4;
7281                     op_even = Iop_InterleaveEvenLanes32x4;
7282                     break;
7283                  case 3:
7284                     return False;
7285                  default:
7286                     vassert(0);
7287               }
7288            } else {
7289               switch (size) {
7290                  case 0:
7291                     op_odd  = Iop_InterleaveOddLanes8x8;
7292                     op_even = Iop_InterleaveEvenLanes8x8;
7293                     break;
7294                  case 1:
7295                     op_odd  = Iop_InterleaveOddLanes16x4;
7296                     op_even = Iop_InterleaveEvenLanes16x4;
7297                     break;
7298                  case 2:
7299                     op_odd  = Iop_InterleaveHI32x2;
7300                     op_even = Iop_InterleaveLO32x2;
7301                     break;
7302                  case 3:
7303                     return False;
7304                  default:
7305                     vassert(0);
7306               }
7307            }
7308            assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7309            assign(new_m, binop(op_odd, mkexpr(old_m), mkexpr(old_d)));
7310            if (Q) {
7311               putQReg(dreg, mkexpr(new_d), condT);
7312               putQReg(mreg, mkexpr(new_m), condT);
7313            } else {
7314               putDRegI64(dreg, mkexpr(new_d), condT);
7315               putDRegI64(mreg, mkexpr(new_m), condT);
7316            }
7317            DIP("vtrn.%d %c%u, %c%u\n",
7318                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7319            return True;
7320         } else if ((B >> 1) == 2) {
7321            /* VUZP */
7322            IROp op_even, op_odd;
7323            IRTemp old_m, old_d, new_m, new_d;
7324            if (!Q && size == 2)
7325               return False;
7326            if (Q) {
7327               old_m = newTemp(Ity_V128);
7328               old_d = newTemp(Ity_V128);
7329               new_m = newTemp(Ity_V128);
7330               new_d = newTemp(Ity_V128);
7331               assign(old_m, getQReg(mreg));
7332               assign(old_d, getQReg(dreg));
7333            } else {
7334               old_m = newTemp(Ity_I64);
7335               old_d = newTemp(Ity_I64);
7336               new_m = newTemp(Ity_I64);
7337               new_d = newTemp(Ity_I64);
7338               assign(old_m, getDRegI64(mreg));
7339               assign(old_d, getDRegI64(dreg));
7340            }
7341            switch (size) {
7342               case 0:
7343                  op_odd  = Q ? Iop_CatOddLanes8x16 : Iop_CatOddLanes8x8;
7344                  op_even = Q ? Iop_CatEvenLanes8x16 : Iop_CatEvenLanes8x8;
7345                  break;
7346               case 1:
7347                  op_odd  = Q ? Iop_CatOddLanes16x8 : Iop_CatOddLanes16x4;
7348                  op_even = Q ? Iop_CatEvenLanes16x8 : Iop_CatEvenLanes16x4;
7349                  break;
7350               case 2:
7351                  op_odd  = Iop_CatOddLanes32x4;
7352                  op_even = Iop_CatEvenLanes32x4;
7353                  break;
7354               case 3:
7355                  return False;
7356               default:
7357                  vassert(0);
7358            }
7359            assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7360            assign(new_m, binop(op_odd,  mkexpr(old_m), mkexpr(old_d)));
7361            if (Q) {
7362               putQReg(dreg, mkexpr(new_d), condT);
7363               putQReg(mreg, mkexpr(new_m), condT);
7364            } else {
7365               putDRegI64(dreg, mkexpr(new_d), condT);
7366               putDRegI64(mreg, mkexpr(new_m), condT);
7367            }
7368            DIP("vuzp.%d %c%u, %c%u\n",
7369                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7370            return True;
7371         } else if ((B >> 1) == 3) {
7372            /* VZIP */
7373            IROp op_lo, op_hi;
7374            IRTemp old_m, old_d, new_m, new_d;
7375            if (!Q && size == 2)
7376               return False;
7377            if (Q) {
7378               old_m = newTemp(Ity_V128);
7379               old_d = newTemp(Ity_V128);
7380               new_m = newTemp(Ity_V128);
7381               new_d = newTemp(Ity_V128);
7382               assign(old_m, getQReg(mreg));
7383               assign(old_d, getQReg(dreg));
7384            } else {
7385               old_m = newTemp(Ity_I64);
7386               old_d = newTemp(Ity_I64);
7387               new_m = newTemp(Ity_I64);
7388               new_d = newTemp(Ity_I64);
7389               assign(old_m, getDRegI64(mreg));
7390               assign(old_d, getDRegI64(dreg));
7391            }
7392            switch (size) {
7393               case 0:
7394                  op_hi = Q ? Iop_InterleaveHI8x16 : Iop_InterleaveHI8x8;
7395                  op_lo = Q ? Iop_InterleaveLO8x16 : Iop_InterleaveLO8x8;
7396                  break;
7397               case 1:
7398                  op_hi = Q ? Iop_InterleaveHI16x8 : Iop_InterleaveHI16x4;
7399                  op_lo = Q ? Iop_InterleaveLO16x8 : Iop_InterleaveLO16x4;
7400                  break;
7401               case 2:
7402                  op_hi = Iop_InterleaveHI32x4;
7403                  op_lo = Iop_InterleaveLO32x4;
7404                  break;
7405               case 3:
7406                  return False;
7407               default:
7408                  vassert(0);
7409            }
7410            assign(new_d, binop(op_lo, mkexpr(old_m), mkexpr(old_d)));
7411            assign(new_m, binop(op_hi, mkexpr(old_m), mkexpr(old_d)));
7412            if (Q) {
7413               putQReg(dreg, mkexpr(new_d), condT);
7414               putQReg(mreg, mkexpr(new_m), condT);
7415            } else {
7416               putDRegI64(dreg, mkexpr(new_d), condT);
7417               putDRegI64(mreg, mkexpr(new_m), condT);
7418            }
7419            DIP("vzip.%d %c%u, %c%u\n",
7420                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7421            return True;
7422         } else if (B == 8) {
7423            /* VMOVN */
7424            IROp op;
7425            mreg >>= 1;
7426            switch (size) {
7427               case 0: op = Iop_NarrowUn16to8x8;  break;
7428               case 1: op = Iop_NarrowUn32to16x4; break;
7429               case 2: op = Iop_NarrowUn64to32x2; break;
7430               case 3: return False;
7431               default: vassert(0);
7432            }
7433            putDRegI64(dreg, unop(op, getQReg(mreg)), condT);
7434            DIP("vmovn.i%d d%u, q%u\n", 16 << size, dreg, mreg);
7435            return True;
7436         } else if (B == 9 || (B >> 1) == 5) {
7437            /* VQMOVN, VQMOVUN */
7438            IROp op, op2;
7439            IRTemp tmp;
7440            dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
7441            mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
7442            if (mreg & 1)
7443               return False;
7444            mreg >>= 1;
7445            switch (size) {
7446               case 0: op2 = Iop_NarrowUn16to8x8;  break;
7447               case 1: op2 = Iop_NarrowUn32to16x4; break;
7448               case 2: op2 = Iop_NarrowUn64to32x2; break;
7449               case 3: return False;
7450               default: vassert(0);
7451            }
7452            switch (B & 3) {
7453               case 0:
7454                  vassert(0);
7455               case 1:
7456                  switch (size) {
7457                     case 0: op = Iop_QNarrowUn16Sto8Ux8;  break;
7458                     case 1: op = Iop_QNarrowUn32Sto16Ux4; break;
7459                     case 2: op = Iop_QNarrowUn64Sto32Ux2; break;
7460                     case 3: return False;
7461                     default: vassert(0);
7462                  }
7463                  DIP("vqmovun.s%d d%u, q%u\n", 16 << size, dreg, mreg);
7464                  break;
7465               case 2:
7466                  switch (size) {
7467                     case 0: op = Iop_QNarrowUn16Sto8Sx8;  break;
7468                     case 1: op = Iop_QNarrowUn32Sto16Sx4; break;
7469                     case 2: op = Iop_QNarrowUn64Sto32Sx2; break;
7470                     case 3: return False;
7471                     default: vassert(0);
7472                  }
7473                  DIP("vqmovn.s%d d%u, q%u\n", 16 << size, dreg, mreg);
7474                  break;
7475               case 3:
7476                  switch (size) {
7477                     case 0: op = Iop_QNarrowUn16Uto8Ux8;  break;
7478                     case 1: op = Iop_QNarrowUn32Uto16Ux4; break;
7479                     case 2: op = Iop_QNarrowUn64Uto32Ux2; break;
7480                     case 3: return False;
7481                     default: vassert(0);
7482                  }
7483                  DIP("vqmovn.u%d d%u, q%u\n", 16 << size, dreg, mreg);
7484                  break;
7485               default:
7486                  vassert(0);
7487            }
7488            res = newTemp(Ity_I64);
7489            tmp = newTemp(Ity_I64);
7490            assign(res, unop(op, getQReg(mreg)));
7491            assign(tmp, unop(op2, getQReg(mreg)));
7492            setFlag_QC(mkexpr(res), mkexpr(tmp), False, condT);
7493            putDRegI64(dreg, mkexpr(res), condT);
7494            return True;
7495         } else if (B == 12) {
7496            /* VSHLL (maximum shift) */
7497            IROp op, cvt;
7498            UInt shift_imm;
7499            if (Q)
7500               return False;
7501            if (dreg & 1)
7502               return False;
7503            dreg >>= 1;
7504            shift_imm = 8 << size;
7505            res = newTemp(Ity_V128);
7506            switch (size) {
7507               case 0: op = Iop_ShlN16x8; cvt = Iop_Widen8Uto16x8;  break;
7508               case 1: op = Iop_ShlN32x4; cvt = Iop_Widen16Uto32x4; break;
7509               case 2: op = Iop_ShlN64x2; cvt = Iop_Widen32Uto64x2; break;
7510               case 3: return False;
7511               default: vassert(0);
7512            }
7513            assign(res, binop(op, unop(cvt, getDRegI64(mreg)),
7514                                  mkU8(shift_imm)));
7515            putQReg(dreg, mkexpr(res), condT);
7516            DIP("vshll.i%d q%u, d%u, #%d\n", 8 << size, dreg, mreg, 8 << size);
7517            return True;
7518         } else if ((B >> 3) == 3 && (B & 3) == 0) {
7519            /* VCVT (half<->single) */
7520            /* Half-precision extensions are needed to run this */
7521            vassert(0); // ATC
7522            if (((theInstr >> 18) & 3) != 1)
7523               return False;
7524            if ((theInstr >> 8) & 1) {
7525               if (dreg & 1)
7526                  return False;
7527               dreg >>= 1;
7528               putQReg(dreg, unop(Iop_F16toF32x4, getDRegI64(mreg)),
7529                     condT);
7530               DIP("vcvt.f32.f16 q%u, d%u\n", dreg, mreg);
7531            } else {
7532               if (mreg & 1)
7533                  return False;
7534               mreg >>= 1;
7535               putDRegI64(dreg, unop(Iop_F32toF16x4, getQReg(mreg)),
7536                                condT);
7537               DIP("vcvt.f16.f32 d%u, q%u\n", dreg, mreg);
7538            }
7539            return True;
7540         } else {
7541            return False;
7542         }
7543         vassert(0);
7544         return True;
7545      case 3:
7546         if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,0)) {
7547            /* VRECPE */
7548            IROp op;
7549            F = (theInstr >> 8) & 1;
7550            if (size != 2)
7551               return False;
7552            if (Q) {
7553               op = F ? Iop_RecipEst32Fx4 : Iop_RecipEst32Ux4;
7554               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7555               DIP("vrecpe.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7556            } else {
7557               op = F ? Iop_RecipEst32Fx2 : Iop_RecipEst32Ux2;
7558               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7559               DIP("vrecpe.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7560            }
7561            return True;
7562         } else if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,1)) {
7563            /* VRSQRTE */
7564            IROp op;
7565            F = (B >> 2) & 1;
7566            if (size != 2)
7567               return False;
7568            if (F) {
7569               /* fp */
7570               op = Q ? Iop_RSqrtEst32Fx4 : Iop_RSqrtEst32Fx2;
7571            } else {
7572               /* unsigned int */
7573               op = Q ? Iop_RSqrtEst32Ux4 : Iop_RSqrtEst32Ux2;
7574            }
7575            if (Q) {
7576               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7577               DIP("vrsqrte.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7578            } else {
7579               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7580               DIP("vrsqrte.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7581            }
7582            return True;
7583         } else if ((B >> 3) == 3) {
7584            /* VCVT (fp<->integer) */
7585            IROp op;
7586            if (size != 2)
7587               return False;
7588            switch ((B >> 1) & 3) {
7589               case 0:
7590                  op = Q ? Iop_I32StoFx4 : Iop_I32StoFx2;
7591                  DIP("vcvt.f32.s32 %c%u, %c%u\n",
7592                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7593                  break;
7594               case 1:
7595                  op = Q ? Iop_I32UtoFx4 : Iop_I32UtoFx2;
7596                  DIP("vcvt.f32.u32 %c%u, %c%u\n",
7597                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7598                  break;
7599               case 2:
7600                  op = Q ? Iop_FtoI32Sx4_RZ : Iop_FtoI32Sx2_RZ;
7601                  DIP("vcvt.s32.f32 %c%u, %c%u\n",
7602                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7603                  break;
7604               case 3:
7605                  op = Q ? Iop_FtoI32Ux4_RZ : Iop_FtoI32Ux2_RZ;
7606                  DIP("vcvt.u32.f32 %c%u, %c%u\n",
7607                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7608                  break;
7609               default:
7610                  vassert(0);
7611            }
7612            if (Q) {
7613               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7614            } else {
7615               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7616            }
7617            return True;
7618         } else {
7619            return False;
7620         }
7621         vassert(0);
7622         return True;
7623      default:
7624         vassert(0);
7625   }
7626   return False;
7627}
7628
7629/* A7.4.6 One register and a modified immediate value */
7630static
7631void ppNeonImm(UInt imm, UInt cmode, UInt op)
7632{
7633   int i;
7634   switch (cmode) {
7635      case 0: case 1: case 8: case 9:
7636         vex_printf("0x%x", imm);
7637         break;
7638      case 2: case 3: case 10: case 11:
7639         vex_printf("0x%x00", imm);
7640         break;
7641      case 4: case 5:
7642         vex_printf("0x%x0000", imm);
7643         break;
7644      case 6: case 7:
7645         vex_printf("0x%x000000", imm);
7646         break;
7647      case 12:
7648         vex_printf("0x%xff", imm);
7649         break;
7650      case 13:
7651         vex_printf("0x%xffff", imm);
7652         break;
7653      case 14:
7654         if (op) {
7655            vex_printf("0x");
7656            for (i = 7; i >= 0; i--)
7657               vex_printf("%s", (imm & (1 << i)) ? "ff" : "00");
7658         } else {
7659            vex_printf("0x%x", imm);
7660         }
7661         break;
7662      case 15:
7663         vex_printf("0x%x", imm);
7664         break;
7665   }
7666}
7667
7668static
7669const char *ppNeonImmType(UInt cmode, UInt op)
7670{
7671   switch (cmode) {
7672      case 0 ... 7:
7673      case 12: case 13:
7674         return "i32";
7675      case 8 ... 11:
7676         return "i16";
7677      case 14:
7678         if (op)
7679            return "i64";
7680         else
7681            return "i8";
7682      case 15:
7683         if (op)
7684            vassert(0);
7685         else
7686            return "f32";
7687      default:
7688         vassert(0);
7689   }
7690}
7691
7692static
7693void DIPimm(UInt imm, UInt cmode, UInt op,
7694            const char *instr, UInt Q, UInt dreg)
7695{
7696   if (vex_traceflags & VEX_TRACE_FE) {
7697      vex_printf("%s.%s %c%u, #", instr,
7698                 ppNeonImmType(cmode, op), Q ? 'q' : 'd', dreg);
7699      ppNeonImm(imm, cmode, op);
7700      vex_printf("\n");
7701   }
7702}
7703
7704static
7705Bool dis_neon_data_1reg_and_imm ( UInt theInstr, IRTemp condT )
7706{
7707   UInt dreg = get_neon_d_regno(theInstr);
7708   ULong imm_raw = ((theInstr >> 17) & 0x80) | ((theInstr >> 12) & 0x70) |
7709                  (theInstr & 0xf);
7710   ULong imm_raw_pp = imm_raw;
7711   UInt cmode = (theInstr >> 8) & 0xf;
7712   UInt op_bit = (theInstr >> 5) & 1;
7713   ULong imm = 0;
7714   UInt Q = (theInstr >> 6) & 1;
7715   int i, j;
7716   UInt tmp;
7717   IRExpr *imm_val;
7718   IRExpr *expr;
7719   IRTemp tmp_var;
7720   switch(cmode) {
7721      case 7: case 6:
7722         imm_raw = imm_raw << 8;
7723         /* fallthrough */
7724      case 5: case 4:
7725         imm_raw = imm_raw << 8;
7726         /* fallthrough */
7727      case 3: case 2:
7728         imm_raw = imm_raw << 8;
7729         /* fallthrough */
7730      case 0: case 1:
7731         imm = (imm_raw << 32) | imm_raw;
7732         break;
7733      case 11: case 10:
7734         imm_raw = imm_raw << 8;
7735         /* fallthrough */
7736      case 9: case 8:
7737         imm_raw = (imm_raw << 16) | imm_raw;
7738         imm = (imm_raw << 32) | imm_raw;
7739         break;
7740      case 13:
7741         imm_raw = (imm_raw << 8) | 0xff;
7742         /* fallthrough */
7743      case 12:
7744         imm_raw = (imm_raw << 8) | 0xff;
7745         imm = (imm_raw << 32) | imm_raw;
7746         break;
7747      case 14:
7748         if (! op_bit) {
7749            for(i = 0; i < 8; i++) {
7750               imm = (imm << 8) | imm_raw;
7751            }
7752         } else {
7753            for(i = 7; i >= 0; i--) {
7754               tmp = 0;
7755               for(j = 0; j < 8; j++) {
7756                  tmp = (tmp << 1) | ((imm_raw >> i) & 1);
7757               }
7758               imm = (imm << 8) | tmp;
7759            }
7760         }
7761         break;
7762      case 15:
7763         imm = (imm_raw & 0x80) << 5;
7764         imm |= ((~imm_raw & 0x40) << 5);
7765         for(i = 1; i <= 4; i++)
7766            imm |= (imm_raw & 0x40) << i;
7767         imm |= (imm_raw & 0x7f);
7768         imm = imm << 19;
7769         imm = (imm << 32) | imm;
7770         break;
7771      default:
7772         return False;
7773   }
7774   if (Q) {
7775      imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
7776   } else {
7777      imm_val = mkU64(imm);
7778   }
7779   if (((op_bit == 0) &&
7780      (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 12) == 12))) ||
7781      ((op_bit == 1) && (cmode == 14))) {
7782      /* VMOV (immediate) */
7783      if (Q) {
7784         putQReg(dreg, imm_val, condT);
7785      } else {
7786         putDRegI64(dreg, imm_val, condT);
7787      }
7788      DIPimm(imm_raw_pp, cmode, op_bit, "vmov", Q, dreg);
7789      return True;
7790   }
7791   if ((op_bit == 1) &&
7792      (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 14) == 12))) {
7793      /* VMVN (immediate) */
7794      if (Q) {
7795         putQReg(dreg, unop(Iop_NotV128, imm_val), condT);
7796      } else {
7797         putDRegI64(dreg, unop(Iop_Not64, imm_val), condT);
7798      }
7799      DIPimm(imm_raw_pp, cmode, op_bit, "vmvn", Q, dreg);
7800      return True;
7801   }
7802   if (Q) {
7803      tmp_var = newTemp(Ity_V128);
7804      assign(tmp_var, getQReg(dreg));
7805   } else {
7806      tmp_var = newTemp(Ity_I64);
7807      assign(tmp_var, getDRegI64(dreg));
7808   }
7809   if ((op_bit == 0) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7810      /* VORR (immediate) */
7811      if (Q)
7812         expr = binop(Iop_OrV128, mkexpr(tmp_var), imm_val);
7813      else
7814         expr = binop(Iop_Or64, mkexpr(tmp_var), imm_val);
7815      DIPimm(imm_raw_pp, cmode, op_bit, "vorr", Q, dreg);
7816   } else if ((op_bit == 1) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7817      /* VBIC (immediate) */
7818      if (Q)
7819         expr = binop(Iop_AndV128, mkexpr(tmp_var),
7820                                   unop(Iop_NotV128, imm_val));
7821      else
7822         expr = binop(Iop_And64, mkexpr(tmp_var), unop(Iop_Not64, imm_val));
7823      DIPimm(imm_raw_pp, cmode, op_bit, "vbic", Q, dreg);
7824   } else {
7825      return False;
7826   }
7827   if (Q)
7828      putQReg(dreg, expr, condT);
7829   else
7830      putDRegI64(dreg, expr, condT);
7831   return True;
7832}
7833
7834/* A7.4 Advanced SIMD data-processing instructions */
7835static
7836Bool dis_neon_data_processing ( UInt theInstr, IRTemp condT )
7837{
7838   UInt A = (theInstr >> 19) & 0x1F;
7839   UInt B = (theInstr >>  8) & 0xF;
7840   UInt C = (theInstr >>  4) & 0xF;
7841   UInt U = (theInstr >> 24) & 0x1;
7842
7843   if (! (A & 0x10)) {
7844      return dis_neon_data_3same(theInstr, condT);
7845   }
7846   if (((A & 0x17) == 0x10) && ((C & 0x9) == 0x1)) {
7847      return dis_neon_data_1reg_and_imm(theInstr, condT);
7848   }
7849   if ((C & 1) == 1) {
7850      return dis_neon_data_2reg_and_shift(theInstr, condT);
7851   }
7852   if (((C & 5) == 0) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7853      return dis_neon_data_3diff(theInstr, condT);
7854   }
7855   if (((C & 5) == 4) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7856      return dis_neon_data_2reg_and_scalar(theInstr, condT);
7857   }
7858   if ((A & 0x16) == 0x16) {
7859      if ((U == 0) && ((C & 1) == 0)) {
7860         return dis_neon_vext(theInstr, condT);
7861      }
7862      if ((U != 1) || ((C & 1) == 1))
7863         return False;
7864      if ((B & 8) == 0) {
7865         return dis_neon_data_2reg_misc(theInstr, condT);
7866      }
7867      if ((B & 12) == 8) {
7868         return dis_neon_vtb(theInstr, condT);
7869      }
7870      if ((B == 12) && ((C & 9) == 0)) {
7871         return dis_neon_vdup(theInstr, condT);
7872      }
7873   }
7874   return False;
7875}
7876
7877
7878/*------------------------------------------------------------*/
7879/*--- NEON loads and stores                                ---*/
7880/*------------------------------------------------------------*/
7881
7882/* For NEON memory operations, we use the standard scheme to handle
7883   conditionalisation: generate a jump around the instruction if the
7884   condition is false.  That's only necessary in Thumb mode, however,
7885   since in ARM mode NEON instructions are unconditional. */
7886
7887/* A helper function for what follows.  It assumes we already went
7888   uncond as per comments at the top of this section. */
7889static
7890void mk_neon_elem_load_to_one_lane( UInt rD, UInt inc, UInt index,
7891                                    UInt N, UInt size, IRTemp addr )
7892{
7893   UInt i;
7894   switch (size) {
7895      case 0:
7896         putDRegI64(rD, triop(Iop_SetElem8x8, getDRegI64(rD), mkU8(index),
7897                    loadLE(Ity_I8, mkexpr(addr))), IRTemp_INVALID);
7898         break;
7899      case 1:
7900         putDRegI64(rD, triop(Iop_SetElem16x4, getDRegI64(rD), mkU8(index),
7901                    loadLE(Ity_I16, mkexpr(addr))), IRTemp_INVALID);
7902         break;
7903      case 2:
7904         putDRegI64(rD, triop(Iop_SetElem32x2, getDRegI64(rD), mkU8(index),
7905                    loadLE(Ity_I32, mkexpr(addr))), IRTemp_INVALID);
7906         break;
7907      default:
7908         vassert(0);
7909   }
7910   for (i = 1; i <= N; i++) {
7911      switch (size) {
7912         case 0:
7913            putDRegI64(rD + i * inc,
7914                       triop(Iop_SetElem8x8,
7915                             getDRegI64(rD + i * inc),
7916                             mkU8(index),
7917                             loadLE(Ity_I8, binop(Iop_Add32,
7918                                                  mkexpr(addr),
7919                                                  mkU32(i * 1)))),
7920                       IRTemp_INVALID);
7921            break;
7922         case 1:
7923            putDRegI64(rD + i * inc,
7924                       triop(Iop_SetElem16x4,
7925                             getDRegI64(rD + i * inc),
7926                             mkU8(index),
7927                             loadLE(Ity_I16, binop(Iop_Add32,
7928                                                   mkexpr(addr),
7929                                                   mkU32(i * 2)))),
7930                       IRTemp_INVALID);
7931            break;
7932         case 2:
7933            putDRegI64(rD + i * inc,
7934                       triop(Iop_SetElem32x2,
7935                             getDRegI64(rD + i * inc),
7936                             mkU8(index),
7937                             loadLE(Ity_I32, binop(Iop_Add32,
7938                                                   mkexpr(addr),
7939                                                   mkU32(i * 4)))),
7940                       IRTemp_INVALID);
7941            break;
7942         default:
7943            vassert(0);
7944      }
7945   }
7946}
7947
7948/* A(nother) helper function for what follows.  It assumes we already
7949   went uncond as per comments at the top of this section. */
7950static
7951void mk_neon_elem_store_from_one_lane( UInt rD, UInt inc, UInt index,
7952                                       UInt N, UInt size, IRTemp addr )
7953{
7954   UInt i;
7955   switch (size) {
7956      case 0:
7957         storeLE(mkexpr(addr),
7958                 binop(Iop_GetElem8x8, getDRegI64(rD), mkU8(index)));
7959         break;
7960      case 1:
7961         storeLE(mkexpr(addr),
7962                 binop(Iop_GetElem16x4, getDRegI64(rD), mkU8(index)));
7963         break;
7964      case 2:
7965         storeLE(mkexpr(addr),
7966                 binop(Iop_GetElem32x2, getDRegI64(rD), mkU8(index)));
7967         break;
7968      default:
7969         vassert(0);
7970   }
7971   for (i = 1; i <= N; i++) {
7972      switch (size) {
7973         case 0:
7974            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 1)),
7975                    binop(Iop_GetElem8x8, getDRegI64(rD + i * inc),
7976                                          mkU8(index)));
7977            break;
7978         case 1:
7979            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 2)),
7980                    binop(Iop_GetElem16x4, getDRegI64(rD + i * inc),
7981                                           mkU8(index)));
7982            break;
7983         case 2:
7984            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 4)),
7985                    binop(Iop_GetElem32x2, getDRegI64(rD + i * inc),
7986                                           mkU8(index)));
7987            break;
7988         default:
7989            vassert(0);
7990      }
7991   }
7992}
7993
7994/* Generate 2x64 -> 2x64 deinterleave code, for VLD2.  Caller must
7995   make *u0 and *u1 be valid IRTemps before the call. */
7996static void math_DEINTERLEAVE_2 (/*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
7997                                 IRTemp i0, IRTemp i1, Int laneszB)
7998{
7999   /* The following assumes that the guest is little endian, and hence
8000      that the memory-side (interleaved) data is stored
8001      little-endianly. */
8002   vassert(u0 && u1);
8003   /* This is pretty easy, since we have primitives directly to
8004      hand. */
8005   if (laneszB == 4) {
8006      // memLE(128 bits) == A0 B0 A1 B1
8007      // i0 == B0 A0, i1 == B1 A1
8008      // u0 == A1 A0, u1 == B1 B0
8009      assign(*u0, binop(Iop_InterleaveLO32x2, mkexpr(i1), mkexpr(i0)));
8010      assign(*u1, binop(Iop_InterleaveHI32x2, mkexpr(i1), mkexpr(i0)));
8011   } else if (laneszB == 2) {
8012      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
8013      // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
8014      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
8015      assign(*u0, binop(Iop_CatEvenLanes16x4, mkexpr(i1), mkexpr(i0)));
8016      assign(*u1, binop(Iop_CatOddLanes16x4,  mkexpr(i1), mkexpr(i0)));
8017   } else if (laneszB == 1) {
8018      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
8019      // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
8020      // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
8021      assign(*u0, binop(Iop_CatEvenLanes8x8, mkexpr(i1), mkexpr(i0)));
8022      assign(*u1, binop(Iop_CatOddLanes8x8,  mkexpr(i1), mkexpr(i0)));
8023   } else {
8024      // Can never happen, since VLD2 only has valid lane widths of 32,
8025      // 16 or 8 bits.
8026      vpanic("math_DEINTERLEAVE_2");
8027   }
8028}
8029
8030/* Generate 2x64 -> 2x64 interleave code, for VST2.  Caller must make
8031   *u0 and *u1 be valid IRTemps before the call. */
8032static void math_INTERLEAVE_2 (/*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
8033                               IRTemp u0, IRTemp u1, Int laneszB)
8034{
8035   /* The following assumes that the guest is little endian, and hence
8036      that the memory-side (interleaved) data is stored
8037      little-endianly. */
8038   vassert(i0 && i1);
8039   /* This is pretty easy, since we have primitives directly to
8040      hand. */
8041   if (laneszB == 4) {
8042      // memLE(128 bits) == A0 B0 A1 B1
8043      // i0 == B0 A0, i1 == B1 A1
8044      // u0 == A1 A0, u1 == B1 B0
8045      assign(*i0, binop(Iop_InterleaveLO32x2, mkexpr(u1), mkexpr(u0)));
8046      assign(*i1, binop(Iop_InterleaveHI32x2, mkexpr(u1), mkexpr(u0)));
8047   } else if (laneszB == 2) {
8048      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
8049      // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
8050      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
8051      assign(*i0, binop(Iop_InterleaveLO16x4, mkexpr(u1), mkexpr(u0)));
8052      assign(*i1, binop(Iop_InterleaveHI16x4, mkexpr(u1), mkexpr(u0)));
8053   } else if (laneszB == 1) {
8054      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
8055      // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
8056      // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
8057      assign(*i0, binop(Iop_InterleaveLO8x8, mkexpr(u1), mkexpr(u0)));
8058      assign(*i1, binop(Iop_InterleaveHI8x8, mkexpr(u1), mkexpr(u0)));
8059   } else {
8060      // Can never happen, since VST2 only has valid lane widths of 32,
8061      // 16 or 8 bits.
8062      vpanic("math_INTERLEAVE_2");
8063   }
8064}
8065
8066// Helper function for generating arbitrary slicing 'n' dicing of
8067// 3 8x8 vectors, as needed for VLD3.8 and VST3.8.
8068static IRExpr* math_PERM_8x8x3(const UChar* desc,
8069                               IRTemp s0, IRTemp s1, IRTemp s2)
8070{
8071   // desc is an array of 8 pairs, encoded as 16 bytes,
8072   // that describe how to assemble the result lanes, starting with
8073   // lane 7.  Each pair is: first component (0..2) says which of
8074   // s0/s1/s2 to use.  Second component (0..7) is the lane number
8075   // in the source to use.
8076   UInt si;
8077   for (si = 0; si < 7; si++) {
8078      vassert(desc[2 * si + 0] <= 2);
8079      vassert(desc[2 * si + 1] <= 7);
8080   }
8081   IRTemp h3 = newTemp(Ity_I64);
8082   IRTemp h2 = newTemp(Ity_I64);
8083   IRTemp h1 = newTemp(Ity_I64);
8084   IRTemp h0 = newTemp(Ity_I64);
8085   IRTemp srcs[3] = {s0, s1, s2};
8086#  define SRC_VEC(_lane)   mkexpr(srcs[desc[2 * (7-(_lane)) + 0]])
8087#  define SRC_SHIFT(_lane) mkU8(56-8*(desc[2 * (7-(_lane)) + 1]))
8088   assign(h3, binop(Iop_InterleaveHI8x8,
8089                    binop(Iop_Shl64, SRC_VEC(7), SRC_SHIFT(7)),
8090                    binop(Iop_Shl64, SRC_VEC(6), SRC_SHIFT(6))));
8091   assign(h2, binop(Iop_InterleaveHI8x8,
8092                    binop(Iop_Shl64, SRC_VEC(5), SRC_SHIFT(5)),
8093                    binop(Iop_Shl64, SRC_VEC(4), SRC_SHIFT(4))));
8094   assign(h1, binop(Iop_InterleaveHI8x8,
8095                    binop(Iop_Shl64, SRC_VEC(3), SRC_SHIFT(3)),
8096                    binop(Iop_Shl64, SRC_VEC(2), SRC_SHIFT(2))));
8097   assign(h0, binop(Iop_InterleaveHI8x8,
8098                    binop(Iop_Shl64, SRC_VEC(1), SRC_SHIFT(1)),
8099                    binop(Iop_Shl64, SRC_VEC(0), SRC_SHIFT(0))));
8100#  undef SRC_VEC
8101#  undef SRC_SHIFT
8102   // Now h3..h0 are 64 bit vectors with useful information only
8103   // in the top 16 bits.  We now concatentate those four 16-bit
8104   // groups so as to produce the final result.
8105   IRTemp w1 = newTemp(Ity_I64);
8106   IRTemp w0 = newTemp(Ity_I64);
8107   assign(w1, binop(Iop_InterleaveHI16x4, mkexpr(h3), mkexpr(h2)));
8108   assign(w0, binop(Iop_InterleaveHI16x4, mkexpr(h1), mkexpr(h0)));
8109   return binop(Iop_InterleaveHI32x2, mkexpr(w1), mkexpr(w0));
8110}
8111
8112/* Generate 3x64 -> 3x64 deinterleave code, for VLD3.  Caller must
8113   make *u0, *u1 and *u2 be valid IRTemps before the call. */
8114static void math_DEINTERLEAVE_3 (
8115               /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1, /*OUT*/IRTemp* u2,
8116               IRTemp i0, IRTemp i1, IRTemp i2, Int laneszB
8117            )
8118{
8119#  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8120#  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8121#  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8122   /* The following assumes that the guest is little endian, and hence
8123      that the memory-side (interleaved) data is stored
8124      little-endianly. */
8125   vassert(u0 && u1 && u2);
8126   if (laneszB == 4) {
8127      // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8128      // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8129      // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8130      assign(*u0, IHI32x2(SHL64(i1,  0), SHL64(i0, 32)));
8131      assign(*u1, IHI32x2(SHL64(i2, 32), SHL64(i0,  0)));
8132      assign(*u2, IHI32x2(SHL64(i2,  0), SHL64(i1, 32)));
8133   } else if (laneszB == 2) {
8134      // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8135      // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8136      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8137#     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8138                IHI32x2(                                      \
8139                   IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8140                           SHL64((_tmp2),48-16*(_la2))),      \
8141                   IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8142                           SHL64((_tmp0),48-16*(_la0))))
8143      assign(*u0, XXX(i2,1, i1,2, i0,3, i0,0));
8144      assign(*u1, XXX(i2,2, i1,3, i1,0, i0,1));
8145      assign(*u2, XXX(i2,3, i2,0, i1,1, i0,2));
8146#     undef XXX
8147   } else if (laneszB == 1) {
8148      // These describe how the result vectors [7..0] are
8149      // assembled from the source vectors.  Each pair is
8150      // (source vector number, lane number).
8151      static const UChar de0[16] = {2,5, 2,2, 1,7, 1,4, 1,1, 0,6, 0,3, 0,0};
8152      static const UChar de1[16] = {2,6, 2,3, 2,0, 1,5, 1,2, 0,7, 0,4, 0,1};
8153      static const UChar de2[16] = {2,7, 2,4, 2,1, 1,6, 1,3, 1,0, 0,5, 0,2};
8154      assign(*u0, math_PERM_8x8x3(de0, i0, i1, i2));
8155      assign(*u1, math_PERM_8x8x3(de1, i0, i1, i2));
8156      assign(*u2, math_PERM_8x8x3(de2, i0, i1, i2));
8157   } else {
8158      // Can never happen, since VLD3 only has valid lane widths of 32,
8159      // 16 or 8 bits.
8160      vpanic("math_DEINTERLEAVE_3");
8161   }
8162#  undef SHL64
8163#  undef IHI16x4
8164#  undef IHI32x2
8165}
8166
8167/* Generate 3x64 -> 3x64 interleave code, for VST3.  Caller must
8168   make *i0, *i1 and *i2 be valid IRTemps before the call. */
8169static void math_INTERLEAVE_3 (
8170               /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1, /*OUT*/IRTemp* i2,
8171               IRTemp u0, IRTemp u1, IRTemp u2, Int laneszB
8172            )
8173{
8174#  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8175#  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8176#  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8177   /* The following assumes that the guest is little endian, and hence
8178      that the memory-side (interleaved) data is stored
8179      little-endianly. */
8180   vassert(i0 && i1 && i2);
8181   if (laneszB == 4) {
8182      // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8183      // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8184      // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8185      assign(*i0, IHI32x2(SHL64(u1, 32), SHL64(u0, 32)));
8186      assign(*i1, IHI32x2(SHL64(u0,  0), SHL64(u2, 32)));
8187      assign(*i2, IHI32x2(SHL64(u2,  0), SHL64(u1,  0)));
8188   } else if (laneszB == 2) {
8189      // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8190      // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8191      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8192#     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8193                IHI32x2(                                      \
8194                   IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8195                           SHL64((_tmp2),48-16*(_la2))),      \
8196                   IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8197                           SHL64((_tmp0),48-16*(_la0))))
8198      assign(*i0, XXX(u0,1, u2,0, u1,0, u0,0));
8199      assign(*i1, XXX(u1,2, u0,2, u2,1, u1,1));
8200      assign(*i2, XXX(u2,3, u1,3, u0,3, u2,2));
8201#     undef XXX
8202   } else if (laneszB == 1) {
8203      // These describe how the result vectors [7..0] are
8204      // assembled from the source vectors.  Each pair is
8205      // (source vector number, lane number).
8206      static const UChar in0[16] = {1,2, 0,2, 2,1, 1,1, 0,1, 2,0, 1,0, 0,0};
8207      static const UChar in1[16] = {0,5, 2,4, 1,4, 0,4, 2,3, 1,3, 0,3, 2,2};
8208      static const UChar in2[16] = {2,7, 1,7, 0,7, 2,6, 1,6, 0,6, 2,5, 1,5};
8209      assign(*i0, math_PERM_8x8x3(in0, u0, u1, u2));
8210      assign(*i1, math_PERM_8x8x3(in1, u0, u1, u2));
8211      assign(*i2, math_PERM_8x8x3(in2, u0, u1, u2));
8212   } else {
8213      // Can never happen, since VST3 only has valid lane widths of 32,
8214      // 16 or 8 bits.
8215      vpanic("math_INTERLEAVE_3");
8216   }
8217#  undef SHL64
8218#  undef IHI16x4
8219#  undef IHI32x2
8220}
8221
8222/* Generate 4x64 -> 4x64 deinterleave code, for VLD4.  Caller must
8223   make *u0, *u1, *u2 and *u3 be valid IRTemps before the call. */
8224static void math_DEINTERLEAVE_4 (
8225               /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
8226               /*OUT*/IRTemp* u2, /*OUT*/IRTemp* u3,
8227               IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3, Int laneszB
8228            )
8229{
8230#  define IHI32x2(_t1, _t2) \
8231             binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8232#  define ILO32x2(_t1, _t2) \
8233             binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8234#  define IHI16x4(_t1, _t2) \
8235             binop(Iop_InterleaveHI16x4, mkexpr(_t1), mkexpr(_t2))
8236#  define ILO16x4(_t1, _t2) \
8237             binop(Iop_InterleaveLO16x4, mkexpr(_t1), mkexpr(_t2))
8238#  define IHI8x8(_t1, _e2) \
8239             binop(Iop_InterleaveHI8x8, mkexpr(_t1), _e2)
8240#  define SHL64(_tmp, _amt) \
8241             binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8242   /* The following assumes that the guest is little endian, and hence
8243      that the memory-side (interleaved) data is stored
8244      little-endianly. */
8245   vassert(u0 && u1 && u2 && u3);
8246   if (laneszB == 4) {
8247      assign(*u0, ILO32x2(i2, i0));
8248      assign(*u1, IHI32x2(i2, i0));
8249      assign(*u2, ILO32x2(i3, i1));
8250      assign(*u3, IHI32x2(i3, i1));
8251   } else if (laneszB == 2) {
8252      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8253      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8254      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8255      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8256      assign(b1b0a1a0, ILO16x4(i1, i0));
8257      assign(b3b2a3a2, ILO16x4(i3, i2));
8258      assign(d1d0c1c0, IHI16x4(i1, i0));
8259      assign(d3d2c3c2, IHI16x4(i3, i2));
8260      // And now do what we did for the 32-bit case.
8261      assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8262      assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8263      assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8264      assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8265   } else if (laneszB == 1) {
8266      // Deinterleave into 16-bit chunks, then do as the 16-bit case.
8267      IRTemp i0x = newTemp(Ity_I64);
8268      IRTemp i1x = newTemp(Ity_I64);
8269      IRTemp i2x = newTemp(Ity_I64);
8270      IRTemp i3x = newTemp(Ity_I64);
8271      assign(i0x, IHI8x8(i0, SHL64(i0, 32)));
8272      assign(i1x, IHI8x8(i1, SHL64(i1, 32)));
8273      assign(i2x, IHI8x8(i2, SHL64(i2, 32)));
8274      assign(i3x, IHI8x8(i3, SHL64(i3, 32)));
8275      // From here on is like the 16 bit case.
8276      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8277      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8278      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8279      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8280      assign(b1b0a1a0, ILO16x4(i1x, i0x));
8281      assign(b3b2a3a2, ILO16x4(i3x, i2x));
8282      assign(d1d0c1c0, IHI16x4(i1x, i0x));
8283      assign(d3d2c3c2, IHI16x4(i3x, i2x));
8284      // And now do what we did for the 32-bit case.
8285      assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8286      assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8287      assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8288      assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8289   } else {
8290      // Can never happen, since VLD4 only has valid lane widths of 32,
8291      // 16 or 8 bits.
8292      vpanic("math_DEINTERLEAVE_4");
8293   }
8294#  undef SHL64
8295#  undef IHI8x8
8296#  undef ILO16x4
8297#  undef IHI16x4
8298#  undef ILO32x2
8299#  undef IHI32x2
8300}
8301
8302/* Generate 4x64 -> 4x64 interleave code, for VST4.  Caller must
8303   make *i0, *i1, *i2 and *i3 be valid IRTemps before the call. */
8304static void math_INTERLEAVE_4 (
8305               /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
8306               /*OUT*/IRTemp* i2, /*OUT*/IRTemp* i3,
8307               IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3, Int laneszB
8308            )
8309{
8310#  define IHI32x2(_t1, _t2) \
8311             binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8312#  define ILO32x2(_t1, _t2) \
8313             binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8314#  define CEV16x4(_t1, _t2) \
8315             binop(Iop_CatEvenLanes16x4, mkexpr(_t1), mkexpr(_t2))
8316#  define COD16x4(_t1, _t2) \
8317             binop(Iop_CatOddLanes16x4, mkexpr(_t1), mkexpr(_t2))
8318#  define COD8x8(_t1, _e2) \
8319             binop(Iop_CatOddLanes8x8, mkexpr(_t1), _e2)
8320#  define SHL64(_tmp, _amt) \
8321             binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8322   /* The following assumes that the guest is little endian, and hence
8323      that the memory-side (interleaved) data is stored
8324      little-endianly. */
8325   vassert(u0 && u1 && u2 && u3);
8326   if (laneszB == 4) {
8327      assign(*i0, ILO32x2(u1, u0));
8328      assign(*i1, ILO32x2(u3, u2));
8329      assign(*i2, IHI32x2(u1, u0));
8330      assign(*i3, IHI32x2(u3, u2));
8331   } else if (laneszB == 2) {
8332      // First, interleave at the 32-bit lane size.
8333      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8334      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8335      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8336      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8337      assign(b1b0a1a0, ILO32x2(u1, u0));
8338      assign(b3b2a3a2, IHI32x2(u1, u0));
8339      assign(d1d0c1c0, ILO32x2(u3, u2));
8340      assign(d3d2c3c2, IHI32x2(u3, u2));
8341      // And interleave (cat) at the 16 bit size.
8342      assign(*i0, CEV16x4(d1d0c1c0, b1b0a1a0));
8343      assign(*i1, COD16x4(d1d0c1c0, b1b0a1a0));
8344      assign(*i2, CEV16x4(d3d2c3c2, b3b2a3a2));
8345      assign(*i3, COD16x4(d3d2c3c2, b3b2a3a2));
8346   } else if (laneszB == 1) {
8347      // First, interleave at the 32-bit lane size.
8348      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8349      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8350      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8351      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8352      assign(b1b0a1a0, ILO32x2(u1, u0));
8353      assign(b3b2a3a2, IHI32x2(u1, u0));
8354      assign(d1d0c1c0, ILO32x2(u3, u2));
8355      assign(d3d2c3c2, IHI32x2(u3, u2));
8356      // And interleave (cat) at the 16 bit size.
8357      IRTemp i0x = newTemp(Ity_I64);
8358      IRTemp i1x = newTemp(Ity_I64);
8359      IRTemp i2x = newTemp(Ity_I64);
8360      IRTemp i3x = newTemp(Ity_I64);
8361      assign(i0x, CEV16x4(d1d0c1c0, b1b0a1a0));
8362      assign(i1x, COD16x4(d1d0c1c0, b1b0a1a0));
8363      assign(i2x, CEV16x4(d3d2c3c2, b3b2a3a2));
8364      assign(i3x, COD16x4(d3d2c3c2, b3b2a3a2));
8365      // And rearrange within each word, to get the right 8 bit lanes.
8366      assign(*i0, COD8x8(i0x, SHL64(i0x, 8)));
8367      assign(*i1, COD8x8(i1x, SHL64(i1x, 8)));
8368      assign(*i2, COD8x8(i2x, SHL64(i2x, 8)));
8369      assign(*i3, COD8x8(i3x, SHL64(i3x, 8)));
8370   } else {
8371      // Can never happen, since VLD4 only has valid lane widths of 32,
8372      // 16 or 8 bits.
8373      vpanic("math_DEINTERLEAVE_4");
8374   }
8375#  undef SHL64
8376#  undef COD8x8
8377#  undef COD16x4
8378#  undef CEV16x4
8379#  undef ILO32x2
8380#  undef IHI32x2
8381}
8382
8383/* A7.7 Advanced SIMD element or structure load/store instructions */
8384static
8385Bool dis_neon_load_or_store ( UInt theInstr,
8386                              Bool isT, IRTemp condT )
8387{
8388#  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
8389   UInt bA = INSN(23,23);
8390   UInt fB = INSN(11,8);
8391   UInt bL = INSN(21,21);
8392   UInt rD = (INSN(22,22) << 4) | INSN(15,12);
8393   UInt rN = INSN(19,16);
8394   UInt rM = INSN(3,0);
8395   UInt N, size, i, j;
8396   UInt inc;
8397   UInt regs = 1;
8398
8399   if (isT) {
8400      vassert(condT != IRTemp_INVALID);
8401   } else {
8402      vassert(condT == IRTemp_INVALID);
8403   }
8404   /* So now, if condT is not IRTemp_INVALID, we know we're
8405      dealing with Thumb code. */
8406
8407   if (INSN(20,20) != 0)
8408      return False;
8409
8410   IRTemp initialRn = newTemp(Ity_I32);
8411   assign(initialRn, isT ? getIRegT(rN) : getIRegA(rN));
8412
8413   IRTemp initialRm = newTemp(Ity_I32);
8414   assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
8415
8416   /* There are 3 cases:
8417      (1) VSTn / VLDn (n-element structure from/to one lane)
8418      (2) VLDn (single element to all lanes)
8419      (3) VSTn / VLDn (multiple n-element structures)
8420   */
8421   if (bA) {
8422      N = fB & 3;
8423      if ((fB >> 2) < 3) {
8424         /* ------------ Case (1) ------------
8425            VSTn / VLDn (n-element structure from/to one lane) */
8426
8427         size = fB >> 2;
8428
8429         switch (size) {
8430            case 0: i = INSN(7,5); inc = 1; break;
8431            case 1: i = INSN(7,6); inc = INSN(5,5) ? 2 : 1; break;
8432            case 2: i = INSN(7,7); inc = INSN(6,6) ? 2 : 1; break;
8433            case 3: return False;
8434            default: vassert(0);
8435         }
8436
8437         IRTemp addr = newTemp(Ity_I32);
8438         assign(addr, mkexpr(initialRn));
8439
8440         // go uncond
8441         if (condT != IRTemp_INVALID)
8442            mk_skip_over_T32_if_cond_is_false(condT);
8443         // now uncond
8444
8445         if (bL)
8446            mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
8447         else
8448            mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
8449         DIP("v%s%u.%d {", bL ? "ld" : "st", N + 1, 8 << size);
8450         for (j = 0; j <= N; j++) {
8451            if (j)
8452               DIP(", ");
8453            DIP("d%u[%u]", rD + j * inc, i);
8454         }
8455         DIP("}, [r%u]", rN);
8456         if (rM != 13 && rM != 15) {
8457            DIP(", r%u\n", rM);
8458         } else {
8459            DIP("%s\n", (rM != 15) ? "!" : "");
8460         }
8461      } else {
8462         /* ------------ Case (2) ------------
8463            VLDn (single element to all lanes) */
8464         UInt r;
8465         if (bL == 0)
8466            return False;
8467
8468         inc = INSN(5,5) + 1;
8469         size = INSN(7,6);
8470
8471         /* size == 3 and size == 2 cases differ in alignment constraints */
8472         if (size == 3 && N == 3 && INSN(4,4) == 1)
8473            size = 2;
8474
8475         if (size == 0 && N == 0 && INSN(4,4) == 1)
8476            return False;
8477         if (N == 2 && INSN(4,4) == 1)
8478            return False;
8479         if (size == 3)
8480            return False;
8481
8482         // go uncond
8483         if (condT != IRTemp_INVALID)
8484            mk_skip_over_T32_if_cond_is_false(condT);
8485         // now uncond
8486
8487         IRTemp addr = newTemp(Ity_I32);
8488         assign(addr, mkexpr(initialRn));
8489
8490         if (N == 0 && INSN(5,5))
8491            regs = 2;
8492
8493         for (r = 0; r < regs; r++) {
8494            switch (size) {
8495               case 0:
8496                  putDRegI64(rD + r, unop(Iop_Dup8x8,
8497                                          loadLE(Ity_I8, mkexpr(addr))),
8498                             IRTemp_INVALID);
8499                  break;
8500               case 1:
8501                  putDRegI64(rD + r, unop(Iop_Dup16x4,
8502                                          loadLE(Ity_I16, mkexpr(addr))),
8503                             IRTemp_INVALID);
8504                  break;
8505               case 2:
8506                  putDRegI64(rD + r, unop(Iop_Dup32x2,
8507                                          loadLE(Ity_I32, mkexpr(addr))),
8508                             IRTemp_INVALID);
8509                  break;
8510               default:
8511                  vassert(0);
8512            }
8513            for (i = 1; i <= N; i++) {
8514               switch (size) {
8515                  case 0:
8516                     putDRegI64(rD + r + i * inc,
8517                                unop(Iop_Dup8x8,
8518                                     loadLE(Ity_I8, binop(Iop_Add32,
8519                                                          mkexpr(addr),
8520                                                          mkU32(i * 1)))),
8521                                IRTemp_INVALID);
8522                     break;
8523                  case 1:
8524                     putDRegI64(rD + r + i * inc,
8525                                unop(Iop_Dup16x4,
8526                                     loadLE(Ity_I16, binop(Iop_Add32,
8527                                                           mkexpr(addr),
8528                                                           mkU32(i * 2)))),
8529                                IRTemp_INVALID);
8530                     break;
8531                  case 2:
8532                     putDRegI64(rD + r + i * inc,
8533                                unop(Iop_Dup32x2,
8534                                     loadLE(Ity_I32, binop(Iop_Add32,
8535                                                           mkexpr(addr),
8536                                                           mkU32(i * 4)))),
8537                                IRTemp_INVALID);
8538                     break;
8539                  default:
8540                     vassert(0);
8541               }
8542            }
8543         }
8544         DIP("vld%u.%d {", N + 1, 8 << size);
8545         for (r = 0; r < regs; r++) {
8546            for (i = 0; i <= N; i++) {
8547               if (i || r)
8548                  DIP(", ");
8549               DIP("d%u[]", rD + r + i * inc);
8550            }
8551         }
8552         DIP("}, [r%u]", rN);
8553         if (rM != 13 && rM != 15) {
8554            DIP(", r%u\n", rM);
8555         } else {
8556            DIP("%s\n", (rM != 15) ? "!" : "");
8557         }
8558      }
8559      /* Writeback.  We're uncond here, so no condT-ing. */
8560      if (rM != 15) {
8561         if (rM == 13) {
8562            IRExpr* e = binop(Iop_Add32,
8563                              mkexpr(initialRn),
8564                              mkU32((1 << size) * (N + 1)));
8565            if (isT)
8566               putIRegT(rN, e, IRTemp_INVALID);
8567            else
8568               putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8569         } else {
8570            IRExpr* e = binop(Iop_Add32,
8571                              mkexpr(initialRn),
8572                              mkexpr(initialRm));
8573            if (isT)
8574               putIRegT(rN, e, IRTemp_INVALID);
8575            else
8576               putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8577         }
8578      }
8579      return True;
8580   } else {
8581      /* ------------ Case (3) ------------
8582         VSTn / VLDn (multiple n-element structures) */
8583      inc = (fB & 1) + 1;
8584
8585      if (fB == BITS4(0,0,1,0)       // Dd, Dd+1, Dd+2, Dd+3  inc = 1  regs = 4
8586          || fB == BITS4(0,1,1,0)    // Dd, Dd+1, Dd+2        inc = 1  regs = 3
8587          || fB == BITS4(0,1,1,1)    // Dd                    inc = 2  regs = 1
8588          || fB == BITS4(1,0,1,0)) { // Dd, Dd+1              inc = 1  regs = 2
8589         N = 0; // VLD1/VST1.  'inc' does not appear to have any
8590                // meaning for the VLD1/VST1 cases.  'regs' is the number of
8591                // registers involved.
8592         if (rD + regs > 32) return False;
8593      }
8594      else
8595      if (fB == BITS4(0,0,1,1)       // Dd, Dd+1, Dd+2, Dd+3  inc=2  regs = 2
8596          || fB == BITS4(1,0,0,0)    // Dd, Dd+1              inc=1  regs = 1
8597          || fB == BITS4(1,0,0,1)) { // Dd, Dd+2              inc=2  regs = 1
8598         N = 1; // VLD2/VST2.  'regs' is the number of register-pairs involved
8599         if (regs == 1 && inc == 1 && rD + 1 >= 32) return False;
8600         if (regs == 1 && inc == 2 && rD + 2 >= 32) return False;
8601         if (regs == 2 && inc == 2 && rD + 3 >= 32) return False;
8602      } else if (fB == BITS4(0,1,0,0) || fB == BITS4(0,1,0,1)) {
8603         N = 2; // VLD3/VST3
8604         if (inc == 1 && rD + 2 >= 32) return False;
8605         if (inc == 2 && rD + 4 >= 32) return False;
8606      } else if (fB == BITS4(0,0,0,0) || fB == BITS4(0,0,0,1)) {
8607         N = 3; // VLD4/VST4
8608         if (inc == 1 && rD + 3 >= 32) return False;
8609         if (inc == 2 && rD + 6 >= 32) return False;
8610      } else {
8611         return False;
8612      }
8613
8614      if (N == 1 && fB == BITS4(0,0,1,1)) {
8615         regs = 2;
8616      } else if (N == 0) {
8617         if (fB == BITS4(1,0,1,0)) {
8618            regs = 2;
8619         } else if (fB == BITS4(0,1,1,0)) {
8620            regs = 3;
8621         } else if (fB == BITS4(0,0,1,0)) {
8622            regs = 4;
8623         }
8624      }
8625
8626      size = INSN(7,6);
8627      if (N == 0 && size == 3)
8628         size = 2;
8629      if (size == 3)
8630         return False;
8631
8632      // go uncond
8633      if (condT != IRTemp_INVALID)
8634         mk_skip_over_T32_if_cond_is_false(condT);
8635      // now uncond
8636
8637      IRTemp addr = newTemp(Ity_I32);
8638      assign(addr, mkexpr(initialRn));
8639
8640      if (N == 0 /* No interleaving -- VLD1/VST1 */) {
8641         UInt r;
8642         vassert(regs == 1 || regs == 2 || regs == 3 || regs == 4);
8643         /* inc has no relevance here */
8644         for (r = 0; r < regs; r++) {
8645            if (bL)
8646               putDRegI64(rD+r, loadLE(Ity_I64, mkexpr(addr)), IRTemp_INVALID);
8647            else
8648               storeLE(mkexpr(addr), getDRegI64(rD+r));
8649            IRTemp tmp = newTemp(Ity_I32);
8650            assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(8)));
8651            addr = tmp;
8652         }
8653      }
8654      else
8655      if (N == 1 /* 2-interleaving -- VLD2/VST2 */) {
8656         vassert( (regs == 1 && (inc == 1 || inc == 2))
8657                   || (regs == 2 && inc == 2) );
8658         // Make 'nregs' be the number of registers and 'regstep'
8659         // equal the actual register-step.  The ARM encoding, using 'regs'
8660         // and 'inc', is bizarre.  After this, we have:
8661         // Dd, Dd+1              regs = 1, inc = 1,   nregs = 2, regstep = 1
8662         // Dd, Dd+2              regs = 1, inc = 2,   nregs = 2, regstep = 2
8663         // Dd, Dd+1, Dd+2, Dd+3  regs = 2, inc = 2,   nregs = 4, regstep = 1
8664         UInt nregs   = 2;
8665         UInt regstep = 1;
8666         if (regs == 1 && inc == 1) {
8667            /* nothing */
8668         } else if (regs == 1 && inc == 2) {
8669            regstep = 2;
8670         } else if (regs == 2 && inc == 2) {
8671            nregs = 4;
8672         } else {
8673            vassert(0);
8674         }
8675         // 'a' is address,
8676         // 'di' is interleaved data, 'du' is uninterleaved data
8677         if (nregs == 2) {
8678            IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8679            IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8680            IRTemp  di0 = newTemp(Ity_I64);
8681            IRTemp  di1 = newTemp(Ity_I64);
8682            IRTemp  du0 = newTemp(Ity_I64);
8683            IRTemp  du1 = newTemp(Ity_I64);
8684            if (bL) {
8685               assign(di0, loadLE(Ity_I64, a0));
8686               assign(di1, loadLE(Ity_I64, a1));
8687               math_DEINTERLEAVE_2(&du0, &du1, di0, di1, 1 << size);
8688               putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8689               putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8690            } else {
8691               assign(du0, getDRegI64(rD + 0 * regstep));
8692               assign(du1, getDRegI64(rD + 1 * regstep));
8693               math_INTERLEAVE_2(&di0, &di1, du0, du1, 1 << size);
8694               storeLE(a0, mkexpr(di0));
8695               storeLE(a1, mkexpr(di1));
8696            }
8697            IRTemp tmp = newTemp(Ity_I32);
8698            assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(16)));
8699            addr = tmp;
8700         } else {
8701            vassert(nregs == 4);
8702            vassert(regstep == 1);
8703            IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8704            IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8705            IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8706            IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8707            IRTemp  di0 = newTemp(Ity_I64);
8708            IRTemp  di1 = newTemp(Ity_I64);
8709            IRTemp  di2 = newTemp(Ity_I64);
8710            IRTemp  di3 = newTemp(Ity_I64);
8711            IRTemp  du0 = newTemp(Ity_I64);
8712            IRTemp  du1 = newTemp(Ity_I64);
8713            IRTemp  du2 = newTemp(Ity_I64);
8714            IRTemp  du3 = newTemp(Ity_I64);
8715            if (bL) {
8716               assign(di0, loadLE(Ity_I64, a0));
8717               assign(di1, loadLE(Ity_I64, a1));
8718               assign(di2, loadLE(Ity_I64, a2));
8719               assign(di3, loadLE(Ity_I64, a3));
8720               // Note spooky interleaving: du0, du2, di0, di1 etc
8721               math_DEINTERLEAVE_2(&du0, &du2, di0, di1, 1 << size);
8722               math_DEINTERLEAVE_2(&du1, &du3, di2, di3, 1 << size);
8723               putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8724               putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8725               putDRegI64(rD + 2 * regstep, mkexpr(du2), IRTemp_INVALID);
8726               putDRegI64(rD + 3 * regstep, mkexpr(du3), IRTemp_INVALID);
8727            } else {
8728               assign(du0, getDRegI64(rD + 0 * regstep));
8729               assign(du1, getDRegI64(rD + 1 * regstep));
8730               assign(du2, getDRegI64(rD + 2 * regstep));
8731               assign(du3, getDRegI64(rD + 3 * regstep));
8732               // Note spooky interleaving: du0, du2, di0, di1 etc
8733               math_INTERLEAVE_2(&di0, &di1, du0, du2, 1 << size);
8734               math_INTERLEAVE_2(&di2, &di3, du1, du3, 1 << size);
8735               storeLE(a0, mkexpr(di0));
8736               storeLE(a1, mkexpr(di1));
8737               storeLE(a2, mkexpr(di2));
8738               storeLE(a3, mkexpr(di3));
8739            }
8740
8741            IRTemp tmp = newTemp(Ity_I32);
8742            assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8743            addr = tmp;
8744         }
8745      }
8746      else
8747      if (N == 2 /* 3-interleaving -- VLD3/VST3 */) {
8748         // Dd, Dd+1, Dd+2   regs = 1, inc = 1
8749         // Dd, Dd+2, Dd+4   regs = 1, inc = 2
8750         vassert(regs == 1 && (inc == 1 || inc == 2));
8751         IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8752         IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8753         IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8754         IRTemp  di0 = newTemp(Ity_I64);
8755         IRTemp  di1 = newTemp(Ity_I64);
8756         IRTemp  di2 = newTemp(Ity_I64);
8757         IRTemp  du0 = newTemp(Ity_I64);
8758         IRTemp  du1 = newTemp(Ity_I64);
8759         IRTemp  du2 = newTemp(Ity_I64);
8760         if (bL) {
8761            assign(di0, loadLE(Ity_I64, a0));
8762            assign(di1, loadLE(Ity_I64, a1));
8763            assign(di2, loadLE(Ity_I64, a2));
8764            math_DEINTERLEAVE_3(&du0, &du1, &du2, di0, di1, di2, 1 << size);
8765            putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8766            putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8767            putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8768         } else {
8769            assign(du0, getDRegI64(rD + 0 * inc));
8770            assign(du1, getDRegI64(rD + 1 * inc));
8771            assign(du2, getDRegI64(rD + 2 * inc));
8772            math_INTERLEAVE_3(&di0, &di1, &di2, du0, du1, du2, 1 << size);
8773            storeLE(a0, mkexpr(di0));
8774            storeLE(a1, mkexpr(di1));
8775            storeLE(a2, mkexpr(di2));
8776         }
8777         IRTemp tmp = newTemp(Ity_I32);
8778         assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(24)));
8779         addr = tmp;
8780      }
8781      else
8782      if (N == 3 /* 4-interleaving -- VLD4/VST4 */) {
8783         // Dd, Dd+1, Dd+2, Dd+3   regs = 1, inc = 1
8784         // Dd, Dd+2, Dd+4, Dd+6   regs = 1, inc = 2
8785         vassert(regs == 1 && (inc == 1 || inc == 2));
8786         IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8787         IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8788         IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8789         IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8790         IRTemp  di0 = newTemp(Ity_I64);
8791         IRTemp  di1 = newTemp(Ity_I64);
8792         IRTemp  di2 = newTemp(Ity_I64);
8793         IRTemp  di3 = newTemp(Ity_I64);
8794         IRTemp  du0 = newTemp(Ity_I64);
8795         IRTemp  du1 = newTemp(Ity_I64);
8796         IRTemp  du2 = newTemp(Ity_I64);
8797         IRTemp  du3 = newTemp(Ity_I64);
8798         if (bL) {
8799            assign(di0, loadLE(Ity_I64, a0));
8800            assign(di1, loadLE(Ity_I64, a1));
8801            assign(di2, loadLE(Ity_I64, a2));
8802            assign(di3, loadLE(Ity_I64, a3));
8803            math_DEINTERLEAVE_4(&du0, &du1, &du2, &du3,
8804                                di0, di1, di2, di3, 1 << size);
8805            putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8806            putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8807            putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8808            putDRegI64(rD + 3 * inc, mkexpr(du3), IRTemp_INVALID);
8809         } else {
8810            assign(du0, getDRegI64(rD + 0 * inc));
8811            assign(du1, getDRegI64(rD + 1 * inc));
8812            assign(du2, getDRegI64(rD + 2 * inc));
8813            assign(du3, getDRegI64(rD + 3 * inc));
8814            math_INTERLEAVE_4(&di0, &di1, &di2, &di3,
8815                              du0, du1, du2, du3, 1 << size);
8816            storeLE(a0, mkexpr(di0));
8817            storeLE(a1, mkexpr(di1));
8818            storeLE(a2, mkexpr(di2));
8819            storeLE(a3, mkexpr(di3));
8820         }
8821         IRTemp tmp = newTemp(Ity_I32);
8822         assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8823         addr = tmp;
8824      }
8825      else {
8826         vassert(0);
8827      }
8828
8829      /* Writeback */
8830      if (rM != 15) {
8831         IRExpr* e;
8832         if (rM == 13) {
8833            e = binop(Iop_Add32, mkexpr(initialRn),
8834                                 mkU32(8 * (N + 1) * regs));
8835         } else {
8836            e = binop(Iop_Add32, mkexpr(initialRn),
8837                                 mkexpr(initialRm));
8838         }
8839         if (isT)
8840            putIRegT(rN, e, IRTemp_INVALID);
8841         else
8842            putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8843      }
8844
8845      DIP("v%s%u.%d {", bL ? "ld" : "st", N + 1, 8 << INSN(7,6));
8846      if ((inc == 1 && regs * (N + 1) > 1)
8847          || (inc == 2 && regs > 1 && N > 0)) {
8848         DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
8849      } else {
8850         UInt r;
8851         for (r = 0; r < regs; r++) {
8852            for (i = 0; i <= N; i++) {
8853               if (i || r)
8854                  DIP(", ");
8855               DIP("d%u", rD + r + i * inc);
8856            }
8857         }
8858      }
8859      DIP("}, [r%u]", rN);
8860      if (rM != 13 && rM != 15) {
8861         DIP(", r%u\n", rM);
8862      } else {
8863         DIP("%s\n", (rM != 15) ? "!" : "");
8864      }
8865      return True;
8866   }
8867#  undef INSN
8868}
8869
8870
8871/*------------------------------------------------------------*/
8872/*--- NEON, top level control                              ---*/
8873/*------------------------------------------------------------*/
8874
8875/* Both ARM and Thumb */
8876
8877/* Translate a NEON instruction.    If successful, returns
8878   True and *dres may or may not be updated.  If failure, returns
8879   False and doesn't change *dres nor create any IR.
8880
8881   The Thumb and ARM encodings are similar for the 24 bottom bits, but
8882   the top 8 bits are slightly different.  In both cases, the caller
8883   must pass the entire 32 bits.  Callers may pass any instruction;
8884   this ignores non-NEON ones.
8885
8886   Caller must supply an IRTemp 'condT' holding the gating condition,
8887   or IRTemp_INVALID indicating the insn is always executed.  In ARM
8888   code, this must always be IRTemp_INVALID because NEON insns are
8889   unconditional for ARM.
8890
8891   Finally, the caller must indicate whether this occurs in ARM or in
8892   Thumb code.
8893
8894   This only handles NEON for ARMv7 and below.  The NEON extensions
8895   for v8 are handled by decode_V8_instruction.
8896*/
8897static Bool decode_NEON_instruction_ARMv7_and_below (
8898               /*MOD*/DisResult* dres,
8899               UInt              insn32,
8900               IRTemp            condT,
8901               Bool              isT
8902            )
8903{
8904#  define INSN(_bMax,_bMin)  SLICE_UInt(insn32, (_bMax), (_bMin))
8905
8906   /* There are two kinds of instruction to deal with: load/store and
8907      data processing.  In each case, in ARM mode we merely identify
8908      the kind, and pass it on to the relevant sub-handler.  In Thumb
8909      mode we identify the kind, swizzle the bits around to make it
8910      have the same encoding as in ARM, and hand it on to the
8911      sub-handler.
8912   */
8913
8914   /* In ARM mode, NEON instructions can't be conditional. */
8915   if (!isT)
8916      vassert(condT == IRTemp_INVALID);
8917
8918   /* Data processing:
8919      Thumb: 111U 1111 AAAA Axxx xxxx BBBB CCCC xxxx
8920      ARM:   1111 001U AAAA Axxx xxxx BBBB CCCC xxxx
8921   */
8922   if (!isT && INSN(31,25) == BITS7(1,1,1,1,0,0,1)) {
8923      // ARM, DP
8924      return dis_neon_data_processing(INSN(31,0), condT);
8925   }
8926   if (isT && INSN(31,29) == BITS3(1,1,1)
8927       && INSN(27,24) == BITS4(1,1,1,1)) {
8928      // Thumb, DP
8929      UInt reformatted = INSN(23,0);
8930      reformatted |= (((UInt)INSN(28,28)) << 24); // U bit
8931      reformatted |= (((UInt)BITS7(1,1,1,1,0,0,1)) << 25);
8932      return dis_neon_data_processing(reformatted, condT);
8933   }
8934
8935   /* Load/store:
8936      Thumb: 1111 1001 AxL0 xxxx xxxx BBBB xxxx xxxx
8937      ARM:   1111 0100 AxL0 xxxx xxxx BBBB xxxx xxxx
8938   */
8939   if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
8940      // ARM, memory
8941      return dis_neon_load_or_store(INSN(31,0), isT, condT);
8942   }
8943   if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
8944      UInt reformatted = INSN(23,0);
8945      reformatted |= (((UInt)BITS8(1,1,1,1,0,1,0,0)) << 24);
8946      return dis_neon_load_or_store(reformatted, isT, condT);
8947   }
8948
8949   /* Doesn't match. */
8950   return False;
8951
8952#  undef INSN
8953}
8954
8955
8956/*------------------------------------------------------------*/
8957/*--- V6 MEDIA instructions                                ---*/
8958/*------------------------------------------------------------*/
8959
8960/* Both ARM and Thumb */
8961
8962/* Translate a V6 media instruction.    If successful, returns
8963   True and *dres may or may not be updated.  If failure, returns
8964   False and doesn't change *dres nor create any IR.
8965
8966   The Thumb and ARM encodings are completely different.  In Thumb
8967   mode, the caller must pass the entire 32 bits.  In ARM mode it must
8968   pass the lower 28 bits.  Apart from that, callers may pass any
8969   instruction; this function ignores anything it doesn't recognise.
8970
8971   Caller must supply an IRTemp 'condT' holding the gating condition,
8972   or IRTemp_INVALID indicating the insn is always executed.
8973
8974   Caller must also supply an ARMCondcode 'conq'.  This is only used
8975   for debug printing, no other purpose.  For ARM, this is simply the
8976   top 4 bits of the original instruction.  For Thumb, the condition
8977   is not (really) known until run time, and so ARMCondAL should be
8978   passed, only so that printing of these instructions does not show
8979   any condition.
8980
8981   Finally, the caller must indicate whether this occurs in ARM or in
8982   Thumb code.
8983*/
8984static Bool decode_V6MEDIA_instruction (
8985               /*MOD*/DisResult* dres,
8986               UInt              insnv6m,
8987               IRTemp            condT,
8988               ARMCondcode       conq,
8989               Bool              isT
8990            )
8991{
8992#  define INSNA(_bMax,_bMin)   SLICE_UInt(insnv6m, (_bMax), (_bMin))
8993#  define INSNT0(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 16) & 0xFFFF), \
8994                                           (_bMax), (_bMin) )
8995#  define INSNT1(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 0)  & 0xFFFF), \
8996                                           (_bMax), (_bMin) )
8997   HChar dis_buf[128];
8998   dis_buf[0] = 0;
8999
9000   if (isT) {
9001      vassert(conq == ARMCondAL);
9002   } else {
9003      vassert(INSNA(31,28) == BITS4(0,0,0,0)); // caller's obligation
9004      vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
9005   }
9006
9007   /* ----------- smulbb, smulbt, smultb, smultt ----------- */
9008   {
9009     UInt regD = 99, regM = 99, regN = 99, bitM = 0, bitN = 0;
9010     Bool gate = False;
9011
9012     if (isT) {
9013        if (INSNT0(15,4) == 0xFB1 && INSNT1(15,12) == BITS4(1,1,1,1)
9014            && INSNT1(7,6) == BITS2(0,0)) {
9015           regD = INSNT1(11,8);
9016           regM = INSNT1(3,0);
9017           regN = INSNT0(3,0);
9018           bitM = INSNT1(4,4);
9019           bitN = INSNT1(5,5);
9020           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9021              gate = True;
9022        }
9023     } else {
9024        if (BITS8(0,0,0,1,0,1,1,0) == INSNA(27,20) &&
9025            BITS4(0,0,0,0)         == INSNA(15,12) &&
9026            BITS4(1,0,0,0)         == (INSNA(7,4) & BITS4(1,0,0,1)) ) {
9027           regD = INSNA(19,16);
9028           regM = INSNA(11,8);
9029           regN = INSNA(3,0);
9030           bitM = INSNA(6,6);
9031           bitN = INSNA(5,5);
9032           if (regD != 15 && regN != 15 && regM != 15)
9033              gate = True;
9034        }
9035     }
9036
9037     if (gate) {
9038        IRTemp srcN = newTemp(Ity_I32);
9039        IRTemp srcM = newTemp(Ity_I32);
9040        IRTemp res  = newTemp(Ity_I32);
9041
9042        assign( srcN, binop(Iop_Sar32,
9043                            binop(Iop_Shl32,
9044                                  isT ? getIRegT(regN) : getIRegA(regN),
9045                                  mkU8(bitN ? 0 : 16)), mkU8(16)) );
9046        assign( srcM, binop(Iop_Sar32,
9047                            binop(Iop_Shl32,
9048                                  isT ? getIRegT(regM) : getIRegA(regM),
9049                                  mkU8(bitM ? 0 : 16)), mkU8(16)) );
9050        assign( res, binop(Iop_Mul32, mkexpr(srcN), mkexpr(srcM)) );
9051
9052        if (isT)
9053           putIRegT( regD, mkexpr(res), condT );
9054        else
9055           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9056
9057        DIP( "smul%c%c%s r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b',
9058             nCC(conq), regD, regN, regM );
9059        return True;
9060     }
9061     /* fall through */
9062   }
9063
9064   /* ------------ smulwb<y><c> <Rd>,<Rn>,<Rm> ------------- */
9065   /* ------------ smulwt<y><c> <Rd>,<Rn>,<Rm> ------------- */
9066   {
9067     UInt regD = 99, regN = 99, regM = 99, bitM = 0;
9068     Bool gate = False;
9069
9070     if (isT) {
9071        if (INSNT0(15,4) == 0xFB3 && INSNT1(15,12) == BITS4(1,1,1,1)
9072            && INSNT1(7,5) == BITS3(0,0,0)) {
9073          regN = INSNT0(3,0);
9074          regD = INSNT1(11,8);
9075          regM = INSNT1(3,0);
9076          bitM = INSNT1(4,4);
9077          if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9078             gate = True;
9079        }
9080     } else {
9081        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
9082            INSNA(15,12) == BITS4(0,0,0,0)         &&
9083            (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,1,0)) {
9084           regD = INSNA(19,16);
9085           regN = INSNA(3,0);
9086           regM = INSNA(11,8);
9087           bitM = INSNA(6,6);
9088           if (regD != 15 && regN != 15 && regM != 15)
9089              gate = True;
9090        }
9091     }
9092
9093     if (gate) {
9094        IRTemp irt_prod = newTemp(Ity_I64);
9095
9096        assign( irt_prod,
9097                binop(Iop_MullS32,
9098                      isT ? getIRegT(regN) : getIRegA(regN),
9099                      binop(Iop_Sar32,
9100                            binop(Iop_Shl32,
9101                                  isT ? getIRegT(regM) : getIRegA(regM),
9102                                  mkU8(bitM ? 0 : 16)),
9103                            mkU8(16))) );
9104
9105        IRExpr* ire_result = binop(Iop_Or32,
9106                                   binop( Iop_Shl32,
9107                                          unop(Iop_64HIto32, mkexpr(irt_prod)),
9108                                          mkU8(16) ),
9109                                   binop( Iop_Shr32,
9110                                          unop(Iop_64to32, mkexpr(irt_prod)),
9111                                          mkU8(16) ) );
9112
9113        if (isT)
9114           putIRegT( regD, ire_result, condT );
9115        else
9116           putIRegA( regD, ire_result, condT, Ijk_Boring );
9117
9118        DIP("smulw%c%s r%u, r%u, r%u\n",
9119            bitM ? 't' : 'b', nCC(conq),regD,regN,regM);
9120        return True;
9121     }
9122     /* fall through */
9123   }
9124
9125   /* ------------ pkhbt<c> Rd, Rn, Rm {,LSL #imm} ------------- */
9126   /* ------------ pkhtb<c> Rd, Rn, Rm {,ASR #imm} ------------- */
9127   {
9128     UInt regD = 99, regN = 99, regM = 99, imm5 = 99, shift_type = 99;
9129     Bool tbform = False;
9130     Bool gate = False;
9131
9132     if (isT) {
9133        if (INSNT0(15,4) == 0xEAC
9134            && INSNT1(15,15) == 0 && INSNT1(4,4) == 0) {
9135           regN = INSNT0(3,0);
9136           regD = INSNT1(11,8);
9137           regM = INSNT1(3,0);
9138           imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
9139           shift_type = (INSNT1(5,5) << 1) | 0;
9140           tbform = (INSNT1(5,5) == 0) ? False : True;
9141           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9142              gate = True;
9143        }
9144     } else {
9145        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
9146            INSNA(5,4)   == BITS2(0,1)             &&
9147            (INSNA(6,6)  == 0 || INSNA(6,6) == 1) ) {
9148           regD = INSNA(15,12);
9149           regN = INSNA(19,16);
9150           regM = INSNA(3,0);
9151           imm5 = INSNA(11,7);
9152           shift_type = (INSNA(6,6) << 1) | 0;
9153           tbform = (INSNA(6,6) == 0) ? False : True;
9154           if (regD != 15 && regN != 15 && regM != 15)
9155              gate = True;
9156        }
9157     }
9158
9159     if (gate) {
9160        IRTemp irt_regM       = newTemp(Ity_I32);
9161        IRTemp irt_regM_shift = newTemp(Ity_I32);
9162        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
9163        compute_result_and_C_after_shift_by_imm5(
9164           dis_buf, &irt_regM_shift, NULL, irt_regM, shift_type, imm5, regM );
9165
9166        UInt mask = (tbform == True) ? 0x0000FFFF : 0xFFFF0000;
9167        IRExpr* ire_result
9168          = binop( Iop_Or32,
9169                   binop(Iop_And32, mkexpr(irt_regM_shift), mkU32(mask)),
9170                   binop(Iop_And32, isT ? getIRegT(regN) : getIRegA(regN),
9171                                    unop(Iop_Not32, mkU32(mask))) );
9172
9173        if (isT)
9174           putIRegT( regD, ire_result, condT );
9175        else
9176           putIRegA( regD, ire_result, condT, Ijk_Boring );
9177
9178        DIP( "pkh%s%s r%u, r%u, r%u %s\n", tbform ? "tb" : "bt",
9179             nCC(conq), regD, regN, regM, dis_buf );
9180
9181        return True;
9182     }
9183     /* fall through */
9184   }
9185
9186   /* ---------- usat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9187   {
9188     UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9189     Bool gate = False;
9190
9191     if (isT) {
9192        if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,1,0)
9193            && INSNT0(4,4) == 0
9194            && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9195           regD       = INSNT1(11,8);
9196           regN       = INSNT0(3,0);
9197           shift_type = (INSNT0(5,5) << 1) | 0;
9198           imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9199           sat_imm    = INSNT1(4,0);
9200           if (!isBadRegT(regD) && !isBadRegT(regN))
9201              gate = True;
9202           if (shift_type == BITS2(1,0) && imm5 == 0)
9203              gate = False;
9204        }
9205     } else {
9206        if (INSNA(27,21) == BITS7(0,1,1,0,1,1,1) &&
9207            INSNA(5,4)   == BITS2(0,1)) {
9208           regD       = INSNA(15,12);
9209           regN       = INSNA(3,0);
9210           shift_type = (INSNA(6,6) << 1) | 0;
9211           imm5       = INSNA(11,7);
9212           sat_imm    = INSNA(20,16);
9213           if (regD != 15 && regN != 15)
9214              gate = True;
9215        }
9216     }
9217
9218     if (gate) {
9219        IRTemp irt_regN       = newTemp(Ity_I32);
9220        IRTemp irt_regN_shift = newTemp(Ity_I32);
9221        IRTemp irt_sat_Q      = newTemp(Ity_I32);
9222        IRTemp irt_result     = newTemp(Ity_I32);
9223
9224        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9225        compute_result_and_C_after_shift_by_imm5(
9226                dis_buf, &irt_regN_shift, NULL,
9227                irt_regN, shift_type, imm5, regN );
9228
9229        armUnsignedSatQ( &irt_result, &irt_sat_Q, irt_regN_shift, sat_imm );
9230        or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9231
9232        if (isT)
9233           putIRegT( regD, mkexpr(irt_result), condT );
9234        else
9235           putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9236
9237        DIP("usat%s r%u, #0x%04x, %s\n",
9238            nCC(conq), regD, imm5, dis_buf);
9239        return True;
9240     }
9241     /* fall through */
9242   }
9243
9244  /* ----------- ssat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9245   {
9246     UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9247     Bool gate = False;
9248
9249     if (isT) {
9250        if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9251            && INSNT0(4,4) == 0
9252            && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9253           regD       = INSNT1(11,8);
9254           regN       = INSNT0(3,0);
9255           shift_type = (INSNT0(5,5) << 1) | 0;
9256           imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9257           sat_imm    = INSNT1(4,0) + 1;
9258           if (!isBadRegT(regD) && !isBadRegT(regN))
9259              gate = True;
9260           if (shift_type == BITS2(1,0) && imm5 == 0)
9261              gate = False;
9262        }
9263     } else {
9264        if (INSNA(27,21) == BITS7(0,1,1,0,1,0,1) &&
9265            INSNA(5,4)   == BITS2(0,1)) {
9266           regD       = INSNA(15,12);
9267           regN       = INSNA(3,0);
9268           shift_type = (INSNA(6,6) << 1) | 0;
9269           imm5       = INSNA(11,7);
9270           sat_imm    = INSNA(20,16) + 1;
9271           if (regD != 15 && regN != 15)
9272              gate = True;
9273        }
9274     }
9275
9276     if (gate) {
9277        IRTemp irt_regN       = newTemp(Ity_I32);
9278        IRTemp irt_regN_shift = newTemp(Ity_I32);
9279        IRTemp irt_sat_Q      = newTemp(Ity_I32);
9280        IRTemp irt_result     = newTemp(Ity_I32);
9281
9282        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9283        compute_result_and_C_after_shift_by_imm5(
9284                dis_buf, &irt_regN_shift, NULL,
9285                irt_regN, shift_type, imm5, regN );
9286
9287        armSignedSatQ( irt_regN_shift, sat_imm, &irt_result, &irt_sat_Q );
9288        or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9289
9290        if (isT)
9291           putIRegT( regD, mkexpr(irt_result), condT );
9292        else
9293           putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9294
9295        DIP( "ssat%s r%u, #0x%04x, %s\n",
9296             nCC(conq), regD, imm5, dis_buf);
9297        return True;
9298    }
9299    /* fall through */
9300  }
9301
9302   /* ----------- ssat16<c> <Rd>,#<imm>,<Rn> ----------- */
9303   {
9304     UInt regD = 99, regN = 99, sat_imm = 99;
9305     Bool gate = False;
9306
9307     if (isT) {
9308        if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9309            && INSNT0(5,4) == BITS2(1,0)
9310            && INSNT1(15,12) == BITS4(0,0,0,0)
9311            && INSNT1(7,4) == BITS4(0,0,0,0)) {
9312           regD       = INSNT1(11,8);
9313           regN       = INSNT0(3,0);
9314           sat_imm    = INSNT1(3,0) + 1;
9315           if (!isBadRegT(regD) && !isBadRegT(regN))
9316              gate = True;
9317        }
9318     } else {
9319        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,1,0) &&
9320            INSNA(11,4)   == BITS8(1,1,1,1,0,0,1,1)) {
9321           regD       = INSNA(15,12);
9322           regN       = INSNA(3,0);
9323           sat_imm    = INSNA(19,16) + 1;
9324           if (regD != 15 && regN != 15)
9325              gate = True;
9326        }
9327     }
9328
9329     if (gate) {
9330        IRTemp irt_regN    = newTemp(Ity_I32);
9331        IRTemp irt_regN_lo = newTemp(Ity_I32);
9332        IRTemp irt_regN_hi = newTemp(Ity_I32);
9333        IRTemp irt_Q_lo    = newTemp(Ity_I32);
9334        IRTemp irt_Q_hi    = newTemp(Ity_I32);
9335        IRTemp irt_res_lo  = newTemp(Ity_I32);
9336        IRTemp irt_res_hi  = newTemp(Ity_I32);
9337
9338        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9339        assign( irt_regN_lo,
9340                binop( Iop_Sar32,
9341                       binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9342                       mkU8(16)) );
9343        assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9344
9345        armSignedSatQ( irt_regN_lo, sat_imm, &irt_res_lo, &irt_Q_lo );
9346        or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9347
9348        armSignedSatQ( irt_regN_hi, sat_imm, &irt_res_hi, &irt_Q_hi );
9349        or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9350
9351        IRExpr* ire_result
9352           = binop(Iop_Or32,
9353                   binop(Iop_And32, mkexpr(irt_res_lo), mkU32(0xFFFF)),
9354                   binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)));
9355        if (isT)
9356           putIRegT( regD, ire_result, condT );
9357        else
9358           putIRegA( regD, ire_result, condT, Ijk_Boring );
9359
9360        DIP( "ssat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9361        return True;
9362     }
9363     /* fall through */
9364   }
9365
9366   /* -------------- usat16<c> <Rd>,#<imm4>,<Rn> --------------- */
9367   {
9368     UInt regD = 99, regN = 99, sat_imm = 99;
9369     Bool gate = False;
9370
9371     if (isT) {
9372        if (INSNT0(15,4) == 0xF3A && (INSNT1(15,0) & 0xF0F0) == 0x0000) {
9373           regN = INSNT0(3,0);
9374           regD = INSNT1(11,8);
9375           sat_imm = INSNT1(3,0);
9376           if (!isBadRegT(regD) && !isBadRegT(regN))
9377              gate = True;
9378       }
9379     } else {
9380        if (INSNA(27,20) == BITS8(0,1,1,0,1,1,1,0) &&
9381            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9382            INSNA(7,4)   == BITS4(0,0,1,1)) {
9383           regD    = INSNA(15,12);
9384           regN    = INSNA(3,0);
9385           sat_imm = INSNA(19,16);
9386           if (regD != 15 && regN != 15)
9387              gate = True;
9388        }
9389     }
9390
9391     if (gate) {
9392        IRTemp irt_regN    = newTemp(Ity_I32);
9393        IRTemp irt_regN_lo = newTemp(Ity_I32);
9394        IRTemp irt_regN_hi = newTemp(Ity_I32);
9395        IRTemp irt_Q_lo    = newTemp(Ity_I32);
9396        IRTemp irt_Q_hi    = newTemp(Ity_I32);
9397        IRTemp irt_res_lo  = newTemp(Ity_I32);
9398        IRTemp irt_res_hi  = newTemp(Ity_I32);
9399
9400        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9401        assign( irt_regN_lo, binop( Iop_Sar32,
9402                                    binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9403                                    mkU8(16)) );
9404        assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9405
9406        armUnsignedSatQ( &irt_res_lo, &irt_Q_lo, irt_regN_lo, sat_imm );
9407        or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9408
9409        armUnsignedSatQ( &irt_res_hi, &irt_Q_hi, irt_regN_hi, sat_imm );
9410        or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9411
9412        IRExpr* ire_result = binop( Iop_Or32,
9413                                    binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)),
9414                                    mkexpr(irt_res_lo) );
9415
9416        if (isT)
9417           putIRegT( regD, ire_result, condT );
9418        else
9419           putIRegA( regD, ire_result, condT, Ijk_Boring );
9420
9421        DIP( "usat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9422        return True;
9423     }
9424     /* fall through */
9425   }
9426
9427   /* -------------- uadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9428   {
9429     UInt regD = 99, regN = 99, regM = 99;
9430     Bool gate = False;
9431
9432     if (isT) {
9433        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9434           regN = INSNT0(3,0);
9435           regD = INSNT1(11,8);
9436           regM = INSNT1(3,0);
9437           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9438              gate = True;
9439        }
9440     } else {
9441        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9442            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9443            INSNA(7,4)   == BITS4(0,0,0,1)) {
9444           regD = INSNA(15,12);
9445           regN = INSNA(19,16);
9446           regM = INSNA(3,0);
9447           if (regD != 15 && regN != 15 && regM != 15)
9448              gate = True;
9449        }
9450     }
9451
9452     if (gate) {
9453        IRTemp rNt  = newTemp(Ity_I32);
9454        IRTemp rMt  = newTemp(Ity_I32);
9455        IRTemp res  = newTemp(Ity_I32);
9456        IRTemp reso = newTemp(Ity_I32);
9457
9458        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9459        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9460
9461        assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9462        if (isT)
9463           putIRegT( regD, mkexpr(res), condT );
9464        else
9465           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9466
9467        assign(reso, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
9468        set_GE_32_10_from_bits_31_15(reso, condT);
9469
9470        DIP("uadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9471        return True;
9472     }
9473     /* fall through */
9474   }
9475
9476   /* -------------- sadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9477   {
9478     UInt regD = 99, regN = 99, regM = 99;
9479     Bool gate = False;
9480
9481     if (isT) {
9482        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9483           regN = INSNT0(3,0);
9484           regD = INSNT1(11,8);
9485           regM = INSNT1(3,0);
9486           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9487              gate = True;
9488        }
9489     } else {
9490        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9491            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9492            INSNA(7,4)   == BITS4(0,0,0,1)) {
9493           regD = INSNA(15,12);
9494           regN = INSNA(19,16);
9495           regM = INSNA(3,0);
9496           if (regD != 15 && regN != 15 && regM != 15)
9497              gate = True;
9498        }
9499     }
9500
9501     if (gate) {
9502        IRTemp rNt  = newTemp(Ity_I32);
9503        IRTemp rMt  = newTemp(Ity_I32);
9504        IRTemp res  = newTemp(Ity_I32);
9505        IRTemp reso = newTemp(Ity_I32);
9506
9507        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9508        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9509
9510        assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9511        if (isT)
9512           putIRegT( regD, mkexpr(res), condT );
9513        else
9514           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9515
9516        assign(reso, unop(Iop_Not32,
9517                          binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt))));
9518        set_GE_32_10_from_bits_31_15(reso, condT);
9519
9520        DIP("sadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9521        return True;
9522     }
9523     /* fall through */
9524   }
9525
9526   /* ---------------- usub16<c> <Rd>,<Rn>,<Rm> ---------------- */
9527   {
9528     UInt regD = 99, regN = 99, regM = 99;
9529     Bool gate = False;
9530
9531     if (isT) {
9532        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9533           regN = INSNT0(3,0);
9534           regD = INSNT1(11,8);
9535           regM = INSNT1(3,0);
9536           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9537              gate = True;
9538        }
9539     } else {
9540        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9541            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9542            INSNA(7,4)   == BITS4(0,1,1,1)) {
9543           regD = INSNA(15,12);
9544           regN = INSNA(19,16);
9545           regM = INSNA(3,0);
9546           if (regD != 15 && regN != 15 && regM != 15)
9547             gate = True;
9548        }
9549     }
9550
9551     if (gate) {
9552        IRTemp rNt  = newTemp(Ity_I32);
9553        IRTemp rMt  = newTemp(Ity_I32);
9554        IRTemp res  = newTemp(Ity_I32);
9555        IRTemp reso = newTemp(Ity_I32);
9556
9557        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9558        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9559
9560        assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9561        if (isT)
9562           putIRegT( regD, mkexpr(res), condT );
9563        else
9564           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9565
9566        assign(reso, unop(Iop_Not32,
9567                          binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt))));
9568        set_GE_32_10_from_bits_31_15(reso, condT);
9569
9570        DIP("usub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9571        return True;
9572     }
9573     /* fall through */
9574   }
9575
9576   /* -------------- ssub16<c> <Rd>,<Rn>,<Rm> -------------- */
9577   {
9578     UInt regD = 99, regN = 99, regM = 99;
9579     Bool gate = False;
9580
9581     if (isT) {
9582        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9583           regN = INSNT0(3,0);
9584           regD = INSNT1(11,8);
9585           regM = INSNT1(3,0);
9586           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9587              gate = True;
9588        }
9589     } else {
9590        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9591            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9592            INSNA(7,4)   == BITS4(0,1,1,1)) {
9593           regD = INSNA(15,12);
9594           regN = INSNA(19,16);
9595           regM = INSNA(3,0);
9596           if (regD != 15 && regN != 15 && regM != 15)
9597              gate = True;
9598        }
9599     }
9600
9601     if (gate) {
9602        IRTemp rNt  = newTemp(Ity_I32);
9603        IRTemp rMt  = newTemp(Ity_I32);
9604        IRTemp res  = newTemp(Ity_I32);
9605        IRTemp reso = newTemp(Ity_I32);
9606
9607        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9608        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9609
9610        assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9611        if (isT)
9612           putIRegT( regD, mkexpr(res), condT );
9613        else
9614           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9615
9616        assign(reso, unop(Iop_Not32,
9617                          binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt))));
9618        set_GE_32_10_from_bits_31_15(reso, condT);
9619
9620        DIP("ssub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9621        return True;
9622     }
9623     /* fall through */
9624   }
9625
9626   /* ----------------- uadd8<c> <Rd>,<Rn>,<Rm> ---------------- */
9627   {
9628     UInt regD = 99, regN = 99, regM = 99;
9629     Bool gate = False;
9630
9631     if (isT) {
9632        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9633           regN = INSNT0(3,0);
9634           regD = INSNT1(11,8);
9635           regM = INSNT1(3,0);
9636           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9637              gate = True;
9638        }
9639     } else {
9640        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9641            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9642            (INSNA(7,4)  == BITS4(1,0,0,1))) {
9643           regD = INSNA(15,12);
9644           regN = INSNA(19,16);
9645           regM = INSNA(3,0);
9646           if (regD != 15 && regN != 15 && regM != 15)
9647              gate = True;
9648        }
9649     }
9650
9651     if (gate) {
9652        IRTemp rNt  = newTemp(Ity_I32);
9653        IRTemp rMt  = newTemp(Ity_I32);
9654        IRTemp res  = newTemp(Ity_I32);
9655        IRTemp reso = newTemp(Ity_I32);
9656
9657        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9658        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9659
9660        assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9661        if (isT)
9662           putIRegT( regD, mkexpr(res), condT );
9663        else
9664           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9665
9666        assign(reso, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9667        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9668
9669        DIP("uadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9670        return True;
9671     }
9672     /* fall through */
9673   }
9674
9675   /* ------------------- sadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9676   {
9677     UInt regD = 99, regN = 99, regM = 99;
9678     Bool gate = False;
9679
9680     if (isT) {
9681        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9682           regN = INSNT0(3,0);
9683           regD = INSNT1(11,8);
9684           regM = INSNT1(3,0);
9685           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9686              gate = True;
9687        }
9688     } else {
9689        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9690            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9691            (INSNA(7,4)  == BITS4(1,0,0,1))) {
9692           regD = INSNA(15,12);
9693           regN = INSNA(19,16);
9694           regM = INSNA(3,0);
9695           if (regD != 15 && regN != 15 && regM != 15)
9696              gate = True;
9697        }
9698     }
9699
9700     if (gate) {
9701        IRTemp rNt  = newTemp(Ity_I32);
9702        IRTemp rMt  = newTemp(Ity_I32);
9703        IRTemp res  = newTemp(Ity_I32);
9704        IRTemp reso = newTemp(Ity_I32);
9705
9706        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9707        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9708
9709        assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9710        if (isT)
9711           putIRegT( regD, mkexpr(res), condT );
9712        else
9713           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9714
9715        assign(reso, unop(Iop_Not32,
9716                          binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt))));
9717        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9718
9719        DIP("sadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9720        return True;
9721     }
9722     /* fall through */
9723   }
9724
9725   /* ------------------- usub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9726   {
9727     UInt regD = 99, regN = 99, regM = 99;
9728     Bool gate = False;
9729
9730     if (isT) {
9731        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9732           regN = INSNT0(3,0);
9733           regD = INSNT1(11,8);
9734           regM = INSNT1(3,0);
9735           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9736              gate = True;
9737        }
9738     } else {
9739        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9740            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9741            (INSNA(7,4)  == BITS4(1,1,1,1))) {
9742           regD = INSNA(15,12);
9743           regN = INSNA(19,16);
9744           regM = INSNA(3,0);
9745           if (regD != 15 && regN != 15 && regM != 15)
9746             gate = True;
9747        }
9748     }
9749
9750     if (gate) {
9751        IRTemp rNt  = newTemp(Ity_I32);
9752        IRTemp rMt  = newTemp(Ity_I32);
9753        IRTemp res  = newTemp(Ity_I32);
9754        IRTemp reso = newTemp(Ity_I32);
9755
9756        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9757        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9758
9759        assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9760        if (isT)
9761           putIRegT( regD, mkexpr(res), condT );
9762        else
9763           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9764
9765        assign(reso, unop(Iop_Not32,
9766                          binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt))));
9767        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9768
9769        DIP("usub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9770        return True;
9771     }
9772     /* fall through */
9773   }
9774
9775   /* ------------------- ssub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9776   {
9777     UInt regD = 99, regN = 99, regM = 99;
9778     Bool gate = False;
9779
9780     if (isT) {
9781        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9782           regN = INSNT0(3,0);
9783           regD = INSNT1(11,8);
9784           regM = INSNT1(3,0);
9785           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9786              gate = True;
9787        }
9788     } else {
9789        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9790            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9791            INSNA(7,4)   == BITS4(1,1,1,1)) {
9792           regD = INSNA(15,12);
9793           regN = INSNA(19,16);
9794           regM = INSNA(3,0);
9795           if (regD != 15 && regN != 15 && regM != 15)
9796              gate = True;
9797        }
9798     }
9799
9800     if (gate) {
9801        IRTemp rNt  = newTemp(Ity_I32);
9802        IRTemp rMt  = newTemp(Ity_I32);
9803        IRTemp res  = newTemp(Ity_I32);
9804        IRTemp reso = newTemp(Ity_I32);
9805
9806        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9807        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9808
9809        assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9810        if (isT)
9811           putIRegT( regD, mkexpr(res), condT );
9812        else
9813           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9814
9815        assign(reso, unop(Iop_Not32,
9816                          binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt))));
9817        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9818
9819        DIP("ssub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9820        return True;
9821     }
9822     /* fall through */
9823   }
9824
9825   /* ------------------ qadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
9826   {
9827     UInt regD = 99, regN = 99, regM = 99;
9828     Bool gate = False;
9829
9830     if (isT) {
9831        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9832           regN = INSNT0(3,0);
9833           regD = INSNT1(11,8);
9834           regM = INSNT1(3,0);
9835           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9836              gate = True;
9837        }
9838     } else {
9839        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9840            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9841            INSNA(7,4)   == BITS4(1,0,0,1)) {
9842           regD = INSNA(15,12);
9843           regN = INSNA(19,16);
9844           regM = INSNA(3,0);
9845           if (regD != 15 && regN != 15 && regM != 15)
9846              gate = True;
9847        }
9848     }
9849
9850     if (gate) {
9851        IRTemp rNt   = newTemp(Ity_I32);
9852        IRTemp rMt   = newTemp(Ity_I32);
9853        IRTemp res_q = newTemp(Ity_I32);
9854
9855        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9856        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9857
9858        assign(res_q, binop(Iop_QAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
9859        if (isT)
9860           putIRegT( regD, mkexpr(res_q), condT );
9861        else
9862           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9863
9864        DIP("qadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9865        return True;
9866     }
9867     /* fall through */
9868   }
9869
9870   /* ------------------ qsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
9871   {
9872     UInt regD = 99, regN = 99, regM = 99;
9873     Bool gate = False;
9874
9875     if (isT) {
9876        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9877           regN = INSNT0(3,0);
9878           regD = INSNT1(11,8);
9879           regM = INSNT1(3,0);
9880           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9881              gate = True;
9882        }
9883     } else {
9884        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9885            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9886            INSNA(7,4)   == BITS4(1,1,1,1)) {
9887           regD = INSNA(15,12);
9888           regN = INSNA(19,16);
9889           regM = INSNA(3,0);
9890           if (regD != 15 && regN != 15 && regM != 15)
9891              gate = True;
9892        }
9893     }
9894
9895     if (gate) {
9896        IRTemp rNt   = newTemp(Ity_I32);
9897        IRTemp rMt   = newTemp(Ity_I32);
9898        IRTemp res_q = newTemp(Ity_I32);
9899
9900        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9901        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9902
9903        assign(res_q, binop(Iop_QSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
9904        if (isT)
9905           putIRegT( regD, mkexpr(res_q), condT );
9906        else
9907           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9908
9909        DIP("qsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9910        return True;
9911     }
9912     /* fall through */
9913   }
9914
9915   /* ------------------ uqadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9916   {
9917     UInt regD = 99, regN = 99, regM = 99;
9918     Bool gate = False;
9919
9920     if (isT) {
9921        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9922           regN = INSNT0(3,0);
9923           regD = INSNT1(11,8);
9924           regM = INSNT1(3,0);
9925           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9926              gate = True;
9927        }
9928     } else {
9929        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9930            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9931            (INSNA(7,4)  == BITS4(1,0,0,1))) {
9932           regD = INSNA(15,12);
9933           regN = INSNA(19,16);
9934           regM = INSNA(3,0);
9935           if (regD != 15 && regN != 15 && regM != 15)
9936              gate = True;
9937        }
9938     }
9939
9940     if (gate) {
9941        IRTemp rNt   = newTemp(Ity_I32);
9942        IRTemp rMt   = newTemp(Ity_I32);
9943        IRTemp res_q = newTemp(Ity_I32);
9944
9945        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9946        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9947
9948        assign(res_q, binop(Iop_QAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9949        if (isT)
9950           putIRegT( regD, mkexpr(res_q), condT );
9951        else
9952           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9953
9954        DIP("uqadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9955        return True;
9956     }
9957     /* fall through */
9958   }
9959
9960   /* ------------------ uqsub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9961   {
9962     UInt regD = 99, regN = 99, regM = 99;
9963     Bool gate = False;
9964
9965     if (isT) {
9966        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9967           regN = INSNT0(3,0);
9968           regD = INSNT1(11,8);
9969           regM = INSNT1(3,0);
9970           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9971              gate = True;
9972        }
9973     } else {
9974        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9975            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9976            (INSNA(7,4)  == BITS4(1,1,1,1))) {
9977           regD = INSNA(15,12);
9978           regN = INSNA(19,16);
9979           regM = INSNA(3,0);
9980           if (regD != 15 && regN != 15 && regM != 15)
9981             gate = True;
9982        }
9983     }
9984
9985     if (gate) {
9986        IRTemp rNt   = newTemp(Ity_I32);
9987        IRTemp rMt   = newTemp(Ity_I32);
9988        IRTemp res_q = newTemp(Ity_I32);
9989
9990        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9991        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9992
9993        assign(res_q, binop(Iop_QSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
9994        if (isT)
9995           putIRegT( regD, mkexpr(res_q), condT );
9996        else
9997           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9998
9999        DIP("uqsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10000        return True;
10001     }
10002     /* fall through */
10003   }
10004
10005   /* ----------------- uhadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
10006   {
10007     UInt regD = 99, regN = 99, regM = 99;
10008     Bool gate = False;
10009
10010     if (isT) {
10011        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
10012           regN = INSNT0(3,0);
10013           regD = INSNT1(11,8);
10014           regM = INSNT1(3,0);
10015           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10016              gate = True;
10017        }
10018     } else {
10019        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
10020            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10021            INSNA(7,4)   == BITS4(1,0,0,1)) {
10022           regD = INSNA(15,12);
10023           regN = INSNA(19,16);
10024           regM = INSNA(3,0);
10025           if (regD != 15 && regN != 15 && regM != 15)
10026              gate = True;
10027        }
10028     }
10029
10030     if (gate) {
10031        IRTemp rNt   = newTemp(Ity_I32);
10032        IRTemp rMt   = newTemp(Ity_I32);
10033        IRTemp res_q = newTemp(Ity_I32);
10034
10035        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10036        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10037
10038        assign(res_q, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
10039        if (isT)
10040           putIRegT( regD, mkexpr(res_q), condT );
10041        else
10042           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10043
10044        DIP("uhadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10045        return True;
10046     }
10047     /* fall through */
10048   }
10049
10050   /* ----------------- uhadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
10051   {
10052     UInt regD = 99, regN = 99, regM = 99;
10053     Bool gate = False;
10054
10055     if (isT) {
10056        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
10057           regN = INSNT0(3,0);
10058           regD = INSNT1(11,8);
10059           regM = INSNT1(3,0);
10060           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10061              gate = True;
10062        }
10063     } else {
10064        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
10065            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10066            INSNA(7,4)   == BITS4(0,0,0,1)) {
10067           regD = INSNA(15,12);
10068           regN = INSNA(19,16);
10069           regM = INSNA(3,0);
10070           if (regD != 15 && regN != 15 && regM != 15)
10071              gate = True;
10072        }
10073     }
10074
10075     if (gate) {
10076        IRTemp rNt   = newTemp(Ity_I32);
10077        IRTemp rMt   = newTemp(Ity_I32);
10078        IRTemp res_q = newTemp(Ity_I32);
10079
10080        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10081        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10082
10083        assign(res_q, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
10084        if (isT)
10085           putIRegT( regD, mkexpr(res_q), condT );
10086        else
10087           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10088
10089        DIP("uhadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10090        return True;
10091     }
10092     /* fall through */
10093   }
10094
10095   /* ----------------- shadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
10096   {
10097     UInt regD = 99, regN = 99, regM = 99;
10098     Bool gate = False;
10099
10100     if (isT) {
10101        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
10102           regN = INSNT0(3,0);
10103           regD = INSNT1(11,8);
10104           regM = INSNT1(3,0);
10105           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10106              gate = True;
10107        }
10108     } else {
10109        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
10110            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10111            INSNA(7,4)   == BITS4(1,0,0,1)) {
10112           regD = INSNA(15,12);
10113           regN = INSNA(19,16);
10114           regM = INSNA(3,0);
10115           if (regD != 15 && regN != 15 && regM != 15)
10116              gate = True;
10117        }
10118     }
10119
10120     if (gate) {
10121        IRTemp rNt   = newTemp(Ity_I32);
10122        IRTemp rMt   = newTemp(Ity_I32);
10123        IRTemp res_q = newTemp(Ity_I32);
10124
10125        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10126        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10127
10128        assign(res_q, binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
10129        if (isT)
10130           putIRegT( regD, mkexpr(res_q), condT );
10131        else
10132           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10133
10134        DIP("shadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10135        return True;
10136     }
10137     /* fall through */
10138   }
10139
10140   /* ------------------ qadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
10141   {
10142     UInt regD = 99, regN = 99, regM = 99;
10143     Bool gate = False;
10144
10145     if (isT) {
10146        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10147           regN = INSNT0(3,0);
10148           regD = INSNT1(11,8);
10149           regM = INSNT1(3,0);
10150           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10151              gate = True;
10152        }
10153     } else {
10154        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10155            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10156            INSNA(7,4)   == BITS4(0,0,0,1)) {
10157           regD = INSNA(15,12);
10158           regN = INSNA(19,16);
10159           regM = INSNA(3,0);
10160           if (regD != 15 && regN != 15 && regM != 15)
10161              gate = True;
10162        }
10163     }
10164
10165     if (gate) {
10166        IRTemp rNt   = newTemp(Ity_I32);
10167        IRTemp rMt   = newTemp(Ity_I32);
10168        IRTemp res_q = newTemp(Ity_I32);
10169
10170        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10171        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10172
10173        assign(res_q, binop(Iop_QAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
10174        if (isT)
10175           putIRegT( regD, mkexpr(res_q), condT );
10176        else
10177           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10178
10179        DIP("qadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10180        return True;
10181     }
10182     /* fall through */
10183   }
10184
10185   /* ------------------ qsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
10186   {
10187     UInt regD = 99, regN = 99, regM = 99;
10188     Bool gate = False;
10189
10190      if (isT) {
10191        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10192           regN = INSNT0(3,0);
10193           regD = INSNT1(11,8);
10194           regM = INSNT1(3,0);
10195           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10196              gate = True;
10197        }
10198     } else {
10199        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10200            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10201            INSNA(7,4)   == BITS4(0,1,1,1)) {
10202           regD = INSNA(15,12);
10203           regN = INSNA(19,16);
10204           regM = INSNA(3,0);
10205           if (regD != 15 && regN != 15 && regM != 15)
10206             gate = True;
10207        }
10208     }
10209
10210     if (gate) {
10211        IRTemp rNt   = newTemp(Ity_I32);
10212        IRTemp rMt   = newTemp(Ity_I32);
10213        IRTemp res_q = newTemp(Ity_I32);
10214
10215        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10216        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10217
10218        assign(res_q, binop(Iop_QSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
10219        if (isT)
10220           putIRegT( regD, mkexpr(res_q), condT );
10221        else
10222           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10223
10224        DIP("qsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10225        return True;
10226     }
10227     /* fall through */
10228   }
10229
10230   /* ------------------- qsax<c> <Rd>,<Rn>,<Rm> ------------------- */
10231   /* note: the hardware seems to construct the result differently
10232      from wot the manual says. */
10233   {
10234     UInt regD = 99, regN = 99, regM = 99;
10235     Bool gate = False;
10236
10237     if (isT) {
10238        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10239           regN = INSNT0(3,0);
10240           regD = INSNT1(11,8);
10241           regM = INSNT1(3,0);
10242           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10243              gate = True;
10244        }
10245     } else {
10246        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10247            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10248            INSNA(7,4)   == BITS4(0,1,0,1)) {
10249           regD = INSNA(15,12);
10250           regN = INSNA(19,16);
10251           regM = INSNA(3,0);
10252           if (regD != 15 && regN != 15 && regM != 15)
10253              gate = True;
10254        }
10255     }
10256
10257     if (gate) {
10258        IRTemp irt_regN     = newTemp(Ity_I32);
10259        IRTemp irt_regM     = newTemp(Ity_I32);
10260        IRTemp irt_sum      = newTemp(Ity_I32);
10261        IRTemp irt_diff     = newTemp(Ity_I32);
10262        IRTemp irt_sum_res  = newTemp(Ity_I32);
10263        IRTemp irt_diff_res = newTemp(Ity_I32);
10264
10265        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10266        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10267
10268        assign( irt_diff,
10269                binop( Iop_Sub32,
10270                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10271                       binop( Iop_Sar32,
10272                              binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10273                              mkU8(16) ) ) );
10274        armSignedSatQ( irt_diff, 0x10, &irt_diff_res, NULL);
10275
10276        assign( irt_sum,
10277                binop( Iop_Add32,
10278                       binop( Iop_Sar32,
10279                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10280                              mkU8(16) ),
10281                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) )) );
10282        armSignedSatQ( irt_sum, 0x10, &irt_sum_res, NULL );
10283
10284        IRExpr* ire_result = binop( Iop_Or32,
10285                                    binop( Iop_Shl32, mkexpr(irt_diff_res),
10286                                           mkU8(16) ),
10287                                    binop( Iop_And32, mkexpr(irt_sum_res),
10288                                           mkU32(0xFFFF)) );
10289
10290        if (isT)
10291           putIRegT( regD, ire_result, condT );
10292        else
10293           putIRegA( regD, ire_result, condT, Ijk_Boring );
10294
10295        DIP( "qsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10296        return True;
10297     }
10298     /* fall through */
10299   }
10300
10301   /* ------------------- qasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10302   {
10303     UInt regD = 99, regN = 99, regM = 99;
10304     Bool gate = False;
10305
10306     if (isT) {
10307        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10308           regN = INSNT0(3,0);
10309           regD = INSNT1(11,8);
10310           regM = INSNT1(3,0);
10311           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10312              gate = True;
10313        }
10314     } else {
10315        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10316            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10317            INSNA(7,4)   == BITS4(0,0,1,1)) {
10318           regD = INSNA(15,12);
10319           regN = INSNA(19,16);
10320           regM = INSNA(3,0);
10321           if (regD != 15 && regN != 15 && regM != 15)
10322              gate = True;
10323        }
10324     }
10325
10326     if (gate) {
10327        IRTemp irt_regN     = newTemp(Ity_I32);
10328        IRTemp irt_regM     = newTemp(Ity_I32);
10329        IRTemp irt_sum      = newTemp(Ity_I32);
10330        IRTemp irt_diff     = newTemp(Ity_I32);
10331        IRTemp irt_res_sum  = newTemp(Ity_I32);
10332        IRTemp irt_res_diff = newTemp(Ity_I32);
10333
10334        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10335        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10336
10337        assign( irt_diff,
10338                binop( Iop_Sub32,
10339                       binop( Iop_Sar32,
10340                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10341                              mkU8(16) ),
10342                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10343        armSignedSatQ( irt_diff, 0x10, &irt_res_diff, NULL );
10344
10345        assign( irt_sum,
10346                binop( Iop_Add32,
10347                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10348                       binop( Iop_Sar32,
10349                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10350                              mkU8(16) ) ) );
10351        armSignedSatQ( irt_sum, 0x10, &irt_res_sum, NULL );
10352
10353        IRExpr* ire_result
10354          = binop( Iop_Or32,
10355                   binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
10356                   binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
10357
10358        if (isT)
10359           putIRegT( regD, ire_result, condT );
10360        else
10361           putIRegA( regD, ire_result, condT, Ijk_Boring );
10362
10363        DIP( "qasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10364        return True;
10365     }
10366     /* fall through */
10367   }
10368
10369   /* ------------------- sasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10370   {
10371     UInt regD = 99, regN = 99, regM = 99;
10372     Bool gate = False;
10373
10374     if (isT) {
10375        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
10376           regN = INSNT0(3,0);
10377           regD = INSNT1(11,8);
10378           regM = INSNT1(3,0);
10379           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10380              gate = True;
10381        }
10382     } else {
10383        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
10384            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10385            INSNA(7,4)   == BITS4(0,0,1,1)) {
10386           regD = INSNA(15,12);
10387           regN = INSNA(19,16);
10388           regM = INSNA(3,0);
10389           if (regD != 15 && regN != 15 && regM != 15)
10390              gate = True;
10391        }
10392     }
10393
10394     if (gate) {
10395        IRTemp irt_regN = newTemp(Ity_I32);
10396        IRTemp irt_regM = newTemp(Ity_I32);
10397        IRTemp irt_sum  = newTemp(Ity_I32);
10398        IRTemp irt_diff = newTemp(Ity_I32);
10399
10400        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10401        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10402
10403        assign( irt_diff,
10404                binop( Iop_Sub32,
10405                       binop( Iop_Sar32,
10406                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10407                              mkU8(16) ),
10408                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10409
10410        assign( irt_sum,
10411                binop( Iop_Add32,
10412                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10413                       binop( Iop_Sar32,
10414                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10415                              mkU8(16) ) ) );
10416
10417        IRExpr* ire_result
10418          = binop( Iop_Or32,
10419                   binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
10420                   binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
10421
10422        IRTemp ge10 = newTemp(Ity_I32);
10423        assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
10424        put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
10425        put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
10426
10427        IRTemp ge32 = newTemp(Ity_I32);
10428        assign(ge32, unop(Iop_Not32, mkexpr(irt_sum)));
10429        put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
10430        put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
10431
10432        if (isT)
10433           putIRegT( regD, ire_result, condT );
10434        else
10435           putIRegA( regD, ire_result, condT, Ijk_Boring );
10436
10437        DIP( "sasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10438        return True;
10439     }
10440     /* fall through */
10441   }
10442
10443   /* --------------- smuad, smuadx<c><Rd>,<Rn>,<Rm> --------------- */
10444   /* --------------- smsad, smsadx<c><Rd>,<Rn>,<Rm> --------------- */
10445   {
10446     UInt regD = 99, regN = 99, regM = 99, bitM = 99;
10447     Bool gate = False, isAD = False;
10448
10449     if (isT) {
10450        if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10451            && (INSNT1(15,0) & 0xF0E0) == 0xF000) {
10452           regN = INSNT0(3,0);
10453           regD = INSNT1(11,8);
10454           regM = INSNT1(3,0);
10455           bitM = INSNT1(4,4);
10456           isAD = INSNT0(15,4) == 0xFB2;
10457           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10458              gate = True;
10459        }
10460     } else {
10461        if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10462            INSNA(15,12) == BITS4(1,1,1,1)         &&
10463            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) {
10464           regD = INSNA(19,16);
10465           regN = INSNA(3,0);
10466           regM = INSNA(11,8);
10467           bitM = INSNA(5,5);
10468           isAD = INSNA(6,6) == 0;
10469           if (regD != 15 && regN != 15 && regM != 15)
10470              gate = True;
10471        }
10472     }
10473
10474     if (gate) {
10475        IRTemp irt_regN    = newTemp(Ity_I32);
10476        IRTemp irt_regM    = newTemp(Ity_I32);
10477        IRTemp irt_prod_lo = newTemp(Ity_I32);
10478        IRTemp irt_prod_hi = newTemp(Ity_I32);
10479        IRTemp tmpM        = newTemp(Ity_I32);
10480
10481        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10482
10483        assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10484        assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10485
10486        assign( irt_prod_lo,
10487                binop( Iop_Mul32,
10488                       binop( Iop_Sar32,
10489                              binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
10490                              mkU8(16) ),
10491                       binop( Iop_Sar32,
10492                              binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10493                              mkU8(16) ) ) );
10494        assign( irt_prod_hi, binop(Iop_Mul32,
10495                                   binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)),
10496                                   binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) );
10497        IRExpr* ire_result
10498           = binop( isAD ? Iop_Add32 : Iop_Sub32,
10499                    mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) );
10500
10501        if (isT)
10502           putIRegT( regD, ire_result, condT );
10503        else
10504           putIRegA( regD, ire_result, condT, Ijk_Boring );
10505
10506        if (isAD) {
10507           or_into_QFLAG32(
10508              signed_overflow_after_Add32( ire_result,
10509                                           irt_prod_lo, irt_prod_hi ),
10510              condT
10511           );
10512        }
10513
10514        DIP("smu%cd%s%s r%u, r%u, r%u\n",
10515            isAD ? 'a' : 's',
10516            bitM ? "x" : "", nCC(conq), regD, regN, regM);
10517        return True;
10518     }
10519     /* fall through */
10520   }
10521
10522   /* --------------- smlad{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10523   /* --------------- smlsd{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10524   {
10525     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10526     Bool gate = False, isAD = False;
10527
10528     if (isT) {
10529       if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10530           && INSNT1(7,5) == BITS3(0,0,0)) {
10531           regN = INSNT0(3,0);
10532           regD = INSNT1(11,8);
10533           regM = INSNT1(3,0);
10534           regA = INSNT1(15,12);
10535           bitM = INSNT1(4,4);
10536           isAD = INSNT0(15,4) == 0xFB2;
10537           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10538               && !isBadRegT(regA))
10539              gate = True;
10540        }
10541     } else {
10542        if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10543            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
10544           regD = INSNA(19,16);
10545           regA = INSNA(15,12);
10546           regN = INSNA(3,0);
10547           regM = INSNA(11,8);
10548           bitM = INSNA(5,5);
10549           isAD = INSNA(6,6) == 0;
10550           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10551              gate = True;
10552        }
10553     }
10554
10555     if (gate) {
10556        IRTemp irt_regN    = newTemp(Ity_I32);
10557        IRTemp irt_regM    = newTemp(Ity_I32);
10558        IRTemp irt_regA    = newTemp(Ity_I32);
10559        IRTemp irt_prod_lo = newTemp(Ity_I32);
10560        IRTemp irt_prod_hi = newTemp(Ity_I32);
10561        IRTemp irt_sum     = newTemp(Ity_I32);
10562        IRTemp tmpM        = newTemp(Ity_I32);
10563
10564        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10565        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10566
10567        assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10568        assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10569
10570        assign( irt_prod_lo,
10571                binop(Iop_Mul32,
10572                      binop(Iop_Sar32,
10573                            binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10574                            mkU8(16)),
10575                      binop(Iop_Sar32,
10576                            binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10577                            mkU8(16))) );
10578        assign( irt_prod_hi,
10579                binop( Iop_Mul32,
10580                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10581                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10582        assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32,
10583                                mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) );
10584
10585        IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA));
10586
10587        if (isT)
10588           putIRegT( regD, ire_result, condT );
10589        else
10590           putIRegA( regD, ire_result, condT, Ijk_Boring );
10591
10592        if (isAD) {
10593           or_into_QFLAG32(
10594              signed_overflow_after_Add32( mkexpr(irt_sum),
10595                                           irt_prod_lo, irt_prod_hi ),
10596              condT
10597           );
10598        }
10599
10600        or_into_QFLAG32(
10601           signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ),
10602           condT
10603        );
10604
10605        DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n",
10606            isAD ? 'a' : 's',
10607            bitM ? "x" : "", nCC(conq), regD, regN, regM, regA);
10608        return True;
10609     }
10610     /* fall through */
10611   }
10612
10613   /* ----- smlabb, smlabt, smlatb, smlatt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10614   {
10615     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99, bitN = 99;
10616     Bool gate = False;
10617
10618     if (isT) {
10619        if (INSNT0(15,4) == 0xFB1 && INSNT1(7,6) == BITS2(0,0)) {
10620           regN = INSNT0(3,0);
10621           regD = INSNT1(11,8);
10622           regM = INSNT1(3,0);
10623           regA = INSNT1(15,12);
10624           bitM = INSNT1(4,4);
10625           bitN = INSNT1(5,5);
10626           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10627               && !isBadRegT(regA))
10628              gate = True;
10629        }
10630     } else {
10631        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
10632            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10633           regD = INSNA(19,16);
10634           regN = INSNA(3,0);
10635           regM = INSNA(11,8);
10636           regA = INSNA(15,12);
10637           bitM = INSNA(6,6);
10638           bitN = INSNA(5,5);
10639           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10640              gate = True;
10641        }
10642     }
10643
10644     if (gate) {
10645        IRTemp irt_regA = newTemp(Ity_I32);
10646        IRTemp irt_prod = newTemp(Ity_I32);
10647
10648        assign( irt_prod,
10649                binop(Iop_Mul32,
10650                      binop(Iop_Sar32,
10651                            binop(Iop_Shl32,
10652                                  isT ? getIRegT(regN) : getIRegA(regN),
10653                                  mkU8(bitN ? 0 : 16)),
10654                            mkU8(16)),
10655                      binop(Iop_Sar32,
10656                            binop(Iop_Shl32,
10657                                  isT ? getIRegT(regM) : getIRegA(regM),
10658                                  mkU8(bitM ? 0 : 16)),
10659                            mkU8(16))) );
10660
10661        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10662
10663        IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_prod), mkexpr(irt_regA));
10664
10665        if (isT)
10666           putIRegT( regD, ire_result, condT );
10667        else
10668           putIRegA( regD, ire_result, condT, Ijk_Boring );
10669
10670        or_into_QFLAG32(
10671           signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ),
10672           condT
10673        );
10674
10675        DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n",
10676             bitN ? 't' : 'b', bitM ? 't' : 'b',
10677             nCC(conq), regD, regN, regM, regA );
10678        return True;
10679     }
10680     /* fall through */
10681   }
10682
10683   /* ----- smlalbb, smlalbt, smlaltb, smlaltt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10684   {
10685     UInt regDHi = 99, regN = 99, regM = 99, regDLo = 99, bitM = 99, bitN = 99;
10686     Bool gate = False;
10687
10688     if (isT) {
10689        if (INSNT0(15,4) == 0xFBC && INSNT1(7,6) == BITS2(1,0)) {
10690           regN   = INSNT0(3,0);
10691           regDHi = INSNT1(11,8);
10692           regM   = INSNT1(3,0);
10693           regDLo = INSNT1(15,12);
10694           bitM   = INSNT1(4,4);
10695           bitN   = INSNT1(5,5);
10696           if (!isBadRegT(regDHi) && !isBadRegT(regN) && !isBadRegT(regM)
10697               && !isBadRegT(regDLo) && regDHi != regDLo)
10698              gate = True;
10699        }
10700     } else {
10701        if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
10702            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10703           regDHi = INSNA(19,16);
10704           regN   = INSNA(3,0);
10705           regM   = INSNA(11,8);
10706           regDLo = INSNA(15,12);
10707           bitM   = INSNA(6,6);
10708           bitN   = INSNA(5,5);
10709           if (regDHi != 15 && regN != 15 && regM != 15 && regDLo != 15 &&
10710               regDHi != regDLo)
10711              gate = True;
10712        }
10713     }
10714
10715     if (gate) {
10716        IRTemp irt_regD  = newTemp(Ity_I64);
10717        IRTemp irt_prod  = newTemp(Ity_I64);
10718        IRTemp irt_res   = newTemp(Ity_I64);
10719        IRTemp irt_resHi = newTemp(Ity_I32);
10720        IRTemp irt_resLo = newTemp(Ity_I32);
10721
10722        assign( irt_prod,
10723                binop(Iop_MullS32,
10724                      binop(Iop_Sar32,
10725                            binop(Iop_Shl32,
10726                                  isT ? getIRegT(regN) : getIRegA(regN),
10727                                  mkU8(bitN ? 0 : 16)),
10728                            mkU8(16)),
10729                      binop(Iop_Sar32,
10730                            binop(Iop_Shl32,
10731                                  isT ? getIRegT(regM) : getIRegA(regM),
10732                                  mkU8(bitM ? 0 : 16)),
10733                            mkU8(16))) );
10734
10735        assign( irt_regD, binop(Iop_32HLto64,
10736                                isT ? getIRegT(regDHi) : getIRegA(regDHi),
10737                                isT ? getIRegT(regDLo) : getIRegA(regDLo)) );
10738        assign( irt_res, binop(Iop_Add64, mkexpr(irt_regD), mkexpr(irt_prod)) );
10739        assign( irt_resHi, unop(Iop_64HIto32, mkexpr(irt_res)) );
10740        assign( irt_resLo, unop(Iop_64to32, mkexpr(irt_res)) );
10741
10742        if (isT) {
10743           putIRegT( regDHi, mkexpr(irt_resHi), condT );
10744           putIRegT( regDLo, mkexpr(irt_resLo), condT );
10745        } else {
10746           putIRegA( regDHi, mkexpr(irt_resHi), condT, Ijk_Boring );
10747           putIRegA( regDLo, mkexpr(irt_resLo), condT, Ijk_Boring );
10748        }
10749
10750        DIP( "smlal%c%c%s r%u, r%u, r%u, r%u\n",
10751             bitN ? 't' : 'b', bitM ? 't' : 'b',
10752             nCC(conq), regDHi, regN, regM, regDLo );
10753        return True;
10754     }
10755     /* fall through */
10756   }
10757
10758   /* ----- smlawb, smlawt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10759   {
10760     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10761     Bool gate = False;
10762
10763     if (isT) {
10764        if (INSNT0(15,4) == 0xFB3 && INSNT1(7,5) == BITS3(0,0,0)) {
10765           regN = INSNT0(3,0);
10766           regD = INSNT1(11,8);
10767           regM = INSNT1(3,0);
10768           regA = INSNT1(15,12);
10769           bitM = INSNT1(4,4);
10770           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10771               && !isBadRegT(regA))
10772              gate = True;
10773        }
10774     } else {
10775        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
10776            (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,0,0)) {
10777           regD = INSNA(19,16);
10778           regN = INSNA(3,0);
10779           regM = INSNA(11,8);
10780           regA = INSNA(15,12);
10781           bitM = INSNA(6,6);
10782           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10783              gate = True;
10784        }
10785     }
10786
10787     if (gate) {
10788        IRTemp irt_regA = newTemp(Ity_I32);
10789        IRTemp irt_prod = newTemp(Ity_I64);
10790
10791        assign( irt_prod,
10792                binop(Iop_MullS32,
10793                      isT ? getIRegT(regN) : getIRegA(regN),
10794                      binop(Iop_Sar32,
10795                            binop(Iop_Shl32,
10796                                  isT ? getIRegT(regM) : getIRegA(regM),
10797                                  mkU8(bitM ? 0 : 16)),
10798                            mkU8(16))) );
10799
10800        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10801
10802        IRTemp prod32 = newTemp(Ity_I32);
10803        assign(prod32,
10804               binop(Iop_Or32,
10805                     binop(Iop_Shl32, unop(Iop_64HIto32, mkexpr(irt_prod)), mkU8(16)),
10806                     binop(Iop_Shr32, unop(Iop_64to32, mkexpr(irt_prod)), mkU8(16))
10807        ));
10808
10809        IRExpr* ire_result = binop(Iop_Add32, mkexpr(prod32), mkexpr(irt_regA));
10810
10811        if (isT)
10812           putIRegT( regD, ire_result, condT );
10813        else
10814           putIRegA( regD, ire_result, condT, Ijk_Boring );
10815
10816        or_into_QFLAG32(
10817           signed_overflow_after_Add32( ire_result, prod32, irt_regA ),
10818           condT
10819        );
10820
10821        DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n",
10822             bitM ? 't' : 'b',
10823             nCC(conq), regD, regN, regM, regA );
10824        return True;
10825     }
10826     /* fall through */
10827   }
10828
10829   /* ------------------- sel<c> <Rd>,<Rn>,<Rm> -------------------- */
10830   /* fixme: fix up the test in v6media.c so that we can pass the ge
10831      flags as part of the test. */
10832   {
10833     UInt regD = 99, regN = 99, regM = 99;
10834     Bool gate = False;
10835
10836     if (isT) {
10837        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
10838           regN = INSNT0(3,0);
10839           regD = INSNT1(11,8);
10840           regM = INSNT1(3,0);
10841           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10842              gate = True;
10843        }
10844     } else {
10845        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
10846            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10847            INSNA(7,4)   == BITS4(1,0,1,1)) {
10848           regD = INSNA(15,12);
10849           regN = INSNA(19,16);
10850           regM = INSNA(3,0);
10851           if (regD != 15 && regN != 15 && regM != 15)
10852              gate = True;
10853        }
10854     }
10855
10856     if (gate) {
10857        IRTemp irt_ge_flag0 = newTemp(Ity_I32);
10858        IRTemp irt_ge_flag1 = newTemp(Ity_I32);
10859        IRTemp irt_ge_flag2 = newTemp(Ity_I32);
10860        IRTemp irt_ge_flag3 = newTemp(Ity_I32);
10861
10862        assign( irt_ge_flag0, get_GEFLAG32(0) );
10863        assign( irt_ge_flag1, get_GEFLAG32(1) );
10864        assign( irt_ge_flag2, get_GEFLAG32(2) );
10865        assign( irt_ge_flag3, get_GEFLAG32(3) );
10866
10867        IRExpr* ire_ge_flag0_or
10868          = binop(Iop_Or32, mkexpr(irt_ge_flag0),
10869                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag0)));
10870        IRExpr* ire_ge_flag1_or
10871          = binop(Iop_Or32, mkexpr(irt_ge_flag1),
10872                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag1)));
10873        IRExpr* ire_ge_flag2_or
10874          = binop(Iop_Or32, mkexpr(irt_ge_flag2),
10875                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag2)));
10876        IRExpr* ire_ge_flag3_or
10877          = binop(Iop_Or32, mkexpr(irt_ge_flag3),
10878                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag3)));
10879
10880        IRExpr* ire_ge_flags
10881          = binop( Iop_Or32,
10882                   binop(Iop_Or32,
10883                         binop(Iop_And32,
10884                               binop(Iop_Sar32, ire_ge_flag0_or, mkU8(31)),
10885                               mkU32(0x000000ff)),
10886                         binop(Iop_And32,
10887                               binop(Iop_Sar32, ire_ge_flag1_or, mkU8(31)),
10888                               mkU32(0x0000ff00))),
10889                   binop(Iop_Or32,
10890                         binop(Iop_And32,
10891                               binop(Iop_Sar32, ire_ge_flag2_or, mkU8(31)),
10892                               mkU32(0x00ff0000)),
10893                         binop(Iop_And32,
10894                               binop(Iop_Sar32, ire_ge_flag3_or, mkU8(31)),
10895                               mkU32(0xff000000))) );
10896
10897        IRExpr* ire_result
10898          = binop(Iop_Or32,
10899                  binop(Iop_And32,
10900                        isT ? getIRegT(regN) : getIRegA(regN),
10901                        ire_ge_flags ),
10902                  binop(Iop_And32,
10903                        isT ? getIRegT(regM) : getIRegA(regM),
10904                        unop(Iop_Not32, ire_ge_flags)));
10905
10906        if (isT)
10907           putIRegT( regD, ire_result, condT );
10908        else
10909           putIRegA( regD, ire_result, condT, Ijk_Boring );
10910
10911        DIP("sel%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10912        return True;
10913     }
10914     /* fall through */
10915   }
10916
10917   /* ----------------- uxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
10918   {
10919     UInt regD = 99, regN = 99, regM = 99, rotate = 99;
10920     Bool gate = False;
10921
10922     if (isT) {
10923        if (INSNT0(15,4) == 0xFA3 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
10924           regN   = INSNT0(3,0);
10925           regD   = INSNT1(11,8);
10926           regM   = INSNT1(3,0);
10927           rotate = INSNT1(5,4);
10928           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10929              gate = True;
10930        }
10931     } else {
10932        if (INSNA(27,20) == BITS8(0,1,1,0,1,1,0,0) &&
10933            INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
10934           regD   = INSNA(15,12);
10935           regN   = INSNA(19,16);
10936           regM   = INSNA(3,0);
10937           rotate = INSNA(11,10);
10938           if (regD != 15 && regN != 15 && regM != 15)
10939             gate = True;
10940        }
10941     }
10942
10943     if (gate) {
10944        IRTemp irt_regN = newTemp(Ity_I32);
10945        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10946
10947        IRTemp irt_regM = newTemp(Ity_I32);
10948        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10949
10950        IRTemp irt_rot = newTemp(Ity_I32);
10951        assign( irt_rot, binop(Iop_And32,
10952                               genROR32(irt_regM, 8 * rotate),
10953                               mkU32(0x00FF00FF)) );
10954
10955        IRExpr* resLo
10956           = binop(Iop_And32,
10957                   binop(Iop_Add32, mkexpr(irt_regN), mkexpr(irt_rot)),
10958                   mkU32(0x0000FFFF));
10959
10960        IRExpr* resHi
10961           = binop(Iop_Add32,
10962                   binop(Iop_And32, mkexpr(irt_regN), mkU32(0xFFFF0000)),
10963                   binop(Iop_And32, mkexpr(irt_rot),  mkU32(0xFFFF0000)));
10964
10965        IRExpr* ire_result
10966           = binop( Iop_Or32, resHi, resLo );
10967
10968        if (isT)
10969           putIRegT( regD, ire_result, condT );
10970        else
10971           putIRegA( regD, ire_result, condT, Ijk_Boring );
10972
10973        DIP( "uxtab16%s r%u, r%u, r%u, ROR #%u\n",
10974             nCC(conq), regD, regN, regM, 8 * rotate );
10975        return True;
10976     }
10977     /* fall through */
10978   }
10979
10980   /* --------------- usad8  Rd,Rn,Rm    ---------------- */
10981   /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */
10982   {
10983     UInt rD = 99, rN = 99, rM = 99, rA = 99;
10984     Bool gate = False;
10985
10986     if (isT) {
10987       if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) {
10988           rN = INSNT0(3,0);
10989           rA = INSNT1(15,12);
10990           rD = INSNT1(11,8);
10991           rM = INSNT1(3,0);
10992           if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13)
10993              gate = True;
10994        }
10995     } else {
10996        if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) &&
10997            INSNA(7,4)   == BITS4(0,0,0,1) ) {
10998           rD = INSNA(19,16);
10999           rA = INSNA(15,12);
11000           rM = INSNA(11,8);
11001           rN = INSNA(3,0);
11002           if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */)
11003              gate = True;
11004        }
11005     }
11006     /* We allow rA == 15, to denote the usad8 (no accumulator) case. */
11007
11008     if (gate) {
11009        IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN);
11010        IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM);
11011        IRExpr* rAe = rA == 15 ? mkU32(0)
11012                               : (isT ? getIRegT(rA) : getIRegA(rA));
11013        IRExpr* res = binop(Iop_Add32,
11014                            binop(Iop_Sad8Ux4, rNe, rMe),
11015                            rAe);
11016        if (isT)
11017           putIRegT( rD, res, condT );
11018        else
11019           putIRegA( rD, res, condT, Ijk_Boring );
11020
11021        if (rA == 15) {
11022           DIP( "usad8%s r%u, r%u, r%u\n",
11023                nCC(conq), rD, rN, rM );
11024        } else {
11025           DIP( "usada8%s r%u, r%u, r%u, r%u\n",
11026                nCC(conq), rD, rN, rM, rA );
11027        }
11028        return True;
11029     }
11030     /* fall through */
11031   }
11032
11033   /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
11034   {
11035     UInt regD = 99, regN = 99, regM = 99;
11036     Bool gate = False;
11037
11038     if (isT) {
11039        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
11040           regN = INSNT0(3,0);
11041           regD = INSNT1(11,8);
11042           regM = INSNT1(3,0);
11043           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11044              gate = True;
11045        }
11046     } else {
11047        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
11048            INSNA(11,8)  == BITS4(0,0,0,0)         &&
11049            INSNA(7,4)   == BITS4(0,1,0,1)) {
11050           regD = INSNA(15,12);
11051           regN = INSNA(19,16);
11052           regM = INSNA(3,0);
11053           if (regD != 15 && regN != 15 && regM != 15)
11054              gate = True;
11055        }
11056     }
11057
11058     if (gate) {
11059        IRTemp rNt   = newTemp(Ity_I32);
11060        IRTemp rMt   = newTemp(Ity_I32);
11061        IRTemp res_q = newTemp(Ity_I32);
11062
11063        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11064        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11065
11066        assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
11067        if (isT)
11068           putIRegT( regD, mkexpr(res_q), condT );
11069        else
11070           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11071
11072        or_into_QFLAG32(
11073           signed_overflow_after_Add32(
11074              binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11075           condT
11076        );
11077
11078        DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11079        return True;
11080     }
11081     /* fall through */
11082   }
11083
11084   /* ------------------ qdadd<c> <Rd>,<Rm>,<Rn> ------------------- */
11085   {
11086     UInt regD = 99, regN = 99, regM = 99;
11087     Bool gate = False;
11088
11089     if (isT) {
11090        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF090) {
11091           regN = INSNT0(3,0);
11092           regD = INSNT1(11,8);
11093           regM = INSNT1(3,0);
11094           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11095              gate = True;
11096        }
11097     } else {
11098        if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
11099            INSNA(11,8)  == BITS4(0,0,0,0)         &&
11100            INSNA(7,4)   == BITS4(0,1,0,1)) {
11101           regD = INSNA(15,12);
11102           regN = INSNA(19,16);
11103           regM = INSNA(3,0);
11104           if (regD != 15 && regN != 15 && regM != 15)
11105              gate = True;
11106        }
11107     }
11108
11109     if (gate) {
11110        IRTemp rNt   = newTemp(Ity_I32);
11111        IRTemp rMt   = newTemp(Ity_I32);
11112        IRTemp rN_d  = newTemp(Ity_I32);
11113        IRTemp res_q = newTemp(Ity_I32);
11114
11115        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11116        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11117
11118        or_into_QFLAG32(
11119           signed_overflow_after_Add32(
11120              binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11121           condT
11122        );
11123
11124        assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11125        assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rN_d)));
11126        if (isT)
11127           putIRegT( regD, mkexpr(res_q), condT );
11128        else
11129           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11130
11131        or_into_QFLAG32(
11132           signed_overflow_after_Add32(
11133              binop(Iop_Add32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11134           condT
11135        );
11136
11137        DIP("qdadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11138        return True;
11139     }
11140     /* fall through */
11141   }
11142
11143   /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
11144   {
11145     UInt regD = 99, regN = 99, regM = 99;
11146     Bool gate = False;
11147
11148     if (isT) {
11149        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
11150           regN = INSNT0(3,0);
11151           regD = INSNT1(11,8);
11152           regM = INSNT1(3,0);
11153           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11154              gate = True;
11155        }
11156     } else {
11157        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
11158            INSNA(11,8)  == BITS4(0,0,0,0)         &&
11159            INSNA(7,4)   == BITS4(0,1,0,1)) {
11160           regD = INSNA(15,12);
11161           regN = INSNA(19,16);
11162           regM = INSNA(3,0);
11163           if (regD != 15 && regN != 15 && regM != 15)
11164              gate = True;
11165        }
11166     }
11167
11168     if (gate) {
11169        IRTemp rNt   = newTemp(Ity_I32);
11170        IRTemp rMt   = newTemp(Ity_I32);
11171        IRTemp res_q = newTemp(Ity_I32);
11172
11173        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11174        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11175
11176        assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
11177        if (isT)
11178           putIRegT( regD, mkexpr(res_q), condT );
11179        else
11180           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11181
11182        or_into_QFLAG32(
11183           signed_overflow_after_Sub32(
11184              binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11185           condT
11186        );
11187
11188        DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11189        return True;
11190     }
11191     /* fall through */
11192   }
11193
11194   /* ------------------ qdsub<c> <Rd>,<Rm>,<Rn> ------------------- */
11195   {
11196     UInt regD = 99, regN = 99, regM = 99;
11197     Bool gate = False;
11198
11199     if (isT) {
11200        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0B0) {
11201           regN = INSNT0(3,0);
11202           regD = INSNT1(11,8);
11203           regM = INSNT1(3,0);
11204           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11205              gate = True;
11206        }
11207     } else {
11208        if (INSNA(27,20) == BITS8(0,0,0,1,0,1,1,0) &&
11209            INSNA(11,8)  == BITS4(0,0,0,0)         &&
11210            INSNA(7,4)   == BITS4(0,1,0,1)) {
11211           regD = INSNA(15,12);
11212           regN = INSNA(19,16);
11213           regM = INSNA(3,0);
11214           if (regD != 15 && regN != 15 && regM != 15)
11215              gate = True;
11216        }
11217     }
11218
11219     if (gate) {
11220        IRTemp rNt   = newTemp(Ity_I32);
11221        IRTemp rMt   = newTemp(Ity_I32);
11222        IRTemp rN_d  = newTemp(Ity_I32);
11223        IRTemp res_q = newTemp(Ity_I32);
11224
11225        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11226        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11227
11228        or_into_QFLAG32(
11229           signed_overflow_after_Add32(
11230              binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11231           condT
11232        );
11233
11234        assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11235        assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rN_d)));
11236        if (isT)
11237           putIRegT( regD, mkexpr(res_q), condT );
11238        else
11239           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11240
11241        or_into_QFLAG32(
11242           signed_overflow_after_Sub32(
11243              binop(Iop_Sub32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11244           condT
11245        );
11246
11247        DIP("qdsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11248        return True;
11249     }
11250     /* fall through */
11251   }
11252
11253   /* ------------------ uqsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
11254   {
11255     UInt regD = 99, regN = 99, regM = 99;
11256     Bool gate = False;
11257
11258     if (isT) {
11259        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11260           regN = INSNT0(3,0);
11261           regD = INSNT1(11,8);
11262           regM = INSNT1(3,0);
11263           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11264              gate = True;
11265        }
11266     } else {
11267        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11268            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11269            INSNA(7,4)   == BITS4(0,1,1,1)) {
11270           regD = INSNA(15,12);
11271           regN = INSNA(19,16);
11272           regM = INSNA(3,0);
11273           if (regD != 15 && regN != 15 && regM != 15)
11274             gate = True;
11275        }
11276     }
11277
11278     if (gate) {
11279        IRTemp rNt   = newTemp(Ity_I32);
11280        IRTemp rMt   = newTemp(Ity_I32);
11281        IRTemp res_q = newTemp(Ity_I32);
11282
11283        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11284        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11285
11286        assign(res_q, binop(Iop_QSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11287        if (isT)
11288           putIRegT( regD, mkexpr(res_q), condT );
11289        else
11290           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11291
11292        DIP("uqsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11293        return True;
11294     }
11295     /* fall through */
11296   }
11297
11298   /* ----------------- shadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
11299   {
11300     UInt regD = 99, regN = 99, regM = 99;
11301     Bool gate = False;
11302
11303     if (isT) {
11304        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11305           regN = INSNT0(3,0);
11306           regD = INSNT1(11,8);
11307           regM = INSNT1(3,0);
11308           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11309              gate = True;
11310        }
11311     } else {
11312        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11313            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11314            INSNA(7,4)   == BITS4(0,0,0,1)) {
11315           regD = INSNA(15,12);
11316           regN = INSNA(19,16);
11317           regM = INSNA(3,0);
11318           if (regD != 15 && regN != 15 && regM != 15)
11319              gate = True;
11320        }
11321     }
11322
11323     if (gate) {
11324        IRTemp rNt   = newTemp(Ity_I32);
11325        IRTemp rMt   = newTemp(Ity_I32);
11326        IRTemp res_q = newTemp(Ity_I32);
11327
11328        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11329        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11330
11331        assign(res_q, binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
11332        if (isT)
11333           putIRegT( regD, mkexpr(res_q), condT );
11334        else
11335           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11336
11337        DIP("shadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11338        return True;
11339     }
11340     /* fall through */
11341   }
11342
11343   /* ----------------- uhsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11344   {
11345     UInt regD = 99, regN = 99, regM = 99;
11346     Bool gate = False;
11347
11348     if (isT) {
11349        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11350           regN = INSNT0(3,0);
11351           regD = INSNT1(11,8);
11352           regM = INSNT1(3,0);
11353           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11354              gate = True;
11355        }
11356     } else {
11357        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11358            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11359            INSNA(7,4)   == BITS4(1,1,1,1)) {
11360           regD = INSNA(15,12);
11361           regN = INSNA(19,16);
11362           regM = INSNA(3,0);
11363           if (regD != 15 && regN != 15 && regM != 15)
11364              gate = True;
11365        }
11366     }
11367
11368     if (gate) {
11369        IRTemp rNt   = newTemp(Ity_I32);
11370        IRTemp rMt   = newTemp(Ity_I32);
11371        IRTemp res_q = newTemp(Ity_I32);
11372
11373        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11374        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11375
11376        assign(res_q, binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
11377        if (isT)
11378           putIRegT( regD, mkexpr(res_q), condT );
11379        else
11380           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11381
11382        DIP("uhsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11383        return True;
11384     }
11385     /* fall through */
11386   }
11387
11388   /* ----------------- uhsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
11389   {
11390     UInt regD = 99, regN = 99, regM = 99;
11391     Bool gate = False;
11392
11393     if (isT) {
11394        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11395           regN = INSNT0(3,0);
11396           regD = INSNT1(11,8);
11397           regM = INSNT1(3,0);
11398           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11399              gate = True;
11400        }
11401     } else {
11402        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11403            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11404            INSNA(7,4)   == BITS4(0,1,1,1)) {
11405           regD = INSNA(15,12);
11406           regN = INSNA(19,16);
11407           regM = INSNA(3,0);
11408           if (regD != 15 && regN != 15 && regM != 15)
11409              gate = True;
11410        }
11411     }
11412
11413     if (gate) {
11414        IRTemp rNt   = newTemp(Ity_I32);
11415        IRTemp rMt   = newTemp(Ity_I32);
11416        IRTemp res_q = newTemp(Ity_I32);
11417
11418        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11419        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11420
11421        assign(res_q, binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11422        if (isT)
11423           putIRegT( regD, mkexpr(res_q), condT );
11424        else
11425           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11426
11427        DIP("uhsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11428        return True;
11429     }
11430     /* fall through */
11431   }
11432
11433   /* ------------------ uqadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
11434   {
11435     UInt regD = 99, regN = 99, regM = 99;
11436     Bool gate = False;
11437
11438     if (isT) {
11439        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11440           regN = INSNT0(3,0);
11441           regD = INSNT1(11,8);
11442           regM = INSNT1(3,0);
11443           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11444              gate = True;
11445        }
11446     } else {
11447        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11448            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11449            INSNA(7,4)   == BITS4(0,0,0,1)) {
11450           regD = INSNA(15,12);
11451           regN = INSNA(19,16);
11452           regM = INSNA(3,0);
11453           if (regD != 15 && regN != 15 && regM != 15)
11454              gate = True;
11455        }
11456     }
11457
11458     if (gate) {
11459        IRTemp rNt   = newTemp(Ity_I32);
11460        IRTemp rMt   = newTemp(Ity_I32);
11461        IRTemp res_q = newTemp(Ity_I32);
11462
11463        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11464        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11465
11466        assign(res_q, binop(Iop_QAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
11467        if (isT)
11468           putIRegT( regD, mkexpr(res_q), condT );
11469        else
11470           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11471
11472        DIP("uqadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11473        return True;
11474     }
11475     /* fall through */
11476   }
11477
11478   /* ------------------- uqsax<c> <Rd>,<Rn>,<Rm> ------------------- */
11479   {
11480     UInt regD = 99, regN = 99, regM = 99;
11481     Bool gate = False;
11482
11483     if (isT) {
11484        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11485           regN = INSNT0(3,0);
11486           regD = INSNT1(11,8);
11487           regM = INSNT1(3,0);
11488           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11489              gate = True;
11490        }
11491     } else {
11492        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11493            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11494            INSNA(7,4)   == BITS4(0,1,0,1)) {
11495           regD = INSNA(15,12);
11496           regN = INSNA(19,16);
11497           regM = INSNA(3,0);
11498           if (regD != 15 && regN != 15 && regM != 15)
11499              gate = True;
11500        }
11501     }
11502
11503     if (gate) {
11504        IRTemp irt_regN     = newTemp(Ity_I32);
11505        IRTemp irt_regM     = newTemp(Ity_I32);
11506        IRTemp irt_sum      = newTemp(Ity_I32);
11507        IRTemp irt_diff     = newTemp(Ity_I32);
11508        IRTemp irt_sum_res  = newTemp(Ity_I32);
11509        IRTemp irt_diff_res = newTemp(Ity_I32);
11510
11511        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11512        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11513
11514        assign( irt_diff,
11515                binop( Iop_Sub32,
11516                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11517                       binop( Iop_Shr32,
11518                              binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
11519                              mkU8(16) ) ) );
11520        armUnsignedSatQ( &irt_diff_res, NULL, irt_diff, 0x10);
11521
11522        assign( irt_sum,
11523                binop( Iop_Add32,
11524                       binop( Iop_Shr32,
11525                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11526                              mkU8(16) ),
11527                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) )) );
11528        armUnsignedSatQ( &irt_sum_res, NULL, irt_sum, 0x10 );
11529
11530        IRExpr* ire_result = binop( Iop_Or32,
11531                                    binop( Iop_Shl32, mkexpr(irt_diff_res),
11532                                           mkU8(16) ),
11533                                    binop( Iop_And32, mkexpr(irt_sum_res),
11534                                           mkU32(0xFFFF)) );
11535
11536        if (isT)
11537           putIRegT( regD, ire_result, condT );
11538        else
11539           putIRegA( regD, ire_result, condT, Ijk_Boring );
11540
11541        DIP( "uqsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11542        return True;
11543     }
11544     /* fall through */
11545   }
11546
11547   /* ------------------- uqasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11548   {
11549     UInt regD = 99, regN = 99, regM = 99;
11550     Bool gate = False;
11551
11552     if (isT) {
11553        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11554           regN = INSNT0(3,0);
11555           regD = INSNT1(11,8);
11556           regM = INSNT1(3,0);
11557           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11558              gate = True;
11559        }
11560     } else {
11561        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11562            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11563            INSNA(7,4)   == BITS4(0,0,1,1)) {
11564           regD = INSNA(15,12);
11565           regN = INSNA(19,16);
11566           regM = INSNA(3,0);
11567           if (regD != 15 && regN != 15 && regM != 15)
11568              gate = True;
11569        }
11570     }
11571
11572     if (gate) {
11573        IRTemp irt_regN     = newTemp(Ity_I32);
11574        IRTemp irt_regM     = newTemp(Ity_I32);
11575        IRTemp irt_sum      = newTemp(Ity_I32);
11576        IRTemp irt_diff     = newTemp(Ity_I32);
11577        IRTemp irt_res_sum  = newTemp(Ity_I32);
11578        IRTemp irt_res_diff = newTemp(Ity_I32);
11579
11580        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11581        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11582
11583        assign( irt_diff,
11584                binop( Iop_Sub32,
11585                       binop( Iop_Shr32,
11586                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11587                              mkU8(16) ),
11588                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11589        armUnsignedSatQ( &irt_res_diff, NULL, irt_diff, 0x10 );
11590
11591        assign( irt_sum,
11592                binop( Iop_Add32,
11593                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11594                       binop( Iop_Shr32,
11595                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11596                              mkU8(16) ) ) );
11597        armUnsignedSatQ( &irt_res_sum, NULL, irt_sum, 0x10 );
11598
11599        IRExpr* ire_result
11600          = binop( Iop_Or32,
11601                   binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
11602                   binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
11603
11604        if (isT)
11605           putIRegT( regD, ire_result, condT );
11606        else
11607           putIRegA( regD, ire_result, condT, Ijk_Boring );
11608
11609        DIP( "uqasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11610        return True;
11611     }
11612     /* fall through */
11613   }
11614
11615   /* ------------------- usax<c> <Rd>,<Rn>,<Rm> ------------------- */
11616   {
11617     UInt regD = 99, regN = 99, regM = 99;
11618     Bool gate = False;
11619
11620     if (isT) {
11621        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11622           regN = INSNT0(3,0);
11623           regD = INSNT1(11,8);
11624           regM = INSNT1(3,0);
11625           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11626              gate = True;
11627        }
11628     } else {
11629        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11630            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11631            INSNA(7,4)   == BITS4(0,1,0,1)) {
11632           regD = INSNA(15,12);
11633           regN = INSNA(19,16);
11634           regM = INSNA(3,0);
11635           if (regD != 15 && regN != 15 && regM != 15)
11636              gate = True;
11637        }
11638     }
11639
11640     if (gate) {
11641        IRTemp irt_regN = newTemp(Ity_I32);
11642        IRTemp irt_regM = newTemp(Ity_I32);
11643        IRTemp irt_sum  = newTemp(Ity_I32);
11644        IRTemp irt_diff = newTemp(Ity_I32);
11645
11646        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11647        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11648
11649        assign( irt_sum,
11650                binop( Iop_Add32,
11651                       unop( Iop_16Uto32,
11652                             unop( Iop_32to16, mkexpr(irt_regN) )
11653                       ),
11654                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11655
11656        assign( irt_diff,
11657                binop( Iop_Sub32,
11658                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11659                       unop( Iop_16Uto32,
11660                             unop( Iop_32to16, mkexpr(irt_regM) )
11661                       )
11662                )
11663        );
11664
11665        IRExpr* ire_result
11666          = binop( Iop_Or32,
11667                   binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11668                   binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11669
11670        IRTemp ge10 = newTemp(Ity_I32);
11671        assign( ge10, IRExpr_ITE( binop( Iop_CmpLE32U,
11672                                         mkU32(0x10000), mkexpr(irt_sum) ),
11673                                  mkU32(1), mkU32(0) ) );
11674        put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
11675        put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
11676
11677        IRTemp ge32 = newTemp(Ity_I32);
11678        assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11679        put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11680        put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11681
11682        if (isT)
11683           putIRegT( regD, ire_result, condT );
11684        else
11685           putIRegA( regD, ire_result, condT, Ijk_Boring );
11686
11687        DIP( "usax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11688        return True;
11689     }
11690     /* fall through */
11691   }
11692
11693   /* ------------------- uasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11694   {
11695     UInt regD = 99, regN = 99, regM = 99;
11696     Bool gate = False;
11697
11698     if (isT) {
11699        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11700           regN = INSNT0(3,0);
11701           regD = INSNT1(11,8);
11702           regM = INSNT1(3,0);
11703           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11704              gate = True;
11705        }
11706     } else {
11707        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11708            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11709            INSNA(7,4)   == BITS4(0,0,1,1)) {
11710           regD = INSNA(15,12);
11711           regN = INSNA(19,16);
11712           regM = INSNA(3,0);
11713           if (regD != 15 && regN != 15 && regM != 15)
11714              gate = True;
11715        }
11716     }
11717
11718     if (gate) {
11719        IRTemp irt_regN = newTemp(Ity_I32);
11720        IRTemp irt_regM = newTemp(Ity_I32);
11721        IRTemp irt_sum  = newTemp(Ity_I32);
11722        IRTemp irt_diff = newTemp(Ity_I32);
11723
11724        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11725        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11726
11727        assign( irt_diff,
11728                binop( Iop_Sub32,
11729                       unop( Iop_16Uto32,
11730                             unop( Iop_32to16, mkexpr(irt_regN) )
11731                       ),
11732                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11733
11734        assign( irt_sum,
11735                binop( Iop_Add32,
11736                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11737                       unop( Iop_16Uto32,
11738                             unop( Iop_32to16, mkexpr(irt_regM) )
11739                       ) ) );
11740
11741        IRExpr* ire_result
11742          = binop( Iop_Or32,
11743                   binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
11744                   binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
11745
11746        IRTemp ge10 = newTemp(Ity_I32);
11747        assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
11748        put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11749        put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11750
11751        IRTemp ge32 = newTemp(Ity_I32);
11752        assign( ge32, IRExpr_ITE( binop( Iop_CmpLE32U,
11753                                         mkU32(0x10000), mkexpr(irt_sum) ),
11754                                  mkU32(1), mkU32(0) ) );
11755        put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
11756        put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
11757
11758        if (isT)
11759           putIRegT( regD, ire_result, condT );
11760        else
11761           putIRegA( regD, ire_result, condT, Ijk_Boring );
11762
11763        DIP( "uasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11764        return True;
11765     }
11766     /* fall through */
11767   }
11768
11769   /* ------------------- ssax<c> <Rd>,<Rn>,<Rm> ------------------- */
11770   {
11771     UInt regD = 99, regN = 99, regM = 99;
11772     Bool gate = False;
11773
11774     if (isT) {
11775        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
11776           regN = INSNT0(3,0);
11777           regD = INSNT1(11,8);
11778           regM = INSNT1(3,0);
11779           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11780              gate = True;
11781        }
11782     } else {
11783        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
11784            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11785            INSNA(7,4)   == BITS4(0,1,0,1)) {
11786           regD = INSNA(15,12);
11787           regN = INSNA(19,16);
11788           regM = INSNA(3,0);
11789           if (regD != 15 && regN != 15 && regM != 15)
11790              gate = True;
11791        }
11792     }
11793
11794     if (gate) {
11795        IRTemp irt_regN = newTemp(Ity_I32);
11796        IRTemp irt_regM = newTemp(Ity_I32);
11797        IRTemp irt_sum  = newTemp(Ity_I32);
11798        IRTemp irt_diff = newTemp(Ity_I32);
11799
11800        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11801        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11802
11803        assign( irt_sum,
11804                binop( Iop_Add32,
11805                       binop( Iop_Sar32,
11806                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11807                              mkU8(16) ),
11808                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
11809
11810        assign( irt_diff,
11811                binop( Iop_Sub32,
11812                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
11813                       binop( Iop_Sar32,
11814                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11815                              mkU8(16) ) ) );
11816
11817        IRExpr* ire_result
11818          = binop( Iop_Or32,
11819                   binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11820                   binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11821
11822        IRTemp ge10 = newTemp(Ity_I32);
11823        assign(ge10, unop(Iop_Not32, mkexpr(irt_sum)));
11824        put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11825        put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11826
11827        IRTemp ge32 = newTemp(Ity_I32);
11828        assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11829        put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11830        put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11831
11832        if (isT)
11833           putIRegT( regD, ire_result, condT );
11834        else
11835           putIRegA( regD, ire_result, condT, Ijk_Boring );
11836
11837        DIP( "ssax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11838        return True;
11839     }
11840     /* fall through */
11841   }
11842
11843   /* ----------------- shsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11844   {
11845     UInt regD = 99, regN = 99, regM = 99;
11846     Bool gate = False;
11847
11848     if (isT) {
11849        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11850           regN = INSNT0(3,0);
11851           regD = INSNT1(11,8);
11852           regM = INSNT1(3,0);
11853           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11854              gate = True;
11855        }
11856     } else {
11857        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11858            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11859            INSNA(7,4)   == BITS4(1,1,1,1)) {
11860           regD = INSNA(15,12);
11861           regN = INSNA(19,16);
11862           regM = INSNA(3,0);
11863           if (regD != 15 && regN != 15 && regM != 15)
11864              gate = True;
11865        }
11866     }
11867
11868     if (gate) {
11869        IRTemp rNt   = newTemp(Ity_I32);
11870        IRTemp rMt   = newTemp(Ity_I32);
11871        IRTemp res_q = newTemp(Ity_I32);
11872
11873        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11874        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11875
11876        assign(res_q, binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
11877        if (isT)
11878           putIRegT( regD, mkexpr(res_q), condT );
11879        else
11880           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11881
11882        DIP("shsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11883        return True;
11884     }
11885     /* fall through */
11886   }
11887
11888   /* ----------------- sxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
11889   {
11890     UInt regD = 99, regN = 99, regM = 99, rotate = 99;
11891     Bool gate = False;
11892
11893     if (isT) {
11894        if (INSNT0(15,4) == 0xFA2 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
11895           regN   = INSNT0(3,0);
11896           regD   = INSNT1(11,8);
11897           regM   = INSNT1(3,0);
11898           rotate = INSNT1(5,4);
11899           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11900              gate = True;
11901        }
11902     } else {
11903        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
11904            INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
11905           regD   = INSNA(15,12);
11906           regN   = INSNA(19,16);
11907           regM   = INSNA(3,0);
11908           rotate = INSNA(11,10);
11909           if (regD != 15 && regN != 15 && regM != 15)
11910             gate = True;
11911        }
11912     }
11913
11914     if (gate) {
11915        IRTemp irt_regN = newTemp(Ity_I32);
11916        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11917
11918        IRTemp irt_regM = newTemp(Ity_I32);
11919        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11920
11921        IRTemp irt_rot = newTemp(Ity_I32);
11922        assign( irt_rot, genROR32(irt_regM, 8 * rotate) );
11923
11924        /* FIXME Maybe we can write this arithmetic in shorter form. */
11925        IRExpr* resLo
11926           = binop(Iop_And32,
11927                   binop(Iop_Add32,
11928                         mkexpr(irt_regN),
11929                         unop(Iop_16Uto32,
11930                              unop(Iop_8Sto16,
11931                                   unop(Iop_32to8, mkexpr(irt_rot))))),
11932                   mkU32(0x0000FFFF));
11933
11934        IRExpr* resHi
11935           = binop(Iop_And32,
11936                   binop(Iop_Add32,
11937                         mkexpr(irt_regN),
11938                         binop(Iop_Shl32,
11939                               unop(Iop_16Uto32,
11940                                    unop(Iop_8Sto16,
11941                                         unop(Iop_32to8,
11942                                              binop(Iop_Shr32,
11943                                                    mkexpr(irt_rot),
11944                                                    mkU8(16))))),
11945                               mkU8(16))),
11946                   mkU32(0xFFFF0000));
11947
11948        IRExpr* ire_result
11949           = binop( Iop_Or32, resHi, resLo );
11950
11951        if (isT)
11952           putIRegT( regD, ire_result, condT );
11953        else
11954           putIRegA( regD, ire_result, condT, Ijk_Boring );
11955
11956        DIP( "sxtab16%s r%u, r%u, r%u, ROR #%u\n",
11957             nCC(conq), regD, regN, regM, 8 * rotate );
11958        return True;
11959     }
11960     /* fall through */
11961   }
11962
11963   /* ----------------- shasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11964   {
11965     UInt regD = 99, regN = 99, regM = 99;
11966     Bool gate = False;
11967
11968     if (isT) {
11969        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11970           regN = INSNT0(3,0);
11971           regD = INSNT1(11,8);
11972           regM = INSNT1(3,0);
11973           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11974              gate = True;
11975        }
11976     } else {
11977        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11978            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11979            INSNA(7,4)   == BITS4(0,0,1,1)) {
11980           regD = INSNA(15,12);
11981           regN = INSNA(19,16);
11982           regM = INSNA(3,0);
11983           if (regD != 15 && regN != 15 && regM != 15)
11984              gate = True;
11985        }
11986     }
11987
11988     if (gate) {
11989        IRTemp rNt   = newTemp(Ity_I32);
11990        IRTemp rMt   = newTemp(Ity_I32);
11991        IRTemp irt_diff  = newTemp(Ity_I32);
11992        IRTemp irt_sum   = newTemp(Ity_I32);
11993        IRTemp res_q = newTemp(Ity_I32);
11994
11995        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11996        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11997
11998        assign( irt_diff,
11999                binop(Iop_Sub32,
12000                      unop(Iop_16Sto32,
12001                           unop(Iop_32to16,
12002                                mkexpr(rNt)
12003                           )
12004                      ),
12005                      unop(Iop_16Sto32,
12006                           unop(Iop_32to16,
12007                                binop(Iop_Shr32,
12008                                      mkexpr(rMt), mkU8(16)
12009                                )
12010                           )
12011                      )
12012                )
12013        );
12014
12015        assign( irt_sum,
12016                binop(Iop_Add32,
12017                      unop(Iop_16Sto32,
12018                           unop(Iop_32to16,
12019                                binop(Iop_Shr32,
12020                                      mkexpr(rNt), mkU8(16)
12021                                )
12022                           )
12023                      ),
12024                      unop(Iop_16Sto32,
12025                           unop(Iop_32to16, mkexpr(rMt)
12026                           )
12027                      )
12028                )
12029        );
12030
12031        assign( res_q,
12032                binop(Iop_Or32,
12033                      unop(Iop_16Uto32,
12034                           unop(Iop_32to16,
12035                                binop(Iop_Shr32,
12036                                      mkexpr(irt_diff), mkU8(1)
12037                                )
12038                           )
12039                      ),
12040                      binop(Iop_Shl32,
12041                            binop(Iop_Shr32,
12042                                  mkexpr(irt_sum), mkU8(1)
12043                            ),
12044                            mkU8(16)
12045                     )
12046                )
12047        );
12048
12049        if (isT)
12050           putIRegT( regD, mkexpr(res_q), condT );
12051        else
12052           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12053
12054        DIP("shasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12055        return True;
12056     }
12057     /* fall through */
12058   }
12059
12060   /* ----------------- uhasx<c> <Rd>,<Rn>,<Rm> ------------------- */
12061   {
12062     UInt regD = 99, regN = 99, regM = 99;
12063     Bool gate = False;
12064
12065     if (isT) {
12066        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12067           regN = INSNT0(3,0);
12068           regD = INSNT1(11,8);
12069           regM = INSNT1(3,0);
12070           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12071              gate = True;
12072        }
12073     } else {
12074        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12075            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12076            INSNA(7,4)   == BITS4(0,0,1,1)) {
12077           regD = INSNA(15,12);
12078           regN = INSNA(19,16);
12079           regM = INSNA(3,0);
12080           if (regD != 15 && regN != 15 && regM != 15)
12081              gate = True;
12082        }
12083     }
12084
12085     if (gate) {
12086        IRTemp rNt   = newTemp(Ity_I32);
12087        IRTemp rMt   = newTemp(Ity_I32);
12088        IRTemp irt_diff  = newTemp(Ity_I32);
12089        IRTemp irt_sum   = newTemp(Ity_I32);
12090        IRTemp res_q = newTemp(Ity_I32);
12091
12092        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12093        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12094
12095        assign( irt_diff,
12096                binop(Iop_Sub32,
12097                      unop(Iop_16Uto32,
12098                           unop(Iop_32to16,
12099                                mkexpr(rNt)
12100                           )
12101                      ),
12102                      unop(Iop_16Uto32,
12103                           unop(Iop_32to16,
12104                                binop(Iop_Shr32,
12105                                      mkexpr(rMt), mkU8(16)
12106                                )
12107                           )
12108                      )
12109                )
12110        );
12111
12112        assign( irt_sum,
12113                binop(Iop_Add32,
12114                      unop(Iop_16Uto32,
12115                           unop(Iop_32to16,
12116                                binop(Iop_Shr32,
12117                                      mkexpr(rNt), mkU8(16)
12118                                )
12119                           )
12120                      ),
12121                      unop(Iop_16Uto32,
12122                           unop(Iop_32to16, mkexpr(rMt)
12123                           )
12124                      )
12125                )
12126        );
12127
12128        assign( res_q,
12129                binop(Iop_Or32,
12130                      unop(Iop_16Uto32,
12131                           unop(Iop_32to16,
12132                                binop(Iop_Shr32,
12133                                      mkexpr(irt_diff), mkU8(1)
12134                                )
12135                           )
12136                      ),
12137                      binop(Iop_Shl32,
12138                            binop(Iop_Shr32,
12139                                  mkexpr(irt_sum), mkU8(1)
12140                            ),
12141                            mkU8(16)
12142                     )
12143                )
12144        );
12145
12146        if (isT)
12147           putIRegT( regD, mkexpr(res_q), condT );
12148        else
12149           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12150
12151        DIP("uhasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12152        return True;
12153     }
12154     /* fall through */
12155   }
12156
12157   /* ----------------- shsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12158   {
12159     UInt regD = 99, regN = 99, regM = 99;
12160     Bool gate = False;
12161
12162     if (isT) {
12163        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12164           regN = INSNT0(3,0);
12165           regD = INSNT1(11,8);
12166           regM = INSNT1(3,0);
12167           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12168              gate = True;
12169        }
12170     } else {
12171        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12172            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12173            INSNA(7,4)   == BITS4(0,1,0,1)) {
12174           regD = INSNA(15,12);
12175           regN = INSNA(19,16);
12176           regM = INSNA(3,0);
12177           if (regD != 15 && regN != 15 && regM != 15)
12178              gate = True;
12179        }
12180     }
12181
12182     if (gate) {
12183        IRTemp rNt   = newTemp(Ity_I32);
12184        IRTemp rMt   = newTemp(Ity_I32);
12185        IRTemp irt_diff  = newTemp(Ity_I32);
12186        IRTemp irt_sum   = newTemp(Ity_I32);
12187        IRTemp res_q = newTemp(Ity_I32);
12188
12189        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12190        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12191
12192        assign( irt_sum,
12193                binop(Iop_Add32,
12194                      unop(Iop_16Sto32,
12195                           unop(Iop_32to16,
12196                                mkexpr(rNt)
12197                           )
12198                      ),
12199                      unop(Iop_16Sto32,
12200                           unop(Iop_32to16,
12201                                binop(Iop_Shr32,
12202                                      mkexpr(rMt), mkU8(16)
12203                                )
12204                           )
12205                      )
12206                )
12207        );
12208
12209        assign( irt_diff,
12210                binop(Iop_Sub32,
12211                      unop(Iop_16Sto32,
12212                           unop(Iop_32to16,
12213                                binop(Iop_Shr32,
12214                                      mkexpr(rNt), mkU8(16)
12215                                )
12216                           )
12217                      ),
12218                      unop(Iop_16Sto32,
12219                           unop(Iop_32to16, mkexpr(rMt)
12220                           )
12221                      )
12222                )
12223        );
12224
12225        assign( res_q,
12226                binop(Iop_Or32,
12227                      unop(Iop_16Uto32,
12228                           unop(Iop_32to16,
12229                                binop(Iop_Shr32,
12230                                      mkexpr(irt_sum), mkU8(1)
12231                                )
12232                           )
12233                      ),
12234                      binop(Iop_Shl32,
12235                            binop(Iop_Shr32,
12236                                  mkexpr(irt_diff), mkU8(1)
12237                            ),
12238                            mkU8(16)
12239                     )
12240                )
12241        );
12242
12243        if (isT)
12244           putIRegT( regD, mkexpr(res_q), condT );
12245        else
12246           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12247
12248        DIP("shsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12249        return True;
12250     }
12251     /* fall through */
12252   }
12253
12254   /* ----------------- uhsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12255   {
12256     UInt regD = 99, regN = 99, regM = 99;
12257     Bool gate = False;
12258
12259     if (isT) {
12260        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12261           regN = INSNT0(3,0);
12262           regD = INSNT1(11,8);
12263           regM = INSNT1(3,0);
12264           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12265              gate = True;
12266        }
12267     } else {
12268        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12269            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12270            INSNA(7,4)   == BITS4(0,1,0,1)) {
12271           regD = INSNA(15,12);
12272           regN = INSNA(19,16);
12273           regM = INSNA(3,0);
12274           if (regD != 15 && regN != 15 && regM != 15)
12275              gate = True;
12276        }
12277     }
12278
12279     if (gate) {
12280        IRTemp rNt   = newTemp(Ity_I32);
12281        IRTemp rMt   = newTemp(Ity_I32);
12282        IRTemp irt_diff  = newTemp(Ity_I32);
12283        IRTemp irt_sum   = newTemp(Ity_I32);
12284        IRTemp res_q = newTemp(Ity_I32);
12285
12286        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12287        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12288
12289        assign( irt_sum,
12290                binop(Iop_Add32,
12291                      unop(Iop_16Uto32,
12292                           unop(Iop_32to16,
12293                                mkexpr(rNt)
12294                           )
12295                      ),
12296                      unop(Iop_16Uto32,
12297                           unop(Iop_32to16,
12298                                binop(Iop_Shr32,
12299                                      mkexpr(rMt), mkU8(16)
12300                                )
12301                           )
12302                      )
12303                )
12304        );
12305
12306        assign( irt_diff,
12307                binop(Iop_Sub32,
12308                      unop(Iop_16Uto32,
12309                           unop(Iop_32to16,
12310                                binop(Iop_Shr32,
12311                                      mkexpr(rNt), mkU8(16)
12312                                )
12313                           )
12314                      ),
12315                      unop(Iop_16Uto32,
12316                           unop(Iop_32to16, mkexpr(rMt)
12317                           )
12318                      )
12319                )
12320        );
12321
12322        assign( res_q,
12323                binop(Iop_Or32,
12324                      unop(Iop_16Uto32,
12325                           unop(Iop_32to16,
12326                                binop(Iop_Shr32,
12327                                      mkexpr(irt_sum), mkU8(1)
12328                                )
12329                           )
12330                      ),
12331                      binop(Iop_Shl32,
12332                            binop(Iop_Shr32,
12333                                  mkexpr(irt_diff), mkU8(1)
12334                            ),
12335                            mkU8(16)
12336                     )
12337                )
12338        );
12339
12340        if (isT)
12341           putIRegT( regD, mkexpr(res_q), condT );
12342        else
12343           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12344
12345        DIP("uhsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12346        return True;
12347     }
12348     /* fall through */
12349   }
12350
12351   /* ----------------- shsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
12352   {
12353     UInt regD = 99, regN = 99, regM = 99;
12354     Bool gate = False;
12355
12356     if (isT) {
12357        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12358           regN = INSNT0(3,0);
12359           regD = INSNT1(11,8);
12360           regM = INSNT1(3,0);
12361           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12362              gate = True;
12363        }
12364     } else {
12365        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12366            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12367            INSNA(7,4)   == BITS4(0,1,1,1)) {
12368           regD = INSNA(15,12);
12369           regN = INSNA(19,16);
12370           regM = INSNA(3,0);
12371           if (regD != 15 && regN != 15 && regM != 15)
12372              gate = True;
12373        }
12374     }
12375
12376     if (gate) {
12377        IRTemp rNt   = newTemp(Ity_I32);
12378        IRTemp rMt   = newTemp(Ity_I32);
12379        IRTemp res_q = newTemp(Ity_I32);
12380
12381        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12382        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12383
12384        assign(res_q, binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
12385        if (isT)
12386           putIRegT( regD, mkexpr(res_q), condT );
12387        else
12388           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12389
12390        DIP("shsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12391        return True;
12392     }
12393     /* fall through */
12394   }
12395
12396   /* ----------------- smmls{r}<c> <Rd>,<Rn>,<Rm>,<Ra> ------------------- */
12397   {
12398     UInt rD = 99, rN = 99, rM = 99, rA = 99;
12399     Bool round  = False;
12400     Bool gate   = False;
12401
12402     if (isT) {
12403        if (INSNT0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
12404            && INSNT0(6,4) == BITS3(1,1,0)
12405            && INSNT1(7,5) == BITS3(0,0,0)) {
12406           round = INSNT1(4,4);
12407           rA    = INSNT1(15,12);
12408           rD    = INSNT1(11,8);
12409           rM    = INSNT1(3,0);
12410           rN    = INSNT0(3,0);
12411           if (!isBadRegT(rD)
12412               && !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rA))
12413              gate = True;
12414        }
12415     } else {
12416        if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,1)
12417            && INSNA(15,12) != BITS4(1,1,1,1)
12418            && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(1,1,0,1)) {
12419           round = INSNA(5,5);
12420           rD    = INSNA(19,16);
12421           rA    = INSNA(15,12);
12422           rM    = INSNA(11,8);
12423           rN    = INSNA(3,0);
12424           if (rD != 15 && rM != 15 && rN != 15)
12425              gate = True;
12426        }
12427     }
12428     if (gate) {
12429        IRTemp irt_rA   = newTemp(Ity_I32);
12430        IRTemp irt_rN   = newTemp(Ity_I32);
12431        IRTemp irt_rM   = newTemp(Ity_I32);
12432        assign( irt_rA, isT ? getIRegT(rA) : getIRegA(rA) );
12433        assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12434        assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12435        IRExpr* res
12436        = unop(Iop_64HIto32,
12437               binop(Iop_Add64,
12438                     binop(Iop_Sub64,
12439                           binop(Iop_32HLto64, mkexpr(irt_rA), mkU32(0)),
12440                           binop(Iop_MullS32, mkexpr(irt_rN), mkexpr(irt_rM))),
12441                     mkU64(round ? 0x80000000ULL : 0ULL)));
12442        if (isT)
12443           putIRegT( rD, res, condT );
12444        else
12445           putIRegA(rD, res, condT, Ijk_Boring);
12446        DIP("smmls%s%s r%u, r%u, r%u, r%u\n",
12447            round ? "r" : "", nCC(conq), rD, rN, rM, rA);
12448        return True;
12449     }
12450     /* fall through */
12451   }
12452
12453   /* -------------- smlald{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12454   {
12455     UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12456     Bool m_swap = False;
12457     Bool gate   = False;
12458
12459     if (isT) {
12460        if (INSNT0(15,4) == 0xFBC &&
12461            (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0)) {
12462           rN     = INSNT0(3,0);
12463           rDlo   = INSNT1(15,12);
12464           rDhi   = INSNT1(11,8);
12465           rM     = INSNT1(3,0);
12466           m_swap = (INSNT1(4,4) & 1) == 1;
12467           if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
12468               && !isBadRegT(rM) && rDhi != rDlo)
12469              gate = True;
12470        }
12471     } else {
12472        if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0)
12473            && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
12474           rN     = INSNA(3,0);
12475           rDlo   = INSNA(15,12);
12476           rDhi   = INSNA(19,16);
12477           rM     = INSNA(11,8);
12478           m_swap = ( INSNA(5,5) & 1 ) == 1;
12479           if (rDlo != 15 && rDhi != 15
12480               && rN != 15 && rM != 15 && rDlo != rDhi)
12481              gate = True;
12482        }
12483     }
12484
12485     if (gate) {
12486        IRTemp irt_rM   = newTemp(Ity_I32);
12487        IRTemp irt_rN   = newTemp(Ity_I32);
12488        IRTemp irt_rDhi = newTemp(Ity_I32);
12489        IRTemp irt_rDlo = newTemp(Ity_I32);
12490        IRTemp op_2     = newTemp(Ity_I32);
12491        IRTemp pr_1     = newTemp(Ity_I64);
12492        IRTemp pr_2     = newTemp(Ity_I64);
12493        IRTemp result   = newTemp(Ity_I64);
12494        IRTemp resHi    = newTemp(Ity_I32);
12495        IRTemp resLo    = newTemp(Ity_I32);
12496        assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM));
12497        assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN));
12498        assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi));
12499        assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo));
12500        assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12501        assign( pr_1, binop(Iop_MullS32,
12502                            unop(Iop_16Sto32,
12503                                 unop(Iop_32to16, mkexpr(irt_rN))
12504                            ),
12505                            unop(Iop_16Sto32,
12506                                 unop(Iop_32to16, mkexpr(op_2))
12507                            )
12508                      )
12509        );
12510        assign( pr_2, binop(Iop_MullS32,
12511                            binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12512                            binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12513                      )
12514        );
12515        assign( result, binop(Iop_Add64,
12516                              binop(Iop_Add64,
12517                                    mkexpr(pr_1),
12518                                    mkexpr(pr_2)
12519                              ),
12520                              binop(Iop_32HLto64,
12521                                    mkexpr(irt_rDhi),
12522                                    mkexpr(irt_rDlo)
12523                              )
12524                        )
12525        );
12526        assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12527        assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12528        if (isT) {
12529           putIRegT( rDhi, mkexpr(resHi), condT );
12530           putIRegT( rDlo, mkexpr(resLo), condT );
12531        } else {
12532           putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12533           putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12534        }
12535        DIP("smlald%c%s r%u, r%u, r%u, r%u\n",
12536            m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12537        return True;
12538     }
12539     /* fall through */
12540   }
12541
12542   /* -------------- smlsld{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12543   {
12544     UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12545     Bool m_swap = False;
12546     Bool gate   = False;
12547
12548     if (isT) {
12549        if ((INSNT0(15,4) == 0xFBD &&
12550            (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0))) {
12551           rN     = INSNT0(3,0);
12552           rDlo   = INSNT1(15,12);
12553           rDhi   = INSNT1(11,8);
12554           rM     = INSNT1(3,0);
12555           m_swap = (INSNT1(4,4) & 1) == 1;
12556           if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN) &&
12557               !isBadRegT(rM) && rDhi != rDlo)
12558              gate = True;
12559        }
12560     } else {
12561        if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0) &&
12562            (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,1,0,1)) {
12563           rN     = INSNA(3,0);
12564           rDlo   = INSNA(15,12);
12565           rDhi   = INSNA(19,16);
12566           rM     = INSNA(11,8);
12567           m_swap = (INSNA(5,5) & 1) == 1;
12568           if (rDlo != 15 && rDhi != 15 &&
12569               rN != 15 && rM != 15 && rDlo != rDhi)
12570              gate = True;
12571        }
12572     }
12573     if (gate) {
12574        IRTemp irt_rM   = newTemp(Ity_I32);
12575        IRTemp irt_rN   = newTemp(Ity_I32);
12576        IRTemp irt_rDhi = newTemp(Ity_I32);
12577        IRTemp irt_rDlo = newTemp(Ity_I32);
12578        IRTemp op_2     = newTemp(Ity_I32);
12579        IRTemp pr_1     = newTemp(Ity_I64);
12580        IRTemp pr_2     = newTemp(Ity_I64);
12581        IRTemp result   = newTemp(Ity_I64);
12582        IRTemp resHi    = newTemp(Ity_I32);
12583        IRTemp resLo    = newTemp(Ity_I32);
12584        assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12585        assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12586        assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi) );
12587        assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo) );
12588        assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12589        assign( pr_1, binop(Iop_MullS32,
12590                            unop(Iop_16Sto32,
12591                                 unop(Iop_32to16, mkexpr(irt_rN))
12592                            ),
12593                            unop(Iop_16Sto32,
12594                                 unop(Iop_32to16, mkexpr(op_2))
12595                            )
12596                      )
12597        );
12598        assign( pr_2, binop(Iop_MullS32,
12599                            binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12600                            binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12601                      )
12602        );
12603        assign( result, binop(Iop_Add64,
12604                              binop(Iop_Sub64,
12605                                    mkexpr(pr_1),
12606                                    mkexpr(pr_2)
12607                              ),
12608                              binop(Iop_32HLto64,
12609                                    mkexpr(irt_rDhi),
12610                                    mkexpr(irt_rDlo)
12611                              )
12612                        )
12613        );
12614        assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12615        assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12616        if (isT) {
12617           putIRegT( rDhi, mkexpr(resHi), condT );
12618           putIRegT( rDlo, mkexpr(resLo), condT );
12619        } else {
12620           putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12621           putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12622        }
12623        DIP("smlsld%c%s r%u, r%u, r%u, r%u\n",
12624            m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12625        return True;
12626     }
12627     /* fall through */
12628   }
12629
12630   /* ---------- Doesn't match anything. ---------- */
12631   return False;
12632
12633#  undef INSNA
12634#  undef INSNT0
12635#  undef INSNT1
12636}
12637
12638
12639/*------------------------------------------------------------*/
12640/*--- V8 instructions                                      ---*/
12641/*------------------------------------------------------------*/
12642
12643/* Break a V128-bit value up into four 32-bit ints. */
12644
12645static void breakupV128to32s ( IRTemp t128,
12646                               /*OUTs*/
12647                               IRTemp* t3, IRTemp* t2,
12648                               IRTemp* t1, IRTemp* t0 )
12649{
12650   IRTemp hi64 = newTemp(Ity_I64);
12651   IRTemp lo64 = newTemp(Ity_I64);
12652   assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
12653   assign( lo64, unop(Iop_V128to64,   mkexpr(t128)) );
12654
12655   vassert(t0 && *t0 == IRTemp_INVALID);
12656   vassert(t1 && *t1 == IRTemp_INVALID);
12657   vassert(t2 && *t2 == IRTemp_INVALID);
12658   vassert(t3 && *t3 == IRTemp_INVALID);
12659
12660   *t0 = newTemp(Ity_I32);
12661   *t1 = newTemp(Ity_I32);
12662   *t2 = newTemp(Ity_I32);
12663   *t3 = newTemp(Ity_I32);
12664   assign( *t0, unop(Iop_64to32,   mkexpr(lo64)) );
12665   assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
12666   assign( *t2, unop(Iop_64to32,   mkexpr(hi64)) );
12667   assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
12668}
12669
12670
12671/* Both ARM and Thumb */
12672
12673/* Translate a V8 instruction.  If successful, returns True and *dres
12674   may or may not be updated.  If unsuccessful, returns False and
12675   doesn't change *dres nor create any IR.
12676
12677   The Thumb and ARM encodings are potentially different.  In both
12678   ARM and Thumb mode, the caller must pass the entire 32 bits of
12679   the instruction.  Callers may pass any instruction; this function
12680   ignores anything it doesn't recognise.
12681
12682   Caller must supply an IRTemp 'condT' holding the gating condition,
12683   or IRTemp_INVALID indicating the insn is always executed.
12684
12685   If we are decoding an ARM instruction which is in the NV space
12686   then it is expected that condT will be IRTemp_INVALID, and that is
12687   asserted for.  That condition is ensured by the logic near the top
12688   of disInstr_ARM_WRK, that sets up condT.
12689
12690   When decoding for Thumb, the caller must pass the ITState pre/post
12691   this instruction, so that we can generate a SIGILL in the cases where
12692   the instruction may not be in an IT block.  When decoding for ARM,
12693   both of these must be IRTemp_INVALID.
12694
12695   Finally, the caller must indicate whether this occurs in ARM or in
12696   Thumb code.
12697*/
12698static Bool decode_V8_instruction (
12699               /*MOD*/DisResult* dres,
12700               UInt              insnv8,
12701               IRTemp            condT,
12702               Bool              isT,
12703               IRTemp            old_itstate,
12704               IRTemp            new_itstate
12705            )
12706{
12707#  define INSN(_bMax,_bMin)   SLICE_UInt(insnv8, (_bMax), (_bMin))
12708
12709   if (isT) {
12710      vassert(old_itstate != IRTemp_INVALID);
12711      vassert(new_itstate != IRTemp_INVALID);
12712   } else {
12713      vassert(old_itstate == IRTemp_INVALID);
12714      vassert(new_itstate == IRTemp_INVALID);
12715   }
12716
12717   /* ARMCondcode 'conq' is only used for debug printing and for no other
12718      purpose.  For ARM, this is simply the top 4 bits of the instruction.
12719      For Thumb, the condition is not (really) known until run time, and so
12720      we set it to ARMCondAL in order that printing of these instructions
12721      does not show any condition. */
12722   ARMCondcode conq;
12723   if (isT) {
12724      conq = ARMCondAL;
12725   } else {
12726      conq = (ARMCondcode)INSN(31,28);
12727      if (conq == ARMCondNV || conq == ARMCondAL) {
12728         vassert(condT == IRTemp_INVALID);
12729      } else {
12730         vassert(condT != IRTemp_INVALID);
12731      }
12732      vassert(conq >= ARMCondEQ && conq <= ARMCondNV);
12733   }
12734
12735   /* ----------- {AESD, AESE, AESMC, AESIMC}.8 q_q ----------- */
12736   /*     31   27   23  21 19 17 15 11   7      3
12737      T1: 1111 1111 1 D 11 sz 00 d  0011 00 M 0 m  AESE Qd, Qm
12738      A1: 1111 0011 1 D 11 sz 00 d  0011 00 M 0 m  AESE Qd, Qm
12739
12740      T1: 1111 1111 1 D 11 sz 00 d  0011 01 M 0 m  AESD Qd, Qm
12741      A1: 1111 0011 1 D 11 sz 00 d  0011 01 M 0 m  AESD Qd, Qm
12742
12743      T1: 1111 1111 1 D 11 sz 00 d  0011 10 M 0 m  AESMC Qd, Qm
12744      A1: 1111 0011 1 D 11 sz 00 d  0011 10 M 0 m  AESMC Qd, Qm
12745
12746      T1: 1111 1111 1 D 11 sz 00 d  0011 11 M 0 m  AESIMC Qd, Qm
12747      A1: 1111 0011 1 D 11 sz 00 d  0011 11 M 0 m  AESIMC Qd, Qm
12748
12749      sz must be 00
12750      ARM encoding is in NV space.
12751      In Thumb mode, we must not be in an IT block.
12752   */
12753   {
12754     UInt regD = 99, regM = 99, opc = 4/*invalid*/;
12755     Bool gate = True;
12756
12757     UInt high9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
12758     if (INSN(31,23) == high9 && INSN(21,16) == BITS6(1,1,0,0,0,0)
12759         && INSN(11,8) == BITS4(0,0,1,1) && INSN(4,4) == 0) {
12760        UInt bitD = INSN(22,22);
12761        UInt fldD = INSN(15,12);
12762        UInt bitM = INSN(5,5);
12763        UInt fldM = INSN(3,0);
12764        opc  = INSN(7,6);
12765        regD = (bitD << 4) | fldD;
12766        regM = (bitM << 4) | fldM;
12767     }
12768     if ((regD & 1) == 1 || (regM & 1) == 1)
12769        gate = False;
12770
12771     if (gate) {
12772        if (isT) {
12773           gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
12774        }
12775        /* In ARM mode, this is statically unconditional.  In Thumb mode,
12776           this must be dynamically unconditional, and we've SIGILLd if not.
12777           In either case we can create unconditional IR. */
12778        IRTemp op1 = newTemp(Ity_V128);
12779        IRTemp op2 = newTemp(Ity_V128);
12780        IRTemp src = newTemp(Ity_V128);
12781        IRTemp res = newTemp(Ity_V128);
12782        assign(op1,  getQReg(regD >> 1));
12783        assign(op2,  getQReg(regM >> 1));
12784        assign(src,  opc == BITS2(0,0) || opc == BITS2(0,1)
12785                        ? binop(Iop_XorV128, mkexpr(op1), mkexpr(op2))
12786                        : mkexpr(op2));
12787
12788        void* helpers[4]
12789           = { &armg_dirtyhelper_AESE,  &armg_dirtyhelper_AESD,
12790               &armg_dirtyhelper_AESMC, &armg_dirtyhelper_AESIMC };
12791        const HChar* hNames[4]
12792           = { "armg_dirtyhelper_AESE",  "armg_dirtyhelper_AESD",
12793               "armg_dirtyhelper_AESMC", "armg_dirtyhelper_AESIMC" };
12794        const HChar* iNames[4]
12795           = { "aese", "aesd", "aesmc", "aesimc" };
12796
12797        vassert(opc >= 0 && opc <= 3);
12798        void*        helper = helpers[opc];
12799        const HChar* hname  = hNames[opc];
12800
12801        IRTemp w32_3, w32_2, w32_1, w32_0;
12802        w32_3 = w32_2 = w32_1 = w32_0 = IRTemp_INVALID;
12803        breakupV128to32s( src, &w32_3, &w32_2, &w32_1, &w32_0 );
12804
12805        IRDirty* di
12806          = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
12807                               mkIRExprVec_5(
12808                                  IRExpr_VECRET(),
12809                                  mkexpr(w32_3), mkexpr(w32_2),
12810                                  mkexpr(w32_1), mkexpr(w32_0)) );
12811        stmt(IRStmt_Dirty(di));
12812
12813        putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
12814        DIP("%s.8 q%d, q%d\n", iNames[opc], regD >> 1, regM >> 1);
12815        return True;
12816     }
12817     /* fall through */
12818   }
12819
12820   /* ----------- SHA 3-reg insns q_q_q ----------- */
12821   /*
12822          31   27   23      19 15 11   7       3
12823      T1: 1110 1111 0  D 00 n  d  1100 N Q M 0 m  SHA1C Qd, Qn, Qm  ix=0
12824      A1: 1111 0010 ----------------------------
12825
12826      T1: 1110 1111 0  D 01 n  d  1100 N Q M 0 m  SHA1P Qd, Qn, Qm  ix=1
12827      A1: 1111 0010 ----------------------------
12828
12829      T1: 1110 1111 0  D 10 n  d  1100 N Q M 0 m  SHA1M Qd, Qn, Qm  ix=2
12830      A1: 1111 0010 ----------------------------
12831
12832      T1: 1110 1111 0  D 11 n  d  1100 N Q M 0 m  SHA1SU0 Qd, Qn, Qm  ix=3
12833      A1: 1111 0010 ----------------------------
12834      (that's a complete set of 4, based on insn[21,20])
12835
12836      T1: 1111 1111 0  D 00 n  d  1100 N Q M 0 m  SHA256H Qd, Qn, Qm  ix=4
12837      A1: 1111 0011 ----------------------------
12838
12839      T1: 1111 1111 0  D 01 n  d  1100 N Q M 0 m  SHA256H2 Qd, Qn, Qm  ix=5
12840      A1: 1111 0011 ----------------------------
12841
12842      T1: 1111 1111 0  D 10 n  d  1100 N Q M 0 m  SHA256SU1 Qd, Qn, Qm  ix=6
12843      A1: 1111 0011 ----------------------------
12844      (3/4 of a complete set of 4, based on insn[21,20])
12845
12846      Q must be 1.  Same comments about conditionalisation as for the AES
12847      group above apply.
12848   */
12849   {
12850     UInt ix = 8; /* invalid */
12851     Bool gate = False;
12852
12853     UInt hi9_sha1   = isT ? BITS9(1,1,1,0,1,1,1,1,0)
12854                           : BITS9(1,1,1,1,0,0,1,0,0);
12855     UInt hi9_sha256 = isT ? BITS9(1,1,1,1,1,1,1,1,0)
12856                           : BITS9(1,1,1,1,0,0,1,1,0);
12857     if ((INSN(31,23) == hi9_sha1 || INSN(31,23) == hi9_sha256)
12858         && INSN(11,8) == BITS4(1,1,0,0)
12859         && INSN(6,6) == 1 && INSN(4,4) == 0) {
12860        ix = INSN(21,20);
12861        if (INSN(31,23) == hi9_sha256)
12862           ix |= 4;
12863        if (ix < 7)
12864           gate = True;
12865     }
12866
12867     UInt regN = (INSN(7,7)   << 4)  | INSN(19,16);
12868     UInt regD = (INSN(22,22) << 4)  | INSN(15,12);
12869     UInt regM = (INSN(5,5)   << 4)  | INSN(3,0);
12870     if ((regD & 1) == 1 || (regM & 1) == 1 || (regN & 1) == 1)
12871        gate = False;
12872
12873     if (gate) {
12874        vassert(ix >= 0 && ix < 7);
12875        const HChar* inames[7]
12876           = { "sha1c", "sha1p", "sha1m", "sha1su0",
12877               "sha256h", "sha256h2", "sha256su1" };
12878        void(*helpers[7])(V128*,UInt,UInt,UInt,UInt,UInt,UInt,
12879                                UInt,UInt,UInt,UInt,UInt,UInt)
12880           = { &armg_dirtyhelper_SHA1C,    &armg_dirtyhelper_SHA1P,
12881               &armg_dirtyhelper_SHA1M,    &armg_dirtyhelper_SHA1SU0,
12882               &armg_dirtyhelper_SHA256H,  &armg_dirtyhelper_SHA256H2,
12883               &armg_dirtyhelper_SHA256SU1 };
12884        const HChar* hnames[7]
12885           = { "armg_dirtyhelper_SHA1C",    "armg_dirtyhelper_SHA1P",
12886               "armg_dirtyhelper_SHA1M",    "armg_dirtyhelper_SHA1SU0",
12887               "armg_dirtyhelper_SHA256H",  "armg_dirtyhelper_SHA256H2",
12888               "armg_dirtyhelper_SHA256SU1" };
12889
12890        /* This is a really lame way to implement this, even worse than
12891           the arm64 version.  But at least it works. */
12892
12893        if (isT) {
12894           gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
12895        }
12896
12897        IRTemp vD = newTemp(Ity_V128);
12898        IRTemp vN = newTemp(Ity_V128);
12899        IRTemp vM = newTemp(Ity_V128);
12900        assign(vD,  getQReg(regD >> 1));
12901        assign(vN,  getQReg(regN >> 1));
12902        assign(vM,  getQReg(regM >> 1));
12903
12904        IRTemp d32_3, d32_2, d32_1, d32_0;
12905        d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID;
12906        breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 );
12907
12908        IRTemp n32_3_pre, n32_2_pre, n32_1_pre, n32_0_pre;
12909        n32_3_pre = n32_2_pre = n32_1_pre = n32_0_pre = IRTemp_INVALID;
12910        breakupV128to32s( vN, &n32_3_pre, &n32_2_pre, &n32_1_pre, &n32_0_pre );
12911
12912        IRTemp m32_3, m32_2, m32_1, m32_0;
12913        m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
12914        breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
12915
12916        IRTemp n32_3 = newTemp(Ity_I32);
12917        IRTemp n32_2 = newTemp(Ity_I32);
12918        IRTemp n32_1 = newTemp(Ity_I32);
12919        IRTemp n32_0 = newTemp(Ity_I32);
12920
12921        /* Mask off any bits of the N register operand that aren't actually
12922           needed, so that Memcheck doesn't complain unnecessarily. */
12923        switch (ix) {
12924           case 0: case 1: case 2:
12925              assign(n32_3, mkU32(0));
12926              assign(n32_2, mkU32(0));
12927              assign(n32_1, mkU32(0));
12928              assign(n32_0, mkexpr(n32_0_pre));
12929              break;
12930           case 3: case 4: case 5: case 6:
12931              assign(n32_3, mkexpr(n32_3_pre));
12932              assign(n32_2, mkexpr(n32_2_pre));
12933              assign(n32_1, mkexpr(n32_1_pre));
12934              assign(n32_0, mkexpr(n32_0_pre));
12935              break;
12936           default:
12937              vassert(0);
12938        }
12939
12940        IRExpr** argvec
12941           = mkIRExprVec_13(
12942                IRExpr_VECRET(),
12943                mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0),
12944                mkexpr(n32_3), mkexpr(n32_2), mkexpr(n32_1), mkexpr(n32_0),
12945                mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0)
12946             );
12947
12948        IRTemp res = newTemp(Ity_V128);
12949        IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
12950                                         hnames[ix], helpers[ix], argvec );
12951        stmt(IRStmt_Dirty(di));
12952        putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
12953
12954        DIP("%s.8 q%u, q%u, q%u\n",
12955            inames[ix], regD >> 1, regN >> 1, regM >> 1);
12956        return True;
12957     }
12958     /* fall through */
12959   }
12960
12961   /* ----------- SHA1SU1, SHA256SU0 ----------- */
12962   /*
12963          31   27   23  21 19   15 11   7      3
12964      T1: 1111 1111 1 D 11 1010 d  0011 10 M 0 m  SHA1SU1 Qd, Qm
12965      A1: 1111 0011 ----------------------------
12966
12967      T1: 1111 1111 1 D 11 1010 d  0011 11 M 0 m  SHA256SU0 Qd, Qm
12968      A1: 1111 0011 ----------------------------
12969
12970      Same comments about conditionalisation as for the AES group above apply.
12971   */
12972   {
12973     Bool gate = False;
12974
12975     UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
12976     if (INSN(31,23) == hi9 && INSN(21,16) == BITS6(1,1,1,0,1,0)
12977         && INSN(11,7) == BITS5(0,0,1,1,1) && INSN(4,4) == 0) {
12978        gate = True;
12979     }
12980
12981     UInt regD = (INSN(22,22) << 4) | INSN(15,12);
12982     UInt regM = (INSN(5,5)   << 4) | INSN(3,0);
12983     if ((regD & 1) == 1 || (regM & 1) == 1)
12984        gate = False;
12985
12986     Bool is_1SU1 = INSN(6,6) == 0;
12987
12988     if (gate) {
12989        const HChar* iname
12990           = is_1SU1 ? "sha1su1" : "sha256su0";
12991        void (*helper)(V128*,UInt,UInt,UInt,UInt,UInt,UInt,UInt,UInt)
12992           = is_1SU1 ? &armg_dirtyhelper_SHA1SU1
12993                     : *armg_dirtyhelper_SHA256SU0;
12994        const HChar* hname
12995           = is_1SU1 ? "armg_dirtyhelper_SHA1SU1"
12996                     : "armg_dirtyhelper_SHA256SU0";
12997
12998        if (isT) {
12999           gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13000        }
13001
13002        IRTemp vD = newTemp(Ity_V128);
13003        IRTemp vM = newTemp(Ity_V128);
13004        assign(vD,  getQReg(regD >> 1));
13005        assign(vM,  getQReg(regM >> 1));
13006
13007        IRTemp d32_3, d32_2, d32_1, d32_0;
13008        d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID;
13009        breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 );
13010
13011        IRTemp m32_3, m32_2, m32_1, m32_0;
13012        m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
13013        breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
13014
13015        IRExpr** argvec
13016           = mkIRExprVec_9(
13017                IRExpr_VECRET(),
13018                mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0),
13019                mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0)
13020             );
13021
13022        IRTemp res = newTemp(Ity_V128);
13023        IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13024                                         hname, helper, argvec );
13025        stmt(IRStmt_Dirty(di));
13026        putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13027
13028        DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1);
13029        return True;
13030     }
13031     /* fall through */
13032   }
13033
13034   /* ----------- SHA1H ----------- */
13035   /*
13036          31   27   23  21 19   15 11   7      3
13037      T1: 1111 1111 1 D 11 1001 d  0010 11 M 0 m  SHA1H Qd, Qm
13038      A1: 1111 0011 ----------------------------
13039
13040      Same comments about conditionalisation as for the AES group above apply.
13041   */
13042   {
13043     Bool gate = False;
13044
13045     UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
13046     if (INSN(31,23) == hi9 && INSN(21,16) == BITS6(1,1,1,0,0,1)
13047         && INSN(11,6) == BITS6(0,0,1,0,1,1) && INSN(4,4) == 0) {
13048        gate = True;
13049     }
13050
13051     UInt regD = (INSN(22,22) << 4) | INSN(15,12);
13052     UInt regM = (INSN(5,5)   << 4) | INSN(3,0);
13053     if ((regD & 1) == 1 || (regM & 1) == 1)
13054        gate = False;
13055
13056     if (gate) {
13057        const HChar* iname = "sha1h";
13058        void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_SHA1H;
13059        const HChar* hname                        = "armg_dirtyhelper_SHA1H";
13060
13061        if (isT) {
13062           gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13063        }
13064
13065        IRTemp vM = newTemp(Ity_V128);
13066        assign(vM,  getQReg(regM >> 1));
13067
13068        IRTemp m32_3, m32_2, m32_1, m32_0;
13069        m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
13070        breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
13071        /* m32_3, m32_2, m32_1 are just abandoned.  No harm; iropt will
13072           remove them. */
13073
13074        IRExpr*  zero   = mkU32(0);
13075        IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
13076                                        zero, zero, zero, mkexpr(m32_0));
13077
13078        IRTemp res = newTemp(Ity_V128);
13079        IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13080                                         hname, helper, argvec );
13081        stmt(IRStmt_Dirty(di));
13082        putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13083
13084        DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1);
13085        return True;
13086     }
13087     /* fall through */
13088   }
13089
13090   /* ----------- VMULL.P64 ----------- */
13091   /*
13092          31   27   23  21 19 15 11   7       3
13093      T2: 1110 1111 1 D 10 n  d  1110 N 0 M 0 m
13094      A2: 1111 0010 -------------------------
13095
13096      The ARM documentation is pretty difficult to follow here.
13097      Same comments about conditionalisation as for the AES group above apply.
13098   */
13099   {
13100     Bool gate = False;
13101
13102     UInt hi9 = isT ? BITS9(1,1,1,0,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,0,1);
13103     if (INSN(31,23) == hi9 && INSN(21,20) == BITS2(1,0)
13104         && INSN(11,8) == BITS4(1,1,1,0)
13105         && INSN(6,6) == 0 && INSN(4,4) == 0) {
13106        gate = True;
13107     }
13108
13109     UInt regN = (INSN(7,7)   << 4)  | INSN(19,16);
13110     UInt regD = (INSN(22,22) << 4)  | INSN(15,12);
13111     UInt regM = (INSN(5,5)   << 4)  | INSN(3,0);
13112
13113     if ((regD & 1) == 1)
13114        gate = False;
13115
13116     if (gate) {
13117        const HChar* iname = "vmull";
13118        void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_VMULLP64;
13119        const HChar* hname                        = "armg_dirtyhelper_VMULLP64";
13120
13121        if (isT) {
13122           gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13123        }
13124
13125        IRTemp srcN = newTemp(Ity_I64);
13126        IRTemp srcM = newTemp(Ity_I64);
13127        assign(srcN, getDRegI64(regN));
13128        assign(srcM, getDRegI64(regM));
13129
13130        IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
13131                                        unop(Iop_64HIto32, mkexpr(srcN)),
13132                                        unop(Iop_64to32,   mkexpr(srcN)),
13133                                        unop(Iop_64HIto32, mkexpr(srcM)),
13134                                        unop(Iop_64to32, mkexpr(srcM)));
13135
13136        IRTemp res = newTemp(Ity_V128);
13137        IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13138                                         hname, helper, argvec );
13139        stmt(IRStmt_Dirty(di));
13140        putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13141
13142        DIP("%s.p64 q%u, q%u, w%u\n", iname, regD >> 1, regN, regM);
13143        return True;
13144     }
13145     /* fall through */
13146   }
13147
13148   /* ----------- LDA{,B,H}, STL{,B,H} ----------- */
13149   /*     31   27   23   19   15 11   7    3
13150      A1: cond 0001 1001  n    t 1100 1001 1111  LDA  Rt, [Rn]
13151      A1: cond 0001 1111  n    t 1100 1001 1111  LDAH Rt, [Rn]
13152      A1: cond 0001 1101  n    t 1100 1001 1111  LDAB Rt, [Rn]
13153
13154      A1: cond 0001 1000  n 1111 1100 1001    t  STL  Rt, [Rn]
13155      A1: cond 0001 1110  n 1111 1100 1001    t  STLH Rt, [Rn]
13156      A1: cond 0001 1100  n 1111 1100 1001    t  STLB Rt, [Rn]
13157
13158      T1: 1110 1000 1101  n    t 1111 1010 1111  LDA  Rt, [Rn]
13159      T1: 1110 1000 1101  n    t 1111 1001 1111  LDAH Rt, [Rn]
13160      T1: 1110 1000 1101  n    t 1111 1000 1111  LDAB Rt, [Rn]
13161
13162      T1: 1110 1000 1100  n    t 1111 1010 1111  STL  Rt, [Rn]
13163      T1: 1110 1000 1100  n    t 1111 1001 1111  STLH Rt, [Rn]
13164      T1: 1110 1000 1100  n    t 1111 1000 1111  STLB Rt, [Rn]
13165   */
13166   {
13167     UInt nn     = 16; // invalid
13168     UInt tt     = 16; // invalid
13169     UInt szBlg2 = 4;  // invalid
13170     Bool isLoad = False;
13171     Bool gate   = False;
13172     if (isT) {
13173        if (INSN(31,21) == BITS11(1,1,1,0,1,0,0,0,1,1,0)
13174            && INSN(11,6) == BITS6(1,1,1,1,1,0)
13175            && INSN(3,0) == BITS4(1,1,1,1)) {
13176           nn     = INSN(19,16);
13177           tt     = INSN(15,12);
13178           isLoad = INSN(20,20) == 1;
13179           szBlg2 = INSN(5,4); // 00:B 01:H 10:W 11:invalid
13180           gate   = szBlg2 != BITS2(1,1) && tt != 15 && nn != 15;
13181        }
13182     } else {
13183        if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 1
13184            && INSN(11,0) == BITS12(1,1,0,0,1,0,0,1,1,1,1,1)) {
13185           nn     = INSN(19,16);
13186           tt     = INSN(15,12);
13187           isLoad = True;
13188           szBlg2     = INSN(22,21); // 10:B 11:H 00:W 01:invalid
13189           gate   = szBlg2 != BITS2(0,1) && tt != 15 && nn != 15;
13190        }
13191        else
13192        if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 0
13193            && INSN(15,4) == BITS12(1,1,1,1,1,1,0,0,1,0,0,1)) {
13194           nn     = INSN(19,16);
13195           tt     = INSN(3,0);
13196           isLoad = False;
13197           szBlg2     = INSN(22,21);  // 10:B 11:H 00:W 01:invalid
13198           gate   = szBlg2 != BITS2(0,1) && tt != 15 && nn != 15;
13199        }
13200        if (gate) {
13201           // Rearrange szBlg2 bits to be the same as the Thumb case
13202           switch (szBlg2) {
13203              case 2: szBlg2 = 0; break;
13204              case 3: szBlg2 = 1; break;
13205              case 0: szBlg2 = 2; break;
13206              default: /*NOTREACHED*/vassert(0);
13207           }
13208        }
13209     }
13210     // For both encodings, the instruction is guarded by condT, which
13211     // is passed in by the caller.  Note that the the loads and stores
13212     // are conditional, so we don't have to truncate the IRSB at this
13213     // point, but the fence is unconditional.  There's no way to
13214     // represent a conditional fence without a side exit, but it
13215     // doesn't matter from a correctness standpoint that it is
13216     // unconditional -- it just loses a bit of performance in the
13217     // case where the condition doesn't hold.
13218     if (gate) {
13219        vassert(szBlg2 <= 2 && nn <= 14 && tt <= 14);
13220        IRExpr* ea = llGetIReg(nn);
13221        if (isLoad) {
13222           static IRLoadGOp cvt[3]
13223              = { ILGop_8Uto32, ILGop_16Uto32, ILGop_Ident32 };
13224           IRTemp data = newTemp(Ity_I32);
13225           loadGuardedLE(data, cvt[szBlg2], ea, mkU32(0)/*alt*/, condT);
13226           if (isT) {
13227              putIRegT(tt, mkexpr(data), condT);
13228           } else {
13229              putIRegA(tt, mkexpr(data), condT, Ijk_INVALID);
13230           }
13231           stmt(IRStmt_MBE(Imbe_Fence));
13232        } else {
13233           stmt(IRStmt_MBE(Imbe_Fence));
13234           IRExpr* data = llGetIReg(tt);
13235           switch (szBlg2) {
13236              case 0: data = unop(Iop_32to8,  data); break;
13237              case 1: data = unop(Iop_32to16, data); break;
13238              case 2: break;
13239              default: vassert(0);
13240           }
13241           storeGuardedLE(ea, data, condT);
13242        }
13243        const HChar* ldNames[3] = { "ldab", "ldah", "lda" };
13244        const HChar* stNames[3] = { "stlb", "stlh", "stl" };
13245        DIP("%s r%u, [r%u]", (isLoad ? ldNames : stNames)[szBlg2], tt, nn);
13246        return True;
13247     }
13248     /* else fall through */
13249   }
13250
13251   /* ----------- LDAEX{,B,H,D}, STLEX{,B,H,D} ----------- */
13252   /*     31   27   23   19 15 11   7    3
13253      A1: cond 0001 1101 n  t  1110 1001 1111  LDAEXB Rt, [Rn]
13254      A1: cond 0001 1111 n  t  1110 1001 1111  LDAEXH Rt, [Rn]
13255      A1: cond 0001 1001 n  t  1110 1001 1111  LDAEX  Rt, [Rn]
13256      A1: cond 0001 1011 n  t  1110 1001 1111  LDAEXD Rt, Rt+1, [Rn]
13257
13258      A1: cond 0001 1100 n  d  1110 1001 t     STLEXB Rd, Rt, [Rn]
13259      A1: cond 0001 1110 n  d  1110 1001 t     STLEXH Rd, Rt, [Rn]
13260      A1: cond 0001 1000 n  d  1110 1001 t     STLEX  Rd, Rt, [Rn]
13261      A1: cond 0001 1010 n  d  1110 1001 t     STLEXD Rd, Rt, Rt+1, [Rn]
13262
13263          31  28   24    19 15 11   7    3
13264      T1: 111 0100 01101 n  t  1111 1100 1111  LDAEXB Rt, [Rn]
13265      T1: 111 0100 01101 n  t  1111 1101 1111  LDAEXH Rt, [Rn]
13266      T1: 111 0100 01101 n  t  1111 1110 1111  LDAEX  Rt, [Rn]
13267      T1: 111 0100 01101 n  t  t2   1111 1111  LDAEXD Rt, Rt2, [Rn]
13268
13269      T1: 111 0100 01100 n  t  1111 1100 d     STLEXB Rd, Rt, [Rn]
13270      T1: 111 0100 01100 n  t  1111 1101 d     STLEXH Rd, Rt, [Rn]
13271      T1: 111 0100 01100 n  t  1111 1110 d     STLEX  Rd, Rt, [Rn]
13272      T1: 111 0100 01100 n  t  t2   1111 d     STLEXD Rd, Rt, Rt2, [Rn]
13273   */
13274   {
13275     UInt nn     = 16; // invalid
13276     UInt tt     = 16; // invalid
13277     UInt tt2    = 16; // invalid
13278     UInt dd     = 16; // invalid
13279     UInt szBlg2 = 4;  // invalid
13280     Bool isLoad = False;
13281     Bool gate   = False;
13282     if (isT) {
13283        if (INSN(31,21) == BITS11(1,1,1,0,1,0,0,0,1,1,0)
13284            && INSN(7,6) == BITS2(1,1)) {
13285           isLoad = INSN(20,20) == 1;
13286           nn     = INSN(19,16);
13287           tt     = INSN(15,12);
13288           tt2    = INSN(11,8);
13289           szBlg2 = INSN(5,4);
13290           dd     = INSN(3,0);
13291           gate   = True;
13292           if (szBlg2 < BITS2(1,1) && tt2 != BITS4(1,1,1,1)) gate = False;
13293           if (isLoad && dd != BITS4(1,1,1,1)) gate = False;
13294           // re-set not-used register values to invalid
13295           if (szBlg2 < BITS2(1,1)) tt2 = 16;
13296           if (isLoad) dd = 16;
13297        }
13298     } else {
13299        /* ARM encoding.  Do the load and store cases separately as
13300           the register numbers are in different places and a combined decode
13301           is too confusing. */
13302        if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 1
13303            && INSN(11,0) == BITS12(1,1,1,0,1,0,0,1,1,1,1,1)) {
13304           szBlg2 = INSN(22,21);
13305           isLoad = True;
13306           nn     = INSN(19,16);
13307           tt     = INSN(15,12);
13308           gate   = True;
13309        }
13310        else
13311        if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 0
13312            && INSN(11,4) == BITS8(1,1,1,0,1,0,0,1)) {
13313           szBlg2 = INSN(22,21);
13314           isLoad = False;
13315           nn     = INSN(19,16);
13316           dd     = INSN(15,12);
13317           tt     = INSN(3,0);
13318           gate   = True;
13319        }
13320        if (gate) {
13321           // Rearrange szBlg2 bits to be the same as the Thumb case
13322           switch (szBlg2) {
13323              case 2: szBlg2 = 0; break;
13324              case 3: szBlg2 = 1; break;
13325              case 0: szBlg2 = 2; break;
13326              case 1: szBlg2 = 3; break;
13327              default: /*NOTREACHED*/vassert(0);
13328           }
13329        }
13330     }
13331     // Perform further checks on register numbers
13332     if (gate) {
13333        /**/ if (isT && isLoad) {
13334           // Thumb load
13335           if (szBlg2 < 3) {
13336              if (! (tt != 13 && tt != 15 && nn != 15)) gate = False;
13337           } else {
13338              if (! (tt != 13 && tt != 15 && tt2 != 13 && tt2 != 15
13339                     && tt != tt2 && nn != 15)) gate = False;
13340           }
13341        }
13342        else if (isT && !isLoad) {
13343           // Thumb store
13344           if (szBlg2 < 3) {
13345              if (! (dd != 13 && dd != 15 && tt != 13 && tt != 15
13346                     && nn != 15 && dd != nn && dd != tt)) gate = False;
13347           } else {
13348              if (! (dd != 13 && dd != 15 && tt != 13 && tt != 15
13349                     && tt2 != 13 && tt2 != 15 && nn != 15 && dd != nn
13350                     && dd != tt && dd != tt2)) gate = False;
13351           }
13352        }
13353        else if (!isT && isLoad) {
13354           // ARM Load
13355           if (szBlg2 < 3) {
13356              if (! (tt != 15 && nn != 15)) gate = False;
13357           } else {
13358              if (! ((tt & 1) == 0 && tt != 14 && nn != 15)) gate = False;
13359              vassert(tt2 == 16/*invalid*/);
13360              tt2 = tt + 1;
13361           }
13362        }
13363        else if (!isT && !isLoad) {
13364           // ARM Store
13365           if (szBlg2 < 3) {
13366              if (! (dd != 15 && tt != 15 && nn != 15
13367                     && dd != nn && dd != tt)) gate = False;
13368           } else {
13369              if (! (dd != 15 && (tt & 1) == 0 && tt != 14 && nn != 15
13370                     && dd != nn && dd != tt && dd != tt+1)) gate = False;
13371              vassert(tt2 == 16/*invalid*/);
13372              tt2 = tt + 1;
13373           }
13374        }
13375        else /*NOTREACHED*/vassert(0);
13376     }
13377     if (gate) {
13378        // Paranoia ..
13379        vassert(szBlg2 <= 3);
13380        if (szBlg2 < 3) { vassert(tt2 == 16/*invalid*/); }
13381                   else { vassert(tt2 <= 14); }
13382        if (isLoad) { vassert(dd == 16/*invalid*/); }
13383               else { vassert(dd <= 14); }
13384     }
13385     // If we're still good even after all that, generate the IR.
13386     if (gate) {
13387        /* First, go unconditional.  Staying in-line is too complex. */
13388        if (isT) {
13389           vassert(condT != IRTemp_INVALID);
13390           mk_skip_over_T32_if_cond_is_false( condT );
13391        } else {
13392           if (condT != IRTemp_INVALID) {
13393              mk_skip_over_A32_if_cond_is_false( condT );
13394              condT = IRTemp_INVALID;
13395           }
13396        }
13397        /* Now the load or store. */
13398        IRType ty = Ity_INVALID; /* the type of the transferred data */
13399        const HChar* nm = NULL;
13400        switch (szBlg2) {
13401           case 0: nm = "b"; ty = Ity_I8;  break;
13402           case 1: nm = "h"; ty = Ity_I16; break;
13403           case 2: nm = "";  ty = Ity_I32; break;
13404           case 3: nm = "d"; ty = Ity_I64; break;
13405           default: vassert(0);
13406        }
13407        IRExpr* ea = isT ? getIRegT(nn) : getIRegA(nn);
13408        if (isLoad) {
13409           // LOAD.  Transaction, then fence.
13410           IROp widen = Iop_INVALID;
13411           switch (szBlg2) {
13412              case 0: widen = Iop_8Uto32;  break;
13413              case 1: widen = Iop_16Uto32; break;
13414              case 2: case 3: break;
13415              default: vassert(0);
13416           }
13417           IRTemp  res = newTemp(ty);
13418           // FIXME: assumes little-endian guest
13419           stmt( IRStmt_LLSC(Iend_LE, res, ea, NULL/*this is a load*/) );
13420
13421#          define PUT_IREG(_nnz, _eez) \
13422              do { vassert((_nnz) <= 14); /* no writes to the PC */ \
13423                   if (isT) { putIRegT((_nnz), (_eez), IRTemp_INVALID); } \
13424                       else { putIRegA((_nnz), (_eez), \
13425                              IRTemp_INVALID, Ijk_Boring); } } while(0)
13426           if (ty == Ity_I64) {
13427              // FIXME: assumes little-endian guest
13428              PUT_IREG(tt,  unop(Iop_64to32, mkexpr(res)));
13429              PUT_IREG(tt2, unop(Iop_64HIto32, mkexpr(res)));
13430           } else {
13431              PUT_IREG(tt, widen == Iop_INVALID
13432                              ? mkexpr(res) : unop(widen, mkexpr(res)));
13433           }
13434           stmt(IRStmt_MBE(Imbe_Fence));
13435           if (ty == Ity_I64) {
13436              DIP("ldrex%s%s r%u, r%u, [r%u]\n",
13437                  nm, isT ? "" : nCC(conq), tt, tt2, nn);
13438           } else {
13439              DIP("ldrex%s%s r%u, [r%u]\n", nm, isT ? "" : nCC(conq), tt, nn);
13440           }
13441#          undef PUT_IREG
13442        } else {
13443           // STORE.  Fence, then transaction.
13444           IRTemp resSC1, resSC32, data;
13445           IROp   narrow = Iop_INVALID;
13446           switch (szBlg2) {
13447              case 0: narrow = Iop_32to8; break;
13448              case 1: narrow = Iop_32to16; break;
13449              case 2: case 3: break;
13450              default: vassert(0);
13451           }
13452           stmt(IRStmt_MBE(Imbe_Fence));
13453           data = newTemp(ty);
13454#          define GET_IREG(_nnz) (isT ? getIRegT(_nnz) : getIRegA(_nnz))
13455           assign(data,
13456                  ty == Ity_I64
13457                     // FIXME: assumes little-endian guest
13458                     ? binop(Iop_32HLto64, GET_IREG(tt2), GET_IREG(tt))
13459                     : narrow == Iop_INVALID
13460                        ? GET_IREG(tt)
13461                        : unop(narrow, GET_IREG(tt)));
13462#          undef GET_IREG
13463           resSC1 = newTemp(Ity_I1);
13464           // FIXME: assumes little-endian guest
13465           stmt( IRStmt_LLSC(Iend_LE, resSC1, ea, mkexpr(data)) );
13466
13467           /* Set rDD to 1 on failure, 0 on success.  Currently we have
13468              resSC1 == 0 on failure, 1 on success. */
13469           resSC32 = newTemp(Ity_I32);
13470           assign(resSC32,
13471                  unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
13472           vassert(dd <= 14); /* no writes to the PC */
13473           if (isT) {
13474              putIRegT(dd, mkexpr(resSC32), IRTemp_INVALID);
13475           } else {
13476              putIRegA(dd, mkexpr(resSC32), IRTemp_INVALID, Ijk_Boring);
13477           }
13478           if (ty == Ity_I64) {
13479              DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
13480                  nm, isT ? "" : nCC(conq), dd, tt, tt2, nn);
13481           } else {
13482              DIP("strex%s%s r%u, r%u, [r%u]\n",
13483                  nm, isT ? "" : nCC(conq), dd, tt, nn);
13484           }
13485        } /* if (isLoad) */
13486        return True;
13487     } /* if (gate) */
13488     /* else fall through */
13489   }
13490
13491   /* ----------- VSEL<c>.F64 d_d_d, VSEL<c>.F32 s_s_s ----------- */
13492   /*        31   27    22 21 19 15 11  8 7 6 5 4 3
13493      T1/A1: 1111 11100 D  cc n  d  101 1 N 0 M 0 m  VSEL<c>.F64 Dd, Dn, Dm
13494      T1/A1: 1111 11100 D  cc n  d  101 0 N 0 M 0 m  VSEL<c>.F32 Sd, Sn, Sm
13495
13496      ARM encoding is in NV space.
13497      In Thumb mode, we must not be in an IT block.
13498   */
13499   if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,0) && INSN(11,9) == BITS3(1,0,1)
13500       && INSN(6,6) == 0 && INSN(4,4) == 0) {
13501      UInt bit_D  = INSN(22,22);
13502      UInt fld_cc = INSN(21,20);
13503      UInt fld_n  = INSN(19,16);
13504      UInt fld_d  = INSN(15,12);
13505      Bool isF64  = INSN(8,8) == 1;
13506      UInt bit_N  = INSN(7,7);
13507      UInt bit_M  = INSN(5,5);
13508      UInt fld_m  = INSN(3,0);
13509
13510      UInt dd = isF64 ? ((bit_D << 4) | fld_d) : ((fld_d << 1) | bit_D);
13511      UInt nn = isF64 ? ((bit_N << 4) | fld_n) : ((fld_n << 1) | bit_N);
13512      UInt mm = isF64 ? ((bit_M << 4) | fld_m) : ((fld_m << 1) | bit_M);
13513
13514      UInt cc_1 = (fld_cc >> 1) & 1;
13515      UInt cc_0 = (fld_cc >> 0) & 1;
13516      UInt cond = (fld_cc << 2) | ((cc_1 ^ cc_0) << 1) | 0;
13517
13518      if (isT) {
13519         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13520      }
13521      /* In ARM mode, this is statically unconditional.  In Thumb mode,
13522         this must be dynamically unconditional, and we've SIGILLd if not.
13523         In either case we can create unconditional IR. */
13524
13525      IRTemp guard = newTemp(Ity_I32);
13526      assign(guard, mk_armg_calculate_condition(cond));
13527      IRExpr* srcN = (isF64 ? llGetDReg : llGetFReg)(nn);
13528      IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13529      IRExpr* res  = IRExpr_ITE(unop(Iop_32to1, mkexpr(guard)), srcN, srcM);
13530      (isF64 ? llPutDReg : llPutFReg)(dd, res);
13531
13532      UChar rch = isF64 ? 'd' : 'f';
13533      DIP("vsel%s.%s %c%u, %c%u, %c%u\n",
13534          nCC(cond), isF64 ? "f64" : "f32", rch, dd, rch, nn, rch, mm);
13535      return True;
13536   }
13537
13538   /* -------- VRINT{A,N,P,M}.F64 d_d, VRINT{A,N,P,M}.F32 s_s -------- */
13539   /*        31        22 21   17 15 11  8 7  5 4 3
13540      T1/A1: 111111101 D  1110 rm Vd 101 1 01 M 0 Vm VRINT{A,N,P,M}.F64 Dd, Dm
13541      T1/A1: 111111101 D  1110 rm Vd 101 0 01 M 0 Vm VRINT{A,N,P,M}.F32 Sd, Sm
13542
13543      ARM encoding is in NV space.
13544      In Thumb mode, we must not be in an IT block.
13545   */
13546   if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1)
13547       && INSN(21,18) == BITS4(1,1,1,0) && INSN(11,9) == BITS3(1,0,1)
13548       && INSN(7,6) == BITS2(0,1) && INSN(4,4) == 0) {
13549      UInt bit_D  = INSN(22,22);
13550      UInt fld_rm = INSN(17,16);
13551      UInt fld_d  = INSN(15,12);
13552      Bool isF64  = INSN(8,8) == 1;
13553      UInt bit_M  = INSN(5,5);
13554      UInt fld_m  = INSN(3,0);
13555
13556      UInt dd = isF64 ? ((bit_D << 4) | fld_d) : ((fld_d << 1) | bit_D);
13557      UInt mm = isF64 ? ((bit_M << 4) | fld_m) : ((fld_m << 1) | bit_M);
13558
13559      if (isT) {
13560         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13561      }
13562      /* In ARM mode, this is statically unconditional.  In Thumb mode,
13563         this must be dynamically unconditional, and we've SIGILLd if not.
13564         In either case we can create unconditional IR. */
13565
13566      UChar c = '?';
13567      IRRoundingMode rm = Irrm_NEAREST;
13568      switch (fld_rm) {
13569         /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13570            kludge since it doesn't take into account the nearest-even vs
13571            nearest-away semantics. */
13572         case BITS2(0,0): c = 'a'; rm = Irrm_NEAREST; break;
13573         case BITS2(0,1): c = 'n'; rm = Irrm_NEAREST; break;
13574         case BITS2(1,0): c = 'p'; rm = Irrm_PosINF;  break;
13575         case BITS2(1,1): c = 'm'; rm = Irrm_NegINF;  break;
13576         default: vassert(0);
13577      }
13578
13579      IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13580      IRExpr* res  = binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13581                           mkU32((UInt)rm), srcM);
13582      (isF64 ? llPutDReg : llPutFReg)(dd, res);
13583
13584      UChar rch = isF64 ? 'd' : 'f';
13585      DIP("vrint%c.%s.%s %c%u, %c%u\n",
13586          c, isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13587      return True;
13588   }
13589
13590   /* -------- VRINT{Z,R}.F64.F64 d_d, VRINT{Z,R}.F32.F32 s_s -------- */
13591   /*     31   27    22 21     15 11   7  6 5 4 3
13592      T1: 1110 11101 D  110110 Vd 1011 op 1 M 0 Vm VRINT<r><c>.F64.F64 Dd, Dm
13593      A1: cond 11101 D  110110 Vd 1011 op 1 M 0 Vm
13594
13595      T1: 1110 11101 D  110110 Vd 1010 op 1 M 0 Vm VRINT<r><c>.F32.F32 Sd, Sm
13596      A1: cond 11101 D  110110 Vd 1010 op 1 M 0 Vm
13597
13598      In contrast to the VRINT variants just above, this can be conditional.
13599   */
13600   if ((isT ? (INSN(31,28) == BITS4(1,1,1,0)) : True)
13601       && INSN(27,23) == BITS5(1,1,1,0,1) && INSN(21,16) == BITS6(1,1,0,1,1,0)
13602       && INSN(11,9) == BITS3(1,0,1) && INSN(6,6) == 1 && INSN(4,4) == 0) {
13603      UInt bit_D   = INSN(22,22);
13604      UInt fld_Vd  = INSN(15,12);
13605      Bool isF64   = INSN(8,8) == 1;
13606      Bool rToZero = INSN(7,7) == 1;
13607      UInt bit_M   = INSN(5,5);
13608      UInt fld_Vm  = INSN(3,0);
13609      UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13610      UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13611
13612      if (isT) vassert(condT != IRTemp_INVALID);
13613      IRType ty  = isF64 ? Ity_F64 : Ity_F32;
13614      IRTemp src = newTemp(ty);
13615      IRTemp res = newTemp(ty);
13616      assign(src, (isF64 ? getDReg : getFReg)(mm));
13617
13618      IRTemp rm = newTemp(Ity_I32);
13619      assign(rm, rToZero ? mkU32(Irrm_ZERO)
13620                         : mkexpr(mk_get_IR_rounding_mode()));
13621      assign(res, binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13622                        mkexpr(rm), mkexpr(src)));
13623      (isF64 ? putDReg : putFReg)(dd, mkexpr(res), condT);
13624
13625      UChar rch = isF64 ? 'd' : 'f';
13626      DIP("vrint%c.%s.%s %c%u, %c%u\n",
13627          rToZero ? 'z' : 'r',
13628          isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13629      return True;
13630   }
13631
13632   /* ----------- VCVT{A,N,P,M}{.S32,.U32}{.F64,.F32} ----------- */
13633   /*        31   27    22 21   17 15 11  8  7  6 5 4 3
13634      T1/A1: 1111 11101 D  1111 rm Vd 101 sz op 1 M 0 Vm
13635             VCVT{A,N,P,M}{.S32,.U32}.F64 Sd, Dm
13636             VCVT{A,N,P,M}{.S32,.U32}.F32 Sd, Sm
13637
13638      ARM encoding is in NV space.
13639      In Thumb mode, we must not be in an IT block.
13640   */
13641   if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1) && INSN(21,18) == BITS4(1,1,1,1)
13642       && INSN(11,9) == BITS3(1,0,1) && INSN(6,6) == 1 && INSN(4,4) == 0) {
13643      UInt bit_D  = INSN(22,22);
13644      UInt fld_rm = INSN(17,16);
13645      UInt fld_Vd = INSN(15,12);
13646      Bool isF64  = INSN(8,8) == 1;
13647      Bool isU    = INSN(7,7) == 0;
13648      UInt bit_M  = INSN(5,5);
13649      UInt fld_Vm = INSN(3,0);
13650
13651      UInt dd = (fld_Vd << 1) | bit_D;
13652      UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13653
13654      if (isT) {
13655         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13656      }
13657      /* In ARM mode, this is statically unconditional.  In Thumb mode,
13658         this must be dynamically unconditional, and we've SIGILLd if not.
13659         In either case we can create unconditional IR. */
13660
13661      UChar c = '?';
13662      IRRoundingMode rm = Irrm_NEAREST;
13663      switch (fld_rm) {
13664         /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13665            kludge since it doesn't take into account the nearest-even vs
13666            nearest-away semantics. */
13667         case BITS2(0,0): c = 'a'; rm = Irrm_NEAREST; break;
13668         case BITS2(0,1): c = 'n'; rm = Irrm_NEAREST; break;
13669         case BITS2(1,0): c = 'p'; rm = Irrm_PosINF;  break;
13670         case BITS2(1,1): c = 'm'; rm = Irrm_NegINF;  break;
13671         default: vassert(0);
13672      }
13673
13674      IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13675      IRTemp   res = newTemp(Ity_I32);
13676
13677      /* The arm back end doesn't support use of Iop_F32toI32U or
13678         Iop_F32toI32S, so for those cases we widen the F32 to F64
13679         and then follow the F64 route. */
13680      if (!isF64) {
13681         srcM = unop(Iop_F32toF64, srcM);
13682      }
13683      assign(res, binop(isU ? Iop_F64toI32U : Iop_F64toI32S,
13684                        mkU32((UInt)rm), srcM));
13685
13686      llPutFReg(dd, unop(Iop_ReinterpI32asF32, mkexpr(res)));
13687
13688      UChar rch = isF64 ? 'd' : 'f';
13689      DIP("vcvt%c.%s.%s %c%u, %c%u\n",
13690          c, isU ? "u32" : "s32", isF64 ? "f64" : "f32", 's', dd, rch, mm);
13691      return True;
13692   }
13693
13694   /* ----------- V{MAX,MIN}NM{.F64 d_d_d, .F32 s_s_s} ----------- */
13695   /* 31   27    22 21 19 15 11  8 7 6  5 4 3
13696      1111 11101 D  00 Vn Vd 101 1 N op M 0 Vm  V{MIN,MAX}NM.F64 Dd, Dn, Dm
13697      1111 11101 D  00 Vn Vd 101 0 N op M 0 Vm  V{MIN,MAX}NM.F32 Sd, Sn, Sm
13698
13699      ARM encoding is in NV space.
13700      In Thumb mode, we must not be in an IT block.
13701   */
13702   if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1) && INSN(21,20) == BITS2(0,0)
13703       && INSN(11,9) == BITS3(1,0,1) && INSN(4,4) == 0) {
13704      UInt bit_D  = INSN(22,22);
13705      UInt fld_Vn = INSN(19,16);
13706      UInt fld_Vd = INSN(15,12);
13707      Bool isF64  = INSN(8,8) == 1;
13708      UInt bit_N  = INSN(7,7);
13709      Bool isMAX  = INSN(6,6) == 0;
13710      UInt bit_M  = INSN(5,5);
13711      UInt fld_Vm = INSN(3,0);
13712
13713      UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13714      UInt nn = isF64 ? ((bit_N << 4) | fld_Vn) : ((fld_Vn << 1) | bit_N);
13715      UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13716
13717      if (isT) {
13718         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13719      }
13720      /* In ARM mode, this is statically unconditional.  In Thumb mode,
13721         this must be dynamically unconditional, and we've SIGILLd if not.
13722         In either case we can create unconditional IR. */
13723
13724      IROp op = isF64 ? (isMAX ? Iop_MaxNumF64 : Iop_MinNumF64)
13725                      : (isMAX ? Iop_MaxNumF32 : Iop_MinNumF32);
13726      IRExpr* srcN = (isF64 ? llGetDReg : llGetFReg)(nn);
13727      IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13728      IRExpr* res  = binop(op, srcN, srcM);
13729      (isF64 ? llPutDReg : llPutFReg)(dd, res);
13730
13731      UChar rch = isF64 ? 'd' : 'f';
13732      DIP("v%snm.%s %c%u, %c%u, %c%u\n",
13733          isMAX ? "max" : "min", isF64 ? "f64" : "f32",
13734          rch, dd, rch, nn, rch, mm);
13735      return True;
13736   }
13737
13738   /* ----------- VRINTX.F64.F64 d_d, VRINTX.F32.F32 s_s ----------- */
13739   /*     31   27    22 21     15 11  8 7  5 4 3
13740      T1: 1110 11101 D  110111 Vd 101 1 01 M 0 Vm VRINTX<c>.F64.F64 Dd, Dm
13741      A1: cond 11101 D  110111 Vd 101 1 01 M 0 Vm
13742
13743      T1: 1110 11101 D  110111 Vd 101 0 01 M 0 Vm VRINTX<c>.F32.F32 Dd, Dm
13744      A1: cond 11101 D  110111 Vd 101 0 01 M 0 Vm
13745
13746      Like VRINT{Z,R}{.F64.F64, .F32.F32} just above, this can be conditional.
13747      This produces the same code as the VRINTR case since we ignore the
13748      requirement to signal inexactness.
13749   */
13750   if ((isT ? (INSN(31,28) == BITS4(1,1,1,0)) : True)
13751       && INSN(27,23) == BITS5(1,1,1,0,1) && INSN(21,16) == BITS6(1,1,0,1,1,1)
13752       && INSN(11,9) == BITS3(1,0,1) && INSN(7,6) == BITS2(0,1)
13753       && INSN(4,4) == 0) {
13754      UInt bit_D  = INSN(22,22);
13755      UInt fld_Vd = INSN(15,12);
13756      Bool isF64  = INSN(8,8) == 1;
13757      UInt bit_M  = INSN(5,5);
13758      UInt fld_Vm = INSN(3,0);
13759      UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13760      UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13761
13762      if (isT) vassert(condT != IRTemp_INVALID);
13763      IRType ty  = isF64 ? Ity_F64 : Ity_F32;
13764      IRTemp src = newTemp(ty);
13765      IRTemp res = newTemp(ty);
13766      assign(src, (isF64 ? getDReg : getFReg)(mm));
13767
13768      IRTemp rm = newTemp(Ity_I32);
13769      assign(rm, mkexpr(mk_get_IR_rounding_mode()));
13770      assign(res, binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13771                        mkexpr(rm), mkexpr(src)));
13772      (isF64 ? putDReg : putFReg)(dd, mkexpr(res), condT);
13773
13774      UChar rch = isF64 ? 'd' : 'f';
13775      DIP("vrint%c.%s.%s %c%u, %c%u\n",
13776          'x',
13777          isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13778      return True;
13779   }
13780
13781   /* ----------- V{MAX,MIN}NM{.F32 d_d_d, .F32 q_q_q} ----------- */
13782   /*     31   27    22 21 20 19 15 11   7 6 5 4 3
13783      T1: 1111 11110 D  op 0  Vn Vd 1111 N 1 M 1 Vm  V{MIN,MAX}NM.F32 Qd,Qn,Qm
13784      A1: 1111 00110 D  op 0  Vn Vd 1111 N 1 M 1 Vm
13785
13786      T1: 1111 11110 D  op 0  Vn Vd 1111 N 0 M 1 Vm  V{MIN,MAX}NM.F32 Dd,Dn,Dm
13787      A1: 1111 00110 D  op 0  Vn Vd 1111 N 0 M 1 Vm
13788
13789      ARM encoding is in NV space.
13790      In Thumb mode, we must not be in an IT block.
13791   */
13792   if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,0)
13793                           : BITS9(1,1,1,1,0,0,1,1,0))
13794       && INSN(20,20) == 0 && INSN(11,8) == BITS4(1,1,1,1) && INSN(4,4) == 1) {
13795      UInt bit_D  = INSN(22,22);
13796      Bool isMax  = INSN(21,21) == 0;
13797      UInt fld_Vn = INSN(19,16);
13798      UInt fld_Vd = INSN(15,12);
13799      UInt bit_N  = INSN(7,7);
13800      Bool isQ    = INSN(6,6) == 1;
13801      UInt bit_M  = INSN(5,5);
13802      UInt fld_Vm = INSN(3,0);
13803
13804      /* dd, nn, mm are D-register numbers. */
13805      UInt dd = (bit_D << 4) | fld_Vd;
13806      UInt nn = (bit_N << 4) | fld_Vn;
13807      UInt mm = (bit_M << 4) | fld_Vm;
13808
13809      if (! (isQ && ((dd & 1) == 1 || (nn & 1) == 1 || (mm & 1) == 1))) {
13810         /* Do this piecewise on f regs.  This is a bit tricky
13811            though because we are dealing with the full 16 x Q == 32 x D
13812            register set, so the implied F reg numbers are 0 to 63.  But
13813            ll{Get,Put}FReg only allow the 0 .. 31 as those are the only
13814            architected F regs. */
13815         UInt ddF = dd << 1;
13816         UInt nnF = nn << 1;
13817         UInt mmF = mm << 1;
13818
13819         if (isT) {
13820            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13821         }
13822         /* In ARM mode, this is statically unconditional.  In Thumb mode,
13823            this must be dynamically unconditional, and we've SIGILLd if not.
13824            In either case we can create unconditional IR. */
13825
13826         IROp op = isMax ? Iop_MaxNumF32 : Iop_MinNumF32;
13827
13828         IRTemp r0 = newTemp(Ity_F32);
13829         IRTemp r1 = newTemp(Ity_F32);
13830         IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13831         IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13832
13833         assign(r0, binop(op, llGetFReg_up_to_64(nnF+0),
13834                              llGetFReg_up_to_64(mmF+0)));
13835         assign(r1, binop(op, llGetFReg_up_to_64(nnF+1),
13836                              llGetFReg_up_to_64(mmF+1)));
13837         if (isQ) {
13838            assign(r2, binop(op, llGetFReg_up_to_64(nnF+2),
13839                                 llGetFReg_up_to_64(mmF+2)));
13840            assign(r3, binop(op, llGetFReg_up_to_64(nnF+3),
13841                                 llGetFReg_up_to_64(mmF+3)));
13842         }
13843         llPutFReg_up_to_64(ddF+0, mkexpr(r0));
13844         llPutFReg_up_to_64(ddF+1, mkexpr(r1));
13845         if (isQ) {
13846            llPutFReg_up_to_64(ddF+2, mkexpr(r2));
13847            llPutFReg_up_to_64(ddF+3, mkexpr(r3));
13848         }
13849
13850         HChar rch = isQ ? 'q' : 'd';
13851         UInt  sh  = isQ ? 1 : 0;
13852         DIP("v%snm.f32 %c%u, %c%u, %c%u\n",
13853              isMax ? "max" : "min", rch,
13854              dd >> sh, rch, nn >> sh, rch, mm >> sh);
13855         return True;
13856      }
13857      /* else fall through */
13858   }
13859
13860   /* ----------- VCVT{A,N,P,M}{.F32 d_d, .F32 q_q} ----------- */
13861   /*     31   27    22 21     15 11 9  7  6 5 4 3
13862      T1: 1111 11111 D  111011 Vd 00 rm op Q M 0 Vm
13863      A1: 1111 00111 D  111011 Vd 00 rm op Q M 0 Vm
13864
13865      ARM encoding is in NV space.
13866      In Thumb mode, we must not be in an IT block.
13867   */
13868   if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,1)
13869                           : BITS9(1,1,1,1,0,0,1,1,1))
13870       && INSN(21,16) == BITS6(1,1,1,0,1,1) && INSN(11,10) == BITS2(0,0)
13871       && INSN(4,4) == 0) {
13872      UInt bit_D  = INSN(22,22);
13873      UInt fld_Vd = INSN(15,12);
13874      UInt fld_rm = INSN(9,8);
13875      Bool isU    = INSN(7,7) == 1;
13876      Bool isQ    = INSN(6,6) == 1;
13877      UInt bit_M  = INSN(5,5);
13878      UInt fld_Vm = INSN(3,0);
13879
13880      /* dd, nn, mm are D-register numbers. */
13881      UInt dd = (bit_D << 4) | fld_Vd;
13882      UInt mm = (bit_M << 4) | fld_Vm;
13883
13884      if (! (isQ && ((dd & 1) == 1 || (mm & 1) == 1))) {
13885         /* Do this piecewise on f regs. */
13886         UInt ddF = dd << 1;
13887         UInt mmF = mm << 1;
13888
13889         if (isT) {
13890            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13891         }
13892         /* In ARM mode, this is statically unconditional.  In Thumb mode,
13893            this must be dynamically unconditional, and we've SIGILLd if not.
13894            In either case we can create unconditional IR. */
13895
13896         UChar cvtc = '?';
13897         IRRoundingMode rm = Irrm_NEAREST;
13898         switch (fld_rm) {
13899            /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13900               kludge since it doesn't take into account the nearest-even vs
13901               nearest-away semantics. */
13902            case BITS2(0,0): cvtc = 'a'; rm = Irrm_NEAREST; break;
13903            case BITS2(0,1): cvtc = 'n'; rm = Irrm_NEAREST; break;
13904            case BITS2(1,0): cvtc = 'p'; rm = Irrm_PosINF;  break;
13905            case BITS2(1,1): cvtc = 'm'; rm = Irrm_NegINF;  break;
13906            default: vassert(0);
13907         }
13908
13909         IROp cvt = isU ? Iop_F64toI32U : Iop_F64toI32S;
13910
13911         IRTemp r0 = newTemp(Ity_F32);
13912         IRTemp r1 = newTemp(Ity_F32);
13913         IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13914         IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13915
13916         IRExpr* rmE = mkU32((UInt)rm);
13917
13918         assign(r0, unop(Iop_ReinterpI32asF32,
13919                         binop(cvt, rmE, unop(Iop_F32toF64,
13920                                              llGetFReg_up_to_64(mmF+0)))));
13921         assign(r1, unop(Iop_ReinterpI32asF32,
13922                         binop(cvt, rmE, unop(Iop_F32toF64,
13923                                              llGetFReg_up_to_64(mmF+1)))));
13924         if (isQ) {
13925            assign(r2, unop(Iop_ReinterpI32asF32,
13926                            binop(cvt, rmE, unop(Iop_F32toF64,
13927                                                 llGetFReg_up_to_64(mmF+2)))));
13928            assign(r3, unop(Iop_ReinterpI32asF32,
13929                            binop(cvt, rmE, unop(Iop_F32toF64,
13930                                                 llGetFReg_up_to_64(mmF+3)))));
13931         }
13932
13933         llPutFReg_up_to_64(ddF+0, mkexpr(r0));
13934         llPutFReg_up_to_64(ddF+1, mkexpr(r1));
13935         if (isQ) {
13936            llPutFReg_up_to_64(ddF+2, mkexpr(r2));
13937            llPutFReg_up_to_64(ddF+3, mkexpr(r3));
13938         }
13939
13940         HChar rch = isQ ? 'q' : 'd';
13941         UInt  sh  = isQ ? 1 : 0;
13942         DIP("vcvt%c.%c32.f32 %c%u, %c%u\n",
13943              cvtc, isU ? 'u' : 's', rch, dd >> sh, rch, mm >> sh);
13944         return True;
13945      }
13946      /* else fall through */
13947   }
13948
13949   /* ----------- VRINT{A,N,P,M,X,Z}{.F32 d_d, .F32 q_q} ----------- */
13950   /*     31   27    22 21     15 11 9  6 5 4 3
13951      T1: 1111 11111 D  111010 Vd 01 op Q M 0 Vm
13952      A1: 1111 00111 D  111010 Vd 01 op Q M 0 Vm
13953
13954      ARM encoding is in NV space.
13955      In Thumb mode, we must not be in an IT block.
13956   */
13957   if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,1)
13958                           : BITS9(1,1,1,1,0,0,1,1,1))
13959       && INSN(21,16) == BITS6(1,1,1,0,1,0) && INSN(11,10) == BITS2(0,1)
13960       && INSN(4,4) == 0) {
13961      UInt bit_D  = INSN(22,22);
13962      UInt fld_Vd = INSN(15,12);
13963      UInt fld_op = INSN(9,7);
13964      Bool isQ    = INSN(6,6) == 1;
13965      UInt bit_M  = INSN(5,5);
13966      UInt fld_Vm = INSN(3,0);
13967
13968      /* dd, nn, mm are D-register numbers. */
13969      UInt dd = (bit_D << 4) | fld_Vd;
13970      UInt mm = (bit_M << 4) | fld_Vm;
13971
13972      if (! (fld_op == BITS3(1,0,0) || fld_op == BITS3(1,1,0))
13973          && ! (isQ && ((dd & 1) == 1 || (mm & 1) == 1))) {
13974         /* Do this piecewise on f regs. */
13975         UInt ddF = dd << 1;
13976         UInt mmF = mm << 1;
13977
13978         if (isT) {
13979            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13980         }
13981         /* In ARM mode, this is statically unconditional.  In Thumb mode,
13982            this must be dynamically unconditional, and we've SIGILLd if not.
13983            In either case we can create unconditional IR. */
13984
13985         UChar cvtc = '?';
13986         IRRoundingMode rm = Irrm_NEAREST;
13987         switch (fld_op) {
13988            /* Various kludges:
13989               - The use of NEAREST for both the 'a' and 'n' cases,
13990                 since it doesn't take into account the nearest-even vs
13991                 nearest-away semantics.
13992               - For the 'x' case, we don't signal inexactness.
13993            */
13994            case BITS3(0,1,0): cvtc = 'a'; rm = Irrm_NEAREST; break;
13995            case BITS3(0,0,0): cvtc = 'n'; rm = Irrm_NEAREST; break;
13996            case BITS3(1,1,1): cvtc = 'p'; rm = Irrm_PosINF;  break;
13997            case BITS3(1,0,1): cvtc = 'm'; rm = Irrm_NegINF;  break;
13998            case BITS3(0,1,1): cvtc = 'z'; rm = Irrm_ZERO;    break;
13999            case BITS3(0,0,1): cvtc = 'x'; rm = Irrm_NEAREST; break;
14000            case BITS3(1,0,0):
14001            case BITS3(1,1,0):
14002            default: vassert(0);
14003         }
14004
14005         IRTemp r0 = newTemp(Ity_F32);
14006         IRTemp r1 = newTemp(Ity_F32);
14007         IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
14008         IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
14009
14010         IRExpr* rmE = mkU32((UInt)rm);
14011         IROp    rnd = Iop_RoundF32toInt;
14012
14013         assign(r0, binop(rnd, rmE, llGetFReg_up_to_64(mmF+0)));
14014         assign(r1, binop(rnd, rmE, llGetFReg_up_to_64(mmF+1)));
14015         if (isQ) {
14016            assign(r2, binop(rnd, rmE, llGetFReg_up_to_64(mmF+2)));
14017            assign(r3, binop(rnd, rmE, llGetFReg_up_to_64(mmF+3)));
14018         }
14019
14020         llPutFReg_up_to_64(ddF+0, mkexpr(r0));
14021         llPutFReg_up_to_64(ddF+1, mkexpr(r1));
14022         if (isQ) {
14023            llPutFReg_up_to_64(ddF+2, mkexpr(r2));
14024            llPutFReg_up_to_64(ddF+3, mkexpr(r3));
14025         }
14026
14027         HChar rch = isQ ? 'q' : 'd';
14028         UInt  sh  = isQ ? 1 : 0;
14029         DIP("vrint%c.f32.f32 %c%u, %c%u\n",
14030             cvtc, rch, dd >> sh, rch, mm >> sh);
14031         return True;
14032      }
14033      /* else fall through */
14034   }
14035
14036   /* ---------- Doesn't match anything. ---------- */
14037   return False;
14038
14039#  undef INSN
14040}
14041
14042
14043/*------------------------------------------------------------*/
14044/*--- LDMxx/STMxx helper (both ARM and Thumb32)            ---*/
14045/*------------------------------------------------------------*/
14046
14047/* Generate IR for LDMxx and STMxx.  This is complex.  Assumes it's
14048   unconditional, so the caller must produce a jump-around before
14049   calling this, if the insn is to be conditional.  Caller is
14050   responsible for all validation of parameters.  For LDMxx, if PC is
14051   amongst the values loaded, caller is also responsible for
14052   generating the jump. */
14053static void mk_ldm_stm ( Bool arm,     /* True: ARM, False: Thumb */
14054                         UInt rN,      /* base reg */
14055                         UInt bINC,    /* 1: inc,  0: dec */
14056                         UInt bBEFORE, /* 1: inc/dec before, 0: after */
14057                         UInt bW,      /* 1: writeback to Rn */
14058                         UInt bL,      /* 1: load, 0: store */
14059                         UInt regList )
14060{
14061   Int i, r, m, nRegs;
14062   IRTemp jk = Ijk_Boring;
14063
14064   /* Get hold of the old Rn value.  We might need to write its value
14065      to memory during a store, and if it's also the writeback
14066      register then we need to get its value now.  We can't treat it
14067      exactly like the other registers we're going to transfer,
14068      because for xxMDA and xxMDB writeback forms, the generated IR
14069      updates Rn in the guest state before any transfers take place.
14070      We have to do this as per comments below, in order that if Rn is
14071      the stack pointer then it always has a value is below or equal
14072      to any of the transfer addresses.  Ick. */
14073   IRTemp oldRnT = newTemp(Ity_I32);
14074   assign(oldRnT, arm ? getIRegA(rN) : getIRegT(rN));
14075
14076   IRTemp anchorT = newTemp(Ity_I32);
14077   /* The old (Addison-Wesley) ARM ARM seems to say that LDMxx/STMxx
14078      ignore the bottom two bits of the address.  However, Cortex-A8
14079      doesn't seem to care.  Hence: */
14080   /* No .. don't force alignment .. */
14081   /* assign(anchorT, binop(Iop_And32, mkexpr(oldRnT), mkU32(~3U))); */
14082   /* Instead, use the potentially misaligned address directly. */
14083   assign(anchorT, mkexpr(oldRnT));
14084
14085   IROp opADDorSUB = bINC ? Iop_Add32 : Iop_Sub32;
14086   // bINC == 1:  xxMIA, xxMIB
14087   // bINC == 0:  xxMDA, xxMDB
14088
14089   // For xxMDA and xxMDB, update Rn first if necessary.  We have
14090   // to do this first so that, for the common idiom of the transfers
14091   // faulting because we're pushing stuff onto a stack and the stack
14092   // is growing down onto allocate-on-fault pages (as Valgrind simulates),
14093   // we need to have the SP up-to-date "covering" (pointing below) the
14094   // transfer area.  For the same reason, if we are doing xxMIA or xxMIB,
14095   // do the transfer first, and then update rN afterwards.
14096   nRegs = 0;
14097   for (i = 0; i < 16; i++) {
14098     if ((regList & (1 << i)) != 0)
14099         nRegs++;
14100   }
14101   if (bW == 1 && !bINC) {
14102      IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
14103      if (arm)
14104         putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
14105      else
14106         putIRegT( rN, e, IRTemp_INVALID );
14107   }
14108
14109   // Make up a list of the registers to transfer, and their offsets
14110   // in memory relative to the anchor.  If the base reg (Rn) is part
14111   // of the transfer, then do it last for a load and first for a store.
14112   UInt xReg[16], xOff[16];
14113   Int  nX = 0;
14114   m = 0;
14115   for (i = 0; i < 16; i++) {
14116      r = bINC ? i : (15-i);
14117      if (0 == (regList & (1<<r)))
14118         continue;
14119      if (bBEFORE)
14120         m++;
14121      /* paranoia: check we aren't transferring the writeback
14122         register during a load. Should be assured by decode-point
14123         check above. */
14124      if (bW == 1 && bL == 1)
14125         vassert(r != rN);
14126
14127      xOff[nX] = 4 * m;
14128      xReg[nX] = r;
14129      nX++;
14130
14131      if (!bBEFORE)
14132         m++;
14133   }
14134   vassert(m == nRegs);
14135   vassert(nX == nRegs);
14136   vassert(nX <= 16);
14137
14138   if (bW == 0 && (regList & (1<<rN)) != 0) {
14139      /* Non-writeback, and basereg is to be transferred.  Do its
14140         transfer last for a load and first for a store.  Requires
14141         reordering xOff/xReg. */
14142      if (0) {
14143         vex_printf("\nREG_LIST_PRE: (rN=%u)\n", rN);
14144         for (i = 0; i < nX; i++)
14145            vex_printf("reg %u   off %u\n", xReg[i], xOff[i]);
14146         vex_printf("\n");
14147      }
14148
14149      vassert(nX > 0);
14150      for (i = 0; i < nX; i++) {
14151         if (xReg[i] == rN)
14152             break;
14153      }
14154      vassert(i < nX); /* else we didn't find it! */
14155      UInt tReg = xReg[i];
14156      UInt tOff = xOff[i];
14157      if (bL == 1) {
14158         /* load; make this transfer happen last */
14159         if (i < nX-1) {
14160            for (m = i+1; m < nX; m++) {
14161               xReg[m-1] = xReg[m];
14162               xOff[m-1] = xOff[m];
14163            }
14164            vassert(m == nX);
14165            xReg[m-1] = tReg;
14166            xOff[m-1] = tOff;
14167         }
14168      } else {
14169         /* store; make this transfer happen first */
14170         if (i > 0) {
14171            for (m = i-1; m >= 0; m--) {
14172               xReg[m+1] = xReg[m];
14173               xOff[m+1] = xOff[m];
14174            }
14175            vassert(m == -1);
14176            xReg[0] = tReg;
14177            xOff[0] = tOff;
14178         }
14179      }
14180
14181      if (0) {
14182         vex_printf("REG_LIST_POST:\n");
14183         for (i = 0; i < nX; i++)
14184            vex_printf("reg %u   off %u\n", xReg[i], xOff[i]);
14185         vex_printf("\n");
14186      }
14187   }
14188
14189   /* According to the Cortex A8 TRM Sec. 5.2.1, LDM(1) with r13 as the base
14190       register and PC in the register list is a return for purposes of branch
14191       prediction.
14192      The ARM ARM Sec. C9.10.1 further specifies that writeback must be enabled
14193       to be counted in event 0x0E (Procedure return).*/
14194   if (rN == 13 && bL == 1 && bINC && !bBEFORE && bW == 1) {
14195      jk = Ijk_Ret;
14196   }
14197
14198   /* Actually generate the transfers */
14199   for (i = 0; i < nX; i++) {
14200      r = xReg[i];
14201      if (bL == 1) {
14202         IRExpr* e = loadLE(Ity_I32,
14203                            binop(opADDorSUB, mkexpr(anchorT),
14204                                  mkU32(xOff[i])));
14205         if (arm) {
14206            putIRegA( r, e, IRTemp_INVALID, jk );
14207         } else {
14208            // no: putIRegT( r, e, IRTemp_INVALID );
14209            // putIRegT refuses to write to R15.  But that might happen.
14210            // Since this is uncond, and we need to be able to
14211            // write the PC, just use the low level put:
14212            llPutIReg( r, e );
14213         }
14214      } else {
14215         /* if we're storing Rn, make sure we use the correct
14216            value, as per extensive comments above */
14217         storeLE( binop(opADDorSUB, mkexpr(anchorT), mkU32(xOff[i])),
14218                  r == rN ? mkexpr(oldRnT)
14219                          : (arm ? getIRegA(r) : getIRegT(r) ) );
14220      }
14221   }
14222
14223   // If we are doing xxMIA or xxMIB,
14224   // do the transfer first, and then update rN afterwards.
14225   if (bW == 1 && bINC) {
14226      IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
14227      if (arm)
14228         putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
14229      else
14230         putIRegT( rN, e, IRTemp_INVALID );
14231   }
14232}
14233
14234
14235/*------------------------------------------------------------*/
14236/*--- VFP (CP 10 and 11) instructions                      ---*/
14237/*------------------------------------------------------------*/
14238
14239/* Both ARM and Thumb */
14240
14241/* Translate a CP10 or CP11 instruction.  If successful, returns
14242   True and *dres may or may not be updated.  If failure, returns
14243   False and doesn't change *dres nor create any IR.
14244
14245   The ARM and Thumb encodings are identical for the low 28 bits of
14246   the insn (yay!) and that's what the caller must supply, iow, imm28
14247   has the top 4 bits masked out.  Caller is responsible for
14248   determining whether the masked-out bits are valid for a CP10/11
14249   insn.  The rules for the top 4 bits are:
14250
14251     ARM: 0000 to 1110 allowed, and this is the gating condition.
14252     1111 (NV) is not allowed.
14253
14254     Thumb: must be 1110.  The gating condition is taken from
14255     ITSTATE in the normal way.
14256
14257   Conditionalisation:
14258
14259   Caller must supply an IRTemp 'condT' holding the gating condition,
14260   or IRTemp_INVALID indicating the insn is always executed.
14261
14262   Caller must also supply an ARMCondcode 'cond'.  This is only used
14263   for debug printing, no other purpose.  For ARM, this is simply the
14264   top 4 bits of the original instruction.  For Thumb, the condition
14265   is not (really) known until run time, and so ARMCondAL should be
14266   passed, only so that printing of these instructions does not show
14267   any condition.
14268
14269   Finally, the caller must indicate whether this occurs in ARM or
14270   Thumb code.
14271*/
14272static Bool decode_CP10_CP11_instruction (
14273               /*MOD*/DisResult* dres,
14274               UInt              insn28,
14275               IRTemp            condT,
14276               ARMCondcode       conq,
14277               Bool              isT
14278            )
14279{
14280#  define INSN(_bMax,_bMin)  SLICE_UInt(insn28, (_bMax), (_bMin))
14281
14282   vassert(INSN(31,28) == BITS4(0,0,0,0)); // caller's obligation
14283
14284   if (isT) {
14285      vassert(conq == ARMCondAL);
14286   } else {
14287      vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
14288   }
14289
14290   /* ----------------------------------------------------------- */
14291   /* -- VFP instructions -- double precision (mostly)         -- */
14292   /* ----------------------------------------------------------- */
14293
14294   /* --------------------- fldmx, fstmx --------------------- */
14295   /*
14296                                 31   27   23   19 15 11   7   0
14297                                         P U WL
14298      C4-100, C5-26  1  FSTMX    cond 1100 1000 Rn Dd 1011 offset
14299      C4-100, C5-28  2  FSTMIAX  cond 1100 1010 Rn Dd 1011 offset
14300      C4-100, C5-30  3  FSTMDBX  cond 1101 0010 Rn Dd 1011 offset
14301
14302      C4-42, C5-26   1  FLDMX    cond 1100 1001 Rn Dd 1011 offset
14303      C4-42, C5-28   2  FLDMIAX  cond 1100 1011 Rn Dd 1011 offset
14304      C4-42, C5-30   3  FLDMDBX  cond 1101 0011 Rn Dd 1011 offset
14305
14306      Regs transferred: Dd .. D(d + (offset-3)/2)
14307      offset must be odd, must not imply a reg > 15
14308      IA/DB: Rn is changed by (4 + 8 x # regs transferred)
14309
14310      case coding:
14311         1  at-Rn   (access at Rn)
14312         2  ia-Rn   (access at Rn, then Rn += 4+8n)
14313         3  db-Rn   (Rn -= 4+8n,   then access at Rn)
14314   */
14315   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
14316       && INSN(11,8) == BITS4(1,0,1,1)) {
14317      UInt bP      = (insn28 >> 24) & 1;
14318      UInt bU      = (insn28 >> 23) & 1;
14319      UInt bW      = (insn28 >> 21) & 1;
14320      UInt bL      = (insn28 >> 20) & 1;
14321      UInt offset  = (insn28 >> 0) & 0xFF;
14322      UInt rN      = INSN(19,16);
14323      UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
14324      UInt nRegs   = (offset - 1) / 2;
14325      UInt summary = 0;
14326      Int  i;
14327
14328      /**/ if (bP == 0 && bU == 1 && bW == 0) {
14329         summary = 1;
14330      }
14331      else if (bP == 0 && bU == 1 && bW == 1) {
14332         summary = 2;
14333      }
14334      else if (bP == 1 && bU == 0 && bW == 1) {
14335         summary = 3;
14336      }
14337      else goto after_vfp_fldmx_fstmx;
14338
14339      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
14340      if (rN == 15 && (summary == 2 || summary == 3 || isT))
14341         goto after_vfp_fldmx_fstmx;
14342
14343      /* offset must be odd, and specify at least one register */
14344      if (0 == (offset & 1) || offset < 3)
14345         goto after_vfp_fldmx_fstmx;
14346
14347      /* can't transfer regs after D15 */
14348      if (dD + nRegs - 1 >= 32)
14349         goto after_vfp_fldmx_fstmx;
14350
14351      /* Now, we can't do a conditional load or store, since that very
14352         likely will generate an exception.  So we have to take a side
14353         exit at this point if the condition is false. */
14354      if (condT != IRTemp_INVALID) {
14355         if (isT)
14356            mk_skip_over_T32_if_cond_is_false( condT );
14357         else
14358            mk_skip_over_A32_if_cond_is_false( condT );
14359         condT = IRTemp_INVALID;
14360      }
14361      /* Ok, now we're unconditional.  Do the load or store. */
14362
14363      /* get the old Rn value */
14364      IRTemp rnT = newTemp(Ity_I32);
14365      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
14366                           rN == 15));
14367
14368      /* make a new value for Rn, post-insn */
14369      IRTemp rnTnew = IRTemp_INVALID;
14370      if (summary == 2 || summary == 3) {
14371         rnTnew = newTemp(Ity_I32);
14372         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
14373                              mkexpr(rnT),
14374                              mkU32(4 + 8 * nRegs)));
14375      }
14376
14377      /* decide on the base transfer address */
14378      IRTemp taT = newTemp(Ity_I32);
14379      assign(taT,  summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
14380
14381      /* update Rn if necessary -- in case 3, we're moving it down, so
14382         update before any memory reference, in order to keep Memcheck
14383         and V's stack-extending logic (on linux) happy */
14384      if (summary == 3) {
14385         if (isT)
14386            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14387         else
14388            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14389      }
14390
14391      /* generate the transfers */
14392      for (i = 0; i < nRegs; i++) {
14393         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
14394         if (bL) {
14395            putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
14396         } else {
14397            storeLE(addr, getDReg(dD + i));
14398         }
14399      }
14400
14401      /* update Rn if necessary -- in case 2, we're moving it up, so
14402         update after any memory reference, in order to keep Memcheck
14403         and V's stack-extending logic (on linux) happy */
14404      if (summary == 2) {
14405         if (isT)
14406            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14407         else
14408            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14409      }
14410
14411      const HChar* nm = bL==1 ? "ld" : "st";
14412      switch (summary) {
14413         case 1:  DIP("f%smx%s r%u, {d%u-d%u}\n",
14414                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
14415                  break;
14416         case 2:  DIP("f%smiax%s r%u!, {d%u-d%u}\n",
14417                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
14418                  break;
14419         case 3:  DIP("f%smdbx%s r%u!, {d%u-d%u}\n",
14420                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
14421                  break;
14422         default: vassert(0);
14423      }
14424
14425      goto decode_success_vfp;
14426      /* FIXME alignment constraints? */
14427   }
14428
14429  after_vfp_fldmx_fstmx:
14430
14431   /* --------------------- fldmd, fstmd --------------------- */
14432   /*
14433                                 31   27   23   19 15 11   7   0
14434                                         P U WL
14435      C4-96, C5-26   1  FSTMD    cond 1100 1000 Rn Dd 1011 offset
14436      C4-96, C5-28   2  FSTMDIA  cond 1100 1010 Rn Dd 1011 offset
14437      C4-96, C5-30   3  FSTMDDB  cond 1101 0010 Rn Dd 1011 offset
14438
14439      C4-38, C5-26   1  FLDMD    cond 1100 1001 Rn Dd 1011 offset
14440      C4-38, C5-28   2  FLDMIAD  cond 1100 1011 Rn Dd 1011 offset
14441      C4-38, C5-30   3  FLDMDBD  cond 1101 0011 Rn Dd 1011 offset
14442
14443      Regs transferred: Dd .. D(d + (offset-2)/2)
14444      offset must be even, must not imply a reg > 15
14445      IA/DB: Rn is changed by (8 x # regs transferred)
14446
14447      case coding:
14448         1  at-Rn   (access at Rn)
14449         2  ia-Rn   (access at Rn, then Rn += 8n)
14450         3  db-Rn   (Rn -= 8n,     then access at Rn)
14451   */
14452   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
14453       && INSN(11,8) == BITS4(1,0,1,1)) {
14454      UInt bP      = (insn28 >> 24) & 1;
14455      UInt bU      = (insn28 >> 23) & 1;
14456      UInt bW      = (insn28 >> 21) & 1;
14457      UInt bL      = (insn28 >> 20) & 1;
14458      UInt offset  = (insn28 >> 0) & 0xFF;
14459      UInt rN      = INSN(19,16);
14460      UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
14461      UInt nRegs   = offset / 2;
14462      UInt summary = 0;
14463      Int  i;
14464
14465      /**/ if (bP == 0 && bU == 1 && bW == 0) {
14466         summary = 1;
14467      }
14468      else if (bP == 0 && bU == 1 && bW == 1) {
14469         summary = 2;
14470      }
14471      else if (bP == 1 && bU == 0 && bW == 1) {
14472         summary = 3;
14473      }
14474      else goto after_vfp_fldmd_fstmd;
14475
14476      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
14477      if (rN == 15 && (summary == 2 || summary == 3 || isT))
14478         goto after_vfp_fldmd_fstmd;
14479
14480      /* offset must be even, and specify at least one register */
14481      if (1 == (offset & 1) || offset < 2)
14482         goto after_vfp_fldmd_fstmd;
14483
14484      /* can't transfer regs after D15 */
14485      if (dD + nRegs - 1 >= 32)
14486         goto after_vfp_fldmd_fstmd;
14487
14488      /* Now, we can't do a conditional load or store, since that very
14489         likely will generate an exception.  So we have to take a side
14490         exit at this point if the condition is false. */
14491      if (condT != IRTemp_INVALID) {
14492         if (isT)
14493            mk_skip_over_T32_if_cond_is_false( condT );
14494         else
14495            mk_skip_over_A32_if_cond_is_false( condT );
14496         condT = IRTemp_INVALID;
14497      }
14498      /* Ok, now we're unconditional.  Do the load or store. */
14499
14500      /* get the old Rn value */
14501      IRTemp rnT = newTemp(Ity_I32);
14502      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
14503                           rN == 15));
14504
14505      /* make a new value for Rn, post-insn */
14506      IRTemp rnTnew = IRTemp_INVALID;
14507      if (summary == 2 || summary == 3) {
14508         rnTnew = newTemp(Ity_I32);
14509         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
14510                              mkexpr(rnT),
14511                              mkU32(8 * nRegs)));
14512      }
14513
14514      /* decide on the base transfer address */
14515      IRTemp taT = newTemp(Ity_I32);
14516      assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
14517
14518      /* update Rn if necessary -- in case 3, we're moving it down, so
14519         update before any memory reference, in order to keep Memcheck
14520         and V's stack-extending logic (on linux) happy */
14521      if (summary == 3) {
14522         if (isT)
14523            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14524         else
14525            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14526      }
14527
14528      /* generate the transfers */
14529      for (i = 0; i < nRegs; i++) {
14530         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
14531         if (bL) {
14532            putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
14533         } else {
14534            storeLE(addr, getDReg(dD + i));
14535         }
14536      }
14537
14538      /* update Rn if necessary -- in case 2, we're moving it up, so
14539         update after any memory reference, in order to keep Memcheck
14540         and V's stack-extending logic (on linux) happy */
14541      if (summary == 2) {
14542         if (isT)
14543            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14544         else
14545            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14546      }
14547
14548      const HChar* nm = bL==1 ? "ld" : "st";
14549      switch (summary) {
14550         case 1:  DIP("f%smd%s r%u, {d%u-d%u}\n",
14551                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
14552                  break;
14553         case 2:  DIP("f%smiad%s r%u!, {d%u-d%u}\n",
14554                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
14555                  break;
14556         case 3:  DIP("f%smdbd%s r%u!, {d%u-d%u}\n",
14557                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
14558                  break;
14559         default: vassert(0);
14560      }
14561
14562      goto decode_success_vfp;
14563      /* FIXME alignment constraints? */
14564   }
14565
14566  after_vfp_fldmd_fstmd:
14567
14568   /* ------------------- fmrx, fmxr ------------------- */
14569   if (BITS8(1,1,1,0,1,1,1,1) == INSN(27,20)
14570       && BITS4(1,0,1,0) == INSN(11,8)
14571       && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
14572      UInt rD  = INSN(15,12);
14573      UInt reg = INSN(19,16);
14574      if (reg == BITS4(0,0,0,1)) {
14575         if (rD == 15) {
14576            IRTemp nzcvT = newTemp(Ity_I32);
14577            /* When rD is 15, we are copying the top 4 bits of FPSCR
14578               into CPSR.  That is, set the flags thunk to COPY and
14579               install FPSCR[31:28] as the value to copy. */
14580            assign(nzcvT, binop(Iop_And32,
14581                                IRExpr_Get(OFFB_FPSCR, Ity_I32),
14582                                mkU32(0xF0000000)));
14583            setFlags_D1(ARMG_CC_OP_COPY, nzcvT, condT);
14584            DIP("fmstat%s\n", nCC(conq));
14585         } else {
14586            /* Otherwise, merely transfer FPSCR to r0 .. r14. */
14587            IRExpr* e = IRExpr_Get(OFFB_FPSCR, Ity_I32);
14588            if (isT)
14589               putIRegT(rD, e, condT);
14590            else
14591               putIRegA(rD, e, condT, Ijk_Boring);
14592            DIP("fmrx%s r%u, fpscr\n", nCC(conq), rD);
14593         }
14594         goto decode_success_vfp;
14595      }
14596      /* fall through */
14597   }
14598
14599   if (BITS8(1,1,1,0,1,1,1,0) == INSN(27,20)
14600       && BITS4(1,0,1,0) == INSN(11,8)
14601       && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
14602      UInt rD  = INSN(15,12);
14603      UInt reg = INSN(19,16);
14604      if (reg == BITS4(0,0,0,1)) {
14605         putMiscReg32(OFFB_FPSCR,
14606                      isT ? getIRegT(rD) : getIRegA(rD), condT);
14607         DIP("fmxr%s fpscr, r%u\n", nCC(conq), rD);
14608         goto decode_success_vfp;
14609      }
14610      /* fall through */
14611   }
14612
14613   /* --------------------- vmov --------------------- */
14614   // VMOV dM, rD, rN
14615   if (0x0C400B10 == (insn28 & 0x0FF00FD0)) {
14616      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
14617      UInt rD = INSN(15,12); /* lo32 */
14618      UInt rN = INSN(19,16); /* hi32 */
14619      if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))) {
14620         /* fall through */
14621      } else {
14622         putDReg(dM,
14623                 unop(Iop_ReinterpI64asF64,
14624                      binop(Iop_32HLto64,
14625                            isT ? getIRegT(rN) : getIRegA(rN),
14626                            isT ? getIRegT(rD) : getIRegA(rD))),
14627                 condT);
14628         DIP("vmov%s d%u, r%u, r%u\n", nCC(conq), dM, rD, rN);
14629         goto decode_success_vfp;
14630      }
14631      /* fall through */
14632   }
14633
14634   // VMOV rD, rN, dM
14635   if (0x0C500B10 == (insn28 & 0x0FF00FD0)) {
14636      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
14637      UInt rD = INSN(15,12); /* lo32 */
14638      UInt rN = INSN(19,16); /* hi32 */
14639      if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))
14640          || rD == rN) {
14641         /* fall through */
14642      } else {
14643         IRTemp i64 = newTemp(Ity_I64);
14644         assign(i64, unop(Iop_ReinterpF64asI64, getDReg(dM)));
14645         IRExpr* hi32 = unop(Iop_64HIto32, mkexpr(i64));
14646         IRExpr* lo32 = unop(Iop_64to32,   mkexpr(i64));
14647         if (isT) {
14648            putIRegT(rN, hi32, condT);
14649            putIRegT(rD, lo32, condT);
14650         } else {
14651            putIRegA(rN, hi32, condT, Ijk_Boring);
14652            putIRegA(rD, lo32, condT, Ijk_Boring);
14653         }
14654         DIP("vmov%s r%u, r%u, d%u\n", nCC(conq), rD, rN, dM);
14655         goto decode_success_vfp;
14656      }
14657      /* fall through */
14658   }
14659
14660   // VMOV sD, sD+1, rN, rM
14661   if (0x0C400A10 == (insn28 & 0x0FF00FD0)) {
14662      UInt sD = (INSN(3,0) << 1) | INSN(5,5);
14663      UInt rN = INSN(15,12);
14664      UInt rM = INSN(19,16);
14665      if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
14666          || sD == 31) {
14667         /* fall through */
14668      } else {
14669         putFReg(sD,
14670                 unop(Iop_ReinterpI32asF32, isT ? getIRegT(rN) : getIRegA(rN)),
14671                 condT);
14672         putFReg(sD+1,
14673                 unop(Iop_ReinterpI32asF32, isT ? getIRegT(rM) : getIRegA(rM)),
14674                 condT);
14675         DIP("vmov%s, s%u, s%u, r%u, r%u\n",
14676              nCC(conq), sD, sD + 1, rN, rM);
14677         goto decode_success_vfp;
14678      }
14679   }
14680
14681   // VMOV rN, rM, sD, sD+1
14682   if (0x0C500A10 == (insn28 & 0x0FF00FD0)) {
14683      UInt sD = (INSN(3,0) << 1) | INSN(5,5);
14684      UInt rN = INSN(15,12);
14685      UInt rM = INSN(19,16);
14686      if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
14687          || sD == 31 || rN == rM) {
14688         /* fall through */
14689      } else {
14690         IRExpr* res0 = unop(Iop_ReinterpF32asI32, getFReg(sD));
14691         IRExpr* res1 = unop(Iop_ReinterpF32asI32, getFReg(sD+1));
14692         if (isT) {
14693            putIRegT(rN, res0, condT);
14694            putIRegT(rM, res1, condT);
14695         } else {
14696            putIRegA(rN, res0, condT, Ijk_Boring);
14697            putIRegA(rM, res1, condT, Ijk_Boring);
14698         }
14699         DIP("vmov%s, r%u, r%u, s%u, s%u\n",
14700             nCC(conq), rN, rM, sD, sD + 1);
14701         goto decode_success_vfp;
14702      }
14703   }
14704
14705   // VMOV rD[x], rT  (ARM core register to scalar)
14706   if (0x0E000B10 == (insn28 & 0x0F900F1F)) {
14707      UInt rD  = (INSN(7,7) << 4) | INSN(19,16);
14708      UInt rT  = INSN(15,12);
14709      UInt opc = (INSN(22,21) << 2) | INSN(6,5);
14710      UInt index;
14711      if (rT == 15 || (isT && rT == 13)) {
14712         /* fall through */
14713      } else {
14714         if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
14715            index = opc & 7;
14716            putDRegI64(rD, triop(Iop_SetElem8x8,
14717                                 getDRegI64(rD),
14718                                 mkU8(index),
14719                                 unop(Iop_32to8,
14720                                      isT ? getIRegT(rT) : getIRegA(rT))),
14721                           condT);
14722            DIP("vmov%s.8 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14723            goto decode_success_vfp;
14724         }
14725         else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
14726            index = (opc >> 1) & 3;
14727            putDRegI64(rD, triop(Iop_SetElem16x4,
14728                                 getDRegI64(rD),
14729                                 mkU8(index),
14730                                 unop(Iop_32to16,
14731                                      isT ? getIRegT(rT) : getIRegA(rT))),
14732                           condT);
14733            DIP("vmov%s.16 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14734            goto decode_success_vfp;
14735         }
14736         else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0)) {
14737            index = (opc >> 2) & 1;
14738            putDRegI64(rD, triop(Iop_SetElem32x2,
14739                                 getDRegI64(rD),
14740                                 mkU8(index),
14741                                 isT ? getIRegT(rT) : getIRegA(rT)),
14742                           condT);
14743            DIP("vmov%s.32 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14744            goto decode_success_vfp;
14745         } else {
14746            /* fall through */
14747         }
14748      }
14749   }
14750
14751   // VMOV (scalar to ARM core register)
14752   // VMOV rT, rD[x]
14753   if (0x0E100B10 == (insn28 & 0x0F100F1F)) {
14754      UInt rN  = (INSN(7,7) << 4) | INSN(19,16);
14755      UInt rT  = INSN(15,12);
14756      UInt U   = INSN(23,23);
14757      UInt opc = (INSN(22,21) << 2) | INSN(6,5);
14758      UInt index;
14759      if (rT == 15 || (isT && rT == 13)) {
14760         /* fall through */
14761      } else {
14762         if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
14763            index = opc & 7;
14764            IRExpr* e = unop(U ? Iop_8Uto32 : Iop_8Sto32,
14765                             binop(Iop_GetElem8x8,
14766                                   getDRegI64(rN),
14767                                   mkU8(index)));
14768            if (isT)
14769               putIRegT(rT, e, condT);
14770            else
14771               putIRegA(rT, e, condT, Ijk_Boring);
14772            DIP("vmov%s.%c8 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
14773                  rT, rN, index);
14774            goto decode_success_vfp;
14775         }
14776         else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
14777            index = (opc >> 1) & 3;
14778            IRExpr* e = unop(U ? Iop_16Uto32 : Iop_16Sto32,
14779                             binop(Iop_GetElem16x4,
14780                                   getDRegI64(rN),
14781                                   mkU8(index)));
14782            if (isT)
14783               putIRegT(rT, e, condT);
14784            else
14785               putIRegA(rT, e, condT, Ijk_Boring);
14786            DIP("vmov%s.%c16 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
14787                  rT, rN, index);
14788            goto decode_success_vfp;
14789         }
14790         else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0) && U == 0) {
14791            index = (opc >> 2) & 1;
14792            IRExpr* e = binop(Iop_GetElem32x2, getDRegI64(rN), mkU8(index));
14793            if (isT)
14794               putIRegT(rT, e, condT);
14795            else
14796               putIRegA(rT, e, condT, Ijk_Boring);
14797            DIP("vmov%s.32 r%u, d%u[%u]\n", nCC(conq), rT, rN, index);
14798            goto decode_success_vfp;
14799         } else {
14800            /* fall through */
14801         }
14802      }
14803   }
14804
14805   // VMOV.F32 sD, #imm
14806   // FCONSTS sD, #imm
14807   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14808       && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,0)) {
14809      UInt rD   = (INSN(15,12) << 1) | INSN(22,22);
14810      UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
14811      UInt b    = (imm8 >> 6) & 1;
14812      UInt imm;
14813      imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,(imm8 >> 5) & 1) << 8)
14814             | ((imm8 & 0x1f) << 3);
14815      imm <<= 16;
14816      putFReg(rD, unop(Iop_ReinterpI32asF32, mkU32(imm)), condT);
14817      DIP("fconsts%s s%u #%u", nCC(conq), rD, imm8);
14818      goto decode_success_vfp;
14819   }
14820
14821   // VMOV.F64 dD, #imm
14822   // FCONSTD dD, #imm
14823   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14824       && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,1)) {
14825      UInt rD   = INSN(15,12) | (INSN(22,22) << 4);
14826      UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
14827      UInt b    = (imm8 >> 6) & 1;
14828      ULong imm;
14829      imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,b) << 8)
14830             | BITS8(b,b,0,0,0,0,0,0) | (imm8 & 0x3f);
14831      imm <<= 48;
14832      putDReg(rD, unop(Iop_ReinterpI64asF64, mkU64(imm)), condT);
14833      DIP("fconstd%s d%u #%u", nCC(conq), rD, imm8);
14834      goto decode_success_vfp;
14835   }
14836
14837   /* ---------------------- vdup ------------------------- */
14838   // VDUP dD, rT
14839   // VDUP qD, rT
14840   if (BITS8(1,1,1,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,1))
14841       && BITS4(1,0,1,1) == INSN(11,8) && INSN(6,6) == 0 && INSN(4,4) == 1) {
14842      UInt rD   = (INSN(7,7) << 4) | INSN(19,16);
14843      UInt rT   = INSN(15,12);
14844      UInt Q    = INSN(21,21);
14845      UInt size = (INSN(22,22) << 1) | INSN(5,5);
14846      if (rT == 15 || (isT && rT == 13) || size == 3 || (Q && (rD & 1))) {
14847         /* fall through */
14848      } else {
14849         IRExpr* e = isT ? getIRegT(rT) : getIRegA(rT);
14850         if (Q) {
14851            rD >>= 1;
14852            switch (size) {
14853               case 0:
14854                  putQReg(rD, unop(Iop_Dup32x4, e), condT);
14855                  break;
14856               case 1:
14857                  putQReg(rD, unop(Iop_Dup16x8, unop(Iop_32to16, e)),
14858                              condT);
14859                  break;
14860               case 2:
14861                  putQReg(rD, unop(Iop_Dup8x16, unop(Iop_32to8, e)),
14862                              condT);
14863                  break;
14864               default:
14865                  vassert(0);
14866            }
14867            DIP("vdup.%d q%u, r%u\n", 32 / (1<<size), rD, rT);
14868         } else {
14869            switch (size) {
14870               case 0:
14871                  putDRegI64(rD, unop(Iop_Dup32x2, e), condT);
14872                  break;
14873               case 1:
14874                  putDRegI64(rD, unop(Iop_Dup16x4, unop(Iop_32to16, e)),
14875                               condT);
14876                  break;
14877               case 2:
14878                  putDRegI64(rD, unop(Iop_Dup8x8, unop(Iop_32to8, e)),
14879                               condT);
14880                  break;
14881               default:
14882                  vassert(0);
14883            }
14884            DIP("vdup.%d d%u, r%u\n", 32 / (1<<size), rD, rT);
14885         }
14886         goto decode_success_vfp;
14887      }
14888   }
14889
14890   /* --------------------- f{ld,st}d --------------------- */
14891   // FLDD, FSTD
14892   if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
14893       && BITS4(1,0,1,1) == INSN(11,8)) {
14894      UInt dD     = INSN(15,12) | (INSN(22,22) << 4);
14895      UInt rN     = INSN(19,16);
14896      UInt offset = (insn28 & 0xFF) << 2;
14897      UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
14898      UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
14899      /* make unconditional */
14900      if (condT != IRTemp_INVALID) {
14901         if (isT)
14902            mk_skip_over_T32_if_cond_is_false( condT );
14903         else
14904            mk_skip_over_A32_if_cond_is_false( condT );
14905         condT = IRTemp_INVALID;
14906      }
14907      IRTemp ea = newTemp(Ity_I32);
14908      assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
14909                       align4if(isT ? getIRegT(rN) : getIRegA(rN),
14910                                rN == 15),
14911                       mkU32(offset)));
14912      if (bL) {
14913         putDReg(dD, loadLE(Ity_F64,mkexpr(ea)), IRTemp_INVALID);
14914      } else {
14915         storeLE(mkexpr(ea), getDReg(dD));
14916      }
14917      DIP("f%sd%s d%u, [r%u, %c#%u]\n",
14918          bL ? "ld" : "st", nCC(conq), dD, rN,
14919          bU ? '+' : '-', offset);
14920      goto decode_success_vfp;
14921   }
14922
14923   /* --------------------- dp insns (D) --------------------- */
14924   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
14925       && BITS4(1,0,1,1) == INSN(11,8)
14926       && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
14927      UInt    dM  = INSN(3,0)   | (INSN(5,5) << 4);       /* argR */
14928      UInt    dD  = INSN(15,12) | (INSN(22,22) << 4);   /* dst/acc */
14929      UInt    dN  = INSN(19,16) | (INSN(7,7) << 4);     /* argL */
14930      UInt    bP  = (insn28 >> 23) & 1;
14931      UInt    bQ  = (insn28 >> 21) & 1;
14932      UInt    bR  = (insn28 >> 20) & 1;
14933      UInt    bS  = (insn28 >> 6) & 1;
14934      UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
14935      IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
14936      switch (opc) {
14937         case BITS4(0,0,0,0): /* MAC: d + n * m */
14938            putDReg(dD, triop(Iop_AddF64, rm,
14939                              getDReg(dD),
14940                              triop(Iop_MulF64, rm, getDReg(dN),
14941                                                    getDReg(dM))),
14942                        condT);
14943            DIP("fmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14944            goto decode_success_vfp;
14945         case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
14946            putDReg(dD, triop(Iop_AddF64, rm,
14947                              getDReg(dD),
14948                              unop(Iop_NegF64,
14949                                   triop(Iop_MulF64, rm, getDReg(dN),
14950                                                         getDReg(dM)))),
14951                        condT);
14952            DIP("fnmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14953            goto decode_success_vfp;
14954         case BITS4(0,0,1,0): /* MSC: - d + n * m */
14955            putDReg(dD, triop(Iop_AddF64, rm,
14956                              unop(Iop_NegF64, getDReg(dD)),
14957                              triop(Iop_MulF64, rm, getDReg(dN),
14958                                                    getDReg(dM))),
14959                        condT);
14960            DIP("fmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14961            goto decode_success_vfp;
14962         case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
14963            putDReg(dD, triop(Iop_AddF64, rm,
14964                              unop(Iop_NegF64, getDReg(dD)),
14965                              unop(Iop_NegF64,
14966                                   triop(Iop_MulF64, rm, getDReg(dN),
14967                                                         getDReg(dM)))),
14968                        condT);
14969            DIP("fnmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14970            goto decode_success_vfp;
14971         case BITS4(0,1,0,0): /* MUL: n * m */
14972            putDReg(dD, triop(Iop_MulF64, rm, getDReg(dN), getDReg(dM)),
14973                        condT);
14974            DIP("fmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14975            goto decode_success_vfp;
14976         case BITS4(0,1,0,1): /* NMUL: - n * m */
14977            putDReg(dD, unop(Iop_NegF64,
14978                             triop(Iop_MulF64, rm, getDReg(dN),
14979                                                   getDReg(dM))),
14980                    condT);
14981            DIP("fnmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14982            goto decode_success_vfp;
14983         case BITS4(0,1,1,0): /* ADD: n + m */
14984            putDReg(dD, triop(Iop_AddF64, rm, getDReg(dN), getDReg(dM)),
14985                        condT);
14986            DIP("faddd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14987            goto decode_success_vfp;
14988         case BITS4(0,1,1,1): /* SUB: n - m */
14989            putDReg(dD, triop(Iop_SubF64, rm, getDReg(dN), getDReg(dM)),
14990                        condT);
14991            DIP("fsubd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14992            goto decode_success_vfp;
14993         case BITS4(1,0,0,0): /* DIV: n / m */
14994            putDReg(dD, triop(Iop_DivF64, rm, getDReg(dN), getDReg(dM)),
14995                        condT);
14996            DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14997            goto decode_success_vfp;
14998         case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
14999            /* XXXROUNDINGFIXME look up ARM reference for fused
15000               multiply-add rounding */
15001            putDReg(dD, triop(Iop_AddF64, rm,
15002                              unop(Iop_NegF64, getDReg(dD)),
15003                              triop(Iop_MulF64, rm,
15004                                                getDReg(dN),
15005                                                getDReg(dM))),
15006                        condT);
15007            DIP("vfnmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15008            goto decode_success_vfp;
15009         case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
15010            /* XXXROUNDINGFIXME look up ARM reference for fused
15011               multiply-add rounding */
15012            putDReg(dD, triop(Iop_AddF64, rm,
15013                              unop(Iop_NegF64, getDReg(dD)),
15014                              triop(Iop_MulF64, rm,
15015                                                unop(Iop_NegF64, getDReg(dN)),
15016                                                getDReg(dM))),
15017                        condT);
15018            DIP("vfnmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15019            goto decode_success_vfp;
15020         case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
15021            /* XXXROUNDINGFIXME look up ARM reference for fused
15022               multiply-add rounding */
15023            putDReg(dD, triop(Iop_AddF64, rm,
15024                              getDReg(dD),
15025                              triop(Iop_MulF64, rm, getDReg(dN),
15026                                                    getDReg(dM))),
15027                        condT);
15028            DIP("vfmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15029            goto decode_success_vfp;
15030         case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
15031            /* XXXROUNDINGFIXME look up ARM reference for fused
15032               multiply-add rounding */
15033            putDReg(dD, triop(Iop_AddF64, rm,
15034                              getDReg(dD),
15035                              triop(Iop_MulF64, rm,
15036                                    unop(Iop_NegF64, getDReg(dN)),
15037                                    getDReg(dM))),
15038                        condT);
15039            DIP("vfmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15040            goto decode_success_vfp;
15041         default:
15042            break;
15043      }
15044   }
15045
15046   /* --------------------- compares (D) --------------------- */
15047   /*          31   27   23   19   15 11   7    3
15048                 28   24   20   16 12    8    4    0
15049      FCMPD    cond 1110 1D11 0100 Dd 1011 0100 Dm
15050      FCMPED   cond 1110 1D11 0100 Dd 1011 1100 Dm
15051      FCMPZD   cond 1110 1D11 0101 Dd 1011 0100 0000
15052      FCMPZED  cond 1110 1D11 0101 Dd 1011 1100 0000
15053                                 Z         N
15054
15055      Z=0 Compare Dd vs Dm     and set FPSCR 31:28 accordingly
15056      Z=1 Compare Dd vs zero
15057
15058      N=1 generates Invalid Operation exn if either arg is any kind of NaN
15059      N=0 generates Invalid Operation exn if either arg is a signalling NaN
15060      (Not that we pay any attention to N here)
15061   */
15062   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15063       && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15064       && BITS4(1,0,1,1) == INSN(11,8)
15065       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15066      UInt bZ = (insn28 >> 16) & 1;
15067      UInt bN = (insn28 >> 7) & 1;
15068      UInt dD = INSN(15,12) | (INSN(22,22) << 4);
15069      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
15070      if (bZ && INSN(3,0) != 0) {
15071         /* does not decode; fall through */
15072      } else {
15073         IRTemp argL = newTemp(Ity_F64);
15074         IRTemp argR = newTemp(Ity_F64);
15075         IRTemp irRes = newTemp(Ity_I32);
15076         assign(argL, getDReg(dD));
15077         assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0)) : getDReg(dM));
15078         assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
15079
15080         IRTemp nzcv     = IRTemp_INVALID;
15081         IRTemp oldFPSCR = newTemp(Ity_I32);
15082         IRTemp newFPSCR = newTemp(Ity_I32);
15083
15084         /* This is where the fun starts.  We have to convert 'irRes'
15085            from an IR-convention return result (IRCmpF64Result) to an
15086            ARM-encoded (N,Z,C,V) group.  The final result is in the
15087            bottom 4 bits of 'nzcv'. */
15088         /* Map compare result from IR to ARM(nzcv) */
15089         /*
15090            FP cmp result | IR   | ARM(nzcv)
15091            --------------------------------
15092            UN              0x45   0011
15093            LT              0x01   1000
15094            GT              0x00   0010
15095            EQ              0x40   0110
15096         */
15097         nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
15098
15099         /* And update FPSCR accordingly */
15100         assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
15101         assign(newFPSCR,
15102                binop(Iop_Or32,
15103                      binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
15104                      binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
15105
15106         putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
15107
15108         if (bZ) {
15109            DIP("fcmpz%sd%s d%u\n", bN ? "e" : "", nCC(conq), dD);
15110         } else {
15111            DIP("fcmp%sd%s d%u, d%u\n", bN ? "e" : "", nCC(conq), dD, dM);
15112         }
15113         goto decode_success_vfp;
15114      }
15115      /* fall through */
15116   }
15117
15118   /* --------------------- unary (D) --------------------- */
15119   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15120       && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15121       && BITS4(1,0,1,1) == INSN(11,8)
15122       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15123      UInt dD  = INSN(15,12) | (INSN(22,22) << 4);
15124      UInt dM  = INSN(3,0) | (INSN(5,5) << 4);
15125      UInt b16 = (insn28 >> 16) & 1;
15126      UInt b7  = (insn28 >> 7) & 1;
15127      /**/ if (b16 == 0 && b7 == 0) {
15128         // FCPYD
15129         putDReg(dD, getDReg(dM), condT);
15130         DIP("fcpyd%s d%u, d%u\n", nCC(conq), dD, dM);
15131         goto decode_success_vfp;
15132      }
15133      else if (b16 == 0 && b7 == 1) {
15134         // FABSD
15135         putDReg(dD, unop(Iop_AbsF64, getDReg(dM)), condT);
15136         DIP("fabsd%s d%u, d%u\n", nCC(conq), dD, dM);
15137         goto decode_success_vfp;
15138      }
15139      else if (b16 == 1 && b7 == 0) {
15140         // FNEGD
15141         putDReg(dD, unop(Iop_NegF64, getDReg(dM)), condT);
15142         DIP("fnegd%s d%u, d%u\n", nCC(conq), dD, dM);
15143         goto decode_success_vfp;
15144      }
15145      else if (b16 == 1 && b7 == 1) {
15146         // FSQRTD
15147         IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15148         putDReg(dD, binop(Iop_SqrtF64, rm, getDReg(dM)), condT);
15149         DIP("fsqrtd%s d%u, d%u\n", nCC(conq), dD, dM);
15150         goto decode_success_vfp;
15151      }
15152      else
15153         vassert(0);
15154
15155      /* fall through */
15156   }
15157
15158   /* ----------------- I <-> D conversions ----------------- */
15159
15160   // F{S,U}ITOD dD, fM
15161   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15162       && BITS4(1,0,0,0) == (INSN(19,16) & BITS4(1,1,1,1))
15163       && BITS4(1,0,1,1) == INSN(11,8)
15164       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15165      UInt bM    = (insn28 >> 5) & 1;
15166      UInt fM    = (INSN(3,0) << 1) | bM;
15167      UInt dD    = INSN(15,12) | (INSN(22,22) << 4);
15168      UInt syned = (insn28 >> 7) & 1;
15169      if (syned) {
15170         // FSITOD
15171         putDReg(dD, unop(Iop_I32StoF64,
15172                          unop(Iop_ReinterpF32asI32, getFReg(fM))),
15173                 condT);
15174         DIP("fsitod%s d%u, s%u\n", nCC(conq), dD, fM);
15175      } else {
15176         // FUITOD
15177         putDReg(dD, unop(Iop_I32UtoF64,
15178                          unop(Iop_ReinterpF32asI32, getFReg(fM))),
15179                 condT);
15180         DIP("fuitod%s d%u, s%u\n", nCC(conq), dD, fM);
15181      }
15182      goto decode_success_vfp;
15183   }
15184
15185   // FTO{S,U}ID fD, dM
15186   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15187       && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15188       && BITS4(1,0,1,1) == INSN(11,8)
15189       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15190      UInt   bD    = (insn28 >> 22) & 1;
15191      UInt   fD    = (INSN(15,12) << 1) | bD;
15192      UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
15193      UInt   bZ    = (insn28 >> 7) & 1;
15194      UInt   syned = (insn28 >> 16) & 1;
15195      IRTemp rmode = newTemp(Ity_I32);
15196      assign(rmode, bZ ? mkU32(Irrm_ZERO)
15197                       : mkexpr(mk_get_IR_rounding_mode()));
15198      if (syned) {
15199         // FTOSID
15200         putFReg(fD, unop(Iop_ReinterpI32asF32,
15201                          binop(Iop_F64toI32S, mkexpr(rmode),
15202                                getDReg(dM))),
15203                 condT);
15204         DIP("ftosi%sd%s s%u, d%u\n", bZ ? "z" : "",
15205             nCC(conq), fD, dM);
15206      } else {
15207         // FTOUID
15208         putFReg(fD, unop(Iop_ReinterpI32asF32,
15209                          binop(Iop_F64toI32U, mkexpr(rmode),
15210                                getDReg(dM))),
15211                 condT);
15212         DIP("ftoui%sd%s s%u, d%u\n", bZ ? "z" : "",
15213             nCC(conq), fD, dM);
15214      }
15215      goto decode_success_vfp;
15216   }
15217
15218   /* ----------------------------------------------------------- */
15219   /* -- VFP instructions -- single precision                  -- */
15220   /* ----------------------------------------------------------- */
15221
15222   /* --------------------- fldms, fstms --------------------- */
15223   /*
15224                                 31   27   23   19 15 11   7   0
15225                                         P UDWL
15226      C4-98, C5-26   1  FSTMD    cond 1100 1x00 Rn Fd 1010 offset
15227      C4-98, C5-28   2  FSTMDIA  cond 1100 1x10 Rn Fd 1010 offset
15228      C4-98, C5-30   3  FSTMDDB  cond 1101 0x10 Rn Fd 1010 offset
15229
15230      C4-40, C5-26   1  FLDMD    cond 1100 1x01 Rn Fd 1010 offset
15231      C4-40, C5-26   2  FLDMIAD  cond 1100 1x11 Rn Fd 1010 offset
15232      C4-40, C5-26   3  FLDMDBD  cond 1101 0x11 Rn Fd 1010 offset
15233
15234      Regs transferred: F(Fd:D) .. F(Fd:d + offset)
15235      offset must not imply a reg > 15
15236      IA/DB: Rn is changed by (4 x # regs transferred)
15237
15238      case coding:
15239         1  at-Rn   (access at Rn)
15240         2  ia-Rn   (access at Rn, then Rn += 4n)
15241         3  db-Rn   (Rn -= 4n,     then access at Rn)
15242   */
15243   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
15244       && INSN(11,8) == BITS4(1,0,1,0)) {
15245      UInt bP      = (insn28 >> 24) & 1;
15246      UInt bU      = (insn28 >> 23) & 1;
15247      UInt bW      = (insn28 >> 21) & 1;
15248      UInt bL      = (insn28 >> 20) & 1;
15249      UInt bD      = (insn28 >> 22) & 1;
15250      UInt offset  = (insn28 >> 0) & 0xFF;
15251      UInt rN      = INSN(19,16);
15252      UInt fD      = (INSN(15,12) << 1) | bD;
15253      UInt nRegs   = offset;
15254      UInt summary = 0;
15255      Int  i;
15256
15257      /**/ if (bP == 0 && bU == 1 && bW == 0) {
15258         summary = 1;
15259      }
15260      else if (bP == 0 && bU == 1 && bW == 1) {
15261         summary = 2;
15262      }
15263      else if (bP == 1 && bU == 0 && bW == 1) {
15264         summary = 3;
15265      }
15266      else goto after_vfp_fldms_fstms;
15267
15268      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
15269      if (rN == 15 && (summary == 2 || summary == 3 || isT))
15270         goto after_vfp_fldms_fstms;
15271
15272      /* offset must specify at least one register */
15273      if (offset < 1)
15274         goto after_vfp_fldms_fstms;
15275
15276      /* can't transfer regs after S31 */
15277      if (fD + nRegs - 1 >= 32)
15278         goto after_vfp_fldms_fstms;
15279
15280      /* Now, we can't do a conditional load or store, since that very
15281         likely will generate an exception.  So we have to take a side
15282         exit at this point if the condition is false. */
15283      if (condT != IRTemp_INVALID) {
15284         if (isT)
15285            mk_skip_over_T32_if_cond_is_false( condT );
15286         else
15287            mk_skip_over_A32_if_cond_is_false( condT );
15288         condT = IRTemp_INVALID;
15289      }
15290      /* Ok, now we're unconditional.  Do the load or store. */
15291
15292      /* get the old Rn value */
15293      IRTemp rnT = newTemp(Ity_I32);
15294      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
15295                           rN == 15));
15296
15297      /* make a new value for Rn, post-insn */
15298      IRTemp rnTnew = IRTemp_INVALID;
15299      if (summary == 2 || summary == 3) {
15300         rnTnew = newTemp(Ity_I32);
15301         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
15302                              mkexpr(rnT),
15303                              mkU32(4 * nRegs)));
15304      }
15305
15306      /* decide on the base transfer address */
15307      IRTemp taT = newTemp(Ity_I32);
15308      assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
15309
15310      /* update Rn if necessary -- in case 3, we're moving it down, so
15311         update before any memory reference, in order to keep Memcheck
15312         and V's stack-extending logic (on linux) happy */
15313      if (summary == 3) {
15314         if (isT)
15315            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
15316         else
15317            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
15318      }
15319
15320      /* generate the transfers */
15321      for (i = 0; i < nRegs; i++) {
15322         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(4*i));
15323         if (bL) {
15324            putFReg(fD + i, loadLE(Ity_F32, addr), IRTemp_INVALID);
15325         } else {
15326            storeLE(addr, getFReg(fD + i));
15327         }
15328      }
15329
15330      /* update Rn if necessary -- in case 2, we're moving it up, so
15331         update after any memory reference, in order to keep Memcheck
15332         and V's stack-extending logic (on linux) happy */
15333      if (summary == 2) {
15334         if (isT)
15335            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
15336         else
15337            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
15338      }
15339
15340      const HChar* nm = bL==1 ? "ld" : "st";
15341      switch (summary) {
15342         case 1:  DIP("f%sms%s r%u, {s%u-s%u}\n",
15343                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
15344                  break;
15345         case 2:  DIP("f%smias%s r%u!, {s%u-s%u}\n",
15346                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
15347                  break;
15348         case 3:  DIP("f%smdbs%s r%u!, {s%u-s%u}\n",
15349                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
15350                  break;
15351         default: vassert(0);
15352      }
15353
15354      goto decode_success_vfp;
15355      /* FIXME alignment constraints? */
15356   }
15357
15358  after_vfp_fldms_fstms:
15359
15360   /* --------------------- fmsr, fmrs --------------------- */
15361   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
15362       && BITS4(1,0,1,0) == INSN(11,8)
15363       && BITS4(0,0,0,0) == INSN(3,0)
15364       && BITS4(0,0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
15365      UInt rD  = INSN(15,12);
15366      UInt b7  = (insn28 >> 7) & 1;
15367      UInt fN  = (INSN(19,16) << 1) | b7;
15368      UInt b20 = (insn28 >> 20) & 1;
15369      if (rD == 15) {
15370         /* fall through */
15371         /* Let's assume that no sane person would want to do
15372            floating-point transfers to or from the program counter,
15373            and simply decline to decode the instruction.  The ARM ARM
15374            doesn't seem to explicitly disallow this case, though. */
15375      } else {
15376         if (b20) {
15377            IRExpr* res = unop(Iop_ReinterpF32asI32, getFReg(fN));
15378            if (isT)
15379               putIRegT(rD, res, condT);
15380            else
15381               putIRegA(rD, res, condT, Ijk_Boring);
15382            DIP("fmrs%s r%u, s%u\n", nCC(conq), rD, fN);
15383         } else {
15384            putFReg(fN, unop(Iop_ReinterpI32asF32,
15385                             isT ? getIRegT(rD) : getIRegA(rD)),
15386                        condT);
15387            DIP("fmsr%s s%u, r%u\n", nCC(conq), fN, rD);
15388         }
15389         goto decode_success_vfp;
15390      }
15391      /* fall through */
15392   }
15393
15394   /* --------------------- f{ld,st}s --------------------- */
15395   // FLDS, FSTS
15396   if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
15397       && BITS4(1,0,1,0) == INSN(11,8)) {
15398      UInt bD     = (insn28 >> 22) & 1;
15399      UInt fD     = (INSN(15,12) << 1) | bD;
15400      UInt rN     = INSN(19,16);
15401      UInt offset = (insn28 & 0xFF) << 2;
15402      UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
15403      UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
15404      /* make unconditional */
15405      if (condT != IRTemp_INVALID) {
15406         if (isT)
15407            mk_skip_over_T32_if_cond_is_false( condT );
15408         else
15409            mk_skip_over_A32_if_cond_is_false( condT );
15410         condT = IRTemp_INVALID;
15411      }
15412      IRTemp ea = newTemp(Ity_I32);
15413      assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
15414                       align4if(isT ? getIRegT(rN) : getIRegA(rN),
15415                                rN == 15),
15416                       mkU32(offset)));
15417      if (bL) {
15418         putFReg(fD, loadLE(Ity_F32,mkexpr(ea)), IRTemp_INVALID);
15419      } else {
15420         storeLE(mkexpr(ea), getFReg(fD));
15421      }
15422      DIP("f%ss%s s%u, [r%u, %c#%u]\n",
15423          bL ? "ld" : "st", nCC(conq), fD, rN,
15424          bU ? '+' : '-', offset);
15425      goto decode_success_vfp;
15426   }
15427
15428   /* --------------------- dp insns (F) --------------------- */
15429   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
15430       && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
15431       && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
15432      UInt    bM  = (insn28 >> 5) & 1;
15433      UInt    bD  = (insn28 >> 22) & 1;
15434      UInt    bN  = (insn28 >> 7) & 1;
15435      UInt    fM  = (INSN(3,0) << 1) | bM;   /* argR */
15436      UInt    fD  = (INSN(15,12) << 1) | bD; /* dst/acc */
15437      UInt    fN  = (INSN(19,16) << 1) | bN; /* argL */
15438      UInt    bP  = (insn28 >> 23) & 1;
15439      UInt    bQ  = (insn28 >> 21) & 1;
15440      UInt    bR  = (insn28 >> 20) & 1;
15441      UInt    bS  = (insn28 >> 6) & 1;
15442      UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
15443      IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15444      switch (opc) {
15445         case BITS4(0,0,0,0): /* MAC: d + n * m */
15446            putFReg(fD, triop(Iop_AddF32, rm,
15447                              getFReg(fD),
15448                              triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
15449                        condT);
15450            DIP("fmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15451            goto decode_success_vfp;
15452         case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
15453            putFReg(fD, triop(Iop_AddF32, rm,
15454                              getFReg(fD),
15455                              unop(Iop_NegF32,
15456                                   triop(Iop_MulF32, rm, getFReg(fN),
15457                                                         getFReg(fM)))),
15458                        condT);
15459            DIP("fnmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15460            goto decode_success_vfp;
15461         case BITS4(0,0,1,0): /* MSC: - d + n * m */
15462            putFReg(fD, triop(Iop_AddF32, rm,
15463                              unop(Iop_NegF32, getFReg(fD)),
15464                              triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
15465                        condT);
15466            DIP("fmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15467            goto decode_success_vfp;
15468         case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
15469            putFReg(fD, triop(Iop_AddF32, rm,
15470                              unop(Iop_NegF32, getFReg(fD)),
15471                              unop(Iop_NegF32,
15472                                   triop(Iop_MulF32, rm,
15473                                                     getFReg(fN),
15474                                                    getFReg(fM)))),
15475                        condT);
15476            DIP("fnmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15477            goto decode_success_vfp;
15478         case BITS4(0,1,0,0): /* MUL: n * m */
15479            putFReg(fD, triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM)),
15480                        condT);
15481            DIP("fmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15482            goto decode_success_vfp;
15483         case BITS4(0,1,0,1): /* NMUL: - n * m */
15484            putFReg(fD, unop(Iop_NegF32,
15485                             triop(Iop_MulF32, rm, getFReg(fN),
15486                                                   getFReg(fM))),
15487                    condT);
15488            DIP("fnmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15489            goto decode_success_vfp;
15490         case BITS4(0,1,1,0): /* ADD: n + m */
15491            putFReg(fD, triop(Iop_AddF32, rm, getFReg(fN), getFReg(fM)),
15492                        condT);
15493            DIP("fadds%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15494            goto decode_success_vfp;
15495         case BITS4(0,1,1,1): /* SUB: n - m */
15496            putFReg(fD, triop(Iop_SubF32, rm, getFReg(fN), getFReg(fM)),
15497                        condT);
15498            DIP("fsubs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15499            goto decode_success_vfp;
15500         case BITS4(1,0,0,0): /* DIV: n / m */
15501            putFReg(fD, triop(Iop_DivF32, rm, getFReg(fN), getFReg(fM)),
15502                        condT);
15503            DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15504            goto decode_success_vfp;
15505         case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
15506            /* XXXROUNDINGFIXME look up ARM reference for fused
15507               multiply-add rounding */
15508            putFReg(fD, triop(Iop_AddF32, rm,
15509                              unop(Iop_NegF32, getFReg(fD)),
15510                              triop(Iop_MulF32, rm,
15511                                                getFReg(fN),
15512                                                getFReg(fM))),
15513                        condT);
15514            DIP("vfnmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15515            goto decode_success_vfp;
15516         case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
15517            /* XXXROUNDINGFIXME look up ARM reference for fused
15518               multiply-add rounding */
15519            putFReg(fD, triop(Iop_AddF32, rm,
15520                              unop(Iop_NegF32, getFReg(fD)),
15521                              triop(Iop_MulF32, rm,
15522                                                unop(Iop_NegF32, getFReg(fN)),
15523                                                getFReg(fM))),
15524                        condT);
15525            DIP("vfnmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15526            goto decode_success_vfp;
15527         case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
15528            /* XXXROUNDINGFIXME look up ARM reference for fused
15529               multiply-add rounding */
15530            putFReg(fD, triop(Iop_AddF32, rm,
15531                              getFReg(fD),
15532                              triop(Iop_MulF32, rm, getFReg(fN),
15533                                                    getFReg(fM))),
15534                        condT);
15535            DIP("vfmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15536            goto decode_success_vfp;
15537         case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
15538            /* XXXROUNDINGFIXME look up ARM reference for fused
15539               multiply-add rounding */
15540            putFReg(fD, triop(Iop_AddF32, rm,
15541                              getFReg(fD),
15542                              triop(Iop_MulF32, rm,
15543                                    unop(Iop_NegF32, getFReg(fN)),
15544                                    getFReg(fM))),
15545                        condT);
15546            DIP("vfmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15547            goto decode_success_vfp;
15548         default:
15549            break;
15550      }
15551   }
15552
15553   /* --------------------- compares (S) --------------------- */
15554   /*          31   27   23   19   15 11   7    3
15555                 28   24   20   16 12    8    4    0
15556      FCMPS    cond 1110 1D11 0100 Fd 1010 01M0 Fm
15557      FCMPES   cond 1110 1D11 0100 Fd 1010 11M0 Fm
15558      FCMPZS   cond 1110 1D11 0101 Fd 1010 0100 0000
15559      FCMPZED  cond 1110 1D11 0101 Fd 1010 1100 0000
15560                                 Z         N
15561
15562      Z=0 Compare Fd:D vs Fm:M     and set FPSCR 31:28 accordingly
15563      Z=1 Compare Fd:D vs zero
15564
15565      N=1 generates Invalid Operation exn if either arg is any kind of NaN
15566      N=0 generates Invalid Operation exn if either arg is a signalling NaN
15567      (Not that we pay any attention to N here)
15568   */
15569   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15570       && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15571       && BITS4(1,0,1,0) == INSN(11,8)
15572       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15573      UInt bZ = (insn28 >> 16) & 1;
15574      UInt bN = (insn28 >> 7) & 1;
15575      UInt bD = (insn28 >> 22) & 1;
15576      UInt bM = (insn28 >> 5) & 1;
15577      UInt fD = (INSN(15,12) << 1) | bD;
15578      UInt fM = (INSN(3,0) << 1) | bM;
15579      if (bZ && (INSN(3,0) != 0 || (INSN(7,4) & 3) != 0)) {
15580         /* does not decode; fall through */
15581      } else {
15582         IRTemp argL = newTemp(Ity_F64);
15583         IRTemp argR = newTemp(Ity_F64);
15584         IRTemp irRes = newTemp(Ity_I32);
15585
15586         assign(argL, unop(Iop_F32toF64, getFReg(fD)));
15587         assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0))
15588                         : unop(Iop_F32toF64, getFReg(fM)));
15589         assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
15590
15591         IRTemp nzcv     = IRTemp_INVALID;
15592         IRTemp oldFPSCR = newTemp(Ity_I32);
15593         IRTemp newFPSCR = newTemp(Ity_I32);
15594
15595         /* This is where the fun starts.  We have to convert 'irRes'
15596            from an IR-convention return result (IRCmpF64Result) to an
15597            ARM-encoded (N,Z,C,V) group.  The final result is in the
15598            bottom 4 bits of 'nzcv'. */
15599         /* Map compare result from IR to ARM(nzcv) */
15600         /*
15601            FP cmp result | IR   | ARM(nzcv)
15602            --------------------------------
15603            UN              0x45   0011
15604            LT              0x01   1000
15605            GT              0x00   0010
15606            EQ              0x40   0110
15607         */
15608         nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
15609
15610         /* And update FPSCR accordingly */
15611         assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
15612         assign(newFPSCR,
15613                binop(Iop_Or32,
15614                      binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
15615                      binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
15616
15617         putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
15618
15619         if (bZ) {
15620            DIP("fcmpz%ss%s s%u\n", bN ? "e" : "", nCC(conq), fD);
15621         } else {
15622            DIP("fcmp%ss%s s%u, s%u\n", bN ? "e" : "",
15623                nCC(conq), fD, fM);
15624         }
15625         goto decode_success_vfp;
15626      }
15627      /* fall through */
15628   }
15629
15630   /* --------------------- unary (S) --------------------- */
15631   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15632       && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15633       && BITS4(1,0,1,0) == INSN(11,8)
15634       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15635      UInt bD = (insn28 >> 22) & 1;
15636      UInt bM = (insn28 >> 5) & 1;
15637      UInt fD  = (INSN(15,12) << 1) | bD;
15638      UInt fM  = (INSN(3,0) << 1) | bM;
15639      UInt b16 = (insn28 >> 16) & 1;
15640      UInt b7  = (insn28 >> 7) & 1;
15641      /**/ if (b16 == 0 && b7 == 0) {
15642         // FCPYS
15643         putFReg(fD, getFReg(fM), condT);
15644         DIP("fcpys%s s%u, s%u\n", nCC(conq), fD, fM);
15645         goto decode_success_vfp;
15646      }
15647      else if (b16 == 0 && b7 == 1) {
15648         // FABSS
15649         putFReg(fD, unop(Iop_AbsF32, getFReg(fM)), condT);
15650         DIP("fabss%s s%u, s%u\n", nCC(conq), fD, fM);
15651         goto decode_success_vfp;
15652      }
15653      else if (b16 == 1 && b7 == 0) {
15654         // FNEGS
15655         putFReg(fD, unop(Iop_NegF32, getFReg(fM)), condT);
15656         DIP("fnegs%s s%u, s%u\n", nCC(conq), fD, fM);
15657         goto decode_success_vfp;
15658      }
15659      else if (b16 == 1 && b7 == 1) {
15660         // FSQRTS
15661         IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15662         putFReg(fD, binop(Iop_SqrtF32, rm, getFReg(fM)), condT);
15663         DIP("fsqrts%s s%u, s%u\n", nCC(conq), fD, fM);
15664         goto decode_success_vfp;
15665      }
15666      else
15667         vassert(0);
15668
15669      /* fall through */
15670   }
15671
15672   /* ----------------- I <-> S conversions ----------------- */
15673
15674   // F{S,U}ITOS fD, fM
15675   /* These are more complex than FSITOD/FUITOD.  In the D cases, a 32
15676      bit int will always fit within the 53 bit mantissa, so there's
15677      no possibility of a loss of precision, but that's obviously not
15678      the case here.  Hence this case possibly requires rounding, and
15679      so it drags in the current rounding mode. */
15680   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15681       && BITS4(1,0,0,0) == INSN(19,16)
15682       && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
15683       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15684      UInt bM    = (insn28 >> 5) & 1;
15685      UInt bD    = (insn28 >> 22) & 1;
15686      UInt fM    = (INSN(3,0) << 1) | bM;
15687      UInt fD    = (INSN(15,12) << 1) | bD;
15688      UInt syned = (insn28 >> 7) & 1;
15689      IRTemp rmode = newTemp(Ity_I32);
15690      assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
15691      if (syned) {
15692         // FSITOS
15693         putFReg(fD, binop(Iop_F64toF32,
15694                           mkexpr(rmode),
15695                           unop(Iop_I32StoF64,
15696                                unop(Iop_ReinterpF32asI32, getFReg(fM)))),
15697                 condT);
15698         DIP("fsitos%s s%u, s%u\n", nCC(conq), fD, fM);
15699      } else {
15700         // FUITOS
15701         putFReg(fD, binop(Iop_F64toF32,
15702                           mkexpr(rmode),
15703                           unop(Iop_I32UtoF64,
15704                                unop(Iop_ReinterpF32asI32, getFReg(fM)))),
15705                 condT);
15706         DIP("fuitos%s s%u, s%u\n", nCC(conq), fD, fM);
15707      }
15708      goto decode_success_vfp;
15709   }
15710
15711   // FTO{S,U}IS fD, fM
15712   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15713       && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15714       && BITS4(1,0,1,0) == INSN(11,8)
15715       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15716      UInt   bM    = (insn28 >> 5) & 1;
15717      UInt   bD    = (insn28 >> 22) & 1;
15718      UInt   fD    = (INSN(15,12) << 1) | bD;
15719      UInt   fM    = (INSN(3,0) << 1) | bM;
15720      UInt   bZ    = (insn28 >> 7) & 1;
15721      UInt   syned = (insn28 >> 16) & 1;
15722      IRTemp rmode = newTemp(Ity_I32);
15723      assign(rmode, bZ ? mkU32(Irrm_ZERO)
15724                       : mkexpr(mk_get_IR_rounding_mode()));
15725      if (syned) {
15726         // FTOSIS
15727         putFReg(fD, unop(Iop_ReinterpI32asF32,
15728                          binop(Iop_F64toI32S, mkexpr(rmode),
15729                                unop(Iop_F32toF64, getFReg(fM)))),
15730                 condT);
15731         DIP("ftosi%ss%s s%u, d%u\n", bZ ? "z" : "",
15732             nCC(conq), fD, fM);
15733         goto decode_success_vfp;
15734      } else {
15735         // FTOUIS
15736         putFReg(fD, unop(Iop_ReinterpI32asF32,
15737                          binop(Iop_F64toI32U, mkexpr(rmode),
15738                                unop(Iop_F32toF64, getFReg(fM)))),
15739                 condT);
15740         DIP("ftoui%ss%s s%u, d%u\n", bZ ? "z" : "",
15741             nCC(conq), fD, fM);
15742         goto decode_success_vfp;
15743      }
15744   }
15745
15746   /* ----------------- S <-> D conversions ----------------- */
15747
15748   // FCVTDS
15749   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15750       && BITS4(0,1,1,1) == INSN(19,16)
15751       && BITS4(1,0,1,0) == INSN(11,8)
15752       && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
15753      UInt dD = INSN(15,12) | (INSN(22,22) << 4);
15754      UInt bM = (insn28 >> 5) & 1;
15755      UInt fM = (INSN(3,0) << 1) | bM;
15756      putDReg(dD, unop(Iop_F32toF64, getFReg(fM)), condT);
15757      DIP("fcvtds%s d%u, s%u\n", nCC(conq), dD, fM);
15758      goto decode_success_vfp;
15759   }
15760
15761   // FCVTSD
15762   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15763       && BITS4(0,1,1,1) == INSN(19,16)
15764       && BITS4(1,0,1,1) == INSN(11,8)
15765       && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
15766      UInt   bD    = (insn28 >> 22) & 1;
15767      UInt   fD    = (INSN(15,12) << 1) | bD;
15768      UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
15769      IRTemp rmode = newTemp(Ity_I32);
15770      assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
15771      putFReg(fD, binop(Iop_F64toF32, mkexpr(rmode), getDReg(dM)),
15772                  condT);
15773      DIP("fcvtsd%s s%u, d%u\n", nCC(conq), fD, dM);
15774      goto decode_success_vfp;
15775   }
15776
15777   /* --------------- VCVT fixed<->floating, VFP --------------- */
15778   /*          31   27   23   19   15 11   7    3
15779                 28   24   20   16 12    8    4    0
15780
15781               cond 1110 1D11 1p1U Vd 101f x1i0 imm4
15782
15783      VCVT<c>.<Td>.F64 <Dd>, <Dd>, #fbits
15784      VCVT<c>.<Td>.F32 <Dd>, <Dd>, #fbits
15785      VCVT<c>.F64.<Td> <Dd>, <Dd>, #fbits
15786      VCVT<c>.F32.<Td> <Dd>, <Dd>, #fbits
15787      are of this form.  We only handle a subset of the cases though.
15788   */
15789   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15790       && BITS4(1,0,1,0) == (INSN(19,16) & BITS4(1,0,1,0))
15791       && BITS3(1,0,1) == INSN(11,9)
15792       && BITS3(1,0,0) == (INSN(6,4) & BITS3(1,0,1))) {
15793      UInt bD        = INSN(22,22);
15794      UInt bOP       = INSN(18,18);
15795      UInt bU        = INSN(16,16);
15796      UInt Vd        = INSN(15,12);
15797      UInt bSF       = INSN(8,8);
15798      UInt bSX       = INSN(7,7);
15799      UInt bI        = INSN(5,5);
15800      UInt imm4      = INSN(3,0);
15801      Bool to_fixed  = bOP == 1;
15802      Bool dp_op     = bSF == 1;
15803      Bool unsyned   = bU == 1;
15804      UInt size      = bSX == 0 ? 16 : 32;
15805      Int  frac_bits = size - ((imm4 << 1) | bI);
15806      UInt d         = dp_op  ? ((bD << 4) | Vd)  : ((Vd << 1) | bD);
15807
15808      IRExpr* rm     = mkU32(Irrm_NEAREST);
15809      IRTemp  scale  = newTemp(Ity_F64);
15810      assign(scale, unop(Iop_I32UtoF64, mkU32( ((UInt)1) << (frac_bits-1) )));
15811
15812      if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && !dp_op
15813                                            && size == 32) {
15814         /* VCVT.F32.{S,U}32 S[d], S[d], #frac_bits */
15815         /* This generates really horrible code.  We could potentially
15816            do much better. */
15817         IRTemp rmode = newTemp(Ity_I32);
15818         assign(rmode, mkU32(Irrm_NEAREST)); // per the spec
15819         IRTemp src32 = newTemp(Ity_I32);
15820         assign(src32,  unop(Iop_ReinterpF32asI32, getFReg(d)));
15821         IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
15822                                mkexpr(src32 ) );
15823         IRExpr* resF64 = triop(Iop_DivF64,
15824                                rm, as_F64,
15825                                triop(Iop_AddF64, rm, mkexpr(scale),
15826                                                      mkexpr(scale)));
15827         IRExpr* resF32 = binop(Iop_F64toF32, mkexpr(rmode), resF64);
15828         putFReg(d, resF32, condT);
15829         DIP("vcvt.f32.%c32, s%u, s%u, #%d\n",
15830             unsyned ? 'u' : 's', d, d, frac_bits);
15831         goto decode_success_vfp;
15832      }
15833      if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && dp_op
15834                                            && size == 32) {
15835         /* VCVT.F64.{S,U}32 D[d], D[d], #frac_bits */
15836         /* This generates really horrible code.  We could potentially
15837            do much better. */
15838         IRTemp src32 = newTemp(Ity_I32);
15839         assign(src32, unop(Iop_64to32, getDRegI64(d)));
15840         IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
15841                                mkexpr(src32 ) );
15842         IRExpr* resF64 = triop(Iop_DivF64,
15843                                rm, as_F64,
15844                                triop(Iop_AddF64, rm, mkexpr(scale),
15845                                                      mkexpr(scale)));
15846         putDReg(d, resF64, condT);
15847         DIP("vcvt.f64.%c32, d%u, d%u, #%d\n",
15848             unsyned ? 'u' : 's', d, d, frac_bits);
15849         goto decode_success_vfp;
15850      }
15851      if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && dp_op
15852                                            && size == 32) {
15853         /* VCVT.{S,U}32.F64 D[d], D[d], #frac_bits */
15854         IRTemp srcF64 = newTemp(Ity_F64);
15855         assign(srcF64, getDReg(d));
15856         IRTemp scaledF64 = newTemp(Ity_F64);
15857         assign(scaledF64, triop(Iop_MulF64,
15858                                 rm, mkexpr(srcF64),
15859                                 triop(Iop_AddF64, rm, mkexpr(scale),
15860                                                       mkexpr(scale))));
15861         IRTemp rmode = newTemp(Ity_I32);
15862         assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
15863         IRTemp asI32 = newTemp(Ity_I32);
15864         assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
15865                             mkexpr(rmode), mkexpr(scaledF64)));
15866         putDRegI64(d, unop(unsyned ? Iop_32Uto64 : Iop_32Sto64,
15867                            mkexpr(asI32)), condT);
15868
15869         DIP("vcvt.%c32.f64, d%u, d%u, #%d\n",
15870             unsyned ? 'u' : 's', d, d, frac_bits);
15871         goto decode_success_vfp;
15872      }
15873      if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && !dp_op
15874                                            && size == 32) {
15875         /* VCVT.{S,U}32.F32 S[d], S[d], #frac_bits */
15876         IRTemp srcF32 = newTemp(Ity_F32);
15877         assign(srcF32, getFReg(d));
15878         IRTemp scaledF64 = newTemp(Ity_F64);
15879         assign(scaledF64, triop(Iop_MulF64,
15880                                 rm, unop(Iop_F32toF64, mkexpr(srcF32)),
15881                                 triop(Iop_AddF64, rm, mkexpr(scale),
15882                                                       mkexpr(scale))));
15883         IRTemp rmode = newTemp(Ity_I32);
15884         assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
15885         IRTemp asI32 = newTemp(Ity_I32);
15886         assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
15887                             mkexpr(rmode), mkexpr(scaledF64)));
15888         putFReg(d, unop(Iop_ReinterpI32asF32, mkexpr(asI32)), condT);
15889         DIP("vcvt.%c32.f32, d%u, d%u, #%d\n",
15890             unsyned ? 'u' : 's', d, d, frac_bits);
15891         goto decode_success_vfp;
15892      }
15893      /* fall through */
15894   }
15895
15896   /* FAILURE */
15897   return False;
15898
15899  decode_success_vfp:
15900   /* Check that any accepted insn really is a CP10 or CP11 insn, iow,
15901      assert that we aren't accepting, in this fn, insns that actually
15902      should be handled somewhere else. */
15903   vassert(INSN(11,9) == BITS3(1,0,1)); // 11:8 = 1010 or 1011
15904   return True;
15905
15906#  undef INSN
15907}
15908
15909
15910/*------------------------------------------------------------*/
15911/*--- Instructions in NV (never) space                     ---*/
15912/*------------------------------------------------------------*/
15913
15914/* ARM only */
15915/* Translate a NV space instruction.  If successful, returns True and
15916   *dres may or may not be updated.  If failure, returns False and
15917   doesn't change *dres nor create any IR.
15918
15919   Note that all NEON instructions (in ARM mode) up to and including
15920   ARMv7, but not later, are handled through here, since they are all
15921   in NV space.
15922*/
15923static Bool decode_NV_instruction_ARMv7_and_below
15924                                 ( /*MOD*/DisResult* dres,
15925                                    const VexArchInfo* archinfo,
15926                                    UInt insn )
15927{
15928#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
15929#  define INSN_COND          SLICE_UInt(insn, 31, 28)
15930
15931   HChar dis_buf[128];
15932
15933   // Should only be called for NV instructions
15934   vassert(BITS4(1,1,1,1) == INSN_COND);
15935
15936   /* ------------------------ pld{w} ------------------------ */
15937   if (BITS8(0,1,0,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
15938       && BITS4(1,1,1,1) == INSN(15,12)) {
15939      UInt rN    = INSN(19,16);
15940      UInt imm12 = INSN(11,0);
15941      UInt bU    = INSN(23,23);
15942      UInt bR    = INSN(22,22);
15943      DIP("pld%c [r%u, #%c%u]\n", bR ? ' ' : 'w', rN, bU ? '+' : '-', imm12);
15944      return True;
15945   }
15946
15947   if (BITS8(0,1,1,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
15948       && BITS4(1,1,1,1) == INSN(15,12)
15949       && 0 == INSN(4,4)) {
15950      UInt rN   = INSN(19,16);
15951      UInt rM   = INSN(3,0);
15952      UInt imm5 = INSN(11,7);
15953      UInt sh2  = INSN(6,5);
15954      UInt bU   = INSN(23,23);
15955      UInt bR   = INSN(22,22);
15956      if (rM != 15 && (rN != 15 || bR)) {
15957         IRExpr* eaE = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
15958                                                       sh2, imm5, dis_buf);
15959         IRTemp eaT = newTemp(Ity_I32);
15960         /* Bind eaE to a temp merely for debugging-vex purposes, so we
15961            can check it's a plausible decoding.  It will get removed
15962            by iropt a little later on. */
15963         vassert(eaE);
15964         assign(eaT, eaE);
15965         DIP("pld%c %s\n", bR ? ' ' : 'w', dis_buf);
15966         return True;
15967      }
15968      /* fall through */
15969   }
15970
15971   /* ------------------------ pli ------------------------ */
15972   if (BITS8(0,1,0,0, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
15973       && BITS4(1,1,1,1) == INSN(15,12)) {
15974      UInt rN    = INSN(19,16);
15975      UInt imm12 = INSN(11,0);
15976      UInt bU    = INSN(23,23);
15977      DIP("pli [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
15978      return True;
15979   }
15980
15981   /* --------------------- Interworking branches --------------------- */
15982
15983   // BLX (1), viz, unconditional branch and link to R15+simm24
15984   // and set CPSR.T = 1, that is, switch to Thumb mode
15985   if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
15986      UInt bitH   = INSN(24,24);
15987      UInt uimm24 = INSN(23,0);   uimm24 <<= 8;
15988      Int  simm24 = (Int)uimm24;  simm24 >>= 8;
15989      simm24 = (((UInt)simm24) << 2) + (bitH << 1);
15990      /* Now this is a bit tricky.  Since we're decoding an ARM insn,
15991         it is implies that CPSR.T == 0.  Hence the current insn's
15992         address is guaranteed to be of the form X--(30)--X00.  So, no
15993         need to mask any bits off it.  But need to set the lowest bit
15994         to 1 to denote we're in Thumb mode after this, since
15995         guest_R15T has CPSR.T as the lowest bit.  And we can't chase
15996         into the call, so end the block at this point. */
15997      UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
15998      putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
15999                    IRTemp_INVALID/*because AL*/, Ijk_Boring );
16000      llPutIReg(15, mkU32(dst));
16001      dres->jk_StopHere = Ijk_Call;
16002      dres->whatNext    = Dis_StopHere;
16003      DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
16004      return True;
16005   }
16006
16007   /* ------------------- v7 barrier insns ------------------- */
16008   switch (insn) {
16009      case 0xF57FF06F: /* ISB */
16010         stmt( IRStmt_MBE(Imbe_Fence) );
16011         DIP("ISB\n");
16012         return True;
16013      case 0xF57FF04F: /* DSB sy */
16014      case 0xF57FF04E: /* DSB st */
16015      case 0xF57FF04B: /* DSB ish */
16016      case 0xF57FF04A: /* DSB ishst */
16017      case 0xF57FF047: /* DSB nsh */
16018      case 0xF57FF046: /* DSB nshst */
16019      case 0xF57FF043: /* DSB osh */
16020      case 0xF57FF042: /* DSB oshst */
16021         stmt( IRStmt_MBE(Imbe_Fence) );
16022         DIP("DSB\n");
16023         return True;
16024      case 0xF57FF05F: /* DMB sy */
16025      case 0xF57FF05E: /* DMB st */
16026      case 0xF57FF05B: /* DMB ish */
16027      case 0xF57FF05A: /* DMB ishst */
16028      case 0xF57FF057: /* DMB nsh */
16029      case 0xF57FF056: /* DMB nshst */
16030      case 0xF57FF053: /* DMB osh */
16031      case 0xF57FF052: /* DMB oshst */
16032         stmt( IRStmt_MBE(Imbe_Fence) );
16033         DIP("DMB\n");
16034         return True;
16035      default:
16036         break;
16037   }
16038
16039   /* ------------------- CLREX ------------------ */
16040   if (insn == 0xF57FF01F) {
16041      /* AFAICS, this simply cancels a (all?) reservations made by a
16042         (any?) preceding LDREX(es).  Arrange to hand it through to
16043         the back end. */
16044      stmt( IRStmt_MBE(Imbe_CancelReservation) );
16045      DIP("clrex\n");
16046      return True;
16047   }
16048
16049   /* ------------------- NEON ------------------- */
16050   if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
16051      Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
16052                        dres, insn, IRTemp_INVALID/*unconditional*/,
16053                        False/*!isT*/
16054                     );
16055      if (ok_neon)
16056         return True;
16057   }
16058
16059   // unrecognised
16060   return False;
16061
16062#  undef INSN_COND
16063#  undef INSN
16064}
16065
16066
16067/*------------------------------------------------------------*/
16068/*--- Disassemble a single ARM instruction                 ---*/
16069/*------------------------------------------------------------*/
16070
16071/* Disassemble a single ARM instruction into IR.  The instruction is
16072   located in host memory at guest_instr, and has (decoded) guest IP
16073   of guest_R15_curr_instr_notENC, which will have been set before the
16074   call here. */
16075
16076static
16077DisResult disInstr_ARM_WRK (
16078             Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
16079             Bool         resteerCisOk,
16080             void*        callback_opaque,
16081             const UChar* guest_instr,
16082             const VexArchInfo* archinfo,
16083             const VexAbiInfo*  abiinfo,
16084             Bool         sigill_diag
16085          )
16086{
16087   // A macro to fish bits out of 'insn'.
16088#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
16089#  define INSN_COND          SLICE_UInt(insn, 31, 28)
16090
16091   DisResult dres;
16092   UInt      insn;
16093   IRTemp    condT; /* :: Ity_I32 */
16094   UInt      summary;
16095   HChar     dis_buf[128];  // big enough to hold LDMIA etc text
16096
16097   /* Set result defaults. */
16098   dres.whatNext    = Dis_Continue;
16099   dres.len         = 4;
16100   dres.continueAt  = 0;
16101   dres.jk_StopHere = Ijk_INVALID;
16102   dres.hint        = Dis_HintNone;
16103
16104   /* Set default actions for post-insn handling of writes to r15, if
16105      required. */
16106   r15written = False;
16107   r15guard   = IRTemp_INVALID; /* unconditional */
16108   r15kind    = Ijk_Boring;
16109
16110   /* At least this is simple on ARM: insns are all 4 bytes long, and
16111      4-aligned.  So just fish the whole thing out of memory right now
16112      and have done. */
16113   insn = getUIntLittleEndianly( guest_instr );
16114
16115   if (0) vex_printf("insn: 0x%x\n", insn);
16116
16117   DIP("\t(arm) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
16118
16119   vassert(0 == (guest_R15_curr_instr_notENC & 3));
16120
16121   /* ----------------------------------------------------------- */
16122
16123   /* Spot "Special" instructions (see comment at top of file). */
16124   {
16125      const UChar* code = guest_instr;
16126      /* Spot the 16-byte preamble:
16127
16128         e1a0c1ec  mov r12, r12, ROR #3
16129         e1a0c6ec  mov r12, r12, ROR #13
16130         e1a0ceec  mov r12, r12, ROR #29
16131         e1a0c9ec  mov r12, r12, ROR #19
16132      */
16133      UInt word1 = 0xE1A0C1EC;
16134      UInt word2 = 0xE1A0C6EC;
16135      UInt word3 = 0xE1A0CEEC;
16136      UInt word4 = 0xE1A0C9EC;
16137      if (getUIntLittleEndianly(code+ 0) == word1 &&
16138          getUIntLittleEndianly(code+ 4) == word2 &&
16139          getUIntLittleEndianly(code+ 8) == word3 &&
16140          getUIntLittleEndianly(code+12) == word4) {
16141         /* Got a "Special" instruction preamble.  Which one is it? */
16142         if (getUIntLittleEndianly(code+16) == 0xE18AA00A
16143                                               /* orr r10,r10,r10 */) {
16144            /* R3 = client_request ( R4 ) */
16145            DIP("r3 = client_request ( %%r4 )\n");
16146            llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
16147            dres.jk_StopHere = Ijk_ClientReq;
16148            dres.whatNext    = Dis_StopHere;
16149            goto decode_success;
16150         }
16151         else
16152         if (getUIntLittleEndianly(code+16) == 0xE18BB00B
16153                                               /* orr r11,r11,r11 */) {
16154            /* R3 = guest_NRADDR */
16155            DIP("r3 = guest_NRADDR\n");
16156            dres.len = 20;
16157            llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
16158            goto decode_success;
16159         }
16160         else
16161         if (getUIntLittleEndianly(code+16) == 0xE18CC00C
16162                                               /* orr r12,r12,r12 */) {
16163            /*  branch-and-link-to-noredir R4 */
16164            DIP("branch-and-link-to-noredir r4\n");
16165            llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
16166            llPutIReg(15, llGetIReg(4));
16167            dres.jk_StopHere = Ijk_NoRedir;
16168            dres.whatNext    = Dis_StopHere;
16169            goto decode_success;
16170         }
16171         else
16172         if (getUIntLittleEndianly(code+16) == 0xE1899009
16173                                               /* orr r9,r9,r9 */) {
16174            /* IR injection */
16175            DIP("IR injection\n");
16176            vex_inject_ir(irsb, Iend_LE);
16177            // Invalidate the current insn. The reason is that the IRop we're
16178            // injecting here can change. In which case the translation has to
16179            // be redone. For ease of handling, we simply invalidate all the
16180            // time.
16181            stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
16182            stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
16183            llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
16184            dres.whatNext    = Dis_StopHere;
16185            dres.jk_StopHere = Ijk_InvalICache;
16186            goto decode_success;
16187         }
16188         /* We don't know what it is.  Set opc1/opc2 so decode_failure
16189            can print the insn following the Special-insn preamble. */
16190         insn = getUIntLittleEndianly(code+16);
16191         goto decode_failure;
16192         /*NOTREACHED*/
16193      }
16194
16195   }
16196
16197   /* ----------------------------------------------------------- */
16198
16199   /* Main ARM instruction decoder starts here. */
16200
16201   /* Deal with the condition.  Strategy is to merely generate a
16202      condition temporary at this point (or IRTemp_INVALID, meaning
16203      unconditional).  We leave it to lower-level instruction decoders
16204      to decide whether they can generate straight-line code, or
16205      whether they must generate a side exit before the instruction.
16206      condT :: Ity_I32 and is always either zero or one. */
16207   condT = IRTemp_INVALID;
16208   switch ( (ARMCondcode)INSN_COND ) {
16209      case ARMCondNV: {
16210         // Illegal instruction prior to v5 (see ARM ARM A3-5), but
16211         // some cases are acceptable
16212         Bool ok
16213            = decode_NV_instruction_ARMv7_and_below(&dres, archinfo, insn);
16214         if (ok)
16215            goto decode_success;
16216         else
16217            goto after_v7_decoder;
16218      }
16219      case ARMCondAL: // Always executed
16220         break;
16221      case ARMCondEQ: case ARMCondNE: case ARMCondHS: case ARMCondLO:
16222      case ARMCondMI: case ARMCondPL: case ARMCondVS: case ARMCondVC:
16223      case ARMCondHI: case ARMCondLS: case ARMCondGE: case ARMCondLT:
16224      case ARMCondGT: case ARMCondLE:
16225         condT = newTemp(Ity_I32);
16226         assign( condT, mk_armg_calculate_condition( INSN_COND ));
16227         break;
16228   }
16229
16230   /* ----------------------------------------------------------- */
16231   /* -- ARMv5 integer instructions                            -- */
16232   /* ----------------------------------------------------------- */
16233
16234   /* ---------------- Data processing ops ------------------- */
16235
16236   if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0))
16237       && !(INSN(25,25) == 0 && INSN(7,7) == 1 && INSN(4,4) == 1)) {
16238      IRTemp  shop = IRTemp_INVALID; /* shifter operand */
16239      IRTemp  shco = IRTemp_INVALID; /* shifter carry out */
16240      UInt    rD   = (insn >> 12) & 0xF; /* 15:12 */
16241      UInt    rN   = (insn >> 16) & 0xF; /* 19:16 */
16242      UInt    bitS = (insn >> 20) & 1; /* 20:20 */
16243      IRTemp  rNt  = IRTemp_INVALID;
16244      IRTemp  res  = IRTemp_INVALID;
16245      IRTemp  oldV = IRTemp_INVALID;
16246      IRTemp  oldC = IRTemp_INVALID;
16247      const HChar*  name = NULL;
16248      IROp    op   = Iop_INVALID;
16249      Bool    ok;
16250
16251      switch (INSN(24,21)) {
16252
16253         /* --------- ADD, SUB, AND, OR --------- */
16254         case BITS4(0,1,0,0): /* ADD:  Rd = Rn + shifter_operand */
16255            name = "add"; op = Iop_Add32; goto rd_eq_rn_op_SO;
16256         case BITS4(0,0,1,0): /* SUB:  Rd = Rn - shifter_operand */
16257            name = "sub"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
16258         case BITS4(0,0,1,1): /* RSB:  Rd = shifter_operand - Rn */
16259            name = "rsb"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
16260         case BITS4(0,0,0,0): /* AND:  Rd = Rn & shifter_operand */
16261            name = "and"; op = Iop_And32; goto rd_eq_rn_op_SO;
16262         case BITS4(1,1,0,0): /* OR:   Rd = Rn | shifter_operand */
16263            name = "orr"; op = Iop_Or32; goto rd_eq_rn_op_SO;
16264         case BITS4(0,0,0,1): /* EOR:  Rd = Rn ^ shifter_operand */
16265            name = "eor"; op = Iop_Xor32; goto rd_eq_rn_op_SO;
16266         case BITS4(1,1,1,0): /* BIC:  Rd = Rn & ~shifter_operand */
16267            name = "bic"; op = Iop_And32; goto rd_eq_rn_op_SO;
16268         rd_eq_rn_op_SO: {
16269            Bool isRSB = False;
16270            Bool isBIC = False;
16271            switch (INSN(24,21)) {
16272               case BITS4(0,0,1,1):
16273                  vassert(op == Iop_Sub32); isRSB = True; break;
16274               case BITS4(1,1,1,0):
16275                  vassert(op == Iop_And32); isBIC = True; break;
16276               default:
16277                  break;
16278            }
16279            rNt = newTemp(Ity_I32);
16280            assign(rNt, getIRegA(rN));
16281            ok = mk_shifter_operand(
16282                    INSN(25,25), INSN(11,0),
16283                    &shop, bitS ? &shco : NULL, dis_buf
16284                 );
16285            if (!ok)
16286               break;
16287            res = newTemp(Ity_I32);
16288            // compute the main result
16289            if (isRSB) {
16290               // reverse-subtract: shifter_operand - Rn
16291               vassert(op == Iop_Sub32);
16292               assign(res, binop(op, mkexpr(shop), mkexpr(rNt)) );
16293            } else if (isBIC) {
16294               // andn: shifter_operand & ~Rn
16295               vassert(op == Iop_And32);
16296               assign(res, binop(op, mkexpr(rNt),
16297                                     unop(Iop_Not32, mkexpr(shop))) );
16298            } else {
16299               // normal: Rn op shifter_operand
16300               assign(res, binop(op, mkexpr(rNt), mkexpr(shop)) );
16301            }
16302            // but don't commit it until after we've finished
16303            // all necessary reads from the guest state
16304            if (bitS
16305                && (op == Iop_And32 || op == Iop_Or32 || op == Iop_Xor32)) {
16306               oldV = newTemp(Ity_I32);
16307               assign( oldV, mk_armg_calculate_flag_v() );
16308            }
16309            // can't safely read guest state after here
16310            // now safe to put the main result
16311            putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
16312            // XXXX!! not safe to read any guest state after
16313            // this point (I think the code below doesn't do that).
16314            if (!bitS)
16315               vassert(shco == IRTemp_INVALID);
16316            /* Update the flags thunk if necessary */
16317            if (bitS) {
16318               vassert(shco != IRTemp_INVALID);
16319               switch (op) {
16320                  case Iop_Add32:
16321                     setFlags_D1_D2( ARMG_CC_OP_ADD, rNt, shop, condT );
16322                     break;
16323                  case Iop_Sub32:
16324                     if (isRSB) {
16325                        setFlags_D1_D2( ARMG_CC_OP_SUB, shop, rNt, condT );
16326                     } else {
16327                        setFlags_D1_D2( ARMG_CC_OP_SUB, rNt, shop, condT );
16328                     }
16329                     break;
16330                  case Iop_And32: /* BIC and AND set the flags the same */
16331                  case Iop_Or32:
16332                  case Iop_Xor32:
16333                     // oldV has been read just above
16334                     setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16335                                        res, shco, oldV, condT );
16336                     break;
16337                  default:
16338                     vassert(0);
16339               }
16340            }
16341            DIP("%s%s%s r%u, r%u, %s\n",
16342                name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
16343            goto decode_success;
16344         }
16345
16346         /* --------- MOV, MVN --------- */
16347         case BITS4(1,1,0,1):   /* MOV: Rd = shifter_operand */
16348         case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
16349            Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
16350            IRTemp jk = Ijk_Boring;
16351            if (rN != 0)
16352               break; /* rN must be zero */
16353            ok = mk_shifter_operand(
16354                    INSN(25,25), INSN(11,0),
16355                    &shop, bitS ? &shco : NULL, dis_buf
16356                 );
16357            if (!ok)
16358               break;
16359            res = newTemp(Ity_I32);
16360            assign( res, isMVN ? unop(Iop_Not32, mkexpr(shop))
16361                               : mkexpr(shop) );
16362            if (bitS) {
16363               vassert(shco != IRTemp_INVALID);
16364               oldV = newTemp(Ity_I32);
16365               assign( oldV, mk_armg_calculate_flag_v() );
16366            } else {
16367               vassert(shco == IRTemp_INVALID);
16368            }
16369            /* According to the Cortex A8 TRM Sec. 5.2.1, MOV PC, r14 is a
16370                return for purposes of branch prediction. */
16371            if (!isMVN && INSN(11,0) == 14) {
16372              jk = Ijk_Ret;
16373            }
16374            // can't safely read guest state after here
16375            putIRegA( rD, mkexpr(res), condT, jk );
16376            /* Update the flags thunk if necessary */
16377            if (bitS) {
16378               setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16379                                  res, shco, oldV, condT );
16380            }
16381            DIP("%s%s%s r%u, %s\n",
16382                isMVN ? "mvn" : "mov",
16383                nCC(INSN_COND), bitS ? "s" : "", rD, dis_buf );
16384            goto decode_success;
16385         }
16386
16387         /* --------- CMP --------- */
16388         case BITS4(1,0,1,0):   /* CMP:  (void) Rn - shifter_operand */
16389         case BITS4(1,0,1,1): { /* CMN:  (void) Rn + shifter_operand */
16390            Bool isCMN = INSN(24,21) == BITS4(1,0,1,1);
16391            if (rD != 0)
16392               break; /* rD must be zero */
16393            if (bitS == 0)
16394               break; /* if S (bit 20) is not set, it's not CMP/CMN */
16395            rNt = newTemp(Ity_I32);
16396            assign(rNt, getIRegA(rN));
16397            ok = mk_shifter_operand(
16398                    INSN(25,25), INSN(11,0),
16399                    &shop, NULL, dis_buf
16400                 );
16401            if (!ok)
16402               break;
16403            // can't safely read guest state after here
16404            /* Update the flags thunk. */
16405            setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
16406                            rNt, shop, condT );
16407            DIP("%s%s r%u, %s\n",
16408                isCMN ? "cmn" : "cmp",
16409                nCC(INSN_COND), rN, dis_buf );
16410            goto decode_success;
16411         }
16412
16413         /* --------- TST --------- */
16414         case BITS4(1,0,0,0):   /* TST:  (void) Rn & shifter_operand */
16415         case BITS4(1,0,0,1): { /* TEQ:  (void) Rn ^ shifter_operand */
16416            Bool isTEQ = INSN(24,21) == BITS4(1,0,0,1);
16417            if (rD != 0)
16418               break; /* rD must be zero */
16419            if (bitS == 0)
16420               break; /* if S (bit 20) is not set, it's not TST/TEQ */
16421            rNt = newTemp(Ity_I32);
16422            assign(rNt, getIRegA(rN));
16423            ok = mk_shifter_operand(
16424                    INSN(25,25), INSN(11,0),
16425                    &shop, &shco, dis_buf
16426                 );
16427            if (!ok)
16428               break;
16429            /* Update the flags thunk. */
16430            res = newTemp(Ity_I32);
16431            assign( res, binop(isTEQ ? Iop_Xor32 : Iop_And32,
16432                               mkexpr(rNt), mkexpr(shop)) );
16433            oldV = newTemp(Ity_I32);
16434            assign( oldV, mk_armg_calculate_flag_v() );
16435            // can't safely read guest state after here
16436            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16437                               res, shco, oldV, condT );
16438            DIP("%s%s r%u, %s\n",
16439                isTEQ ? "teq" : "tst",
16440                nCC(INSN_COND), rN, dis_buf );
16441            goto decode_success;
16442         }
16443
16444         /* --------- ADC, SBC, RSC --------- */
16445         case BITS4(0,1,0,1): /* ADC:  Rd = Rn + shifter_operand + oldC */
16446            name = "adc"; goto rd_eq_rn_op_SO_op_oldC;
16447         case BITS4(0,1,1,0): /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
16448            name = "sbc"; goto rd_eq_rn_op_SO_op_oldC;
16449         case BITS4(0,1,1,1): /* RSC:  Rd = shifter_operand - Rn - (oldC ^ 1) */
16450            name = "rsc"; goto rd_eq_rn_op_SO_op_oldC;
16451         rd_eq_rn_op_SO_op_oldC: {
16452            // FIXME: shco isn't used for anything.  Get rid of it.
16453            rNt = newTemp(Ity_I32);
16454            assign(rNt, getIRegA(rN));
16455            ok = mk_shifter_operand(
16456                    INSN(25,25), INSN(11,0),
16457                    &shop, bitS ? &shco : NULL, dis_buf
16458                 );
16459            if (!ok)
16460               break;
16461            oldC = newTemp(Ity_I32);
16462            assign( oldC, mk_armg_calculate_flag_c() );
16463            res = newTemp(Ity_I32);
16464            // compute the main result
16465            switch (INSN(24,21)) {
16466               case BITS4(0,1,0,1): /* ADC */
16467                  assign(res,
16468                         binop(Iop_Add32,
16469                               binop(Iop_Add32, mkexpr(rNt), mkexpr(shop)),
16470                               mkexpr(oldC) ));
16471                  break;
16472               case BITS4(0,1,1,0): /* SBC */
16473                  assign(res,
16474                         binop(Iop_Sub32,
16475                               binop(Iop_Sub32, mkexpr(rNt), mkexpr(shop)),
16476                               binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
16477                  break;
16478               case BITS4(0,1,1,1): /* RSC */
16479                  assign(res,
16480                         binop(Iop_Sub32,
16481                               binop(Iop_Sub32, mkexpr(shop), mkexpr(rNt)),
16482                               binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
16483                  break;
16484               default:
16485                  vassert(0);
16486            }
16487            // but don't commit it until after we've finished
16488            // all necessary reads from the guest state
16489            // now safe to put the main result
16490            putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
16491            // XXXX!! not safe to read any guest state after
16492            // this point (I think the code below doesn't do that).
16493            if (!bitS)
16494               vassert(shco == IRTemp_INVALID);
16495            /* Update the flags thunk if necessary */
16496            if (bitS) {
16497               vassert(shco != IRTemp_INVALID);
16498               switch (INSN(24,21)) {
16499                  case BITS4(0,1,0,1): /* ADC */
16500                     setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
16501                                        rNt, shop, oldC, condT );
16502                     break;
16503                  case BITS4(0,1,1,0): /* SBC */
16504                     setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
16505                                        rNt, shop, oldC, condT );
16506                     break;
16507                  case BITS4(0,1,1,1): /* RSC */
16508                     setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
16509                                        shop, rNt, oldC, condT );
16510                     break;
16511                  default:
16512                     vassert(0);
16513               }
16514            }
16515            DIP("%s%s%s r%u, r%u, %s\n",
16516                name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
16517            goto decode_success;
16518         }
16519
16520         default:
16521            vassert(0);
16522      }
16523   } /* if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0)) */
16524
16525   /* --------------------- Load/store (ubyte & word) -------- */
16526   // LDR STR LDRB STRB
16527   /*                 31   27   23   19 15 11    6   4 3  # highest bit
16528                        28   24   20 16 12
16529      A5-20   1 | 16  cond 0101 UB0L Rn Rd imm12
16530      A5-22   1 | 32  cond 0111 UBOL Rn Rd imm5  sh2 0 Rm
16531      A5-24   2 | 16  cond 0101 UB1L Rn Rd imm12
16532      A5-26   2 | 32  cond 0111 UB1L Rn Rd imm5  sh2 0 Rm
16533      A5-28   3 | 16  cond 0100 UB0L Rn Rd imm12
16534      A5-32   3 | 32  cond 0110 UB0L Rn Rd imm5  sh2 0 Rm
16535   */
16536   /* case coding:
16537             1   at-ea               (access at ea)
16538             2   at-ea-then-upd      (access at ea, then Rn = ea)
16539             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
16540      ea coding
16541             16  Rn +/- imm12
16542             32  Rn +/- Rm sh2 imm5
16543   */
16544   /* Quickly skip over all of this for hopefully most instructions */
16545   if ((INSN(27,24) & BITS4(1,1,0,0)) != BITS4(0,1,0,0))
16546      goto after_load_store_ubyte_or_word;
16547
16548   summary = 0;
16549
16550   /**/ if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 0) {
16551      summary = 1 | 16;
16552   }
16553   else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 0
16554                                          && INSN(4,4) == 0) {
16555      summary = 1 | 32;
16556   }
16557   else if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 1) {
16558      summary = 2 | 16;
16559   }
16560   else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 1
16561                                          && INSN(4,4) == 0) {
16562      summary = 2 | 32;
16563   }
16564   else if (INSN(27,24) == BITS4(0,1,0,0) && INSN(21,21) == 0) {
16565      summary = 3 | 16;
16566   }
16567   else if (INSN(27,24) == BITS4(0,1,1,0) && INSN(21,21) == 0
16568                                          && INSN(4,4) == 0) {
16569      summary = 3 | 32;
16570   }
16571   else goto after_load_store_ubyte_or_word;
16572
16573   { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
16574     UInt rD = (insn >> 12) & 0xF; /* 15:12 */
16575     UInt rM = (insn >> 0)  & 0xF; /*  3:0  */
16576     UInt bU = (insn >> 23) & 1;      /* 23 */
16577     UInt bB = (insn >> 22) & 1;      /* 22 */
16578     UInt bL = (insn >> 20) & 1;      /* 20 */
16579     UInt imm12 = (insn >> 0) & 0xFFF; /* 11:0 */
16580     UInt imm5  = (insn >> 7) & 0x1F;  /* 11:7 */
16581     UInt sh2   = (insn >> 5) & 3;     /* 6:5 */
16582
16583     /* Skip some invalid cases, which would lead to two competing
16584        updates to the same register, or which are otherwise
16585        disallowed by the spec. */
16586     switch (summary) {
16587        case 1 | 16:
16588           break;
16589        case 1 | 32:
16590           if (rM == 15) goto after_load_store_ubyte_or_word;
16591           break;
16592        case 2 | 16: case 3 | 16:
16593           if (rN == 15) goto after_load_store_ubyte_or_word;
16594           if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
16595           break;
16596        case 2 | 32: case 3 | 32:
16597           if (rM == 15) goto after_load_store_ubyte_or_word;
16598           if (rN == 15) goto after_load_store_ubyte_or_word;
16599           if (rN == rM) goto after_load_store_ubyte_or_word;
16600           if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
16601           break;
16602        default:
16603           vassert(0);
16604     }
16605
16606     /* compute the effective address.  Bind it to a tmp since we
16607        may need to use it twice. */
16608     IRExpr* eaE = NULL;
16609     switch (summary & 0xF0) {
16610        case 16:
16611           eaE = mk_EA_reg_plusminus_imm12( rN, bU, imm12, dis_buf );
16612           break;
16613        case 32:
16614           eaE = mk_EA_reg_plusminus_shifted_reg( rN, bU, rM, sh2, imm5,
16615                                                  dis_buf );
16616           break;
16617     }
16618     vassert(eaE);
16619     IRTemp eaT = newTemp(Ity_I32);
16620     assign(eaT, eaE);
16621
16622     /* get the old Rn value */
16623     IRTemp rnT = newTemp(Ity_I32);
16624     assign(rnT, getIRegA(rN));
16625
16626     /* decide on the transfer address */
16627     IRTemp taT = IRTemp_INVALID;
16628     switch (summary & 0x0F) {
16629        case 1: case 2: taT = eaT; break;
16630        case 3:         taT = rnT; break;
16631     }
16632     vassert(taT != IRTemp_INVALID);
16633
16634     if (bL == 0) {
16635       /* Store.  If necessary, update the base register before the
16636          store itself, so that the common idiom of "str rX, [sp,
16637          #-4]!" (store rX at sp-4, then do new sp = sp-4, a.k.a "push
16638          rX") doesn't cause Memcheck to complain that the access is
16639          below the stack pointer.  Also, not updating sp before the
16640          store confuses Valgrind's dynamic stack-extending logic.  So
16641          do it before the store.  Hence we need to snarf the store
16642          data before doing the basereg update. */
16643
16644        /* get hold of the data to be stored */
16645        IRTemp rDt = newTemp(Ity_I32);
16646        assign(rDt, getIRegA(rD));
16647
16648        /* Update Rn if necessary. */
16649        switch (summary & 0x0F) {
16650           case 2: case 3:
16651              putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16652              break;
16653        }
16654
16655        /* generate the transfer */
16656        if (bB == 0) { // word store
16657           storeGuardedLE( mkexpr(taT), mkexpr(rDt), condT );
16658        } else { // byte store
16659           vassert(bB == 1);
16660           storeGuardedLE( mkexpr(taT), unop(Iop_32to8, mkexpr(rDt)), condT );
16661        }
16662
16663     } else {
16664        /* Load */
16665        vassert(bL == 1);
16666
16667        /* generate the transfer */
16668        if (bB == 0) { // word load
16669           IRTemp jk = Ijk_Boring;
16670           /* According to the Cortex A8 TRM Sec. 5.2.1, LDR(1) with r13 as the
16671               base register and PC as the destination register is a return for
16672               purposes of branch prediction.
16673              The ARM ARM Sec. C9.10.1 further specifies that it must use a
16674               post-increment by immediate addressing mode to be counted in
16675               event 0x0E (Procedure return).*/
16676           if (rN == 13 && summary == (3 | 16) && bB == 0) {
16677              jk = Ijk_Ret;
16678           }
16679           IRTemp tD = newTemp(Ity_I32);
16680           loadGuardedLE( tD, ILGop_Ident32,
16681                          mkexpr(taT), llGetIReg(rD), condT );
16682           /* "rD == 15 ? condT : IRTemp_INVALID": simply
16683              IRTemp_INVALID would be correct in all cases here, and
16684              for the non-r15 case it generates better code, by
16685              avoiding two tests of the cond (since it is already
16686              tested by loadGuardedLE).  However, the logic at the end
16687              of this function, that deals with writes to r15, has an
16688              optimisation which depends on seeing whether or not the
16689              write is conditional.  Hence in this particular case we
16690              let it "see" the guard condition. */
16691           putIRegA( rD, mkexpr(tD),
16692                     rD == 15 ? condT : IRTemp_INVALID, jk );
16693        } else { // byte load
16694           vassert(bB == 1);
16695           IRTemp tD = newTemp(Ity_I32);
16696           loadGuardedLE( tD, ILGop_8Uto32, mkexpr(taT), llGetIReg(rD), condT );
16697           /* No point in similar 3rd arg complexity here, since we
16698              can't sanely write anything to r15 like this. */
16699           putIRegA( rD, mkexpr(tD), IRTemp_INVALID, Ijk_Boring );
16700        }
16701
16702        /* Update Rn if necessary. */
16703        switch (summary & 0x0F) {
16704           case 2: case 3:
16705              // should be assured by logic above:
16706              if (bL == 1)
16707                 vassert(rD != rN); /* since we just wrote rD */
16708              putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16709              break;
16710        }
16711     }
16712
16713     switch (summary & 0x0F) {
16714        case 1:  DIP("%sr%s%s r%u, %s\n",
16715                     bL == 0 ? "st" : "ld",
16716                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16717                 break;
16718        case 2:  DIP("%sr%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
16719                     bL == 0 ? "st" : "ld",
16720                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16721                 break;
16722        case 3:  DIP("%sr%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
16723                     bL == 0 ? "st" : "ld",
16724                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16725                 break;
16726        default: vassert(0);
16727     }
16728
16729     /* XXX deal with alignment constraints */
16730
16731     goto decode_success;
16732
16733     /* Complications:
16734
16735        For all loads: if the Amode specifies base register
16736        writeback, and the same register is specified for Rd and Rn,
16737        the results are UNPREDICTABLE.
16738
16739        For all loads and stores: if R15 is written, branch to
16740        that address afterwards.
16741
16742        STRB: straightforward
16743        LDRB: loaded data is zero extended
16744        STR:  lowest 2 bits of address are ignored
16745        LDR:  if the lowest 2 bits of the address are nonzero
16746              then the loaded value is rotated right by 8 * the lowest 2 bits
16747     */
16748   }
16749
16750  after_load_store_ubyte_or_word:
16751
16752   /* --------------------- Load/store (sbyte & hword) -------- */
16753   // LDRH LDRSH STRH LDRSB
16754   /*                 31   27   23   19 15 11   7    3     # highest bit
16755                        28   24   20 16 12    8    4    0
16756      A5-36   1 | 16  cond 0001 U10L Rn Rd im4h 1SH1 im4l
16757      A5-38   1 | 32  cond 0001 U00L Rn Rd 0000 1SH1 Rm
16758      A5-40   2 | 16  cond 0001 U11L Rn Rd im4h 1SH1 im4l
16759      A5-42   2 | 32  cond 0001 U01L Rn Rd 0000 1SH1 Rm
16760      A5-44   3 | 16  cond 0000 U10L Rn Rd im4h 1SH1 im4l
16761      A5-46   3 | 32  cond 0000 U00L Rn Rd 0000 1SH1 Rm
16762   */
16763   /* case coding:
16764             1   at-ea               (access at ea)
16765             2   at-ea-then-upd      (access at ea, then Rn = ea)
16766             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
16767      ea coding
16768             16  Rn +/- imm8
16769             32  Rn +/- Rm
16770   */
16771   /* Quickly skip over all of this for hopefully most instructions */
16772   if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
16773      goto after_load_store_sbyte_or_hword;
16774
16775   /* Check the "1SH1" thing. */
16776   if ((INSN(7,4) & BITS4(1,0,0,1)) != BITS4(1,0,0,1))
16777      goto after_load_store_sbyte_or_hword;
16778
16779   summary = 0;
16780
16781   /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,0)) {
16782      summary = 1 | 16;
16783   }
16784   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,0)) {
16785      summary = 1 | 32;
16786   }
16787   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,1)) {
16788      summary = 2 | 16;
16789   }
16790   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,1)) {
16791      summary = 2 | 32;
16792   }
16793   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(1,0)) {
16794      summary = 3 | 16;
16795   }
16796   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(0,0)) {
16797      summary = 3 | 32;
16798   }
16799   else goto after_load_store_sbyte_or_hword;
16800
16801   { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
16802     UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
16803     UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
16804     UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
16805     UInt bL   = (insn >> 20) & 1;   /* 20 L=1 load, L=0 store */
16806     UInt bH   = (insn >> 5) & 1;    /* H=1 halfword, H=0 byte */
16807     UInt bS   = (insn >> 6) & 1;    /* S=1 signed, S=0 unsigned */
16808     UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
16809
16810     /* Skip combinations that are either meaningless or already
16811        handled by main word-or-unsigned-byte load-store
16812        instructions. */
16813     if (bS == 0 && bH == 0) /* "unsigned byte" */
16814        goto after_load_store_sbyte_or_hword;
16815     if (bS == 1 && bL == 0) /* "signed store" */
16816        goto after_load_store_sbyte_or_hword;
16817
16818     /* Require 11:8 == 0 for Rn +/- Rm cases */
16819     if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
16820        goto after_load_store_sbyte_or_hword;
16821
16822     /* Skip some invalid cases, which would lead to two competing
16823        updates to the same register, or which are otherwise
16824        disallowed by the spec. */
16825     switch (summary) {
16826        case 1 | 16:
16827           break;
16828        case 1 | 32:
16829           if (rM == 15) goto after_load_store_sbyte_or_hword;
16830           break;
16831        case 2 | 16: case 3 | 16:
16832           if (rN == 15) goto after_load_store_sbyte_or_hword;
16833           if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
16834           break;
16835        case 2 | 32: case 3 | 32:
16836           if (rM == 15) goto after_load_store_sbyte_or_hword;
16837           if (rN == 15) goto after_load_store_sbyte_or_hword;
16838           if (rN == rM) goto after_load_store_sbyte_or_hword;
16839           if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
16840           break;
16841        default:
16842           vassert(0);
16843     }
16844
16845     /* If this is a branch, make it unconditional at this point.
16846        Doing conditional branches in-line is too complex (for now).
16847        Note that you'd have to be insane to use any of these loads to
16848        do a branch, since they only load 16 bits at most, but we
16849        handle it just in case. */
16850     if (bL == 1 && rD == 15 && condT != IRTemp_INVALID) {
16851        // go uncond
16852        mk_skip_over_A32_if_cond_is_false( condT );
16853        condT = IRTemp_INVALID;
16854        // now uncond
16855     }
16856
16857     /* compute the effective address.  Bind it to a tmp since we
16858        may need to use it twice. */
16859     IRExpr* eaE = NULL;
16860     switch (summary & 0xF0) {
16861        case 16:
16862           eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
16863           break;
16864        case 32:
16865           eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
16866           break;
16867     }
16868     vassert(eaE);
16869     IRTemp eaT = newTemp(Ity_I32);
16870     assign(eaT, eaE);
16871
16872     /* get the old Rn value */
16873     IRTemp rnT = newTemp(Ity_I32);
16874     assign(rnT, getIRegA(rN));
16875
16876     /* decide on the transfer address */
16877     IRTemp taT = IRTemp_INVALID;
16878     switch (summary & 0x0F) {
16879        case 1: case 2: taT = eaT; break;
16880        case 3:         taT = rnT; break;
16881     }
16882     vassert(taT != IRTemp_INVALID);
16883
16884     /* ll previous value of rD, for dealing with conditional loads */
16885     IRTemp llOldRd = newTemp(Ity_I32);
16886     assign(llOldRd, llGetIReg(rD));
16887
16888     /* halfword store  H 1  L 0  S 0
16889        uhalf load      H 1  L 1  S 0
16890        shalf load      H 1  L 1  S 1
16891        sbyte load      H 0  L 1  S 1
16892     */
16893     const HChar* name = NULL;
16894     /* generate the transfer */
16895     /**/ if (bH == 1 && bL == 0 && bS == 0) { // halfword store
16896        storeGuardedLE( mkexpr(taT),
16897                        unop(Iop_32to16, getIRegA(rD)), condT );
16898        name = "strh";
16899     }
16900     else if (bH == 1 && bL == 1 && bS == 0) { // uhalf load
16901        IRTemp newRd = newTemp(Ity_I32);
16902        loadGuardedLE( newRd, ILGop_16Uto32,
16903                       mkexpr(taT), mkexpr(llOldRd), condT );
16904        putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16905        name = "ldrh";
16906     }
16907     else if (bH == 1 && bL == 1 && bS == 1) { // shalf load
16908        IRTemp newRd = newTemp(Ity_I32);
16909        loadGuardedLE( newRd, ILGop_16Sto32,
16910                       mkexpr(taT), mkexpr(llOldRd), condT );
16911        putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16912        name = "ldrsh";
16913     }
16914     else if (bH == 0 && bL == 1 && bS == 1) { // sbyte load
16915        IRTemp newRd = newTemp(Ity_I32);
16916        loadGuardedLE( newRd, ILGop_8Sto32,
16917                       mkexpr(taT), mkexpr(llOldRd), condT );
16918        putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16919        name = "ldrsb";
16920     }
16921     else
16922        vassert(0); // should be assured by logic above
16923
16924     /* Update Rn if necessary. */
16925     switch (summary & 0x0F) {
16926        case 2: case 3:
16927           // should be assured by logic above:
16928           if (bL == 1)
16929              vassert(rD != rN); /* since we just wrote rD */
16930           putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16931           break;
16932     }
16933
16934     switch (summary & 0x0F) {
16935        case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
16936                 break;
16937        case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
16938                     name, nCC(INSN_COND), rD, dis_buf);
16939                 break;
16940        case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
16941                     name, nCC(INSN_COND), rD, dis_buf);
16942                 break;
16943        default: vassert(0);
16944     }
16945
16946     /* XXX deal with alignment constraints */
16947
16948     goto decode_success;
16949
16950     /* Complications:
16951
16952        For all loads: if the Amode specifies base register
16953        writeback, and the same register is specified for Rd and Rn,
16954        the results are UNPREDICTABLE.
16955
16956        For all loads and stores: if R15 is written, branch to
16957        that address afterwards.
16958
16959        Misaligned halfword stores => Unpredictable
16960        Misaligned halfword loads  => Unpredictable
16961     */
16962   }
16963
16964  after_load_store_sbyte_or_hword:
16965
16966   /* --------------------- Load/store multiple -------------- */
16967   // LD/STMIA LD/STMIB LD/STMDA LD/STMDB
16968   // Remarkably complex and difficult to get right
16969   // match 27:20 as 100XX0WL
16970   if (BITS8(1,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,1,0,0))) {
16971      // A5-50 LD/STMIA  cond 1000 10WL Rn RegList
16972      // A5-51 LD/STMIB  cond 1001 10WL Rn RegList
16973      // A5-53 LD/STMDA  cond 1000 00WL Rn RegList
16974      // A5-53 LD/STMDB  cond 1001 00WL Rn RegList
16975      //                   28   24   20 16       0
16976
16977      UInt bINC    = (insn >> 23) & 1;
16978      UInt bBEFORE = (insn >> 24) & 1;
16979
16980      UInt bL      = (insn >> 20) & 1;  /* load=1, store=0 */
16981      UInt bW      = (insn >> 21) & 1;  /* Rn wback=1, no wback=0 */
16982      UInt rN      = (insn >> 16) & 0xF;
16983      UInt regList = insn & 0xFFFF;
16984      /* Skip some invalid cases, which would lead to two competing
16985         updates to the same register, or which are otherwise
16986         disallowed by the spec.  Note the test above has required
16987         that S == 0, since that looks like a kernel-mode only thing.
16988         Done by forcing the real pattern, viz 100XXSWL to actually be
16989         100XX0WL. */
16990      if (rN == 15) goto after_load_store_multiple;
16991      // reglist can't be empty
16992      if (regList == 0) goto after_load_store_multiple;
16993      // if requested to writeback Rn, and this is a load instruction,
16994      // then Rn can't appear in RegList, since we'd have two competing
16995      // new values for Rn.  We do however accept this case for store
16996      // instructions.
16997      if (bW == 1 && bL == 1 && ((1 << rN) & regList) > 0)
16998         goto after_load_store_multiple;
16999
17000      /* Now, we can't do a conditional load or store, since that very
17001         likely will generate an exception.  So we have to take a side
17002         exit at this point if the condition is false. */
17003      if (condT != IRTemp_INVALID) {
17004         mk_skip_over_A32_if_cond_is_false( condT );
17005         condT = IRTemp_INVALID;
17006      }
17007
17008      /* Ok, now we're unconditional.  Generate the IR. */
17009      mk_ldm_stm( True/*arm*/, rN, bINC, bBEFORE, bW, bL, regList );
17010
17011      DIP("%sm%c%c%s r%u%s, {0x%04x}\n",
17012          bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
17013          nCC(INSN_COND),
17014          rN, bW ? "!" : "", regList);
17015
17016      goto decode_success;
17017   }
17018
17019  after_load_store_multiple:
17020
17021   /* --------------------- Control flow --------------------- */
17022   // B, BL (Branch, or Branch-and-Link, to immediate offset)
17023   //
17024   if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
17025      UInt link   = (insn >> 24) & 1;
17026      UInt uimm24 = insn & ((1<<24)-1);  uimm24 <<= 8;
17027      Int  simm24 = (Int)uimm24;         simm24 >>= 8;
17028      UInt dst    = guest_R15_curr_instr_notENC + 8 + (((UInt)simm24) << 2);
17029      IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
17030      if (link) {
17031         putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
17032                      condT, Ijk_Boring);
17033      }
17034      if (condT == IRTemp_INVALID) {
17035         /* unconditional transfer to 'dst'.  See if we can simply
17036            continue tracing at the destination. */
17037         if (resteerOkFn( callback_opaque, dst )) {
17038            /* yes */
17039            dres.whatNext   = Dis_ResteerU;
17040            dres.continueAt = dst;
17041         } else {
17042            /* no; terminate the SB at this point. */
17043            llPutIReg(15, mkU32(dst));
17044            dres.jk_StopHere = jk;
17045            dres.whatNext    = Dis_StopHere;
17046         }
17047         DIP("b%s 0x%x\n", link ? "l" : "", dst);
17048      } else {
17049         /* conditional transfer to 'dst' */
17050         const HChar* comment = "";
17051
17052         /* First see if we can do some speculative chasing into one
17053            arm or the other.  Be conservative and only chase if
17054            !link, that is, this is a normal conditional branch to a
17055            known destination. */
17056         if (!link
17057             && resteerCisOk
17058             && vex_control.guest_chase_cond
17059             && dst < guest_R15_curr_instr_notENC
17060             && resteerOkFn( callback_opaque, dst) ) {
17061            /* Speculation: assume this backward branch is taken.  So
17062               we need to emit a side-exit to the insn following this
17063               one, on the negation of the condition, and continue at
17064               the branch target address (dst). */
17065            stmt( IRStmt_Exit( unop(Iop_Not1,
17066                                    unop(Iop_32to1, mkexpr(condT))),
17067                               Ijk_Boring,
17068                               IRConst_U32(guest_R15_curr_instr_notENC+4),
17069                               OFFB_R15T ));
17070            dres.whatNext   = Dis_ResteerC;
17071            dres.continueAt = (Addr32)dst;
17072            comment = "(assumed taken)";
17073         }
17074         else
17075         if (!link
17076             && resteerCisOk
17077             && vex_control.guest_chase_cond
17078             && dst >= guest_R15_curr_instr_notENC
17079             && resteerOkFn( callback_opaque,
17080                             guest_R15_curr_instr_notENC+4) ) {
17081            /* Speculation: assume this forward branch is not taken.
17082               So we need to emit a side-exit to dst (the dest) and
17083               continue disassembling at the insn immediately
17084               following this one. */
17085            stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
17086                               Ijk_Boring,
17087                               IRConst_U32(dst),
17088                               OFFB_R15T ));
17089            dres.whatNext   = Dis_ResteerC;
17090            dres.continueAt = guest_R15_curr_instr_notENC+4;
17091            comment = "(assumed not taken)";
17092         }
17093         else {
17094            /* Conservative default translation - end the block at
17095               this point. */
17096            stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
17097                               jk, IRConst_U32(dst), OFFB_R15T ));
17098            llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4));
17099            dres.jk_StopHere = Ijk_Boring;
17100            dres.whatNext    = Dis_StopHere;
17101         }
17102         DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
17103             dst, comment);
17104      }
17105      goto decode_success;
17106   }
17107
17108   // B, BL (Branch, or Branch-and-Link, to a register)
17109   // NB: interworking branch
17110   if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
17111       && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
17112       && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
17113           || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
17114      IRTemp  dst = newTemp(Ity_I32);
17115      UInt    link = (INSN(11,4) >> 1) & 1;
17116      UInt    rM   = INSN(3,0);
17117      // we don't decode the case (link && rM == 15), as that's
17118      // Unpredictable.
17119      if (!(link && rM == 15)) {
17120         if (condT != IRTemp_INVALID) {
17121            mk_skip_over_A32_if_cond_is_false( condT );
17122         }
17123         // rM contains an interworking address exactly as we require
17124         // (with continuation CPSR.T in bit 0), so we can use it
17125         // as-is, with no masking.
17126         assign( dst, getIRegA(rM) );
17127         if (link) {
17128            putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
17129                      IRTemp_INVALID/*because AL*/, Ijk_Boring );
17130         }
17131         llPutIReg(15, mkexpr(dst));
17132         dres.jk_StopHere = link ? Ijk_Call
17133                                 : (rM == 14 ? Ijk_Ret : Ijk_Boring);
17134         dres.whatNext    = Dis_StopHere;
17135         if (condT == IRTemp_INVALID) {
17136            DIP("b%sx r%u\n", link ? "l" : "", rM);
17137         } else {
17138            DIP("b%sx%s r%u\n", link ? "l" : "", nCC(INSN_COND), rM);
17139         }
17140         goto decode_success;
17141      }
17142      /* else: (link && rM == 15): just fall through */
17143   }
17144
17145   /* --- NB: ARM interworking branches are in NV space, hence
17146      are handled elsewhere by decode_NV_instruction_ARMv7_and_below.
17147      ---
17148   */
17149
17150   /* --------------------- Clz --------------------- */
17151   // CLZ
17152   if (INSN(27,20) == BITS8(0,0,0,1,0,1,1,0)
17153       && INSN(19,16) == BITS4(1,1,1,1)
17154       && INSN(11,4) == BITS8(1,1,1,1,0,0,0,1)) {
17155      UInt rD = INSN(15,12);
17156      UInt rM = INSN(3,0);
17157      IRTemp arg = newTemp(Ity_I32);
17158      IRTemp res = newTemp(Ity_I32);
17159      assign(arg, getIRegA(rM));
17160      assign(res, IRExpr_ITE(
17161                     binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
17162                     mkU32(32),
17163                     unop(Iop_Clz32, mkexpr(arg))
17164            ));
17165      putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17166      DIP("clz%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
17167      goto decode_success;
17168   }
17169
17170   /* --------------------- Mul etc --------------------- */
17171   // MUL
17172   if (BITS8(0,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
17173       && INSN(15,12) == BITS4(0,0,0,0)
17174       && INSN(7,4) == BITS4(1,0,0,1)) {
17175      UInt bitS = (insn >> 20) & 1; /* 20:20 */
17176      UInt rD = INSN(19,16);
17177      UInt rS = INSN(11,8);
17178      UInt rM = INSN(3,0);
17179      if (rD == 15 || rM == 15 || rS == 15) {
17180         /* Unpredictable; don't decode; fall through */
17181      } else {
17182         IRTemp argL = newTemp(Ity_I32);
17183         IRTemp argR = newTemp(Ity_I32);
17184         IRTemp res  = newTemp(Ity_I32);
17185         IRTemp oldC = IRTemp_INVALID;
17186         IRTemp oldV = IRTemp_INVALID;
17187         assign( argL, getIRegA(rM));
17188         assign( argR, getIRegA(rS));
17189         assign( res, binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) );
17190         if (bitS) {
17191            oldC = newTemp(Ity_I32);
17192            assign(oldC, mk_armg_calculate_flag_c());
17193            oldV = newTemp(Ity_I32);
17194            assign(oldV, mk_armg_calculate_flag_v());
17195         }
17196         // now update guest state
17197         putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
17198         if (bitS) {
17199            IRTemp pair = newTemp(Ity_I32);
17200            assign( pair, binop(Iop_Or32,
17201                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17202                                mkexpr(oldV)) );
17203            setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
17204         }
17205         DIP("mul%c%s r%u, r%u, r%u\n",
17206             bitS ? 's' : ' ', nCC(INSN_COND), rD, rM, rS);
17207         goto decode_success;
17208      }
17209      /* fall through */
17210   }
17211
17212   /* --------------------- Integer Divides --------------------- */
17213   // SDIV
17214   if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20)
17215       && INSN(15,12) == BITS4(1,1,1,1)
17216       && INSN(7,4) == BITS4(0,0,0,1)) {
17217      UInt rD = INSN(19,16);
17218      UInt rM = INSN(11,8);
17219      UInt rN = INSN(3,0);
17220      if (rD == 15 || rM == 15 || rN == 15) {
17221         /* Unpredictable; don't decode; fall through */
17222      } else {
17223         IRTemp res  = newTemp(Ity_I32);
17224         IRTemp argL = newTemp(Ity_I32);
17225         IRTemp argR = newTemp(Ity_I32);
17226         assign(argL, getIRegA(rN));
17227         assign(argR, getIRegA(rM));
17228         assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
17229         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17230         DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM);
17231         goto decode_success;
17232      }
17233    }
17234
17235   // UDIV
17236   if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20)
17237       && INSN(15,12) == BITS4(1,1,1,1)
17238       && INSN(7,4) == BITS4(0,0,0,1)) {
17239      UInt rD = INSN(19,16);
17240      UInt rM = INSN(11,8);
17241      UInt rN = INSN(3,0);
17242      if (rD == 15 || rM == 15 || rN == 15) {
17243         /* Unpredictable; don't decode; fall through */
17244      } else {
17245         IRTemp res  = newTemp(Ity_I32);
17246         IRTemp argL = newTemp(Ity_I32);
17247         IRTemp argR = newTemp(Ity_I32);
17248         assign(argL, getIRegA(rN));
17249         assign(argR, getIRegA(rM));
17250         assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
17251         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17252         DIP("udiv r%u, r%u, r%u\n", rD, rN, rM);
17253         goto decode_success;
17254      }
17255   }
17256
17257   // MLA, MLS
17258   if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17259       && INSN(7,4) == BITS4(1,0,0,1)) {
17260      UInt bitS  = (insn >> 20) & 1; /* 20:20 */
17261      UInt isMLS = (insn >> 22) & 1; /* 22:22 */
17262      UInt rD = INSN(19,16);
17263      UInt rN = INSN(15,12);
17264      UInt rS = INSN(11,8);
17265      UInt rM = INSN(3,0);
17266      if (bitS == 1 && isMLS == 1) {
17267         /* This isn't allowed (MLS that sets flags).  don't decode;
17268            fall through */
17269      }
17270      else
17271      if (rD == 15 || rM == 15 || rS == 15 || rN == 15) {
17272         /* Unpredictable; don't decode; fall through */
17273      } else {
17274         IRTemp argL = newTemp(Ity_I32);
17275         IRTemp argR = newTemp(Ity_I32);
17276         IRTemp argP = newTemp(Ity_I32);
17277         IRTemp res  = newTemp(Ity_I32);
17278         IRTemp oldC = IRTemp_INVALID;
17279         IRTemp oldV = IRTemp_INVALID;
17280         assign( argL, getIRegA(rM));
17281         assign( argR, getIRegA(rS));
17282         assign( argP, getIRegA(rN));
17283         assign( res, binop(isMLS ? Iop_Sub32 : Iop_Add32,
17284                            mkexpr(argP),
17285                            binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) ));
17286         if (bitS) {
17287            vassert(!isMLS); // guaranteed above
17288            oldC = newTemp(Ity_I32);
17289            assign(oldC, mk_armg_calculate_flag_c());
17290            oldV = newTemp(Ity_I32);
17291            assign(oldV, mk_armg_calculate_flag_v());
17292         }
17293         // now update guest state
17294         putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
17295         if (bitS) {
17296            IRTemp pair = newTemp(Ity_I32);
17297            assign( pair, binop(Iop_Or32,
17298                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17299                                mkexpr(oldV)) );
17300            setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
17301         }
17302         DIP("ml%c%c%s r%u, r%u, r%u, r%u\n",
17303             isMLS ? 's' : 'a', bitS ? 's' : ' ',
17304             nCC(INSN_COND), rD, rM, rS, rN);
17305         goto decode_success;
17306      }
17307      /* fall through */
17308   }
17309
17310   // SMULL, UMULL
17311   if (BITS8(0,0,0,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17312       && INSN(7,4) == BITS4(1,0,0,1)) {
17313      UInt bitS = (insn >> 20) & 1; /* 20:20 */
17314      UInt rDhi = INSN(19,16);
17315      UInt rDlo = INSN(15,12);
17316      UInt rS   = INSN(11,8);
17317      UInt rM   = INSN(3,0);
17318      UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
17319      if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
17320         /* Unpredictable; don't decode; fall through */
17321      } else {
17322         IRTemp argL  = newTemp(Ity_I32);
17323         IRTemp argR  = newTemp(Ity_I32);
17324         IRTemp res   = newTemp(Ity_I64);
17325         IRTemp resHi = newTemp(Ity_I32);
17326         IRTemp resLo = newTemp(Ity_I32);
17327         IRTemp oldC  = IRTemp_INVALID;
17328         IRTemp oldV  = IRTemp_INVALID;
17329         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
17330         assign( argL, getIRegA(rM));
17331         assign( argR, getIRegA(rS));
17332         assign( res, binop(mulOp, mkexpr(argL), mkexpr(argR)) );
17333         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17334         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17335         if (bitS) {
17336            oldC = newTemp(Ity_I32);
17337            assign(oldC, mk_armg_calculate_flag_c());
17338            oldV = newTemp(Ity_I32);
17339            assign(oldV, mk_armg_calculate_flag_v());
17340         }
17341         // now update guest state
17342         putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17343         putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17344         if (bitS) {
17345            IRTemp pair = newTemp(Ity_I32);
17346            assign( pair, binop(Iop_Or32,
17347                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17348                                mkexpr(oldV)) );
17349            setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
17350         }
17351         DIP("%cmull%c%s r%u, r%u, r%u, r%u\n",
17352             isS ? 's' : 'u', bitS ? 's' : ' ',
17353             nCC(INSN_COND), rDlo, rDhi, rM, rS);
17354         goto decode_success;
17355      }
17356      /* fall through */
17357   }
17358
17359   // SMLAL, UMLAL
17360   if (BITS8(0,0,0,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17361       && INSN(7,4) == BITS4(1,0,0,1)) {
17362      UInt bitS = (insn >> 20) & 1; /* 20:20 */
17363      UInt rDhi = INSN(19,16);
17364      UInt rDlo = INSN(15,12);
17365      UInt rS   = INSN(11,8);
17366      UInt rM   = INSN(3,0);
17367      UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
17368      if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
17369         /* Unpredictable; don't decode; fall through */
17370      } else {
17371         IRTemp argL  = newTemp(Ity_I32);
17372         IRTemp argR  = newTemp(Ity_I32);
17373         IRTemp old   = newTemp(Ity_I64);
17374         IRTemp res   = newTemp(Ity_I64);
17375         IRTemp resHi = newTemp(Ity_I32);
17376         IRTemp resLo = newTemp(Ity_I32);
17377         IRTemp oldC  = IRTemp_INVALID;
17378         IRTemp oldV  = IRTemp_INVALID;
17379         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
17380         assign( argL, getIRegA(rM));
17381         assign( argR, getIRegA(rS));
17382         assign( old, binop(Iop_32HLto64, getIRegA(rDhi), getIRegA(rDlo)) );
17383         assign( res, binop(Iop_Add64,
17384                            mkexpr(old),
17385                            binop(mulOp, mkexpr(argL), mkexpr(argR))) );
17386         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17387         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17388         if (bitS) {
17389            oldC = newTemp(Ity_I32);
17390            assign(oldC, mk_armg_calculate_flag_c());
17391            oldV = newTemp(Ity_I32);
17392            assign(oldV, mk_armg_calculate_flag_v());
17393         }
17394         // now update guest state
17395         putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17396         putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17397         if (bitS) {
17398            IRTemp pair = newTemp(Ity_I32);
17399            assign( pair, binop(Iop_Or32,
17400                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17401                                mkexpr(oldV)) );
17402            setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
17403         }
17404         DIP("%cmlal%c%s r%u, r%u, r%u, r%u\n",
17405             isS ? 's' : 'u', bitS ? 's' : ' ', nCC(INSN_COND),
17406             rDlo, rDhi, rM, rS);
17407         goto decode_success;
17408      }
17409      /* fall through */
17410   }
17411
17412   // UMAAL
17413   if (BITS8(0,0,0,0,0,1,0,0) == INSN(27,20) && INSN(7,4) == BITS4(1,0,0,1)) {
17414      UInt rDhi = INSN(19,16);
17415      UInt rDlo = INSN(15,12);
17416      UInt rM   = INSN(11,8);
17417      UInt rN   = INSN(3,0);
17418      if (rDlo == 15 || rDhi == 15 || rN == 15 || rM == 15 || rDhi == rDlo)  {
17419         /* Unpredictable; don't decode; fall through */
17420      } else {
17421         IRTemp argN   = newTemp(Ity_I32);
17422         IRTemp argM   = newTemp(Ity_I32);
17423         IRTemp argDhi = newTemp(Ity_I32);
17424         IRTemp argDlo = newTemp(Ity_I32);
17425         IRTemp res    = newTemp(Ity_I64);
17426         IRTemp resHi  = newTemp(Ity_I32);
17427         IRTemp resLo  = newTemp(Ity_I32);
17428         assign( argN,   getIRegA(rN) );
17429         assign( argM,   getIRegA(rM) );
17430         assign( argDhi, getIRegA(rDhi) );
17431         assign( argDlo, getIRegA(rDlo) );
17432         assign( res,
17433                 binop(Iop_Add64,
17434                       binop(Iop_Add64,
17435                             binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
17436                             unop(Iop_32Uto64, mkexpr(argDhi))),
17437                       unop(Iop_32Uto64, mkexpr(argDlo))) );
17438         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17439         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17440         // now update guest state
17441         putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17442         putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17443         DIP("umaal %s r%u, r%u, r%u, r%u\n",
17444             nCC(INSN_COND), rDlo, rDhi, rN, rM);
17445         goto decode_success;
17446      }
17447      /* fall through */
17448   }
17449
17450   /* --------------------- Msr etc --------------------- */
17451
17452   // MSR apsr, #imm
17453   if (INSN(27,20) == BITS8(0,0,1,1,0,0,1,0)
17454       && INSN(17,12) == BITS6(0,0,1,1,1,1)) {
17455      UInt write_ge    = INSN(18,18);
17456      UInt write_nzcvq = INSN(19,19);
17457      if (write_nzcvq || write_ge) {
17458         UInt   imm = (INSN(11,0) >> 0) & 0xFF;
17459         UInt   rot = 2 * ((INSN(11,0) >> 8) & 0xF);
17460         IRTemp immT = newTemp(Ity_I32);
17461         vassert(rot <= 30);
17462         imm = ROR32(imm, rot);
17463         assign(immT, mkU32(imm));
17464         desynthesise_APSR( write_nzcvq, write_ge, immT, condT );
17465         DIP("msr%s cpsr%s%sf, #0x%08x\n", nCC(INSN_COND),
17466             write_nzcvq ? "f" : "", write_ge ? "g" : "", imm);
17467         goto decode_success;
17468      }
17469      /* fall through */
17470   }
17471
17472   // MSR apsr, reg
17473   if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
17474       && INSN(17,12) == BITS6(0,0,1,1,1,1)
17475       && INSN(11,4) == BITS8(0,0,0,0,0,0,0,0)) {
17476      UInt rN          = INSN(3,0);
17477      UInt write_ge    = INSN(18,18);
17478      UInt write_nzcvq = INSN(19,19);
17479      if (rN != 15 && (write_nzcvq || write_ge)) {
17480         IRTemp rNt = newTemp(Ity_I32);
17481         assign(rNt, getIRegA(rN));
17482         desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
17483         DIP("msr%s cpsr_%s%s, r%u\n", nCC(INSN_COND),
17484             write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
17485         goto decode_success;
17486      }
17487      /* fall through */
17488   }
17489
17490   // MRS rD, cpsr
17491   if ((insn & 0x0FFF0FFF) == 0x010F0000) {
17492      UInt rD   = INSN(15,12);
17493      if (rD != 15) {
17494         IRTemp apsr = synthesise_APSR();
17495         putIRegA( rD, mkexpr(apsr), condT, Ijk_Boring );
17496         DIP("mrs%s r%u, cpsr\n", nCC(INSN_COND), rD);
17497         goto decode_success;
17498      }
17499      /* fall through */
17500   }
17501
17502   /* --------------------- Svc --------------------- */
17503   if (BITS8(1,1,1,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))) {
17504      UInt imm24 = (insn >> 0) & 0xFFFFFF;
17505      if (imm24 == 0) {
17506         /* A syscall.  We can't do this conditionally, hence: */
17507         if (condT != IRTemp_INVALID) {
17508            mk_skip_over_A32_if_cond_is_false( condT );
17509         }
17510         // AL after here
17511         llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 ));
17512         dres.jk_StopHere = Ijk_Sys_syscall;
17513         dres.whatNext    = Dis_StopHere;
17514         DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
17515         goto decode_success;
17516      }
17517      /* fall through */
17518   }
17519
17520   /* ------------------------ swp ------------------------ */
17521
17522   // SWP, SWPB
17523   if (BITS8(0,0,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
17524       && BITS4(0,0,0,0) == INSN(11,8)
17525       && BITS4(1,0,0,1) == INSN(7,4)) {
17526      UInt   rN   = INSN(19,16);
17527      UInt   rD   = INSN(15,12);
17528      UInt   rM   = INSN(3,0);
17529      IRTemp tRn  = newTemp(Ity_I32);
17530      IRTemp tNew = newTemp(Ity_I32);
17531      IRTemp tOld = IRTemp_INVALID;
17532      IRTemp tSC1 = newTemp(Ity_I1);
17533      UInt   isB  = (insn >> 22) & 1;
17534
17535      if (rD == 15 || rN == 15 || rM == 15 || rN == rM || rN == rD) {
17536         /* undecodable; fall through */
17537      } else {
17538         /* make unconditional */
17539         if (condT != IRTemp_INVALID) {
17540            mk_skip_over_A32_if_cond_is_false( condT );
17541            condT = IRTemp_INVALID;
17542         }
17543         /* Ok, now we're unconditional.  Generate a LL-SC loop. */
17544         assign(tRn, getIRegA(rN));
17545         assign(tNew, getIRegA(rM));
17546         if (isB) {
17547            /* swpb */
17548            tOld = newTemp(Ity_I8);
17549            stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
17550                              NULL/*=>isLL*/) );
17551            stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
17552                              unop(Iop_32to8, mkexpr(tNew))) );
17553         } else {
17554            /* swp */
17555            tOld = newTemp(Ity_I32);
17556            stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
17557                              NULL/*=>isLL*/) );
17558            stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
17559                              mkexpr(tNew)) );
17560         }
17561         stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
17562                           /*Ijk_NoRedir*/Ijk_Boring,
17563                           IRConst_U32(guest_R15_curr_instr_notENC),
17564                           OFFB_R15T ));
17565         putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
17566                      IRTemp_INVALID, Ijk_Boring);
17567         DIP("swp%s%s r%u, r%u, [r%u]\n",
17568             isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
17569         goto decode_success;
17570      }
17571      /* fall through */
17572   }
17573
17574   /* ----------------------------------------------------------- */
17575   /* -- ARMv6 instructions                                    -- */
17576   /* ----------------------------------------------------------- */
17577
17578   /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */
17579
17580   // LDREXD, LDREX, LDREXH, LDREXB
17581   if (0x01900F9F == (insn & 0x0F900FFF)) {
17582      UInt   rT    = INSN(15,12);
17583      UInt   rN    = INSN(19,16);
17584      IRType ty    = Ity_INVALID;
17585      IROp   widen = Iop_INVALID;
17586      const HChar* nm = NULL;
17587      Bool   valid = True;
17588      switch (INSN(22,21)) {
17589         case 0: nm = "";  ty = Ity_I32; break;
17590         case 1: nm = "d"; ty = Ity_I64; break;
17591         case 2: nm = "b"; ty = Ity_I8;  widen = Iop_8Uto32; break;
17592         case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break;
17593         default: vassert(0);
17594      }
17595      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
17596         if (rT == 15 || rN == 15)
17597            valid = False;
17598      } else {
17599         vassert(ty == Ity_I64);
17600         if ((rT & 1) == 1 || rT == 14 || rN == 15)
17601            valid = False;
17602      }
17603      if (valid) {
17604         IRTemp res;
17605         /* make unconditional */
17606         if (condT != IRTemp_INVALID) {
17607           mk_skip_over_A32_if_cond_is_false( condT );
17608           condT = IRTemp_INVALID;
17609         }
17610         /* Ok, now we're unconditional.  Do the load. */
17611         res = newTemp(ty);
17612         // FIXME: assumes little-endian guest
17613         stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
17614                           NULL/*this is a load*/) );
17615         if (ty == Ity_I64) {
17616            // FIXME: assumes little-endian guest
17617            putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)),
17618                           IRTemp_INVALID, Ijk_Boring);
17619            putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)),
17620                           IRTemp_INVALID, Ijk_Boring);
17621            DIP("ldrex%s%s r%u, r%u, [r%u]\n",
17622                nm, nCC(INSN_COND), rT+0, rT+1, rN);
17623         } else {
17624            putIRegA(rT, widen == Iop_INVALID
17625                            ? mkexpr(res) : unop(widen, mkexpr(res)),
17626                     IRTemp_INVALID, Ijk_Boring);
17627            DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN);
17628         }
17629         goto decode_success;
17630      }
17631      /* undecodable; fall through */
17632   }
17633
17634   // STREXD, STREX, STREXH, STREXB
17635   if (0x01800F90 == (insn & 0x0F900FF0)) {
17636      UInt   rT     = INSN(3,0);
17637      UInt   rN     = INSN(19,16);
17638      UInt   rD     = INSN(15,12);
17639      IRType ty     = Ity_INVALID;
17640      IROp   narrow = Iop_INVALID;
17641      const HChar* nm = NULL;
17642      Bool   valid  = True;
17643      switch (INSN(22,21)) {
17644         case 0: nm = "";  ty = Ity_I32; break;
17645         case 1: nm = "d"; ty = Ity_I64; break;
17646         case 2: nm = "b"; ty = Ity_I8;  narrow = Iop_32to8; break;
17647         case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break;
17648         default: vassert(0);
17649      }
17650      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
17651         if (rD == 15 || rN == 15 || rT == 15
17652             || rD == rN || rD == rT)
17653            valid = False;
17654      } else {
17655         vassert(ty == Ity_I64);
17656         if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15
17657             || rD == rN || rD == rT || rD == rT+1)
17658            valid = False;
17659      }
17660      if (valid) {
17661         IRTemp resSC1, resSC32, data;
17662         /* make unconditional */
17663         if (condT != IRTemp_INVALID) {
17664            mk_skip_over_A32_if_cond_is_false( condT );
17665            condT = IRTemp_INVALID;
17666         }
17667         /* Ok, now we're unconditional.  Do the store. */
17668         data = newTemp(ty);
17669         assign(data,
17670                ty == Ity_I64
17671                   // FIXME: assumes little-endian guest
17672                   ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0))
17673                   : narrow == Iop_INVALID
17674                      ? getIRegA(rT)
17675                      : unop(narrow, getIRegA(rT)));
17676         resSC1 = newTemp(Ity_I1);
17677         // FIXME: assumes little-endian guest
17678         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
17679
17680         /* Set rD to 1 on failure, 0 on success.  Currently we have
17681            resSC1 == 0 on failure, 1 on success. */
17682         resSC32 = newTemp(Ity_I32);
17683         assign(resSC32,
17684                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
17685
17686         putIRegA(rD, mkexpr(resSC32),
17687                      IRTemp_INVALID, Ijk_Boring);
17688         if (ty == Ity_I64) {
17689            DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
17690                nm, nCC(INSN_COND), rD, rT, rT+1, rN);
17691         } else {
17692            DIP("strex%s%s r%u, r%u, [r%u]\n",
17693                nm, nCC(INSN_COND), rD, rT, rN);
17694         }
17695         goto decode_success;
17696      }
17697      /* fall through */
17698   }
17699
17700   /* --------------------- movw, movt --------------------- */
17701   if (0x03000000 == (insn & 0x0FF00000)
17702       || 0x03400000 == (insn & 0x0FF00000)) /* pray for CSE */ {
17703      UInt rD    = INSN(15,12);
17704      UInt imm16 = (insn & 0xFFF) | ((insn >> 4) & 0x0000F000);
17705      UInt isT   = (insn >> 22) & 1;
17706      if (rD == 15) {
17707         /* forget it */
17708      } else {
17709         if (isT) {
17710            putIRegA(rD,
17711                     binop(Iop_Or32,
17712                           binop(Iop_And32, getIRegA(rD), mkU32(0xFFFF)),
17713                           mkU32(imm16 << 16)),
17714                     condT, Ijk_Boring);
17715            DIP("movt%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
17716            goto decode_success;
17717         } else {
17718            putIRegA(rD, mkU32(imm16), condT, Ijk_Boring);
17719            DIP("movw%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
17720            goto decode_success;
17721         }
17722      }
17723      /* fall through */
17724   }
17725
17726   /* ----------- uxtb, sxtb, uxth, sxth, uxtb16, sxtb16 ----------- */
17727   /* FIXME: this is an exact duplicate of the Thumb version.  They
17728      should be commoned up. */
17729   if (BITS8(0,1,1,0,1, 0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,0))
17730       && BITS4(1,1,1,1) == INSN(19,16)
17731       && BITS4(0,1,1,1) == INSN(7,4)
17732       && BITS4(0,0, 0,0) == (INSN(11,8) & BITS4(0,0,1,1))) {
17733      UInt subopc = INSN(27,20) & BITS8(0,0,0,0,0, 1,1,1);
17734      if (subopc != BITS4(0,0,0,1) && subopc != BITS4(0,1,0,1)) {
17735         Int    rot  = (INSN(11,8) >> 2) & 3;
17736         UInt   rM   = INSN(3,0);
17737         UInt   rD   = INSN(15,12);
17738         IRTemp srcT = newTemp(Ity_I32);
17739         IRTemp rotT = newTemp(Ity_I32);
17740         IRTemp dstT = newTemp(Ity_I32);
17741         const HChar* nm = "???";
17742         assign(srcT, getIRegA(rM));
17743         assign(rotT, genROR32(srcT, 8 * rot)); /* 0, 8, 16 or 24 only */
17744         switch (subopc) {
17745            case BITS4(0,1,1,0): // UXTB
17746               assign(dstT, unop(Iop_8Uto32, unop(Iop_32to8, mkexpr(rotT))));
17747               nm = "uxtb";
17748               break;
17749            case BITS4(0,0,1,0): // SXTB
17750               assign(dstT, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rotT))));
17751               nm = "sxtb";
17752               break;
17753            case BITS4(0,1,1,1): // UXTH
17754               assign(dstT, unop(Iop_16Uto32, unop(Iop_32to16, mkexpr(rotT))));
17755               nm = "uxth";
17756               break;
17757            case BITS4(0,0,1,1): // SXTH
17758               assign(dstT, unop(Iop_16Sto32, unop(Iop_32to16, mkexpr(rotT))));
17759               nm = "sxth";
17760               break;
17761            case BITS4(0,1,0,0): // UXTB16
17762               assign(dstT, binop(Iop_And32, mkexpr(rotT), mkU32(0x00FF00FF)));
17763               nm = "uxtb16";
17764               break;
17765            case BITS4(0,0,0,0): { // SXTB16
17766               IRTemp lo32 = newTemp(Ity_I32);
17767               IRTemp hi32 = newTemp(Ity_I32);
17768               assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
17769               assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
17770               assign(
17771                  dstT,
17772                  binop(Iop_Or32,
17773                        binop(Iop_And32,
17774                              unop(Iop_8Sto32,
17775                                   unop(Iop_32to8, mkexpr(lo32))),
17776                              mkU32(0xFFFF)),
17777                        binop(Iop_Shl32,
17778                              unop(Iop_8Sto32,
17779                                   unop(Iop_32to8, mkexpr(hi32))),
17780                              mkU8(16))
17781               ));
17782               nm = "sxtb16";
17783               break;
17784            }
17785            default:
17786               vassert(0); // guarded by "if" above
17787         }
17788         putIRegA(rD, mkexpr(dstT), condT, Ijk_Boring);
17789         DIP("%s%s r%u, r%u, ROR #%d\n", nm, nCC(INSN_COND), rD, rM, rot);
17790         goto decode_success;
17791      }
17792      /* fall through */
17793   }
17794
17795   /* ------------------- bfi, bfc ------------------- */
17796   if (BITS8(0,1,1,1,1,1,0, 0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
17797       && BITS4(0, 0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
17798      UInt rD  = INSN(15,12);
17799      UInt rN  = INSN(3,0);
17800      UInt msb = (insn >> 16) & 0x1F; /* 20:16 */
17801      UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
17802      if (rD == 15 || msb < lsb) {
17803         /* undecodable; fall through */
17804      } else {
17805         IRTemp src    = newTemp(Ity_I32);
17806         IRTemp olddst = newTemp(Ity_I32);
17807         IRTemp newdst = newTemp(Ity_I32);
17808         UInt   mask   = ((UInt)1) << (msb - lsb);
17809         mask = (mask - 1) + mask;
17810         vassert(mask != 0); // guaranteed by "msb < lsb" check above
17811         mask <<= lsb;
17812
17813         assign(src, rN == 15 ? mkU32(0) : getIRegA(rN));
17814         assign(olddst, getIRegA(rD));
17815         assign(newdst,
17816                binop(Iop_Or32,
17817                   binop(Iop_And32,
17818                         binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
17819                         mkU32(mask)),
17820                   binop(Iop_And32,
17821                         mkexpr(olddst),
17822                         mkU32(~mask)))
17823               );
17824
17825         putIRegA(rD, mkexpr(newdst), condT, Ijk_Boring);
17826
17827         if (rN == 15) {
17828            DIP("bfc%s r%u, #%u, #%u\n",
17829                nCC(INSN_COND), rD, lsb, msb-lsb+1);
17830         } else {
17831            DIP("bfi%s r%u, r%u, #%u, #%u\n",
17832                nCC(INSN_COND), rD, rN, lsb, msb-lsb+1);
17833         }
17834         goto decode_success;
17835      }
17836      /* fall through */
17837   }
17838
17839   /* ------------------- {u,s}bfx ------------------- */
17840   if (BITS8(0,1,1,1,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17841       && BITS4(0,1,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
17842      UInt rD  = INSN(15,12);
17843      UInt rN  = INSN(3,0);
17844      UInt wm1 = (insn >> 16) & 0x1F; /* 20:16 */
17845      UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
17846      UInt msb = lsb + wm1;
17847      UInt isU = (insn >> 22) & 1;    /* 22:22 */
17848      if (rD == 15 || rN == 15 || msb >= 32) {
17849         /* undecodable; fall through */
17850      } else {
17851         IRTemp src  = newTemp(Ity_I32);
17852         IRTemp tmp  = newTemp(Ity_I32);
17853         IRTemp res  = newTemp(Ity_I32);
17854         UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
17855         vassert(msb >= 0 && msb <= 31);
17856         vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
17857
17858         assign(src, getIRegA(rN));
17859         assign(tmp, binop(Iop_And32,
17860                           binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
17861                           mkU32(mask)));
17862         assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
17863                           binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
17864                           mkU8(31-wm1)));
17865
17866         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17867
17868         DIP("%s%s r%u, r%u, #%u, #%u\n",
17869             isU ? "ubfx" : "sbfx",
17870             nCC(INSN_COND), rD, rN, lsb, wm1 + 1);
17871         goto decode_success;
17872      }
17873      /* fall through */
17874   }
17875
17876   /* --------------------- Load/store doubleword ------------- */
17877   // LDRD STRD
17878   /*                 31   27   23   19 15 11   7    3     # highest bit
17879                        28   24   20 16 12    8    4    0
17880      A5-36   1 | 16  cond 0001 U100 Rn Rd im4h 11S1 im4l
17881      A5-38   1 | 32  cond 0001 U000 Rn Rd 0000 11S1 Rm
17882      A5-40   2 | 16  cond 0001 U110 Rn Rd im4h 11S1 im4l
17883      A5-42   2 | 32  cond 0001 U010 Rn Rd 0000 11S1 Rm
17884      A5-44   3 | 16  cond 0000 U100 Rn Rd im4h 11S1 im4l
17885      A5-46   3 | 32  cond 0000 U000 Rn Rd 0000 11S1 Rm
17886   */
17887   /* case coding:
17888             1   at-ea               (access at ea)
17889             2   at-ea-then-upd      (access at ea, then Rn = ea)
17890             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
17891      ea coding
17892             16  Rn +/- imm8
17893             32  Rn +/- Rm
17894   */
17895   /* Quickly skip over all of this for hopefully most instructions */
17896   if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
17897      goto after_load_store_doubleword;
17898
17899   /* Check the "11S1" thing. */
17900   if ((INSN(7,4) & BITS4(1,1,0,1)) != BITS4(1,1,0,1))
17901      goto after_load_store_doubleword;
17902
17903   summary = 0;
17904
17905   /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,0,0)) {
17906      summary = 1 | 16;
17907   }
17908   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,0,0)) {
17909      summary = 1 | 32;
17910   }
17911   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,1,0)) {
17912      summary = 2 | 16;
17913   }
17914   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,1,0)) {
17915      summary = 2 | 32;
17916   }
17917   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(1,0,0)) {
17918      summary = 3 | 16;
17919   }
17920   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(0,0,0)) {
17921      summary = 3 | 32;
17922   }
17923   else goto after_load_store_doubleword;
17924
17925   { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
17926     UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
17927     UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
17928     UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
17929     UInt bS   = (insn >> 5) & 1;    /* S=1 store, S=0 load */
17930     UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
17931
17932     /* Require rD to be an even numbered register */
17933     if ((rD & 1) != 0)
17934        goto after_load_store_doubleword;
17935
17936     /* Require 11:8 == 0 for Rn +/- Rm cases */
17937     if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
17938        goto after_load_store_doubleword;
17939
17940     /* Skip some invalid cases, which would lead to two competing
17941        updates to the same register, or which are otherwise
17942        disallowed by the spec. */
17943     switch (summary) {
17944        case 1 | 16:
17945           break;
17946        case 1 | 32:
17947           if (rM == 15) goto after_load_store_doubleword;
17948           break;
17949        case 2 | 16: case 3 | 16:
17950           if (rN == 15) goto after_load_store_doubleword;
17951           if (bS == 0 && (rN == rD || rN == rD+1))
17952              goto after_load_store_doubleword;
17953           break;
17954        case 2 | 32: case 3 | 32:
17955           if (rM == 15) goto after_load_store_doubleword;
17956           if (rN == 15) goto after_load_store_doubleword;
17957           if (rN == rM) goto after_load_store_doubleword;
17958           if (bS == 0 && (rN == rD || rN == rD+1))
17959              goto after_load_store_doubleword;
17960           break;
17961        default:
17962           vassert(0);
17963     }
17964
17965     /* If this is a branch, make it unconditional at this point.
17966        Doing conditional branches in-line is too complex (for
17967        now). */
17968     vassert((rD & 1) == 0); /* from tests above */
17969     if (bS == 0 && rD+1 == 15 && condT != IRTemp_INVALID) {
17970        // go uncond
17971        mk_skip_over_A32_if_cond_is_false( condT );
17972        condT = IRTemp_INVALID;
17973        // now uncond
17974     }
17975
17976     /* compute the effective address.  Bind it to a tmp since we
17977        may need to use it twice. */
17978     IRExpr* eaE = NULL;
17979     switch (summary & 0xF0) {
17980        case 16:
17981           eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
17982           break;
17983        case 32:
17984           eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
17985           break;
17986     }
17987     vassert(eaE);
17988     IRTemp eaT = newTemp(Ity_I32);
17989     assign(eaT, eaE);
17990
17991     /* get the old Rn value */
17992     IRTemp rnT = newTemp(Ity_I32);
17993     assign(rnT, getIRegA(rN));
17994
17995     /* decide on the transfer address */
17996     IRTemp taT = IRTemp_INVALID;
17997     switch (summary & 0x0F) {
17998        case 1: case 2: taT = eaT; break;
17999        case 3:         taT = rnT; break;
18000     }
18001     vassert(taT != IRTemp_INVALID);
18002
18003     /* XXX deal with alignment constraints */
18004     /* XXX: but the A8 doesn't seem to trap for misaligned loads, so,
18005        ignore alignment issues for the time being. */
18006
18007     /* For almost all cases, we do the writeback after the transfers.
18008        However, that leaves the stack "uncovered" in cases like:
18009           strd    rD, [sp, #-8]
18010           strd    rD, [sp, #-16]
18011        In which case, do the writeback to SP now, instead of later.
18012        This is bad in that it makes the insn non-restartable if the
18013        accesses fault, but at least keeps Memcheck happy. */
18014     Bool writeback_already_done = False;
18015     if (bS == 1 /*store*/ && summary == (2 | 16)
18016         && rN == 13 && rN != rD && rN != rD+1
18017         && bU == 0/*minus*/
18018         && (imm8 == 8 || imm8 == 16)) {
18019        putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
18020        writeback_already_done = True;
18021     }
18022
18023     /* doubleword store  S 1
18024        doubleword load   S 0
18025     */
18026     const HChar* name = NULL;
18027     /* generate the transfers */
18028     if (bS == 1) { // doubleword store
18029        storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(0)),
18030                        getIRegA(rD+0), condT );
18031        storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(4)),
18032                        getIRegA(rD+1), condT );
18033        name = "strd";
18034     } else { // doubleword load
18035        IRTemp oldRd0 = newTemp(Ity_I32);
18036        IRTemp oldRd1 = newTemp(Ity_I32);
18037        assign(oldRd0, llGetIReg(rD+0));
18038        assign(oldRd1, llGetIReg(rD+1));
18039        IRTemp newRd0 = newTemp(Ity_I32);
18040        IRTemp newRd1 = newTemp(Ity_I32);
18041        loadGuardedLE( newRd0, ILGop_Ident32,
18042                       binop(Iop_Add32, mkexpr(taT), mkU32(0)),
18043                       mkexpr(oldRd0), condT );
18044        putIRegA( rD+0, mkexpr(newRd0), IRTemp_INVALID, Ijk_Boring );
18045        loadGuardedLE( newRd1, ILGop_Ident32,
18046                       binop(Iop_Add32, mkexpr(taT), mkU32(4)),
18047                       mkexpr(oldRd1), condT );
18048        putIRegA( rD+1, mkexpr(newRd1), IRTemp_INVALID, Ijk_Boring );
18049        name = "ldrd";
18050     }
18051
18052     /* Update Rn if necessary. */
18053     switch (summary & 0x0F) {
18054        case 2: case 3:
18055           // should be assured by logic above:
18056           vassert(rN != 15); /* from checks above */
18057           if (bS == 0) {
18058              vassert(rD+0 != rN); /* since we just wrote rD+0 */
18059              vassert(rD+1 != rN); /* since we just wrote rD+1 */
18060           }
18061           if (!writeback_already_done)
18062              putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
18063           break;
18064     }
18065
18066     switch (summary & 0x0F) {
18067        case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
18068                 break;
18069        case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
18070                     name, nCC(INSN_COND), rD, dis_buf);
18071                 break;
18072        case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
18073                     name, nCC(INSN_COND), rD, dis_buf);
18074                 break;
18075        default: vassert(0);
18076     }
18077
18078     goto decode_success;
18079   }
18080
18081  after_load_store_doubleword:
18082
18083   /* ------------------- {s,u}xtab ------------- */
18084   if (BITS8(0,1,1,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
18085       && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
18086       && BITS4(0,1,1,1) == INSN(7,4)) {
18087      UInt rN  = INSN(19,16);
18088      UInt rD  = INSN(15,12);
18089      UInt rM  = INSN(3,0);
18090      UInt rot = (insn >> 10) & 3;
18091      UInt isU = INSN(22,22);
18092      if (rN == 15/*it's {S,U}XTB*/ || rD == 15 || rM == 15) {
18093         /* undecodable; fall through */
18094      } else {
18095         IRTemp srcL = newTemp(Ity_I32);
18096         IRTemp srcR = newTemp(Ity_I32);
18097         IRTemp res  = newTemp(Ity_I32);
18098         assign(srcR, getIRegA(rM));
18099         assign(srcL, getIRegA(rN));
18100         assign(res,  binop(Iop_Add32,
18101                            mkexpr(srcL),
18102                            unop(isU ? Iop_8Uto32 : Iop_8Sto32,
18103                                 unop(Iop_32to8,
18104                                      genROR32(srcR, 8 * rot)))));
18105         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18106         DIP("%cxtab%s r%u, r%u, r%u, ror #%u\n",
18107             isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
18108         goto decode_success;
18109      }
18110      /* fall through */
18111   }
18112
18113   /* ------------------- {s,u}xtah ------------- */
18114   if (BITS8(0,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
18115       && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
18116       && BITS4(0,1,1,1) == INSN(7,4)) {
18117      UInt rN  = INSN(19,16);
18118      UInt rD  = INSN(15,12);
18119      UInt rM  = INSN(3,0);
18120      UInt rot = (insn >> 10) & 3;
18121      UInt isU = INSN(22,22);
18122      if (rN == 15/*it's {S,U}XTH*/ || rD == 15 || rM == 15) {
18123         /* undecodable; fall through */
18124      } else {
18125         IRTemp srcL = newTemp(Ity_I32);
18126         IRTemp srcR = newTemp(Ity_I32);
18127         IRTemp res  = newTemp(Ity_I32);
18128         assign(srcR, getIRegA(rM));
18129         assign(srcL, getIRegA(rN));
18130         assign(res,  binop(Iop_Add32,
18131                            mkexpr(srcL),
18132                            unop(isU ? Iop_16Uto32 : Iop_16Sto32,
18133                                 unop(Iop_32to16,
18134                                      genROR32(srcR, 8 * rot)))));
18135         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18136
18137         DIP("%cxtah%s r%u, r%u, r%u, ror #%u\n",
18138             isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
18139         goto decode_success;
18140      }
18141      /* fall through */
18142   }
18143
18144   /* ------------------- rev16, rev ------------------ */
18145   if (INSN(27,16) == 0x6BF
18146       && (INSN(11,4) == 0xFB/*rev16*/ || INSN(11,4) == 0xF3/*rev*/)) {
18147      Bool isREV = INSN(11,4) == 0xF3;
18148      UInt rM    = INSN(3,0);
18149      UInt rD    = INSN(15,12);
18150      if (rM != 15 && rD != 15) {
18151         IRTemp rMt = newTemp(Ity_I32);
18152         assign(rMt, getIRegA(rM));
18153         IRTemp res = isREV ? gen_REV(rMt) : gen_REV16(rMt);
18154         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18155         DIP("rev%s%s r%u, r%u\n", isREV ? "" : "16",
18156             nCC(INSN_COND), rD, rM);
18157         goto decode_success;
18158      }
18159   }
18160
18161   /* ------------------- revsh ----------------------- */
18162   if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xFB) {
18163      UInt rM = INSN(3,0);
18164      UInt rD = INSN(15,12);
18165      if (rM != 15 && rD != 15) {
18166         IRTemp irt_rM  = newTemp(Ity_I32);
18167         IRTemp irt_hi  = newTemp(Ity_I32);
18168         IRTemp irt_low = newTemp(Ity_I32);
18169         IRTemp irt_res = newTemp(Ity_I32);
18170         assign(irt_rM, getIRegA(rM));
18171         assign(irt_hi,
18172                binop(Iop_Sar32,
18173                      binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
18174                      mkU8(16)
18175                )
18176         );
18177         assign(irt_low,
18178                binop(Iop_And32,
18179                      binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
18180                      mkU32(0xFF)
18181                )
18182         );
18183         assign(irt_res,
18184                binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
18185         );
18186         putIRegA(rD, mkexpr(irt_res), condT, Ijk_Boring);
18187         DIP("revsh%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
18188         goto decode_success;
18189      }
18190   }
18191
18192   /* ------------------- rbit ------------------ */
18193   if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xF3) {
18194      UInt rD = INSN(15,12);
18195      UInt rM = INSN(3,0);
18196      if (rD != 15 && rM != 15) {
18197         IRTemp arg = newTemp(Ity_I32);
18198         assign(arg, getIRegA(rM));
18199         IRTemp res = gen_BITREV(arg);
18200         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18201         DIP("rbit r%u, r%u\n", rD, rM);
18202         goto decode_success;
18203      }
18204   }
18205
18206   /* ------------------- smmul ------------------ */
18207   if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
18208       && INSN(15,12) == BITS4(1,1,1,1)
18209       && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
18210      UInt bitR = INSN(5,5);
18211      UInt rD = INSN(19,16);
18212      UInt rM = INSN(11,8);
18213      UInt rN = INSN(3,0);
18214      if (rD != 15 && rM != 15 && rN != 15) {
18215         IRExpr* res
18216         = unop(Iop_64HIto32,
18217                binop(Iop_Add64,
18218                      binop(Iop_MullS32, getIRegA(rN), getIRegA(rM)),
18219                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
18220         putIRegA(rD, res, condT, Ijk_Boring);
18221         DIP("smmul%s%s r%u, r%u, r%u\n",
18222             nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM);
18223         goto decode_success;
18224      }
18225   }
18226
18227   /* ------------------- smmla ------------------ */
18228   if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
18229       && INSN(15,12) != BITS4(1,1,1,1)
18230       && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
18231      UInt bitR = INSN(5,5);
18232      UInt rD = INSN(19,16);
18233      UInt rA = INSN(15,12);
18234      UInt rM = INSN(11,8);
18235      UInt rN = INSN(3,0);
18236      if (rD != 15 && rM != 15 && rN != 15) {
18237         IRExpr* res
18238         = unop(Iop_64HIto32,
18239                binop(Iop_Add64,
18240                      binop(Iop_Add64,
18241                            binop(Iop_32HLto64, getIRegA(rA), mkU32(0)),
18242                            binop(Iop_MullS32, getIRegA(rN), getIRegA(rM))),
18243                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
18244         putIRegA(rD, res, condT, Ijk_Boring);
18245         DIP("smmla%s%s r%u, r%u, r%u, r%u\n",
18246             nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM, rA);
18247         goto decode_success;
18248      }
18249   }
18250
18251   /* -------------- (A1) LDRT reg+/-#imm12 -------------- */
18252   /* Load Register Unprivileged:
18253      ldrt<c> Rt, [Rn] {, #+/-imm12}
18254   */
18255   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,1) ) {
18256      UInt rT     = INSN(15,12);
18257      UInt rN     = INSN(19,16);
18258      UInt imm12  = INSN(11,0);
18259      UInt bU     = INSN(23,23);
18260      Bool valid  = True;
18261      if (rT == 15 || rN == 15 || rN == rT) valid = False;
18262      if (valid) {
18263         IRTemp newRt = newTemp(Ity_I32);
18264         loadGuardedLE( newRt,
18265                        ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
18266         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18267         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18268                             getIRegA(rN), mkU32(imm12));
18269         putIRegA(rN, erN, condT, Ijk_Boring);
18270         DIP("ldrt%s r%u, [r%u], #%c%u\n",
18271             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18272         goto decode_success;
18273      }
18274   }
18275
18276   /* -------------- (A2) LDRT reg+/-reg with shift -------------- */
18277   /* Load Register Unprivileged:
18278      ldrt<c> Rt, [Rn], +/-Rm{, shift}
18279   */
18280   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,1)
18281        && INSN(4,4) == 0 ) {
18282      UInt rT     = INSN(15,12);
18283      UInt rN     = INSN(19,16);
18284      UInt rM     = INSN(3,0);
18285      UInt imm5   = INSN(11,7);
18286      UInt bU     = INSN(23,23);
18287      UInt type   = INSN(6,5);
18288      Bool valid  = True;
18289      if (rT == 15 || rN == 15 || rN == rT || rM == 15
18290          /* || (ArchVersion() < 6 && rM == rN) */)
18291         valid = False;
18292      if (valid) {
18293         IRTemp newRt = newTemp(Ity_I32);
18294         loadGuardedLE( newRt,
18295                        ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
18296         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18297         // dis_buf generated is slightly bogus, in fact.
18298         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18299                                                       type, imm5, dis_buf);
18300         putIRegA(rN, erN, condT, Ijk_Boring);
18301         DIP("ldrt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18302         goto decode_success;
18303      }
18304   }
18305
18306   /* -------------- (A1) LDRBT reg+/-#imm12 -------------- */
18307   /* Load Register Byte Unprivileged:
18308      ldrbt<c> Rt, [Rn], #+/-imm12
18309   */
18310   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,1) ) {
18311      UInt rT     = INSN(15,12);
18312      UInt rN     = INSN(19,16);
18313      UInt imm12  = INSN(11,0);
18314      UInt bU     = INSN(23,23);
18315      Bool valid  = True;
18316      if (rT == 15 || rN == 15 || rN == rT) valid = False;
18317      if (valid) {
18318         IRTemp newRt = newTemp(Ity_I32);
18319         loadGuardedLE( newRt,
18320                        ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
18321         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18322         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18323                             getIRegA(rN), mkU32(imm12));
18324         putIRegA(rN, erN, condT, Ijk_Boring);
18325         DIP("ldrbt%s r%u, [r%u], #%c%u\n",
18326             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18327         goto decode_success;
18328      }
18329   }
18330
18331   /* -------------- (A2) LDRBT reg+/-reg with shift -------------- */
18332   /* Load Register Byte Unprivileged:
18333      ldrbt<c> Rt, [Rn], +/-Rm{, shift}
18334   */
18335   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,1)
18336        && INSN(4,4) == 0 ) {
18337      UInt rT     = INSN(15,12);
18338      UInt rN     = INSN(19,16);
18339      UInt rM     = INSN(3,0);
18340      UInt imm5   = INSN(11,7);
18341      UInt bU     = INSN(23,23);
18342      UInt type   = INSN(6,5);
18343      Bool valid  = True;
18344      if (rT == 15 || rN == 15 || rN == rT || rM == 15
18345          /* || (ArchVersion() < 6 && rM == rN) */)
18346         valid = False;
18347      if (valid) {
18348         IRTemp newRt = newTemp(Ity_I32);
18349         loadGuardedLE( newRt,
18350                        ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
18351         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18352         // dis_buf generated is slightly bogus, in fact.
18353         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18354                                                       type, imm5, dis_buf);
18355         putIRegA(rN, erN, condT, Ijk_Boring);
18356         DIP("ldrbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18357         goto decode_success;
18358      }
18359   }
18360
18361   /* -------------- (A1) LDRHT reg+#imm8 -------------- */
18362   /* Load Register Halfword Unprivileged:
18363      ldrht<c> Rt, [Rn] {, #+/-imm8}
18364   */
18365   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18366       && INSN(7,4) == BITS4(1,0,1,1) ) {
18367      UInt rT    = INSN(15,12);
18368      UInt rN    = INSN(19,16);
18369      UInt bU    = INSN(23,23);
18370      UInt imm4H = INSN(11,8);
18371      UInt imm4L = INSN(3,0);
18372      UInt imm8  = (imm4H << 4) | imm4L;
18373      Bool valid = True;
18374      if (rT == 15 || rN == 15 || rN == rT)
18375         valid = False;
18376      if (valid) {
18377         IRTemp newRt = newTemp(Ity_I32);
18378         loadGuardedLE( newRt,
18379                        ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
18380         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18381         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18382                             getIRegA(rN), mkU32(imm8));
18383         putIRegA(rN, erN, condT, Ijk_Boring);
18384         DIP("ldrht%s r%u, [r%u], #%c%u\n",
18385             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18386         goto decode_success;
18387      }
18388   }
18389
18390   /* -------------- (A2) LDRHT reg+/-reg -------------- */
18391   /* Load Register Halfword Unprivileged:
18392      ldrht<c> Rt, [Rn], +/-Rm
18393   */
18394   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18395       && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
18396      UInt rT    = INSN(15,12);
18397      UInt rN    = INSN(19,16);
18398      UInt rM    = INSN(3,0);
18399      UInt bU    = INSN(23,23);
18400      Bool valid = True;
18401      if (rT == 15 || rN == 15 || rN == rT || rM == 15)
18402         valid = False;
18403      if (valid) {
18404         IRTemp newRt = newTemp(Ity_I32);
18405         loadGuardedLE( newRt,
18406                        ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
18407         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18408         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18409                             getIRegA(rN), getIRegA(rM));
18410         putIRegA(rN, erN, condT, Ijk_Boring);
18411         DIP("ldrht%s r%u, [r%u], %cr%u\n",
18412             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18413         goto decode_success;
18414      }
18415   }
18416
18417   /* -------------- (A1) LDRSHT reg+#imm8 -------------- */
18418   /* Load Register Signed Halfword Unprivileged:
18419      ldrsht<c> Rt, [Rn] {, #+/-imm8}
18420   */
18421   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18422       && INSN(7,4) == BITS4(1,1,1,1)) {
18423      UInt rT    = INSN(15,12);
18424      UInt rN    = INSN(19,16);
18425      UInt bU    = INSN(23,23);
18426      UInt imm4H = INSN(11,8);
18427      UInt imm4L = INSN(3,0);
18428      UInt imm8  = (imm4H << 4) | imm4L;
18429      Bool valid = True;
18430      if (rN == 15 || rT == 15 || rN == rT)
18431         valid = False;
18432      if (valid) {
18433         IRTemp newRt = newTemp(Ity_I32);
18434         loadGuardedLE( newRt,
18435                        ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
18436         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18437         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18438                             getIRegA(rN), mkU32(imm8));
18439         putIRegA(rN, erN, condT, Ijk_Boring);
18440         DIP("ldrsht%s r%u, [r%u], #%c%u\n",
18441             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18442         goto decode_success;
18443      }
18444   }
18445
18446   /* -------------- (A2) LDRSHT reg+/-reg -------------- */
18447   /* Load Register Signed Halfword Unprivileged:
18448      ldrsht<c> Rt, [Rn], +/-Rm
18449   */
18450   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18451       && INSN(11,4) == BITS8(0,0,0,0,1,1,1,1)) {
18452      UInt rT    = INSN(15,12);
18453      UInt rN    = INSN(19,16);
18454      UInt rM    = INSN(3,0);
18455      UInt bU    = INSN(23,23);
18456      Bool valid = True;
18457      if (rN == 15 || rT == 15 || rN == rT || rM == 15)
18458         valid = False;
18459      if (valid) {
18460         IRTemp newRt = newTemp(Ity_I32);
18461         loadGuardedLE( newRt,
18462                        ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
18463         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18464         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18465                             getIRegA(rN), getIRegA(rM));
18466         putIRegA(rN, erN, condT, Ijk_Boring);
18467         DIP("ldrsht%s r%u, [r%u], %cr%u\n",
18468             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18469         goto decode_success;
18470      }
18471   }
18472
18473   /* -------------- (A1) LDRSBT reg+#imm8 -------------- */
18474   /* Load Register Signed Byte Unprivileged:
18475      ldrsbt<c> Rt, [Rn] {, #+/-imm8}
18476   */
18477   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18478       && INSN(7,4) == BITS4(1,1,0,1)) {
18479      UInt rT    = INSN(15,12);
18480      UInt rN    = INSN(19,16);
18481      UInt bU    = INSN(23,23);
18482      UInt imm4H = INSN(11,8);
18483      UInt imm4L = INSN(3,0);
18484      UInt imm8  = (imm4H << 4) | imm4L;
18485      Bool valid = True;
18486      if (rT == 15 || rN == 15 || rN == rT)
18487         valid = False;
18488      if (valid) {
18489         IRTemp newRt = newTemp(Ity_I32);
18490         loadGuardedLE( newRt,
18491                        ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
18492         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18493         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18494                             getIRegA(rN), mkU32(imm8));
18495         putIRegA(rN, erN, condT, Ijk_Boring);
18496         DIP("ldrsbt%s r%u, [r%u], #%c%u\n",
18497             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18498         goto decode_success;
18499      }
18500   }
18501
18502   /* -------------- (A2) LDRSBT reg+/-reg -------------- */
18503   /* Load Register Signed Byte Unprivileged:
18504      ldrsbt<c> Rt, [Rn], +/-Rm
18505   */
18506   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18507       && INSN(11,4) == BITS8(0,0,0,0,1,1,0,1)) {
18508      UInt rT    = INSN(15,12);
18509      UInt rN    = INSN(19,16);
18510      UInt bU    = INSN(23,23);
18511      UInt rM    = INSN(3,0);
18512      Bool valid = True;
18513      if (rT == 15 || rN == 15 || rN == rT || rM == 15)
18514         valid = False;
18515      if (valid) {
18516         IRTemp newRt = newTemp(Ity_I32);
18517         loadGuardedLE( newRt,
18518                        ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
18519         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18520         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18521                             getIRegA(rN), getIRegA(rM));
18522         putIRegA(rN, erN, condT, Ijk_Boring);
18523         DIP("ldrsbt%s r%u, [r%u], %cr%u\n",
18524             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18525         goto decode_success;
18526      }
18527   }
18528
18529   /* -------------- (A1) STRBT reg+#imm12 -------------- */
18530   /* Store Register Byte Unprivileged:
18531      strbt<c> Rt, [Rn], #+/-imm12
18532   */
18533   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,0) ) {
18534      UInt rT     = INSN(15,12);
18535      UInt rN     = INSN(19,16);
18536      UInt imm12  = INSN(11,0);
18537      UInt bU     = INSN(23,23);
18538      Bool valid = True;
18539      if (rT == 15 || rN == 15 || rN == rT) valid = False;
18540      if (valid) {
18541         IRExpr* address = getIRegA(rN);
18542         IRExpr* data = unop(Iop_32to8, getIRegA(rT));
18543         storeGuardedLE( address, data, condT);
18544         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18545                               getIRegA(rN), mkU32(imm12));
18546         putIRegA(rN, newRn, condT, Ijk_Boring);
18547         DIP("strbt%s r%u, [r%u], #%c%u\n",
18548             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18549         goto decode_success;
18550      }
18551   }
18552
18553   /* -------------- (A2) STRBT reg+/-reg -------------- */
18554   /* Store Register Byte Unprivileged:
18555      strbt<c> Rt, [Rn], +/-Rm{, shift}
18556   */
18557   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,0)
18558       && INSN(4,4) == 0) {
18559      UInt rT     = INSN(15,12);
18560      UInt rN     = INSN(19,16);
18561      UInt imm5   = INSN(11,7);
18562      UInt type   = INSN(6,5);
18563      UInt rM     = INSN(3,0);
18564      UInt bU     = INSN(23,23);
18565      Bool valid  = True;
18566      if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
18567      if (valid) {
18568         IRExpr* address = getIRegA(rN);
18569         IRExpr* data = unop(Iop_32to8, getIRegA(rT));
18570         storeGuardedLE( address, data, condT);
18571         // dis_buf generated is slightly bogus, in fact.
18572         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18573                                                       type, imm5, dis_buf);
18574         putIRegA(rN, erN, condT, Ijk_Boring);
18575         DIP("strbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18576         goto decode_success;
18577      }
18578   }
18579
18580   /* -------------- (A1) STRHT reg+#imm8 -------------- */
18581   /* Store Register Halfword Unprivileged:
18582      strht<c> Rt, [Rn], #+/-imm8
18583   */
18584   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,0)
18585       && INSN(7,4) == BITS4(1,0,1,1) ) {
18586      UInt rT    = INSN(15,12);
18587      UInt rN    = INSN(19,16);
18588      UInt imm4H = INSN(11,8);
18589      UInt imm4L = INSN(3,0);
18590      UInt imm8  = (imm4H << 4) | imm4L;
18591      UInt bU    = INSN(23,23);
18592      Bool valid = True;
18593      if (rT == 15 || rN == 15 || rN == rT) valid = False;
18594      if (valid) {
18595         IRExpr* address = getIRegA(rN);
18596         IRExpr* data = unop(Iop_32to16, getIRegA(rT));
18597         storeGuardedLE( address, data, condT);
18598         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18599                               getIRegA(rN), mkU32(imm8));
18600         putIRegA(rN, newRn, condT, Ijk_Boring);
18601         DIP("strht%s r%u, [r%u], #%c%u\n",
18602             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18603         goto decode_success;
18604      }
18605   }
18606
18607   /* -------------- (A2) STRHT reg+reg -------------- */
18608   /* Store Register Halfword Unprivileged:
18609      strht<c> Rt, [Rn], +/-Rm
18610   */
18611   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,0)
18612       && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
18613      UInt rT    = INSN(15,12);
18614      UInt rN    = INSN(19,16);
18615      UInt rM    = INSN(3,0);
18616      UInt bU    = INSN(23,23);
18617      Bool valid = True;
18618      if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
18619      if (valid) {
18620         IRExpr* address = getIRegA(rN);
18621         IRExpr* data = unop(Iop_32to16, getIRegA(rT));
18622         storeGuardedLE( address, data, condT);
18623         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18624                               getIRegA(rN), getIRegA(rM));
18625         putIRegA(rN, newRn, condT, Ijk_Boring);
18626         DIP("strht%s r%u, [r%u], %cr%u\n",
18627             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18628         goto decode_success;
18629      }
18630   }
18631
18632   /* -------------- (A1) STRT reg+imm12 -------------- */
18633   /* Store Register Unprivileged:
18634      strt<c> Rt, [Rn], #+/-imm12
18635   */
18636   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,0) ) {
18637      UInt rT    = INSN(15,12);
18638      UInt rN    = INSN(19,16);
18639      UInt imm12 = INSN(11,0);
18640      UInt bU    = INSN(23,23);
18641      Bool valid = True;
18642      if (rN == 15 || rN == rT) valid = False;
18643      if (valid) {
18644         IRExpr* address = getIRegA(rN);
18645         storeGuardedLE( address, getIRegA(rT), condT);
18646         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18647                               getIRegA(rN), mkU32(imm12));
18648         putIRegA(rN, newRn, condT, Ijk_Boring);
18649         DIP("strt%s r%u, [r%u], %c%u\n",
18650             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18651         goto decode_success;
18652      }
18653   }
18654
18655   /* -------------- (A2) STRT reg+reg -------------- */
18656   /* Store Register Unprivileged:
18657      strt<c> Rt, [Rn], +/-Rm{, shift}
18658   */
18659   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,0)
18660       && INSN(4,4) == 0 ) {
18661      UInt rT    = INSN(15,12);
18662      UInt rN    = INSN(19,16);
18663      UInt rM    = INSN(3,0);
18664      UInt type  = INSN(6,5);
18665      UInt imm5  = INSN(11,7);
18666      UInt bU    = INSN(23,23);
18667      Bool valid = True;
18668      if (rN == 15 || rN == rT || rM == 15) valid = False;
18669      /* FIXME We didn't do:
18670         if ArchVersion() < 6 && rM == rN then UNPREDICTABLE */
18671      if (valid) {
18672         storeGuardedLE( getIRegA(rN), getIRegA(rT), condT);
18673         // dis_buf generated is slightly bogus, in fact.
18674         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18675                                                       type, imm5, dis_buf);
18676         putIRegA(rN, erN, condT, Ijk_Boring);
18677         DIP("strt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18678         goto decode_success;
18679      }
18680   }
18681
18682   /* ----------------------------------------------------------- */
18683   /* -- ARMv7 instructions                                    -- */
18684   /* ----------------------------------------------------------- */
18685
18686   /* -------------- read CP15 TPIDRURO register ------------- */
18687   /* mrc     p15, 0, r0,  c13, c0, 3  up to
18688      mrc     p15, 0, r14, c13, c0, 3
18689   */
18690   /* I don't know whether this is really v7-only.  But anyway, we
18691      have to support it since arm-linux uses TPIDRURO as a thread
18692      state register. */
18693   if (0x0E1D0F70 == (insn & 0x0FFF0FFF)) {
18694      UInt rD = INSN(15,12);
18695      if (rD <= 14) {
18696         /* skip r15, that's too stupid to handle */
18697         putIRegA(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
18698                      condT, Ijk_Boring);
18699         DIP("mrc%s p15,0, r%u, c13, c0, 3\n", nCC(INSN_COND), rD);
18700         goto decode_success;
18701      }
18702      /* fall through */
18703   }
18704
18705   /* -------------- read CP15 PMUSRENR register ------------- */
18706   /* mrc     p15, 0, r0,  c9, c14, 0  up to
18707      mrc     p15, 0, r14, c9, c14, 0
18708   */
18709   /* A program reading this register is really asking "which
18710      performance monitoring registes are available in user space?
18711      The simple answer here is to return zero, meaning "none".  See
18712      #345984. */
18713   if (0x0E190F1E == (insn & 0x0FFF0FFF)) {
18714      UInt rD = INSN(15,12);
18715      if (rD <= 14) {
18716         /* skip r15, that's too stupid to handle */
18717         putIRegA(rD, mkU32(0), condT, Ijk_Boring);
18718         DIP("mrc%s p15,0, r%u, c9, c14, 0\n", nCC(INSN_COND), rD);
18719         goto decode_success;
18720      }
18721      /* fall through */
18722   }
18723
18724   /* Handle various kinds of barriers.  This is rather indiscriminate
18725      in the sense that they are all turned into an IR Fence, which
18726      means we don't know which they are, so the back end has to
18727      re-emit them all when it comes acrosss an IR Fence.
18728   */
18729   /* v6 */ /* mcr 15, 0, rT, c7, c10, 5 */
18730   if (0xEE070FBA == (insn & 0xFFFF0FFF)) {
18731      UInt rT = INSN(15,12);
18732      if (rT <= 14) {
18733         /* mcr 15, 0, rT, c7, c10, 5 (v6) equiv to DMB (v7).  Data
18734            Memory Barrier -- ensures ordering of memory accesses. */
18735         stmt( IRStmt_MBE(Imbe_Fence) );
18736         DIP("mcr 15, 0, r%u, c7, c10, 5 (data memory barrier)\n", rT);
18737         goto decode_success;
18738      }
18739      /* fall through */
18740   }
18741   /* other flavours of barrier */
18742   switch (insn) {
18743      case 0xEE070F9A: /* v6 */
18744         /* mcr 15, 0, r0, c7, c10, 4 (v6) equiv to DSB (v7).  Data
18745            Synch Barrier -- ensures completion of memory accesses. */
18746         stmt( IRStmt_MBE(Imbe_Fence) );
18747         DIP("mcr 15, 0, r0, c7, c10, 4 (data synch barrier)\n");
18748         goto decode_success;
18749      case 0xEE070F95: /* v6 */
18750         /* mcr 15, 0, r0, c7, c5, 4 (v6) equiv to ISB (v7).
18751            Instruction Synchronisation Barrier (or Flush Prefetch
18752            Buffer) -- a pipe flush, I think.  I suspect we could
18753            ignore those, but to be on the safe side emit a fence
18754            anyway. */
18755         stmt( IRStmt_MBE(Imbe_Fence) );
18756         DIP("mcr 15, 0, r0, c7, c5, 4 (insn synch barrier)\n");
18757         goto decode_success;
18758      default:
18759         break;
18760   }
18761
18762   /* ----------------------------------------------------------- */
18763   /* -- Hints                                                 -- */
18764   /* ----------------------------------------------------------- */
18765
18766   switch (insn & 0x0FFFFFFF) {
18767      /* ------------------- NOP ------------------ */
18768      case 0x0320F000:
18769         DIP("nop%s\n", nCC(INSN_COND));
18770         goto decode_success;
18771      /* ------------------- YIELD ------------------ */
18772      case 0x0320F001:
18773         /* Continue after conditionally yielding. */
18774         DIP("yield%s\n", nCC(INSN_COND));
18775         stmt( IRStmt_Exit( unop(Iop_32to1,
18776                                 condT == IRTemp_INVALID
18777                                    ? mkU32(1) : mkexpr(condT)),
18778                            Ijk_Yield,
18779                            IRConst_U32(guest_R15_curr_instr_notENC + 4),
18780                            OFFB_R15T ));
18781         goto decode_success;
18782      default:
18783         break;
18784   }
18785
18786   /* ----------------------------------------------------------- */
18787   /* -- VFP (CP 10, CP 11) instructions (in ARM mode)         -- */
18788   /* ----------------------------------------------------------- */
18789
18790   if (INSN_COND != ARMCondNV) {
18791      Bool ok_vfp = decode_CP10_CP11_instruction (
18792                       &dres, INSN(27,0), condT, INSN_COND,
18793                       False/*!isT*/
18794                    );
18795      if (ok_vfp)
18796         goto decode_success;
18797   }
18798
18799   /* ----------------------------------------------------------- */
18800   /* -- NEON instructions (in ARM mode)                       -- */
18801   /* ----------------------------------------------------------- */
18802
18803   /* These are all in NV space, and so are taken care of (far) above,
18804      by a call from this function to
18805      decode_NV_instruction_ARMv7_and_below(). */
18806
18807   /* ----------------------------------------------------------- */
18808   /* -- v6 media instructions (in ARM mode)                   -- */
18809   /* ----------------------------------------------------------- */
18810
18811   { Bool ok_v6m = decode_V6MEDIA_instruction(
18812                       &dres, INSN(27,0), condT, INSN_COND,
18813                       False/*!isT*/
18814                   );
18815     if (ok_v6m)
18816        goto decode_success;
18817   }
18818
18819   /* ----------------------------------------------------------- */
18820   /* -- v8 instructions (in ARM mode)                         -- */
18821   /* ----------------------------------------------------------- */
18822
18823  after_v7_decoder:
18824
18825   /* If we get here, it means that all attempts to decode the
18826      instruction as ARMv7 or earlier have failed.  So, if we're doing
18827      ARMv8 or later, here is the point to try for it. */
18828
18829   if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
18830      Bool ok_v8
18831         = decode_V8_instruction( &dres, insn, condT, False/*!isT*/,
18832                                  IRTemp_INVALID, IRTemp_INVALID );
18833      if (ok_v8)
18834         goto decode_success;
18835   }
18836
18837   /* ----------------------------------------------------------- */
18838   /* -- Undecodable                                           -- */
18839   /* ----------------------------------------------------------- */
18840
18841   goto decode_failure;
18842   /*NOTREACHED*/
18843
18844  decode_failure:
18845   /* All decode failures end up here. */
18846   if (sigill_diag) {
18847      vex_printf("disInstr(arm): unhandled instruction: "
18848                 "0x%x\n", insn);
18849      vex_printf("                 cond=%d(0x%x) 27:20=%d(0x%02x) "
18850                                   "4:4=%d "
18851                                   "3:0=%d(0x%x)\n",
18852                 (Int)INSN_COND, (UInt)INSN_COND,
18853                 (Int)INSN(27,20), (UInt)INSN(27,20),
18854                 (Int)INSN(4,4),
18855                 (Int)INSN(3,0), (UInt)INSN(3,0) );
18856   }
18857
18858   /* Tell the dispatcher that this insn cannot be decoded, and so has
18859      not been executed, and (is currently) the next to be executed.
18860      R15 should be up-to-date since it made so at the start of each
18861      insn, but nevertheless be paranoid and update it again right
18862      now. */
18863   vassert(0 == (guest_R15_curr_instr_notENC & 3));
18864   llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
18865   dres.len         = 0;
18866   dres.whatNext    = Dis_StopHere;
18867   dres.jk_StopHere = Ijk_NoDecode;
18868   dres.continueAt  = 0;
18869   return dres;
18870
18871  decode_success:
18872   /* All decode successes end up here. */
18873   DIP("\n");
18874
18875   vassert(dres.len == 4 || dres.len == 20);
18876
18877   /* Now then.  Do we have an implicit jump to r15 to deal with? */
18878   if (r15written) {
18879      /* If we get jump to deal with, we assume that there's been no
18880         other competing branch stuff previously generated for this
18881         insn.  That's reasonable, in the sense that the ARM insn set
18882         appears to declare as "Unpredictable" any instruction which
18883         generates more than one possible new value for r15.  Hence
18884         just assert.  The decoders themselves should check against
18885         all such instructions which are thusly Unpredictable, and
18886         decline to decode them.  Hence we should never get here if we
18887         have competing new values for r15, and hence it is safe to
18888         assert here. */
18889      vassert(dres.whatNext == Dis_Continue);
18890      vassert(irsb->next == NULL);
18891      vassert(irsb->jumpkind == Ijk_Boring);
18892      /* If r15 is unconditionally written, terminate the block by
18893         jumping to it.  If it's conditionally written, still
18894         terminate the block (a shame, but we can't do side exits to
18895         arbitrary destinations), but first jump to the next
18896         instruction if the condition doesn't hold. */
18897      /* We can't use getIReg(15) to get the destination, since that
18898         will produce r15+8, which isn't what we want.  Must use
18899         llGetIReg(15) instead. */
18900      if (r15guard == IRTemp_INVALID) {
18901         /* unconditional */
18902      } else {
18903         /* conditional */
18904         stmt( IRStmt_Exit(
18905                  unop(Iop_32to1,
18906                       binop(Iop_Xor32,
18907                             mkexpr(r15guard), mkU32(1))),
18908                  r15kind,
18909                  IRConst_U32(guest_R15_curr_instr_notENC + 4),
18910                  OFFB_R15T
18911         ));
18912      }
18913      /* This seems crazy, but we're required to finish the insn with
18914         a write to the guest PC.  As usual we rely on ir_opt to tidy
18915         up later. */
18916      llPutIReg(15, llGetIReg(15));
18917      dres.whatNext    = Dis_StopHere;
18918      dres.jk_StopHere = r15kind;
18919   } else {
18920      /* Set up the end-state in the normal way. */
18921      switch (dres.whatNext) {
18922         case Dis_Continue:
18923            llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC));
18924            break;
18925         case Dis_ResteerU:
18926         case Dis_ResteerC:
18927            llPutIReg(15, mkU32(dres.continueAt));
18928            break;
18929         case Dis_StopHere:
18930            break;
18931         default:
18932            vassert(0);
18933      }
18934   }
18935
18936   return dres;
18937
18938#  undef INSN_COND
18939#  undef INSN
18940}
18941
18942
18943/*------------------------------------------------------------*/
18944/*--- Disassemble a single Thumb2 instruction              ---*/
18945/*------------------------------------------------------------*/
18946
18947static const UChar it_length_table[256]; /* fwds */
18948
18949/* NB: in Thumb mode we do fetches of regs with getIRegT, which
18950   automagically adds 4 to fetches of r15.  However, writes to regs
18951   are done with putIRegT, which disallows writes to r15.  Hence any
18952   r15 writes and associated jumps have to be done "by hand". */
18953
18954/* Disassemble a single Thumb instruction into IR.  The instruction is
18955   located in host memory at guest_instr, and has (decoded) guest IP
18956   of guest_R15_curr_instr_notENC, which will have been set before the
18957   call here. */
18958
18959static
18960DisResult disInstr_THUMB_WRK (
18961             Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
18962             Bool         resteerCisOk,
18963             void*        callback_opaque,
18964             const UChar* guest_instr,
18965             const VexArchInfo* archinfo,
18966             const VexAbiInfo*  abiinfo,
18967             Bool         sigill_diag
18968          )
18969{
18970   /* A macro to fish bits out of insn0.  There's also INSN1, to fish
18971      bits out of insn1, but that's defined only after the end of the
18972      16-bit insn decoder, so as to stop it mistakenly being used
18973      therein. */
18974#  define INSN0(_bMax,_bMin)  SLICE_UInt(((UInt)insn0), (_bMax), (_bMin))
18975
18976   DisResult dres;
18977   UShort    insn0; /*  first 16 bits of the insn */
18978   UShort    insn1; /* second 16 bits of the insn */
18979   HChar     dis_buf[128];  // big enough to hold LDMIA etc text
18980
18981   /* Summary result of the ITxxx backwards analysis: False == safe
18982      but suboptimal. */
18983   Bool guaranteedUnconditional = False;
18984
18985   /* Set result defaults. */
18986   dres.whatNext    = Dis_Continue;
18987   dres.len         = 2;
18988   dres.continueAt  = 0;
18989   dres.jk_StopHere = Ijk_INVALID;
18990   dres.hint        = Dis_HintNone;
18991
18992   /* Set default actions for post-insn handling of writes to r15, if
18993      required. */
18994   r15written = False;
18995   r15guard   = IRTemp_INVALID; /* unconditional */
18996   r15kind    = Ijk_Boring;
18997
18998   /* Insns could be 2 or 4 bytes long.  Just get the first 16 bits at
18999      this point.  If we need the second 16, get them later.  We can't
19000      get them both out immediately because it risks a fault (very
19001      unlikely, but ..) if the second 16 bits aren't actually
19002      necessary. */
19003   insn0 = getUShortLittleEndianly( guest_instr );
19004   insn1 = 0; /* We'll get it later, once we know we need it. */
19005
19006   /* Similarly, will set this later. */
19007   IRTemp old_itstate = IRTemp_INVALID;
19008
19009   if (0) vex_printf("insn: 0x%x\n", insn0);
19010
19011   DIP("\t(thumb) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
19012
19013   vassert(0 == (guest_R15_curr_instr_notENC & 1));
19014
19015   /* ----------------------------------------------------------- */
19016   /* Spot "Special" instructions (see comment at top of file). */
19017   {
19018      const UChar* code = guest_instr;
19019      /* Spot the 16-byte preamble:
19020
19021         ea4f 0cfc  mov.w   ip, ip, ror #3
19022         ea4f 3c7c  mov.w   ip, ip, ror #13
19023         ea4f 7c7c  mov.w   ip, ip, ror #29
19024         ea4f 4cfc  mov.w   ip, ip, ror #19
19025      */
19026      UInt word1 = 0x0CFCEA4F;
19027      UInt word2 = 0x3C7CEA4F;
19028      UInt word3 = 0x7C7CEA4F;
19029      UInt word4 = 0x4CFCEA4F;
19030      if (getUIntLittleEndianly(code+ 0) == word1 &&
19031          getUIntLittleEndianly(code+ 4) == word2 &&
19032          getUIntLittleEndianly(code+ 8) == word3 &&
19033          getUIntLittleEndianly(code+12) == word4) {
19034         /* Got a "Special" instruction preamble.  Which one is it? */
19035         // 0x 0A 0A EA 4A
19036         if (getUIntLittleEndianly(code+16) == 0x0A0AEA4A
19037                                               /* orr.w r10,r10,r10 */) {
19038            /* R3 = client_request ( R4 ) */
19039            DIP("r3 = client_request ( %%r4 )\n");
19040            llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19041            dres.jk_StopHere = Ijk_ClientReq;
19042            dres.whatNext    = Dis_StopHere;
19043            goto decode_success;
19044         }
19045         else
19046         // 0x 0B 0B EA 4B
19047         if (getUIntLittleEndianly(code+16) == 0x0B0BEA4B
19048                                               /* orr r11,r11,r11 */) {
19049            /* R3 = guest_NRADDR */
19050            DIP("r3 = guest_NRADDR\n");
19051            dres.len = 20;
19052            llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
19053            goto decode_success;
19054         }
19055         else
19056         // 0x 0C 0C EA 4C
19057         if (getUIntLittleEndianly(code+16) == 0x0C0CEA4C
19058                                               /* orr r12,r12,r12 */) {
19059            /*  branch-and-link-to-noredir R4 */
19060            DIP("branch-and-link-to-noredir r4\n");
19061            llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19062            llPutIReg(15, getIRegT(4));
19063            dres.jk_StopHere = Ijk_NoRedir;
19064            dres.whatNext    = Dis_StopHere;
19065            goto decode_success;
19066         }
19067         else
19068         // 0x 09 09 EA 49
19069         if (getUIntLittleEndianly(code+16) == 0x0909EA49
19070                                               /* orr r9,r9,r9 */) {
19071            /* IR injection */
19072            DIP("IR injection\n");
19073            vex_inject_ir(irsb, Iend_LE);
19074            // Invalidate the current insn. The reason is that the IRop we're
19075            // injecting here can change. In which case the translation has to
19076            // be redone. For ease of handling, we simply invalidate all the
19077            // time.
19078            stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
19079            stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
19080            llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19081            dres.whatNext    = Dis_StopHere;
19082            dres.jk_StopHere = Ijk_InvalICache;
19083            goto decode_success;
19084         }
19085         /* We don't know what it is.  Set insn0 so decode_failure
19086            can print the insn following the Special-insn preamble. */
19087         insn0 = getUShortLittleEndianly(code+16);
19088         goto decode_failure;
19089         /*NOTREACHED*/
19090      }
19091
19092   }
19093
19094   /* ----------------------------------------------------------- */
19095
19096   /* Main Thumb instruction decoder starts here.  It's a series of
19097      switches which examine ever longer bit sequences at the MSB of
19098      the instruction word, first for 16-bit insns, then for 32-bit
19099      insns. */
19100
19101   /* --- BEGIN ITxxx optimisation analysis --- */
19102   /* This is a crucial optimisation for the ITState boilerplate that
19103      follows.  Examine the 9 halfwords preceding this instruction,
19104      and if we are absolutely sure that none of them constitute an
19105      'it' instruction, then we can be sure that this instruction is
19106      not under the control of any 'it' instruction, and so
19107      guest_ITSTATE must be zero.  So write zero into ITSTATE right
19108      now, so that iropt can fold out almost all of the resulting
19109      junk.
19110
19111      If we aren't sure, we can always safely skip this step.  So be a
19112      bit conservative about it: only poke around in the same page as
19113      this instruction, lest we get a fault from the previous page
19114      that would not otherwise have happened.  The saving grace is
19115      that such skipping is pretty rare -- it only happens,
19116      statistically, 18/4096ths of the time, so is judged unlikely to
19117      be a performance problems.
19118
19119      FIXME: do better.  Take into account the number of insns covered
19120      by any IT insns we find, to rule out cases where an IT clearly
19121      cannot cover this instruction.  This would improve behaviour for
19122      branch targets immediately following an IT-guarded group that is
19123      not of full length.  Eg, (and completely ignoring issues of 16-
19124      vs 32-bit insn length):
19125
19126             ite cond
19127             insn1
19128             insn2
19129      label: insn3
19130             insn4
19131
19132      The 'it' only conditionalises insn1 and insn2.  However, the
19133      current analysis is conservative and considers insn3 and insn4
19134      also possibly guarded.  Hence if 'label:' is the start of a hot
19135      loop we will get a big performance hit.
19136   */
19137   {
19138      /* Summary result of this analysis: False == safe but
19139         suboptimal. */
19140      vassert(guaranteedUnconditional == False);
19141
19142      UInt pc = guest_R15_curr_instr_notENC;
19143      vassert(0 == (pc & 1));
19144
19145      UInt pageoff = pc & 0xFFF;
19146      if (pageoff >= 18) {
19147         /* It's safe to poke about in the 9 halfwords preceding this
19148            insn.  So, have a look at them. */
19149         guaranteedUnconditional = True; /* assume no 'it' insn found,
19150                                            till we do */
19151         UShort* hwp = (UShort*)(HWord)pc;
19152         Int i;
19153         for (i = -1; i >= -9; i--) {
19154            /* We're in the same page.  (True, but commented out due
19155               to expense.) */
19156            /*
19157            vassert( ( ((UInt)(&hwp[i])) & 0xFFFFF000 )
19158                      == ( pc & 0xFFFFF000 ) );
19159            */
19160            /* All valid IT instructions must have the form 0xBFxy,
19161               where x can be anything, but y must be nonzero.  Find
19162               the number of insns covered by it (1 .. 4) and check to
19163               see if it can possibly reach up to the instruction in
19164               question.  Some (x,y) combinations mean UNPREDICTABLE,
19165               and the table is constructed to be conservative by
19166               returning 4 for those cases, so the analysis is safe
19167               even if the code uses unpredictable IT instructions (in
19168               which case its authors are nuts, but hey.)  */
19169            UShort hwp_i = hwp[i];
19170            if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) {
19171               /* might be an 'it' insn. */
19172               /* # guarded insns */
19173               Int n_guarded = (Int)it_length_table[hwp_i & 0xFF];
19174               vassert(n_guarded >= 1 && n_guarded <= 4);
19175               if (n_guarded * 2 /* # guarded HWs, worst case */
19176                   > (-(i+1)))   /* -(i+1): # remaining HWs after the IT */
19177                   /* -(i+0) also seems to work, even though I think
19178                      it's wrong.  I don't understand that. */
19179                  guaranteedUnconditional = False;
19180               break;
19181            }
19182         }
19183      }
19184   }
19185   /* --- END ITxxx optimisation analysis --- */
19186
19187   /* Generate the guarding condition for this insn, by examining
19188      ITSTATE.  Assign it to condT.  Also, generate new
19189      values for ITSTATE ready for stuffing back into the
19190      guest state, but don't actually do the Put yet, since it will
19191      need to stuffed back in only after the instruction gets to a
19192      point where it is sure to complete.  Mostly we let the code at
19193      decode_success handle this, but in cases where the insn contains
19194      a side exit, we have to update them before the exit. */
19195
19196   /* If the ITxxx optimisation analysis above could not prove that
19197      this instruction is guaranteed unconditional, we insert a
19198      lengthy IR preamble to compute the guarding condition at
19199      runtime.  If it can prove it (which obviously we hope is the
19200      normal case) then we insert a minimal preamble, which is
19201      equivalent to setting guest_ITSTATE to zero and then folding
19202      that through the full preamble (which completely disappears). */
19203
19204   IRTemp condT              = IRTemp_INVALID;
19205   IRTemp cond_AND_notInIT_T = IRTemp_INVALID;
19206
19207   IRTemp new_itstate        = IRTemp_INVALID;
19208   vassert(old_itstate == IRTemp_INVALID);
19209
19210   if (guaranteedUnconditional) {
19211      /* BEGIN "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
19212
19213      // ITSTATE = 0 :: I32
19214      IRTemp z32 = newTemp(Ity_I32);
19215      assign(z32, mkU32(0));
19216      put_ITSTATE(z32);
19217
19218      // old_itstate = 0 :: I32
19219      //
19220      // old_itstate = get_ITSTATE();
19221      old_itstate = z32; /* 0 :: I32 */
19222
19223      // new_itstate = old_itstate >> 8
19224      //             = 0 >> 8
19225      //             = 0 :: I32
19226      //
19227      // new_itstate = newTemp(Ity_I32);
19228      // assign(new_itstate,
19229      //        binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
19230      new_itstate = z32;
19231
19232      // ITSTATE = 0 :: I32(again)
19233      //
19234      // put_ITSTATE(new_itstate);
19235
19236      // condT1 = calc_cond_dyn( xor(and(old_istate,0xF0), 0xE0) )
19237      //        = calc_cond_dyn( xor(0,0xE0) )
19238      //        = calc_cond_dyn ( 0xE0 )
19239      //        = 1 :: I32
19240      // Not that this matters, since the computed value is not used:
19241      // see condT folding below
19242      //
19243      // IRTemp condT1 = newTemp(Ity_I32);
19244      // assign(condT1,
19245      //        mk_armg_calculate_condition_dyn(
19246      //           binop(Iop_Xor32,
19247      //                 binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
19248      //                 mkU32(0xE0))
19249      //       )
19250      // );
19251
19252      // condT = 32to8(and32(old_itstate,0xF0)) == 0  ? 1  : condT1
19253      //       = 32to8(and32(0,0xF0)) == 0  ? 1  : condT1
19254      //       = 32to8(0) == 0  ? 1  : condT1
19255      //       = 0 == 0  ? 1  : condT1
19256      //       = 1
19257      //
19258      // condT = newTemp(Ity_I32);
19259      // assign(condT, IRExpr_ITE(
19260      //                  unop(Iop_32to8, binop(Iop_And32,
19261      //                                        mkexpr(old_itstate),
19262      //                                        mkU32(0xF0))),
19263      //                  mkexpr(condT1),
19264      //                  mkU32(1))
19265      //       ));
19266      condT = newTemp(Ity_I32);
19267      assign(condT, mkU32(1));
19268
19269      // notInITt = xor32(and32(old_itstate, 1), 1)
19270      //          = xor32(and32(0, 1), 1)
19271      //          = xor32(0, 1)
19272      //          = 1 :: I32
19273      //
19274      // IRTemp notInITt = newTemp(Ity_I32);
19275      // assign(notInITt,
19276      //        binop(Iop_Xor32,
19277      //              binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
19278      //              mkU32(1)));
19279
19280      // cond_AND_notInIT_T = and32(notInITt, condT)
19281      //                    = and32(1, 1)
19282      //                    = 1
19283      //
19284      // cond_AND_notInIT_T = newTemp(Ity_I32);
19285      // assign(cond_AND_notInIT_T,
19286      //        binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
19287      cond_AND_notInIT_T = condT; /* 1 :: I32 */
19288
19289      /* END "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
19290   } else {
19291      /* BEGIN { STANDARD PREAMBLE; } */
19292
19293      old_itstate = get_ITSTATE();
19294
19295      new_itstate = newTemp(Ity_I32);
19296      assign(new_itstate,
19297             binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
19298
19299      put_ITSTATE(new_itstate);
19300
19301      /* Same strategy as for ARM insns: generate a condition
19302         temporary at this point (or IRTemp_INVALID, meaning
19303         unconditional).  We leave it to lower-level instruction
19304         decoders to decide whether they can generate straight-line
19305         code, or whether they must generate a side exit before the
19306         instruction.  condT :: Ity_I32 and is always either zero or
19307         one. */
19308      IRTemp condT1 = newTemp(Ity_I32);
19309      assign(condT1,
19310             mk_armg_calculate_condition_dyn(
19311                binop(Iop_Xor32,
19312                      binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
19313                      mkU32(0xE0))
19314            )
19315      );
19316
19317      /* This is a bit complex, but needed to make Memcheck understand
19318         that, if the condition in old_itstate[7:4] denotes AL (that
19319         is, if this instruction is to be executed unconditionally),
19320         then condT does not depend on the results of calling the
19321         helper.
19322
19323         We test explicitly for old_itstate[7:4] == AL ^ 0xE, and in
19324         that case set condT directly to 1.  Else we use the results
19325         of the helper.  Since old_itstate is always defined and
19326         because Memcheck does lazy V-bit propagation through ITE,
19327         this will cause condT to always be a defined 1 if the
19328         condition is 'AL'.  From an execution semantics point of view
19329         this is irrelevant since we're merely duplicating part of the
19330         behaviour of the helper.  But it makes it clear to Memcheck,
19331         in this case, that condT does not in fact depend on the
19332         contents of the condition code thunk.  Without it, we get
19333         quite a lot of false errors.
19334
19335         So, just to clarify: from a straight semantics point of view,
19336         we can simply do "assign(condT, mkexpr(condT1))", and the
19337         simulator still runs fine.  It's just that we get loads of
19338         false errors from Memcheck. */
19339      condT = newTemp(Ity_I32);
19340      assign(condT, IRExpr_ITE(
19341                       binop(Iop_CmpNE32, binop(Iop_And32,
19342                                                mkexpr(old_itstate),
19343                                                mkU32(0xF0)),
19344                                          mkU32(0)),
19345                       mkexpr(condT1),
19346                       mkU32(1)
19347            ));
19348
19349      /* Something we don't have in ARM: generate a 0 or 1 value
19350         indicating whether or not we are in an IT block (NB: 0 = in
19351         IT block, 1 = not in IT block).  This is used to gate
19352         condition code updates in 16-bit Thumb instructions. */
19353      IRTemp notInITt = newTemp(Ity_I32);
19354      assign(notInITt,
19355             binop(Iop_Xor32,
19356                   binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
19357                   mkU32(1)));
19358
19359      /* Compute 'condT && notInITt' -- that is, the instruction is
19360         going to execute, and we're not in an IT block.  This is the
19361         gating condition for updating condition codes in 16-bit Thumb
19362         instructions, except for CMP, CMN and TST. */
19363      cond_AND_notInIT_T = newTemp(Ity_I32);
19364      assign(cond_AND_notInIT_T,
19365             binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
19366      /* END { STANDARD PREAMBLE; } */
19367   }
19368
19369
19370   /* At this point:
19371      * ITSTATE has been updated
19372      * condT holds the guarding condition for this instruction (0 or 1),
19373      * notInITt is 1 if we're in "normal" code, 0 if in an IT block
19374      * cond_AND_notInIT_T is the AND of the above two.
19375
19376      If the instruction proper can't trap, then there's nothing else
19377      to do w.r.t. ITSTATE -- just go and and generate IR for the
19378      insn, taking into account the guarding condition.
19379
19380      If, however, the instruction might trap, then we must back up
19381      ITSTATE to the old value, and re-update it after the potentially
19382      trapping IR section.  A trap can happen either via a memory
19383      reference or because we need to throw SIGILL.
19384
19385      If an instruction has a side exit, we need to be sure that any
19386      ITSTATE backup is re-updated before the side exit.
19387   */
19388
19389   /* ----------------------------------------------------------- */
19390   /* --                                                       -- */
19391   /* -- Thumb 16-bit integer instructions                     -- */
19392   /* --                                                       -- */
19393   /* -- IMPORTANT: references to insn1 or INSN1 are           -- */
19394   /* --            not allowed in this section                -- */
19395   /* --                                                       -- */
19396   /* ----------------------------------------------------------- */
19397
19398   /* 16-bit instructions inside an IT block, apart from CMP, CMN and
19399      TST, do not set the condition codes.  Hence we must dynamically
19400      test for this case for every condition code update. */
19401
19402   IROp   anOp   = Iop_INVALID;
19403   const HChar* anOpNm = NULL;
19404
19405   /* ================ 16-bit 15:6 cases ================ */
19406
19407   switch (INSN0(15,6)) {
19408
19409   case 0x10a:   // CMP
19410   case 0x10b: { // CMN
19411      /* ---------------- CMP Rn, Rm ---------------- */
19412      Bool   isCMN = INSN0(15,6) == 0x10b;
19413      UInt   rN    = INSN0(2,0);
19414      UInt   rM    = INSN0(5,3);
19415      IRTemp argL  = newTemp(Ity_I32);
19416      IRTemp argR  = newTemp(Ity_I32);
19417      assign( argL, getIRegT(rN) );
19418      assign( argR, getIRegT(rM) );
19419      /* Update flags regardless of whether in an IT block or not. */
19420      setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
19421                      argL, argR, condT );
19422      DIP("%s r%u, r%u\n", isCMN ? "cmn" : "cmp", rN, rM);
19423      goto decode_success;
19424   }
19425
19426   case 0x108: {
19427      /* ---------------- TST Rn, Rm ---------------- */
19428      UInt   rN   = INSN0(2,0);
19429      UInt   rM   = INSN0(5,3);
19430      IRTemp oldC = newTemp(Ity_I32);
19431      IRTemp oldV = newTemp(Ity_I32);
19432      IRTemp res  = newTemp(Ity_I32);
19433      assign( oldC, mk_armg_calculate_flag_c() );
19434      assign( oldV, mk_armg_calculate_flag_v() );
19435      assign( res,  binop(Iop_And32, getIRegT(rN), getIRegT(rM)) );
19436      /* Update flags regardless of whether in an IT block or not. */
19437      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
19438      DIP("tst r%u, r%u\n", rN, rM);
19439      goto decode_success;
19440   }
19441
19442   case 0x109: {
19443      /* ---------------- NEGS Rd, Rm ---------------- */
19444      /* Rd = -Rm */
19445      UInt   rM   = INSN0(5,3);
19446      UInt   rD   = INSN0(2,0);
19447      IRTemp arg  = newTemp(Ity_I32);
19448      IRTemp zero = newTemp(Ity_I32);
19449      assign(arg, getIRegT(rM));
19450      assign(zero, mkU32(0));
19451      // rD can never be r15
19452      putIRegT(rD, binop(Iop_Sub32, mkexpr(zero), mkexpr(arg)), condT);
19453      setFlags_D1_D2( ARMG_CC_OP_SUB, zero, arg, cond_AND_notInIT_T);
19454      DIP("negs r%u, r%u\n", rD, rM);
19455      goto decode_success;
19456   }
19457
19458   case 0x10F: {
19459      /* ---------------- MVNS Rd, Rm ---------------- */
19460      /* Rd = ~Rm */
19461      UInt   rM   = INSN0(5,3);
19462      UInt   rD   = INSN0(2,0);
19463      IRTemp oldV = newTemp(Ity_I32);
19464      IRTemp oldC = newTemp(Ity_I32);
19465      IRTemp res  = newTemp(Ity_I32);
19466      assign( oldV, mk_armg_calculate_flag_v() );
19467      assign( oldC, mk_armg_calculate_flag_c() );
19468      assign(res, unop(Iop_Not32, getIRegT(rM)));
19469      // rD can never be r15
19470      putIRegT(rD, mkexpr(res), condT);
19471      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19472                         cond_AND_notInIT_T );
19473      DIP("mvns r%u, r%u\n", rD, rM);
19474      goto decode_success;
19475   }
19476
19477   case 0x10C:
19478      /* ---------------- ORRS Rd, Rm ---------------- */
19479      anOp = Iop_Or32; anOpNm = "orr"; goto and_orr_eor_mul;
19480   case 0x100:
19481      /* ---------------- ANDS Rd, Rm ---------------- */
19482      anOp = Iop_And32; anOpNm = "and"; goto and_orr_eor_mul;
19483   case 0x101:
19484      /* ---------------- EORS Rd, Rm ---------------- */
19485      anOp = Iop_Xor32; anOpNm = "eor"; goto and_orr_eor_mul;
19486   case 0x10d:
19487      /* ---------------- MULS Rd, Rm ---------------- */
19488      anOp = Iop_Mul32; anOpNm = "mul"; goto and_orr_eor_mul;
19489   and_orr_eor_mul: {
19490      /* Rd = Rd `op` Rm */
19491      UInt   rM   = INSN0(5,3);
19492      UInt   rD   = INSN0(2,0);
19493      IRTemp res  = newTemp(Ity_I32);
19494      IRTemp oldV = newTemp(Ity_I32);
19495      IRTemp oldC = newTemp(Ity_I32);
19496      assign( oldV, mk_armg_calculate_flag_v() );
19497      assign( oldC, mk_armg_calculate_flag_c() );
19498      assign( res, binop(anOp, getIRegT(rD), getIRegT(rM) ));
19499      // not safe to read guest state after here
19500      // rD can never be r15
19501      putIRegT(rD, mkexpr(res), condT);
19502      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19503                         cond_AND_notInIT_T );
19504      DIP("%s r%u, r%u\n", anOpNm, rD, rM);
19505      goto decode_success;
19506   }
19507
19508   case 0x10E: {
19509      /* ---------------- BICS Rd, Rm ---------------- */
19510      /* Rd = Rd & ~Rm */
19511      UInt   rM   = INSN0(5,3);
19512      UInt   rD   = INSN0(2,0);
19513      IRTemp res  = newTemp(Ity_I32);
19514      IRTemp oldV = newTemp(Ity_I32);
19515      IRTemp oldC = newTemp(Ity_I32);
19516      assign( oldV, mk_armg_calculate_flag_v() );
19517      assign( oldC, mk_armg_calculate_flag_c() );
19518      assign( res, binop(Iop_And32, getIRegT(rD),
19519                                    unop(Iop_Not32, getIRegT(rM) )));
19520      // not safe to read guest state after here
19521      // rD can never be r15
19522      putIRegT(rD, mkexpr(res), condT);
19523      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19524                         cond_AND_notInIT_T );
19525      DIP("bics r%u, r%u\n", rD, rM);
19526      goto decode_success;
19527   }
19528
19529   case 0x105: {
19530      /* ---------------- ADCS Rd, Rm ---------------- */
19531      /* Rd = Rd + Rm + oldC */
19532      UInt   rM   = INSN0(5,3);
19533      UInt   rD   = INSN0(2,0);
19534      IRTemp argL = newTemp(Ity_I32);
19535      IRTemp argR = newTemp(Ity_I32);
19536      IRTemp oldC = newTemp(Ity_I32);
19537      IRTemp res  = newTemp(Ity_I32);
19538      assign(argL, getIRegT(rD));
19539      assign(argR, getIRegT(rM));
19540      assign(oldC, mk_armg_calculate_flag_c());
19541      assign(res, binop(Iop_Add32,
19542                        binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
19543                        mkexpr(oldC)));
19544      // rD can never be r15
19545      putIRegT(rD, mkexpr(res), condT);
19546      setFlags_D1_D2_ND( ARMG_CC_OP_ADC, argL, argR, oldC,
19547                         cond_AND_notInIT_T );
19548      DIP("adcs r%u, r%u\n", rD, rM);
19549      goto decode_success;
19550   }
19551
19552   case 0x106: {
19553      /* ---------------- SBCS Rd, Rm ---------------- */
19554      /* Rd = Rd - Rm - (oldC ^ 1) */
19555      UInt   rM   = INSN0(5,3);
19556      UInt   rD   = INSN0(2,0);
19557      IRTemp argL = newTemp(Ity_I32);
19558      IRTemp argR = newTemp(Ity_I32);
19559      IRTemp oldC = newTemp(Ity_I32);
19560      IRTemp res  = newTemp(Ity_I32);
19561      assign(argL, getIRegT(rD));
19562      assign(argR, getIRegT(rM));
19563      assign(oldC, mk_armg_calculate_flag_c());
19564      assign(res, binop(Iop_Sub32,
19565                        binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
19566                        binop(Iop_Xor32, mkexpr(oldC), mkU32(1))));
19567      // rD can never be r15
19568      putIRegT(rD, mkexpr(res), condT);
19569      setFlags_D1_D2_ND( ARMG_CC_OP_SBB, argL, argR, oldC,
19570                         cond_AND_notInIT_T );
19571      DIP("sbcs r%u, r%u\n", rD, rM);
19572      goto decode_success;
19573   }
19574
19575   case 0x2CB: {
19576      /* ---------------- UXTB Rd, Rm ---------------- */
19577      /* Rd = 8Uto32(Rm) */
19578      UInt rM = INSN0(5,3);
19579      UInt rD = INSN0(2,0);
19580      putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFF)),
19581                   condT);
19582      DIP("uxtb r%u, r%u\n", rD, rM);
19583      goto decode_success;
19584   }
19585
19586   case 0x2C9: {
19587      /* ---------------- SXTB Rd, Rm ---------------- */
19588      /* Rd = 8Sto32(Rm) */
19589      UInt rM = INSN0(5,3);
19590      UInt rD = INSN0(2,0);
19591      putIRegT(rD, binop(Iop_Sar32,
19592                         binop(Iop_Shl32, getIRegT(rM), mkU8(24)),
19593                         mkU8(24)),
19594                   condT);
19595      DIP("sxtb r%u, r%u\n", rD, rM);
19596      goto decode_success;
19597   }
19598
19599   case 0x2CA: {
19600      /* ---------------- UXTH Rd, Rm ---------------- */
19601      /* Rd = 16Uto32(Rm) */
19602      UInt rM = INSN0(5,3);
19603      UInt rD = INSN0(2,0);
19604      putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFFFF)),
19605                   condT);
19606      DIP("uxth r%u, r%u\n", rD, rM);
19607      goto decode_success;
19608   }
19609
19610   case 0x2C8: {
19611      /* ---------------- SXTH Rd, Rm ---------------- */
19612      /* Rd = 16Sto32(Rm) */
19613      UInt rM = INSN0(5,3);
19614      UInt rD = INSN0(2,0);
19615      putIRegT(rD, binop(Iop_Sar32,
19616                         binop(Iop_Shl32, getIRegT(rM), mkU8(16)),
19617                         mkU8(16)),
19618                   condT);
19619      DIP("sxth r%u, r%u\n", rD, rM);
19620      goto decode_success;
19621   }
19622
19623   case 0x102:   // LSLS
19624   case 0x103:   // LSRS
19625   case 0x104:   // ASRS
19626   case 0x107: { // RORS
19627      /* ---------------- LSLS Rs, Rd ---------------- */
19628      /* ---------------- LSRS Rs, Rd ---------------- */
19629      /* ---------------- ASRS Rs, Rd ---------------- */
19630      /* ---------------- RORS Rs, Rd ---------------- */
19631      /* Rd = Rd `op` Rs, and set flags */
19632      UInt   rS   = INSN0(5,3);
19633      UInt   rD   = INSN0(2,0);
19634      IRTemp oldV = newTemp(Ity_I32);
19635      IRTemp rDt  = newTemp(Ity_I32);
19636      IRTemp rSt  = newTemp(Ity_I32);
19637      IRTemp res  = newTemp(Ity_I32);
19638      IRTemp resC = newTemp(Ity_I32);
19639      const HChar* wot  = "???";
19640      assign(rSt, getIRegT(rS));
19641      assign(rDt, getIRegT(rD));
19642      assign(oldV, mk_armg_calculate_flag_v());
19643      /* Does not appear to be the standard 'how' encoding. */
19644      switch (INSN0(15,6)) {
19645         case 0x102:
19646            compute_result_and_C_after_LSL_by_reg(
19647               dis_buf, &res, &resC, rDt, rSt, rD, rS
19648            );
19649            wot = "lsl";
19650            break;
19651         case 0x103:
19652            compute_result_and_C_after_LSR_by_reg(
19653               dis_buf, &res, &resC, rDt, rSt, rD, rS
19654            );
19655            wot = "lsr";
19656            break;
19657         case 0x104:
19658            compute_result_and_C_after_ASR_by_reg(
19659               dis_buf, &res, &resC, rDt, rSt, rD, rS
19660            );
19661            wot = "asr";
19662            break;
19663         case 0x107:
19664            compute_result_and_C_after_ROR_by_reg(
19665               dis_buf, &res, &resC, rDt, rSt, rD, rS
19666            );
19667            wot = "ror";
19668            break;
19669         default:
19670            /*NOTREACHED*/vassert(0);
19671      }
19672      // not safe to read guest state after this point
19673      putIRegT(rD, mkexpr(res), condT);
19674      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
19675                         cond_AND_notInIT_T );
19676      DIP("%ss r%u, r%u\n", wot, rS, rD);
19677      goto decode_success;
19678   }
19679
19680   case 0x2E8:   // REV
19681   case 0x2E9: { // REV16
19682      /* ---------------- REV   Rd, Rm ---------------- */
19683      /* ---------------- REV16 Rd, Rm ---------------- */
19684      UInt rM = INSN0(5,3);
19685      UInt rD = INSN0(2,0);
19686      Bool isREV = INSN0(15,6) == 0x2E8;
19687      IRTemp arg = newTemp(Ity_I32);
19688      assign(arg, getIRegT(rM));
19689      IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
19690      putIRegT(rD, mkexpr(res), condT);
19691      DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM);
19692      goto decode_success;
19693   }
19694
19695   case 0x2EB: { // REVSH
19696      /* ---------------- REVSH Rd, Rn ---------------- */
19697      UInt rM = INSN0(5,3);
19698      UInt rD = INSN0(2,0);
19699      IRTemp irt_rM  = newTemp(Ity_I32);
19700      IRTemp irt_hi  = newTemp(Ity_I32);
19701      IRTemp irt_low = newTemp(Ity_I32);
19702      IRTemp irt_res = newTemp(Ity_I32);
19703      assign(irt_rM, getIRegT(rM));
19704      assign(irt_hi,
19705             binop(Iop_Sar32,
19706                   binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
19707                   mkU8(16)
19708             )
19709      );
19710      assign(irt_low,
19711             binop(Iop_And32,
19712                   binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
19713                   mkU32(0xFF)
19714             )
19715      );
19716      assign(irt_res,
19717             binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
19718      );
19719      putIRegT(rD, mkexpr(irt_res), condT);
19720      DIP("revsh r%u, r%u\n", rD, rM);
19721      goto decode_success;
19722   }
19723
19724   default:
19725      break; /* examine the next shortest prefix */
19726
19727   }
19728
19729
19730   /* ================ 16-bit 15:7 cases ================ */
19731
19732   switch (INSN0(15,7)) {
19733
19734   case BITS9(1,0,1,1,0,0,0,0,0): {
19735      /* ------------ ADD SP, #imm7 * 4 ------------ */
19736      UInt uimm7 = INSN0(6,0);
19737      putIRegT(13, binop(Iop_Add32, getIRegT(13), mkU32(uimm7 * 4)),
19738                   condT);
19739      DIP("add sp, #%u\n", uimm7 * 4);
19740      goto decode_success;
19741   }
19742
19743   case BITS9(1,0,1,1,0,0,0,0,1): {
19744      /* ------------ SUB SP, #imm7 * 4 ------------ */
19745      UInt uimm7 = INSN0(6,0);
19746      putIRegT(13, binop(Iop_Sub32, getIRegT(13), mkU32(uimm7 * 4)),
19747                   condT);
19748      DIP("sub sp, #%u\n", uimm7 * 4);
19749      goto decode_success;
19750   }
19751
19752   case BITS9(0,1,0,0,0,1,1,1,0): {
19753      /* ---------------- BX rM ---------------- */
19754      /* Branch to reg, and optionally switch modes.  Reg contains a
19755         suitably encoded address therefore (w CPSR.T at the bottom).
19756         Have to special-case r15, as usual. */
19757      UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
19758      if (BITS3(0,0,0) == INSN0(2,0)) {
19759         IRTemp dst = newTemp(Ity_I32);
19760         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19761         mk_skip_over_T16_if_cond_is_false(condT);
19762         condT = IRTemp_INVALID;
19763         // now uncond
19764         if (rM <= 14) {
19765            assign( dst, getIRegT(rM) );
19766         } else {
19767            vassert(rM == 15);
19768            assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
19769         }
19770         llPutIReg(15, mkexpr(dst));
19771         dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
19772         dres.whatNext    = Dis_StopHere;
19773         DIP("bx r%u (possibly switch to ARM mode)\n", rM);
19774         goto decode_success;
19775      }
19776      break;
19777   }
19778
19779   /* ---------------- BLX rM ---------------- */
19780   /* Branch and link to interworking address in rM. */
19781   case BITS9(0,1,0,0,0,1,1,1,1): {
19782      if (BITS3(0,0,0) == INSN0(2,0)) {
19783         UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
19784         IRTemp dst = newTemp(Ity_I32);
19785         if (rM <= 14) {
19786            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19787            mk_skip_over_T16_if_cond_is_false(condT);
19788            condT = IRTemp_INVALID;
19789            // now uncond
19790            /* We're returning to Thumb code, hence "| 1" */
19791            assign( dst, getIRegT(rM) );
19792            putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
19793                          IRTemp_INVALID );
19794            llPutIReg(15, mkexpr(dst));
19795            dres.jk_StopHere = Ijk_Call;
19796            dres.whatNext    = Dis_StopHere;
19797            DIP("blx r%u (possibly switch to ARM mode)\n", rM);
19798            goto decode_success;
19799         }
19800         /* else unpredictable, fall through */
19801      }
19802      break;
19803   }
19804
19805   default:
19806      break; /* examine the next shortest prefix */
19807
19808   }
19809
19810
19811   /* ================ 16-bit 15:8 cases ================ */
19812
19813   switch (INSN0(15,8)) {
19814
19815   case BITS8(1,1,0,1,1,1,1,1): {
19816      /* ---------------- SVC ---------------- */
19817      UInt imm8 = INSN0(7,0);
19818      if (imm8 == 0) {
19819         /* A syscall.  We can't do this conditionally, hence: */
19820         mk_skip_over_T16_if_cond_is_false( condT );
19821         // FIXME: what if we have to back up and restart this insn?
19822         // then ITSTATE will be wrong (we'll have it as "used")
19823         // when it isn't.  Correct is to save ITSTATE in a
19824         // stash pseudo-reg, and back up from that if we have to
19825         // restart.
19826         // uncond after here
19827         llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ));
19828         dres.jk_StopHere = Ijk_Sys_syscall;
19829         dres.whatNext    = Dis_StopHere;
19830         DIP("svc #0x%08x\n", imm8);
19831         goto decode_success;
19832      }
19833      /* else fall through */
19834      break;
19835   }
19836
19837   case BITS8(0,1,0,0,0,1,0,0): {
19838      /* ---------------- ADD(HI) Rd, Rm ---------------- */
19839      UInt h1 = INSN0(7,7);
19840      UInt h2 = INSN0(6,6);
19841      UInt rM = (h2 << 3) | INSN0(5,3);
19842      UInt rD = (h1 << 3) | INSN0(2,0);
19843      //if (h1 == 0 && h2 == 0) { // Original T1 was more restrictive
19844      if (rD == 15 && rM == 15) {
19845         // then it's invalid
19846      } else {
19847         IRTemp res = newTemp(Ity_I32);
19848         assign( res, binop(Iop_Add32, getIRegT(rD), getIRegT(rM) ));
19849         if (rD != 15) {
19850            putIRegT( rD, mkexpr(res), condT );
19851         } else {
19852            /* Only allowed outside or last-in IT block; SIGILL if not so. */
19853            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19854            /* jump over insn if not selected */
19855            mk_skip_over_T16_if_cond_is_false(condT);
19856            condT = IRTemp_INVALID;
19857            // now uncond
19858            /* non-interworking branch */
19859            llPutIReg(15, binop(Iop_Or32, mkexpr(res), mkU32(1)));
19860            dres.jk_StopHere = Ijk_Boring;
19861            dres.whatNext    = Dis_StopHere;
19862         }
19863         DIP("add(hi) r%u, r%u\n", rD, rM);
19864         goto decode_success;
19865      }
19866      break;
19867   }
19868
19869   case BITS8(0,1,0,0,0,1,0,1): {
19870      /* ---------------- CMP(HI) Rd, Rm ---------------- */
19871      UInt h1 = INSN0(7,7);
19872      UInt h2 = INSN0(6,6);
19873      UInt rM = (h2 << 3) | INSN0(5,3);
19874      UInt rN = (h1 << 3) | INSN0(2,0);
19875      if (h1 != 0 || h2 != 0) {
19876         IRTemp argL  = newTemp(Ity_I32);
19877         IRTemp argR  = newTemp(Ity_I32);
19878         assign( argL, getIRegT(rN) );
19879         assign( argR, getIRegT(rM) );
19880         /* Update flags regardless of whether in an IT block or not. */
19881         setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
19882         DIP("cmphi r%u, r%u\n", rN, rM);
19883         goto decode_success;
19884      }
19885      break;
19886   }
19887
19888   case BITS8(0,1,0,0,0,1,1,0): {
19889      /* ---------------- MOV(HI) Rd, Rm ---------------- */
19890      UInt h1 = INSN0(7,7);
19891      UInt h2 = INSN0(6,6);
19892      UInt rM = (h2 << 3) | INSN0(5,3);
19893      UInt rD = (h1 << 3) | INSN0(2,0);
19894      /* The old ARM ARM seems to disallow the case where both Rd and
19895         Rm are "low" registers, but newer versions allow it. */
19896      if (1 /*h1 != 0 || h2 != 0*/) {
19897         IRTemp val = newTemp(Ity_I32);
19898         assign( val, getIRegT(rM) );
19899         if (rD != 15) {
19900            putIRegT( rD, mkexpr(val), condT );
19901         } else {
19902            /* Only allowed outside or last-in IT block; SIGILL if not so. */
19903            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19904            /* jump over insn if not selected */
19905            mk_skip_over_T16_if_cond_is_false(condT);
19906            condT = IRTemp_INVALID;
19907            // now uncond
19908            /* non-interworking branch */
19909            llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1)));
19910            dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
19911            dres.whatNext    = Dis_StopHere;
19912         }
19913         DIP("mov r%u, r%u\n", rD, rM);
19914         goto decode_success;
19915      }
19916      break;
19917   }
19918
19919   case BITS8(1,0,1,1,1,1,1,1): {
19920      /* ---------------- IT (if-then) ---------------- */
19921      UInt firstcond = INSN0(7,4);
19922      UInt mask = INSN0(3,0);
19923      UInt newITSTATE = 0;
19924      /* This is the ITSTATE represented as described in
19925         libvex_guest_arm.h.  It is not the ARM ARM representation. */
19926      HChar c1 = '.';
19927      HChar c2 = '.';
19928      HChar c3 = '.';
19929      Bool valid = compute_ITSTATE( &newITSTATE, &c1, &c2, &c3,
19930                                    firstcond, mask );
19931      if (valid && firstcond != 0xF/*NV*/) {
19932         /* Not allowed in an IT block; SIGILL if so. */
19933         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
19934
19935         IRTemp t = newTemp(Ity_I32);
19936         assign(t, mkU32(newITSTATE));
19937         put_ITSTATE(t);
19938
19939         DIP("it%c%c%c %s\n", c1, c2, c3, nCC(firstcond));
19940         goto decode_success;
19941      }
19942      break;
19943   }
19944
19945   case BITS8(1,0,1,1,0,0,0,1):
19946   case BITS8(1,0,1,1,0,0,1,1):
19947   case BITS8(1,0,1,1,1,0,0,1):
19948   case BITS8(1,0,1,1,1,0,1,1): {
19949      /* ---------------- CB{N}Z ---------------- */
19950      UInt rN    = INSN0(2,0);
19951      UInt bOP   = INSN0(11,11);
19952      UInt imm32 = (INSN0(9,9) << 6) | (INSN0(7,3) << 1);
19953      gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
19954      /* It's a conditional branch forward. */
19955      IRTemp kond = newTemp(Ity_I1);
19956      assign( kond, binop(bOP ? Iop_CmpNE32 : Iop_CmpEQ32,
19957                          getIRegT(rN), mkU32(0)) );
19958
19959      vassert(0 == (guest_R15_curr_instr_notENC & 1));
19960      /* Looks like the nearest insn we can branch to is the one after
19961         next.  That makes sense, as there's no point in being able to
19962         encode a conditional branch to the next instruction. */
19963      UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
19964      stmt(IRStmt_Exit( mkexpr(kond),
19965                        Ijk_Boring,
19966                        IRConst_U32(toUInt(dst)),
19967                        OFFB_R15T ));
19968      DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
19969      goto decode_success;
19970   }
19971
19972   default:
19973      break; /* examine the next shortest prefix */
19974
19975   }
19976
19977
19978   /* ================ 16-bit 15:9 cases ================ */
19979
19980   switch (INSN0(15,9)) {
19981
19982   case BITS7(1,0,1,1,0,1,0): {
19983      /* ---------------- PUSH ---------------- */
19984      /* This is a bit like STMxx, but way simpler. Complications we
19985         don't have to deal with:
19986         * SP being one of the transferred registers
19987         * direction (increment vs decrement)
19988         * before-vs-after-ness
19989      */
19990      Int  i, nRegs;
19991      UInt bitR    = INSN0(8,8);
19992      UInt regList = INSN0(7,0);
19993      if (bitR) regList |= (1 << 14);
19994
19995      /* At least one register must be transferred, else result is
19996         UNPREDICTABLE. */
19997      if (regList != 0) {
19998         /* Since we can't generate a guaranteed non-trapping IR
19999            sequence, (1) jump over the insn if it is gated false, and
20000            (2) back out the ITSTATE update. */
20001         mk_skip_over_T16_if_cond_is_false(condT);
20002         condT = IRTemp_INVALID;
20003         put_ITSTATE(old_itstate);
20004         // now uncond
20005
20006         nRegs = 0;
20007         for (i = 0; i < 16; i++) {
20008            if ((regList & (1 << i)) != 0)
20009               nRegs++;
20010         }
20011         vassert(nRegs >= 1 && nRegs <= 9);
20012
20013         /* Move SP down first of all, so we're "covered".  And don't
20014            mess with its alignment. */
20015         IRTemp newSP = newTemp(Ity_I32);
20016         assign(newSP, binop(Iop_Sub32, getIRegT(13), mkU32(4 * nRegs)));
20017         putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
20018
20019         /* Generate a transfer base address as a forced-aligned
20020            version of the final SP value. */
20021         IRTemp base = newTemp(Ity_I32);
20022         assign(base, binop(Iop_And32, mkexpr(newSP), mkU32(~3)));
20023
20024         /* Now the transfers */
20025         nRegs = 0;
20026         for (i = 0; i < 16; i++) {
20027            if ((regList & (1 << i)) != 0) {
20028               storeLE( binop(Iop_Add32, mkexpr(base), mkU32(4 * nRegs)),
20029                        getIRegT(i) );
20030               nRegs++;
20031            }
20032         }
20033
20034         /* Reinstate the ITSTATE update. */
20035         put_ITSTATE(new_itstate);
20036
20037         DIP("push {%s0x%04x}\n", bitR ? "lr," : "", regList & 0xFF);
20038         goto decode_success;
20039      }
20040      break;
20041   }
20042
20043   case BITS7(1,0,1,1,1,1,0): {
20044      /* ---------------- POP ---------------- */
20045      Int  i, nRegs;
20046      UInt bitR    = INSN0(8,8);
20047      UInt regList = INSN0(7,0);
20048
20049      /* At least one register must be transferred, else result is
20050         UNPREDICTABLE. */
20051      if (regList != 0 || bitR) {
20052         /* Since we can't generate a guaranteed non-trapping IR
20053            sequence, (1) jump over the insn if it is gated false, and
20054            (2) back out the ITSTATE update. */
20055         mk_skip_over_T16_if_cond_is_false(condT);
20056         condT = IRTemp_INVALID;
20057         put_ITSTATE(old_itstate);
20058         // now uncond
20059
20060         nRegs = 0;
20061         for (i = 0; i < 8; i++) {
20062            if ((regList & (1 << i)) != 0)
20063               nRegs++;
20064         }
20065         vassert(nRegs >= 0 && nRegs <= 8);
20066         vassert(bitR == 0 || bitR == 1);
20067
20068         IRTemp oldSP = newTemp(Ity_I32);
20069         assign(oldSP, getIRegT(13));
20070
20071         /* Generate a transfer base address as a forced-aligned
20072            version of the original SP value. */
20073         IRTemp base = newTemp(Ity_I32);
20074         assign(base, binop(Iop_And32, mkexpr(oldSP), mkU32(~3)));
20075
20076         /* Compute a new value for SP, but don't install it yet, so
20077            that we're "covered" until all the transfers are done.
20078            And don't mess with its alignment. */
20079         IRTemp newSP = newTemp(Ity_I32);
20080         assign(newSP, binop(Iop_Add32, mkexpr(oldSP),
20081                                        mkU32(4 * (nRegs + bitR))));
20082
20083         /* Now the transfers, not including PC */
20084         nRegs = 0;
20085         for (i = 0; i < 8; i++) {
20086            if ((regList & (1 << i)) != 0) {
20087               putIRegT(i, loadLE( Ity_I32,
20088                                   binop(Iop_Add32, mkexpr(base),
20089                                                    mkU32(4 * nRegs))),
20090                           IRTemp_INVALID );
20091               nRegs++;
20092            }
20093         }
20094
20095         IRTemp newPC = IRTemp_INVALID;
20096         if (bitR) {
20097            newPC = newTemp(Ity_I32);
20098            assign( newPC, loadLE( Ity_I32,
20099                                   binop(Iop_Add32, mkexpr(base),
20100                                                    mkU32(4 * nRegs))));
20101         }
20102
20103         /* Now we can safely install the new SP value */
20104         putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
20105
20106         /* Reinstate the ITSTATE update. */
20107         put_ITSTATE(new_itstate);
20108
20109         /* now, do we also have to do a branch?  If so, it turns out
20110            that the new PC value is encoded exactly as we need it to
20111            be -- with CPSR.T in the bottom bit.  So we can simply use
20112            it as is, no need to mess with it.  Note, therefore, this
20113            is an interworking return. */
20114         if (bitR) {
20115            llPutIReg(15, mkexpr(newPC));
20116            dres.jk_StopHere = Ijk_Ret;
20117            dres.whatNext    = Dis_StopHere;
20118         }
20119
20120         DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
20121         goto decode_success;
20122      }
20123      break;
20124   }
20125
20126   case BITS7(0,0,0,1,1,1,0):   /* ADDS */
20127   case BITS7(0,0,0,1,1,1,1): { /* SUBS */
20128      /* ---------------- ADDS Rd, Rn, #uimm3 ---------------- */
20129      /* ---------------- SUBS Rd, Rn, #uimm3 ---------------- */
20130      UInt   uimm3 = INSN0(8,6);
20131      UInt   rN    = INSN0(5,3);
20132      UInt   rD    = INSN0(2,0);
20133      UInt   isSub = INSN0(9,9);
20134      IRTemp argL  = newTemp(Ity_I32);
20135      IRTemp argR  = newTemp(Ity_I32);
20136      assign( argL, getIRegT(rN) );
20137      assign( argR, mkU32(uimm3) );
20138      putIRegT(rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
20139                         mkexpr(argL), mkexpr(argR)),
20140                   condT);
20141      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20142                      argL, argR, cond_AND_notInIT_T );
20143      DIP("%s r%u, r%u, #%u\n", isSub ? "subs" : "adds", rD, rN, uimm3);
20144      goto decode_success;
20145   }
20146
20147   case BITS7(0,0,0,1,1,0,0):   /* ADDS */
20148   case BITS7(0,0,0,1,1,0,1): { /* SUBS */
20149      /* ---------------- ADDS Rd, Rn, Rm ---------------- */
20150      /* ---------------- SUBS Rd, Rn, Rm ---------------- */
20151      UInt   rM    = INSN0(8,6);
20152      UInt   rN    = INSN0(5,3);
20153      UInt   rD    = INSN0(2,0);
20154      UInt   isSub = INSN0(9,9);
20155      IRTemp argL  = newTemp(Ity_I32);
20156      IRTemp argR  = newTemp(Ity_I32);
20157      assign( argL, getIRegT(rN) );
20158      assign( argR, getIRegT(rM) );
20159      putIRegT( rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
20160                          mkexpr(argL), mkexpr(argR)),
20161                    condT );
20162      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20163                      argL, argR, cond_AND_notInIT_T );
20164      DIP("%s r%u, r%u, r%u\n", isSub ? "subs" : "adds", rD, rN, rM);
20165      goto decode_success;
20166   }
20167
20168   case BITS7(0,1,0,1,0,0,0):   /* STR */
20169   case BITS7(0,1,0,1,1,0,0): { /* LDR */
20170      /* ------------- LDR Rd, [Rn, Rm] ------------- */
20171      /* ------------- STR Rd, [Rn, Rm] ------------- */
20172      /* LDR/STR Rd, [Rn + Rm] */
20173      UInt    rD   = INSN0(2,0);
20174      UInt    rN   = INSN0(5,3);
20175      UInt    rM   = INSN0(8,6);
20176      UInt    isLD = INSN0(11,11);
20177
20178      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20179      put_ITSTATE(old_itstate); // backout
20180      if (isLD) {
20181         IRTemp tD = newTemp(Ity_I32);
20182         loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20183         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20184      } else {
20185         storeGuardedLE(ea, getIRegT(rD), condT);
20186      }
20187      put_ITSTATE(new_itstate); // restore
20188
20189      DIP("%s r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20190      goto decode_success;
20191   }
20192
20193   case BITS7(0,1,0,1,0,0,1):
20194   case BITS7(0,1,0,1,1,0,1): {
20195      /* ------------- LDRH Rd, [Rn, Rm] ------------- */
20196      /* ------------- STRH Rd, [Rn, Rm] ------------- */
20197      /* LDRH/STRH Rd, [Rn + Rm] */
20198      UInt    rD   = INSN0(2,0);
20199      UInt    rN   = INSN0(5,3);
20200      UInt    rM   = INSN0(8,6);
20201      UInt    isLD = INSN0(11,11);
20202
20203      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20204      put_ITSTATE(old_itstate); // backout
20205      if (isLD) {
20206         IRTemp tD = newTemp(Ity_I32);
20207         loadGuardedLE(tD, ILGop_16Uto32, ea, llGetIReg(rD), condT);
20208         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20209      } else {
20210         storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
20211      }
20212      put_ITSTATE(new_itstate); // restore
20213
20214      DIP("%sh r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20215      goto decode_success;
20216   }
20217
20218   case BITS7(0,1,0,1,1,1,1): {
20219      /* ------------- LDRSH Rd, [Rn, Rm] ------------- */
20220      /* LDRSH Rd, [Rn + Rm] */
20221      UInt    rD = INSN0(2,0);
20222      UInt    rN = INSN0(5,3);
20223      UInt    rM = INSN0(8,6);
20224
20225      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20226      put_ITSTATE(old_itstate); // backout
20227      IRTemp tD = newTemp(Ity_I32);
20228      loadGuardedLE(tD, ILGop_16Sto32, ea, llGetIReg(rD), condT);
20229      putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20230      put_ITSTATE(new_itstate); // restore
20231
20232      DIP("ldrsh r%u, [r%u, r%u]\n", rD, rN, rM);
20233      goto decode_success;
20234   }
20235
20236   case BITS7(0,1,0,1,0,1,1): {
20237      /* ------------- LDRSB Rd, [Rn, Rm] ------------- */
20238      /* LDRSB Rd, [Rn + Rm] */
20239      UInt    rD = INSN0(2,0);
20240      UInt    rN = INSN0(5,3);
20241      UInt    rM = INSN0(8,6);
20242
20243      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20244      put_ITSTATE(old_itstate); // backout
20245      IRTemp tD = newTemp(Ity_I32);
20246      loadGuardedLE(tD, ILGop_8Sto32, ea, llGetIReg(rD), condT);
20247      putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20248      put_ITSTATE(new_itstate); // restore
20249
20250      DIP("ldrsb r%u, [r%u, r%u]\n", rD, rN, rM);
20251      goto decode_success;
20252   }
20253
20254   case BITS7(0,1,0,1,0,1,0):
20255   case BITS7(0,1,0,1,1,1,0): {
20256      /* ------------- LDRB Rd, [Rn, Rm] ------------- */
20257      /* ------------- STRB Rd, [Rn, Rm] ------------- */
20258      /* LDRB/STRB Rd, [Rn + Rm] */
20259      UInt    rD   = INSN0(2,0);
20260      UInt    rN   = INSN0(5,3);
20261      UInt    rM   = INSN0(8,6);
20262      UInt    isLD = INSN0(11,11);
20263
20264      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20265      put_ITSTATE(old_itstate); // backout
20266      if (isLD) {
20267         IRTemp tD = newTemp(Ity_I32);
20268         loadGuardedLE(tD, ILGop_8Uto32, ea, llGetIReg(rD), condT);
20269         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20270      } else {
20271         storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
20272      }
20273      put_ITSTATE(new_itstate); // restore
20274
20275      DIP("%sb r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20276      goto decode_success;
20277   }
20278
20279   default:
20280      break; /* examine the next shortest prefix */
20281
20282   }
20283
20284
20285   /* ================ 16-bit 15:11 cases ================ */
20286
20287   switch (INSN0(15,11)) {
20288
20289   case BITS5(0,0,1,1,0):
20290   case BITS5(0,0,1,1,1): {
20291      /* ---------------- ADDS Rn, #uimm8 ---------------- */
20292      /* ---------------- SUBS Rn, #uimm8 ---------------- */
20293      UInt   isSub = INSN0(11,11);
20294      UInt   rN    = INSN0(10,8);
20295      UInt   uimm8 = INSN0(7,0);
20296      IRTemp argL  = newTemp(Ity_I32);
20297      IRTemp argR  = newTemp(Ity_I32);
20298      assign( argL, getIRegT(rN) );
20299      assign( argR, mkU32(uimm8) );
20300      putIRegT( rN, binop(isSub ? Iop_Sub32 : Iop_Add32,
20301                          mkexpr(argL), mkexpr(argR)), condT );
20302      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20303                      argL, argR, cond_AND_notInIT_T );
20304      DIP("%s r%u, #%u\n", isSub ? "subs" : "adds", rN, uimm8);
20305      goto decode_success;
20306   }
20307
20308   case BITS5(1,0,1,0,0): {
20309      /* ---------------- ADD rD, PC, #imm8 * 4 ---------------- */
20310      /* a.k.a. ADR */
20311      /* rD = align4(PC) + imm8 * 4 */
20312      UInt rD   = INSN0(10,8);
20313      UInt imm8 = INSN0(7,0);
20314      putIRegT(rD, binop(Iop_Add32,
20315                         binop(Iop_And32, getIRegT(15), mkU32(~3U)),
20316                         mkU32(imm8 * 4)),
20317                   condT);
20318      DIP("add r%u, pc, #%u\n", rD, imm8 * 4);
20319      goto decode_success;
20320   }
20321
20322   case BITS5(1,0,1,0,1): {
20323      /* ---------------- ADD rD, SP, #imm8 * 4 ---------------- */
20324      UInt rD   = INSN0(10,8);
20325      UInt imm8 = INSN0(7,0);
20326      putIRegT(rD, binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4)),
20327                   condT);
20328      DIP("add r%u, r13, #%u\n", rD, imm8 * 4);
20329      goto decode_success;
20330   }
20331
20332   case BITS5(0,0,1,0,1): {
20333      /* ---------------- CMP Rn, #uimm8 ---------------- */
20334      UInt   rN    = INSN0(10,8);
20335      UInt   uimm8 = INSN0(7,0);
20336      IRTemp argL  = newTemp(Ity_I32);
20337      IRTemp argR  = newTemp(Ity_I32);
20338      assign( argL, getIRegT(rN) );
20339      assign( argR, mkU32(uimm8) );
20340      /* Update flags regardless of whether in an IT block or not. */
20341      setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
20342      DIP("cmp r%u, #%u\n", rN, uimm8);
20343      goto decode_success;
20344   }
20345
20346   case BITS5(0,0,1,0,0): {
20347      /* -------------- (T1) MOVS Rn, #uimm8 -------------- */
20348      UInt   rD    = INSN0(10,8);
20349      UInt   uimm8 = INSN0(7,0);
20350      IRTemp oldV  = newTemp(Ity_I32);
20351      IRTemp oldC  = newTemp(Ity_I32);
20352      IRTemp res   = newTemp(Ity_I32);
20353      assign( oldV, mk_armg_calculate_flag_v() );
20354      assign( oldC, mk_armg_calculate_flag_c() );
20355      assign( res, mkU32(uimm8) );
20356      putIRegT(rD, mkexpr(res), condT);
20357      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
20358                         cond_AND_notInIT_T );
20359      DIP("movs r%u, #%u\n", rD, uimm8);
20360      goto decode_success;
20361   }
20362
20363   case BITS5(0,1,0,0,1): {
20364      /* ------------- LDR Rd, [PC, #imm8 * 4] ------------- */
20365      /* LDR Rd, [align4(PC) + imm8 * 4] */
20366      UInt   rD   = INSN0(10,8);
20367      UInt   imm8 = INSN0(7,0);
20368      IRTemp ea   = newTemp(Ity_I32);
20369
20370      assign(ea, binop(Iop_Add32,
20371                       binop(Iop_And32, getIRegT(15), mkU32(~3U)),
20372                       mkU32(imm8 * 4)));
20373      put_ITSTATE(old_itstate); // backout
20374      IRTemp tD = newTemp(Ity_I32);
20375      loadGuardedLE( tD, ILGop_Ident32, mkexpr(ea), llGetIReg(rD), condT );
20376      putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20377      put_ITSTATE(new_itstate); // restore
20378
20379      DIP("ldr r%u, [pc, #%u]\n", rD, imm8 * 4);
20380      goto decode_success;
20381   }
20382
20383   case BITS5(0,1,1,0,0):   /* STR */
20384   case BITS5(0,1,1,0,1): { /* LDR */
20385      /* ------------- LDR Rd, [Rn, #imm5 * 4] ------------- */
20386      /* ------------- STR Rd, [Rn, #imm5 * 4] ------------- */
20387      /* LDR/STR Rd, [Rn + imm5 * 4] */
20388      UInt    rD   = INSN0(2,0);
20389      UInt    rN   = INSN0(5,3);
20390      UInt    imm5 = INSN0(10,6);
20391      UInt    isLD = INSN0(11,11);
20392
20393      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 4));
20394      put_ITSTATE(old_itstate); // backout
20395      if (isLD) {
20396         IRTemp tD = newTemp(Ity_I32);
20397         loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20398         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20399      } else {
20400         storeGuardedLE( ea, getIRegT(rD), condT );
20401      }
20402      put_ITSTATE(new_itstate); // restore
20403
20404      DIP("%s r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 4);
20405      goto decode_success;
20406   }
20407
20408   case BITS5(1,0,0,0,0):   /* STRH */
20409   case BITS5(1,0,0,0,1): { /* LDRH */
20410      /* ------------- LDRH Rd, [Rn, #imm5 * 2] ------------- */
20411      /* ------------- STRH Rd, [Rn, #imm5 * 2] ------------- */
20412      /* LDRH/STRH Rd, [Rn + imm5 * 2] */
20413      UInt    rD   = INSN0(2,0);
20414      UInt    rN   = INSN0(5,3);
20415      UInt    imm5 = INSN0(10,6);
20416      UInt    isLD = INSN0(11,11);
20417
20418      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 2));
20419      put_ITSTATE(old_itstate); // backout
20420      if (isLD) {
20421         IRTemp tD = newTemp(Ity_I32);
20422         loadGuardedLE( tD, ILGop_16Uto32, ea, llGetIReg(rD), condT );
20423         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20424      } else {
20425         storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
20426      }
20427      put_ITSTATE(new_itstate); // restore
20428
20429      DIP("%sh r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 2);
20430      goto decode_success;
20431   }
20432
20433   case BITS5(0,1,1,1,0):   /* STRB */
20434   case BITS5(0,1,1,1,1): { /* LDRB */
20435      /* ------------- LDRB Rd, [Rn, #imm5] ------------- */
20436      /* ------------- STRB Rd, [Rn, #imm5] ------------- */
20437      /* LDRB/STRB Rd, [Rn + imm5] */
20438      UInt    rD   = INSN0(2,0);
20439      UInt    rN   = INSN0(5,3);
20440      UInt    imm5 = INSN0(10,6);
20441      UInt    isLD = INSN0(11,11);
20442
20443      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5));
20444      put_ITSTATE(old_itstate); // backout
20445      if (isLD) {
20446         IRTemp tD = newTemp(Ity_I32);
20447         loadGuardedLE( tD, ILGop_8Uto32, ea, llGetIReg(rD), condT );
20448         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20449      } else {
20450         storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
20451      }
20452      put_ITSTATE(new_itstate); // restore
20453
20454      DIP("%sb r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5);
20455      goto decode_success;
20456   }
20457
20458   case BITS5(1,0,0,1,0):   /* STR */
20459   case BITS5(1,0,0,1,1): { /* LDR */
20460      /* ------------- LDR Rd, [SP, #imm8 * 4] ------------- */
20461      /* ------------- STR Rd, [SP, #imm8 * 4] ------------- */
20462      /* LDR/STR Rd, [SP + imm8 * 4] */
20463      UInt rD    = INSN0(10,8);
20464      UInt imm8  = INSN0(7,0);
20465      UInt isLD  = INSN0(11,11);
20466
20467      IRExpr* ea = binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4));
20468      put_ITSTATE(old_itstate); // backout
20469      if (isLD) {
20470         IRTemp tD = newTemp(Ity_I32);
20471         loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20472         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20473      } else {
20474         storeGuardedLE(ea, getIRegT(rD), condT);
20475      }
20476      put_ITSTATE(new_itstate); // restore
20477
20478      DIP("%s r%u, [sp, #%u]\n", isLD ? "ldr" : "str", rD, imm8 * 4);
20479      goto decode_success;
20480   }
20481
20482   case BITS5(1,1,0,0,1): {
20483      /* ------------- LDMIA Rn!, {reglist} ------------- */
20484      Int i, nRegs = 0;
20485      UInt rN   = INSN0(10,8);
20486      UInt list = INSN0(7,0);
20487      /* Empty lists aren't allowed. */
20488      if (list != 0) {
20489         mk_skip_over_T16_if_cond_is_false(condT);
20490         condT = IRTemp_INVALID;
20491         put_ITSTATE(old_itstate);
20492         // now uncond
20493
20494         IRTemp oldRn = newTemp(Ity_I32);
20495         IRTemp base  = newTemp(Ity_I32);
20496         assign(oldRn, getIRegT(rN));
20497         assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
20498         for (i = 0; i < 8; i++) {
20499            if (0 == (list & (1 << i)))
20500               continue;
20501            nRegs++;
20502            putIRegT(
20503               i, loadLE(Ity_I32,
20504                         binop(Iop_Add32, mkexpr(base),
20505                                          mkU32(nRegs * 4 - 4))),
20506               IRTemp_INVALID
20507            );
20508         }
20509         /* Only do the writeback for rN if it isn't in the list of
20510            registers to be transferred. */
20511         if (0 == (list & (1 << rN))) {
20512            putIRegT(rN,
20513                     binop(Iop_Add32, mkexpr(oldRn),
20514                                      mkU32(nRegs * 4)),
20515                     IRTemp_INVALID
20516            );
20517         }
20518
20519         /* Reinstate the ITSTATE update. */
20520         put_ITSTATE(new_itstate);
20521
20522         DIP("ldmia r%u!, {0x%04x}\n", rN, list);
20523         goto decode_success;
20524      }
20525      break;
20526   }
20527
20528   case BITS5(1,1,0,0,0): {
20529      /* ------------- STMIA Rn!, {reglist} ------------- */
20530      Int i, nRegs = 0;
20531      UInt rN   = INSN0(10,8);
20532      UInt list = INSN0(7,0);
20533      /* Empty lists aren't allowed.  Also, if rN is in the list then
20534         it must be the lowest numbered register in the list. */
20535      Bool valid = list != 0;
20536      if (valid && 0 != (list & (1 << rN))) {
20537         for (i = 0; i < rN; i++) {
20538            if (0 != (list & (1 << i)))
20539               valid = False;
20540         }
20541      }
20542      if (valid) {
20543         mk_skip_over_T16_if_cond_is_false(condT);
20544         condT = IRTemp_INVALID;
20545         put_ITSTATE(old_itstate);
20546         // now uncond
20547
20548         IRTemp oldRn = newTemp(Ity_I32);
20549         IRTemp base = newTemp(Ity_I32);
20550         assign(oldRn, getIRegT(rN));
20551         assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
20552         for (i = 0; i < 8; i++) {
20553            if (0 == (list & (1 << i)))
20554               continue;
20555            nRegs++;
20556            storeLE( binop(Iop_Add32, mkexpr(base), mkU32(nRegs * 4 - 4)),
20557                     getIRegT(i) );
20558         }
20559         /* Always do the writeback. */
20560         putIRegT(rN,
20561                  binop(Iop_Add32, mkexpr(oldRn),
20562                                   mkU32(nRegs * 4)),
20563                  IRTemp_INVALID);
20564
20565         /* Reinstate the ITSTATE update. */
20566         put_ITSTATE(new_itstate);
20567
20568         DIP("stmia r%u!, {0x%04x}\n", rN, list);
20569         goto decode_success;
20570      }
20571      break;
20572   }
20573
20574   case BITS5(0,0,0,0,0):   /* LSLS */
20575   case BITS5(0,0,0,0,1):   /* LSRS */
20576   case BITS5(0,0,0,1,0): { /* ASRS */
20577      /* ---------------- LSLS Rd, Rm, #imm5 ---------------- */
20578      /* ---------------- LSRS Rd, Rm, #imm5 ---------------- */
20579      /* ---------------- ASRS Rd, Rm, #imm5 ---------------- */
20580      UInt   rD   = INSN0(2,0);
20581      UInt   rM   = INSN0(5,3);
20582      UInt   imm5 = INSN0(10,6);
20583      IRTemp res  = newTemp(Ity_I32);
20584      IRTemp resC = newTemp(Ity_I32);
20585      IRTemp rMt  = newTemp(Ity_I32);
20586      IRTemp oldV = newTemp(Ity_I32);
20587      const HChar* wot  = "???";
20588      assign(rMt, getIRegT(rM));
20589      assign(oldV, mk_armg_calculate_flag_v());
20590      /* Looks like INSN0(12,11) are the standard 'how' encoding.
20591         Could compactify if the ROR case later appears. */
20592      switch (INSN0(15,11)) {
20593         case BITS5(0,0,0,0,0):
20594            compute_result_and_C_after_LSL_by_imm5(
20595               dis_buf, &res, &resC, rMt, imm5, rM
20596            );
20597            wot = "lsl";
20598            break;
20599         case BITS5(0,0,0,0,1):
20600            compute_result_and_C_after_LSR_by_imm5(
20601               dis_buf, &res, &resC, rMt, imm5, rM
20602            );
20603            wot = "lsr";
20604            break;
20605         case BITS5(0,0,0,1,0):
20606            compute_result_and_C_after_ASR_by_imm5(
20607               dis_buf, &res, &resC, rMt, imm5, rM
20608            );
20609            wot = "asr";
20610            break;
20611         default:
20612            /*NOTREACHED*/vassert(0);
20613      }
20614      // not safe to read guest state after this point
20615      putIRegT(rD, mkexpr(res), condT);
20616      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
20617                         cond_AND_notInIT_T );
20618      /* ignore buf and roll our own output */
20619      DIP("%ss r%u, r%u, #%u\n", wot, rD, rM, imm5);
20620      goto decode_success;
20621   }
20622
20623   case BITS5(1,1,1,0,0): {
20624      /* ---------------- B #simm11 ---------------- */
20625      UInt uimm11 = INSN0(10,0);  uimm11 <<= 21;
20626      Int  simm11 = (Int)uimm11;  simm11 >>= 20;
20627      UInt dst    = simm11 + guest_R15_curr_instr_notENC + 4;
20628      /* Only allowed outside or last-in IT block; SIGILL if not so. */
20629      gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20630      // and skip this insn if not selected; being cleverer is too
20631      // difficult
20632      mk_skip_over_T16_if_cond_is_false(condT);
20633      condT = IRTemp_INVALID;
20634      // now uncond
20635      llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
20636      dres.jk_StopHere = Ijk_Boring;
20637      dres.whatNext    = Dis_StopHere;
20638      DIP("b 0x%x\n", dst);
20639      goto decode_success;
20640   }
20641
20642   default:
20643      break; /* examine the next shortest prefix */
20644
20645   }
20646
20647
20648   /* ================ 16-bit 15:12 cases ================ */
20649
20650   switch (INSN0(15,12)) {
20651
20652   case BITS4(1,1,0,1): {
20653      /* ---------------- Bcond #simm8 ---------------- */
20654      UInt cond  = INSN0(11,8);
20655      UInt uimm8 = INSN0(7,0);  uimm8 <<= 24;
20656      Int  simm8 = (Int)uimm8;  simm8 >>= 23;
20657      UInt dst   = simm8 + guest_R15_curr_instr_notENC + 4;
20658      if (cond != ARMCondAL && cond != ARMCondNV) {
20659         /* Not allowed in an IT block; SIGILL if so. */
20660         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
20661
20662         IRTemp kondT = newTemp(Ity_I32);
20663         assign( kondT, mk_armg_calculate_condition(cond) );
20664         stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
20665                            Ijk_Boring,
20666                            IRConst_U32(dst | 1/*CPSR.T*/),
20667                            OFFB_R15T ));
20668         llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2)
20669                              | 1 /*CPSR.T*/ ));
20670         dres.jk_StopHere = Ijk_Boring;
20671         dres.whatNext    = Dis_StopHere;
20672         DIP("b%s 0x%x\n", nCC(cond), dst);
20673         goto decode_success;
20674      }
20675      break;
20676   }
20677
20678   default:
20679      break; /* hmm, nothing matched */
20680
20681   }
20682
20683   /* ================ 16-bit misc cases ================ */
20684
20685   switch (INSN0(15,0)) {
20686      case 0xBF00:
20687         /* ------ NOP ------ */
20688         DIP("nop\n");
20689         goto decode_success;
20690      case 0xBF10: // YIELD
20691      case 0xBF20: // WFE
20692         /* ------ WFE, YIELD ------ */
20693         /* Both appear to get used as a spin-loop hints.  Do the usual thing,
20694            which is to continue after yielding. */
20695         stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
20696                            Ijk_Yield,
20697                            IRConst_U32((guest_R15_curr_instr_notENC + 2)
20698                                        | 1 /*CPSR.T*/),
20699                            OFFB_R15T ));
20700         Bool isWFE = INSN0(15,0) == 0xBF20;
20701         DIP(isWFE ? "wfe\n" : "yield\n");
20702         goto decode_success;
20703      case 0xBF40:
20704         /* ------ SEV ------ */
20705         /* Treat this as a no-op.  Any matching WFEs won't really
20706            cause the host CPU to snooze; they just cause V to try to
20707            run some other thread for a while.  So there's no point in
20708            really doing anything for SEV. */
20709         DIP("sev\n");
20710         goto decode_success;
20711      default:
20712         break; /* fall through */
20713   }
20714
20715   /* ----------------------------------------------------------- */
20716   /* --                                                       -- */
20717   /* -- Thumb 32-bit integer instructions                     -- */
20718   /* --                                                       -- */
20719   /* ----------------------------------------------------------- */
20720
20721#  define INSN1(_bMax,_bMin)  SLICE_UInt(((UInt)insn1), (_bMax), (_bMin))
20722
20723   /* second 16 bits of the instruction, if any */
20724   vassert(insn1 == 0);
20725   insn1 = getUShortLittleEndianly( guest_instr+2 );
20726
20727   anOp   = Iop_INVALID; /* paranoia */
20728   anOpNm = NULL;        /* paranoia */
20729
20730   /* Change result defaults to suit 32-bit insns. */
20731   vassert(dres.whatNext   == Dis_Continue);
20732   vassert(dres.len        == 2);
20733   vassert(dres.continueAt == 0);
20734   dres.len = 4;
20735
20736   /* ---------------- BL/BLX simm26 ---------------- */
20737   if (BITS5(1,1,1,1,0) == INSN0(15,11) && BITS2(1,1) == INSN1(15,14)) {
20738      UInt isBL = INSN1(12,12);
20739      UInt bS   = INSN0(10,10);
20740      UInt bJ1  = INSN1(13,13);
20741      UInt bJ2  = INSN1(11,11);
20742      UInt bI1  = 1 ^ (bJ1 ^ bS);
20743      UInt bI2  = 1 ^ (bJ2 ^ bS);
20744      UInt uimm25
20745         =   (bS          << (1 + 1 + 10 + 11 + 1))
20746           | (bI1         << (1 + 10 + 11 + 1))
20747           | (bI2         << (10 + 11 + 1))
20748           | (INSN0(9,0)  << (11 + 1))
20749           | (INSN1(10,0) << 1);
20750      uimm25 <<= 7;
20751      Int simm25 = (Int)uimm25;
20752      simm25 >>= 7;
20753
20754      vassert(0 == (guest_R15_curr_instr_notENC & 1));
20755      UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
20756
20757      /* One further validity case to check: in the case of BLX
20758         (not-BL), that insn1[0] must be zero. */
20759      Bool valid = True;
20760      if (isBL == 0 && INSN1(0,0) == 1) valid = False;
20761      if (valid) {
20762         /* Only allowed outside or last-in IT block; SIGILL if not so. */
20763         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20764         // and skip this insn if not selected; being cleverer is too
20765         // difficult
20766         mk_skip_over_T32_if_cond_is_false(condT);
20767         condT = IRTemp_INVALID;
20768         // now uncond
20769
20770         /* We're returning to Thumb code, hence "| 1" */
20771         putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 4) | 1 ),
20772                   IRTemp_INVALID);
20773         if (isBL) {
20774            /* BL: unconditional T -> T call */
20775            /* we're calling Thumb code, hence "| 1" */
20776            llPutIReg(15, mkU32( dst | 1 ));
20777            DIP("bl 0x%x (stay in Thumb mode)\n", dst);
20778         } else {
20779            /* BLX: unconditional T -> A call */
20780            /* we're calling ARM code, hence "& 3" to align to a
20781               valid ARM insn address */
20782            llPutIReg(15, mkU32( dst & ~3 ));
20783            DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
20784         }
20785         dres.whatNext    = Dis_StopHere;
20786         dres.jk_StopHere = Ijk_Call;
20787         goto decode_success;
20788      }
20789   }
20790
20791   /* ---------------- {LD,ST}M{IA,DB} ---------------- */
20792   if (0x3a2 == INSN0(15,6) // {LD,ST}MIA
20793       || 0x3a4 == INSN0(15,6)) { // {LD,ST}MDB
20794      UInt bW      = INSN0(5,5); /* writeback Rn ? */
20795      UInt bL      = INSN0(4,4);
20796      UInt rN      = INSN0(3,0);
20797      UInt bP      = INSN1(15,15); /* reglist entry for r15 */
20798      UInt bM      = INSN1(14,14); /* reglist entry for r14 */
20799      UInt rLmost  = INSN1(12,0);  /* reglist entry for r0 .. 12 */
20800      UInt rL13    = INSN1(13,13); /* must be zero */
20801      UInt regList = 0;
20802      Bool valid   = True;
20803
20804      UInt bINC    = 1;
20805      UInt bBEFORE = 0;
20806      if (INSN0(15,6) == 0x3a4) {
20807         bINC    = 0;
20808         bBEFORE = 1;
20809      }
20810
20811      /* detect statically invalid cases, and construct the final
20812         reglist */
20813      if (rL13 == 1)
20814         valid = False;
20815
20816      if (bL == 1) {
20817         regList = (bP << 15) | (bM << 14) | rLmost;
20818         if (rN == 15)                       valid = False;
20819         if (popcount32(regList) < 2)        valid = False;
20820         if (bP == 1 && bM == 1)             valid = False;
20821         if (bW == 1 && (regList & (1<<rN))) valid = False;
20822      } else {
20823         regList = (bM << 14) | rLmost;
20824         if (bP == 1)                        valid = False;
20825         if (rN == 15)                       valid = False;
20826         if (popcount32(regList) < 2)        valid = False;
20827         if (bW == 1 && (regList & (1<<rN))) valid = False;
20828      }
20829
20830      if (valid) {
20831         if (bL == 1 && bP == 1) {
20832            // We'll be writing the PC.  Hence:
20833            /* Only allowed outside or last-in IT block; SIGILL if not so. */
20834            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20835         }
20836
20837         /* Go uncond: */
20838         mk_skip_over_T32_if_cond_is_false(condT);
20839         condT = IRTemp_INVALID;
20840         // now uncond
20841
20842         /* Generate the IR.  This might generate a write to R15. */
20843         mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
20844
20845         if (bL == 1 && (regList & (1<<15))) {
20846            // If we wrote to R15, we have an interworking return to
20847            // deal with.
20848            llPutIReg(15, llGetIReg(15));
20849            dres.jk_StopHere = Ijk_Ret;
20850            dres.whatNext    = Dis_StopHere;
20851         }
20852
20853         DIP("%sm%c%c r%u%s, {0x%04x}\n",
20854              bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
20855              rN, bW ? "!" : "", regList);
20856
20857         goto decode_success;
20858      }
20859   }
20860
20861   /* -------------- (T3) ADD{S}.W Rd, Rn, #constT -------------- */
20862   if (INSN0(15,11) == BITS5(1,1,1,1,0)
20863       && INSN0(9,5) == BITS5(0,1,0,0,0)
20864       && INSN1(15,15) == 0) {
20865      UInt bS = INSN0(4,4);
20866      UInt rN = INSN0(3,0);
20867      UInt rD = INSN1(11,8);
20868      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20869      /* but allow "add.w reg, sp, #constT" for reg != PC */
20870      if (!valid && rD <= 14 && rN == 13)
20871         valid = True;
20872      if (valid) {
20873         IRTemp argL  = newTemp(Ity_I32);
20874         IRTemp argR  = newTemp(Ity_I32);
20875         IRTemp res   = newTemp(Ity_I32);
20876         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
20877         assign(argL, getIRegT(rN));
20878         assign(argR, mkU32(imm32));
20879         assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
20880         putIRegT(rD, mkexpr(res), condT);
20881         if (bS == 1)
20882            setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
20883         DIP("add%s.w r%u, r%u, #%u\n",
20884             bS == 1 ? "s" : "", rD, rN, imm32);
20885         goto decode_success;
20886      }
20887   }
20888
20889   /* ---------------- (T4) ADDW Rd, Rn, #uimm12 -------------- */
20890   if (INSN0(15,11) == BITS5(1,1,1,1,0)
20891       && INSN0(9,4) == BITS6(1,0,0,0,0,0)
20892       && INSN1(15,15) == 0) {
20893      UInt rN = INSN0(3,0);
20894      UInt rD = INSN1(11,8);
20895      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20896      /* but allow "addw reg, sp, #uimm12" for reg != PC */
20897      if (!valid && rD <= 14 && rN == 13)
20898         valid = True;
20899      if (valid) {
20900         IRTemp argL = newTemp(Ity_I32);
20901         IRTemp argR = newTemp(Ity_I32);
20902         IRTemp res  = newTemp(Ity_I32);
20903         UInt imm12  = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
20904         assign(argL, getIRegT(rN));
20905         assign(argR, mkU32(imm12));
20906         assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
20907         putIRegT(rD, mkexpr(res), condT);
20908         DIP("addw r%u, r%u, #%u\n", rD, rN, imm12);
20909         goto decode_success;
20910      }
20911   }
20912
20913   /* ---------------- (T2) CMP.W Rn, #constT ---------------- */
20914   /* ---------------- (T2) CMN.W Rn, #constT ---------------- */
20915   if (INSN0(15,11) == BITS5(1,1,1,1,0)
20916       && (   INSN0(9,4) == BITS6(0,1,1,0,1,1)  // CMP
20917           || INSN0(9,4) == BITS6(0,1,0,0,0,1)) // CMN
20918       && INSN1(15,15) == 0
20919       && INSN1(11,8) == BITS4(1,1,1,1)) {
20920      UInt rN = INSN0(3,0);
20921      if (rN != 15) {
20922         IRTemp argL  = newTemp(Ity_I32);
20923         IRTemp argR  = newTemp(Ity_I32);
20924         Bool   isCMN = INSN0(9,4) == BITS6(0,1,0,0,0,1);
20925         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
20926         assign(argL, getIRegT(rN));
20927         assign(argR, mkU32(imm32));
20928         setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
20929                         argL, argR, condT );
20930         DIP("%s.w r%u, #%u\n", isCMN ? "cmn" : "cmp", rN, imm32);
20931         goto decode_success;
20932      }
20933   }
20934
20935   /* -------------- (T1) TST.W Rn, #constT -------------- */
20936   /* -------------- (T1) TEQ.W Rn, #constT -------------- */
20937   if (INSN0(15,11) == BITS5(1,1,1,1,0)
20938       && (   INSN0(9,4) == BITS6(0,0,0,0,0,1)  // TST
20939           || INSN0(9,4) == BITS6(0,0,1,0,0,1)) // TEQ
20940       && INSN1(15,15) == 0
20941       && INSN1(11,8) == BITS4(1,1,1,1)) {
20942      UInt rN = INSN0(3,0);
20943      if (!isBadRegT(rN)) { // yes, really, it's inconsistent with CMP.W
20944         Bool  isTST  = INSN0(9,4) == BITS6(0,0,0,0,0,1);
20945         IRTemp argL  = newTemp(Ity_I32);
20946         IRTemp argR  = newTemp(Ity_I32);
20947         IRTemp res   = newTemp(Ity_I32);
20948         IRTemp oldV  = newTemp(Ity_I32);
20949         IRTemp oldC  = newTemp(Ity_I32);
20950         Bool   updC  = False;
20951         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
20952         assign(argL, getIRegT(rN));
20953         assign(argR, mkU32(imm32));
20954         assign(res,  binop(isTST ? Iop_And32 : Iop_Xor32,
20955                            mkexpr(argL), mkexpr(argR)));
20956         assign( oldV, mk_armg_calculate_flag_v() );
20957         assign( oldC, updC
20958                       ? mkU32((imm32 >> 31) & 1)
20959                       : mk_armg_calculate_flag_c() );
20960         setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
20961         DIP("%s.w r%u, #%u\n", isTST ? "tst" : "teq", rN, imm32);
20962         goto decode_success;
20963      }
20964   }
20965
20966   /* -------------- (T3) SUB{S}.W Rd, Rn, #constT -------------- */
20967   /* -------------- (T3) RSB{S}.W Rd, Rn, #constT -------------- */
20968   if (INSN0(15,11) == BITS5(1,1,1,1,0)
20969       && (INSN0(9,5) == BITS5(0,1,1,0,1) // SUB
20970           || INSN0(9,5) == BITS5(0,1,1,1,0)) // RSB
20971       && INSN1(15,15) == 0) {
20972      Bool isRSB = INSN0(9,5) == BITS5(0,1,1,1,0);
20973      UInt bS    = INSN0(4,4);
20974      UInt rN    = INSN0(3,0);
20975      UInt rD    = INSN1(11,8);
20976      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20977      /* but allow "sub{s}.w reg, sp, #constT
20978         this is (T2) of "SUB (SP minus immediate)" */
20979      if (!valid && !isRSB && rN == 13 && rD != 15)
20980         valid = True;
20981      if (valid) {
20982         IRTemp argL  = newTemp(Ity_I32);
20983         IRTemp argR  = newTemp(Ity_I32);
20984         IRTemp res   = newTemp(Ity_I32);
20985         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
20986         assign(argL, getIRegT(rN));
20987         assign(argR, mkU32(imm32));
20988         assign(res,  isRSB
20989                      ? binop(Iop_Sub32, mkexpr(argR), mkexpr(argL))
20990                      : binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
20991         putIRegT(rD, mkexpr(res), condT);
20992         if (bS == 1) {
20993            if (isRSB)
20994               setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
20995            else
20996               setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
20997         }
20998         DIP("%s%s.w r%u, r%u, #%u\n",
20999             isRSB ? "rsb" : "sub", bS == 1 ? "s" : "", rD, rN, imm32);
21000         goto decode_success;
21001      }
21002   }
21003
21004   /* -------------- (T4) SUBW Rd, Rn, #uimm12 ------------------- */
21005   if (INSN0(15,11) == BITS5(1,1,1,1,0)
21006       && INSN0(9,4) == BITS6(1,0,1,0,1,0)
21007       && INSN1(15,15) == 0) {
21008      UInt rN = INSN0(3,0);
21009      UInt rD = INSN1(11,8);
21010      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
21011      /* but allow "subw sp, sp, #uimm12" */
21012      if (!valid && rD == 13 && rN == 13)
21013         valid = True;
21014      if (valid) {
21015         IRTemp argL  = newTemp(Ity_I32);
21016         IRTemp argR  = newTemp(Ity_I32);
21017         IRTemp res   = newTemp(Ity_I32);
21018         UInt imm12   = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
21019         assign(argL, getIRegT(rN));
21020         assign(argR, mkU32(imm12));
21021         assign(res,  binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
21022         putIRegT(rD, mkexpr(res), condT);
21023         DIP("subw r%u, r%u, #%u\n", rD, rN, imm12);
21024         goto decode_success;
21025      }
21026   }
21027
21028   /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
21029   /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
21030   if (INSN0(15,11) == BITS5(1,1,1,1,0)
21031       && (   INSN0(9,5) == BITS5(0,1,0,1,0)  // ADC
21032           || INSN0(9,5) == BITS5(0,1,0,1,1)) // SBC
21033       && INSN1(15,15) == 0) {
21034      /* ADC:  Rd = Rn + constT + oldC */
21035      /* SBC:  Rd = Rn - constT - (oldC ^ 1) */
21036      UInt bS    = INSN0(4,4);
21037      UInt rN    = INSN0(3,0);
21038      UInt rD    = INSN1(11,8);
21039      if (!isBadRegT(rN) && !isBadRegT(rD)) {
21040         IRTemp argL  = newTemp(Ity_I32);
21041         IRTemp argR  = newTemp(Ity_I32);
21042         IRTemp res   = newTemp(Ity_I32);
21043         IRTemp oldC  = newTemp(Ity_I32);
21044         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
21045         assign(argL, getIRegT(rN));
21046         assign(argR, mkU32(imm32));
21047         assign(oldC, mk_armg_calculate_flag_c() );
21048         const HChar* nm  = "???";
21049         switch (INSN0(9,5)) {
21050            case BITS5(0,1,0,1,0): // ADC
21051               nm = "adc";
21052               assign(res,
21053                      binop(Iop_Add32,
21054                            binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
21055                            mkexpr(oldC) ));
21056               putIRegT(rD, mkexpr(res), condT);
21057               if (bS)
21058                  setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
21059                                     argL, argR, oldC, condT );
21060               break;
21061            case BITS5(0,1,0,1,1): // SBC
21062               nm = "sbc";
21063               assign(res,
21064                      binop(Iop_Sub32,
21065                            binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
21066                            binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
21067               putIRegT(rD, mkexpr(res), condT);
21068               if (bS)
21069                  setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
21070                                     argL, argR, oldC, condT );
21071               break;
21072            default:
21073              vassert(0);
21074         }
21075         DIP("%s%s.w r%u, r%u, #%u\n",
21076             nm, bS == 1 ? "s" : "", rD, rN, imm32);
21077         goto decode_success;
21078      }
21079   }
21080
21081   /* -------------- (T1) ORR{S}.W Rd, Rn, #constT -------------- */
21082   /* -------------- (T1) AND{S}.W Rd, Rn, #constT -------------- */
21083   /* -------------- (T1) BIC{S}.W Rd, Rn, #constT -------------- */
21084   /* -------------- (T1) EOR{S}.W Rd, Rn, #constT -------------- */
21085   if (INSN0(15,11) == BITS5(1,1,1,1,0)
21086       && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // ORR
21087           || INSN0(9,5) == BITS5(0,0,0,0,0)  // AND
21088           || INSN0(9,5) == BITS5(0,0,0,0,1)  // BIC
21089           || INSN0(9,5) == BITS5(0,0,1,0,0)  // EOR
21090           || INSN0(9,5) == BITS5(0,0,0,1,1)) // ORN
21091       && INSN1(15,15) == 0) {
21092      UInt bS = INSN0(4,4);
21093      UInt rN = INSN0(3,0);
21094      UInt rD = INSN1(11,8);
21095      if (!isBadRegT(rN) && !isBadRegT(rD)) {
21096         Bool   notArgR = False;
21097         IROp   op      = Iop_INVALID;
21098         const HChar* nm = "???";
21099         switch (INSN0(9,5)) {
21100            case BITS5(0,0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
21101            case BITS5(0,0,0,0,0): op = Iop_And32; nm = "and"; break;
21102            case BITS5(0,0,0,0,1): op = Iop_And32; nm = "bic";
21103                                   notArgR = True; break;
21104            case BITS5(0,0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
21105            case BITS5(0,0,0,1,1): op = Iop_Or32;  nm = "orn";
21106                                   notArgR = True; break;
21107            default: vassert(0);
21108         }
21109         IRTemp argL  = newTemp(Ity_I32);
21110         IRTemp argR  = newTemp(Ity_I32);
21111         IRTemp res   = newTemp(Ity_I32);
21112         Bool   updC  = False;
21113         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
21114         assign(argL, getIRegT(rN));
21115         assign(argR, mkU32(notArgR ? ~imm32 : imm32));
21116         assign(res,  binop(op, mkexpr(argL), mkexpr(argR)));
21117         putIRegT(rD, mkexpr(res), condT);
21118         if (bS) {
21119            IRTemp oldV = newTemp(Ity_I32);
21120            IRTemp oldC = newTemp(Ity_I32);
21121            assign( oldV, mk_armg_calculate_flag_v() );
21122            assign( oldC, updC
21123                          ? mkU32((imm32 >> 31) & 1)
21124                          : mk_armg_calculate_flag_c() );
21125            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21126                               condT );
21127         }
21128         DIP("%s%s.w r%u, r%u, #%u\n",
21129             nm, bS == 1 ? "s" : "", rD, rN, imm32);
21130         goto decode_success;
21131      }
21132   }
21133
21134   /* ---------- (T3) ADD{S}.W Rd, Rn, Rm, {shift} ---------- */
21135   /* ---------- (T3) SUB{S}.W Rd, Rn, Rm, {shift} ---------- */
21136   /* ---------- (T3) RSB{S}.W Rd, Rn, Rm, {shift} ---------- */
21137   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21138       && (   INSN0(8,5) == BITS4(1,0,0,0)  // add subopc
21139           || INSN0(8,5) == BITS4(1,1,0,1)  // sub subopc
21140           || INSN0(8,5) == BITS4(1,1,1,0)) // rsb subopc
21141       && INSN1(15,15) == 0) {
21142      UInt rN   = INSN0(3,0);
21143      UInt rD   = INSN1(11,8);
21144      UInt rM   = INSN1(3,0);
21145      UInt bS   = INSN0(4,4);
21146      UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21147      UInt how  = INSN1(5,4);
21148
21149      Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
21150      /* but allow "add.w reg, sp, reg, lsl #N for N=0..31
21151         (T3) "ADD (SP plus register) */
21152      if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
21153          && rD != 15 && rN == 13 && imm5 <= 31 && how == 0) {
21154         valid = True;
21155      }
21156      /* also allow "sub.w reg, sp, reg   lsl #N for N=0 .. 5
21157         (T1) "SUB (SP minus register) */
21158      if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // sub
21159          && rD != 15 && rN == 13 && imm5 <= 5 && how == 0) {
21160         valid = True;
21161      }
21162      if (valid) {
21163         Bool   swap = False;
21164         IROp   op   = Iop_INVALID;
21165         const HChar* nm = "???";
21166         switch (INSN0(8,5)) {
21167            case BITS4(1,0,0,0): op = Iop_Add32; nm = "add"; break;
21168            case BITS4(1,1,0,1): op = Iop_Sub32; nm = "sub"; break;
21169            case BITS4(1,1,1,0): op = Iop_Sub32; nm = "rsb";
21170                                 swap = True; break;
21171            default: vassert(0);
21172         }
21173
21174         IRTemp argL = newTemp(Ity_I32);
21175         assign(argL, getIRegT(rN));
21176
21177         IRTemp rMt = newTemp(Ity_I32);
21178         assign(rMt, getIRegT(rM));
21179
21180         IRTemp argR = newTemp(Ity_I32);
21181         compute_result_and_C_after_shift_by_imm5(
21182            dis_buf, &argR, NULL, rMt, how, imm5, rM
21183         );
21184
21185         IRTemp res = newTemp(Ity_I32);
21186         assign(res, swap
21187                     ? binop(op, mkexpr(argR), mkexpr(argL))
21188                     : binop(op, mkexpr(argL), mkexpr(argR)));
21189
21190         putIRegT(rD, mkexpr(res), condT);
21191         if (bS) {
21192            switch (op) {
21193               case Iop_Add32:
21194                  setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
21195                  break;
21196               case Iop_Sub32:
21197                  if (swap)
21198                     setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
21199                  else
21200                     setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
21201                  break;
21202               default:
21203                  vassert(0);
21204            }
21205         }
21206
21207         DIP("%s%s.w r%u, r%u, %s\n",
21208             nm, bS ? "s" : "", rD, rN, dis_buf);
21209         goto decode_success;
21210      }
21211   }
21212
21213   /* ---------- (T3) ADC{S}.W Rd, Rn, Rm, {shift} ---------- */
21214   /* ---------- (T2) SBC{S}.W Rd, Rn, Rm, {shift} ---------- */
21215   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21216       && (   INSN0(8,5) == BITS4(1,0,1,0)   // adc subopc
21217           || INSN0(8,5) == BITS4(1,0,1,1))  // sbc subopc
21218       && INSN1(15,15) == 0) {
21219      /* ADC:  Rd = Rn + shifter_operand + oldC */
21220      /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
21221      UInt rN = INSN0(3,0);
21222      UInt rD = INSN1(11,8);
21223      UInt rM = INSN1(3,0);
21224      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21225         UInt bS   = INSN0(4,4);
21226         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21227         UInt how  = INSN1(5,4);
21228
21229         IRTemp argL = newTemp(Ity_I32);
21230         assign(argL, getIRegT(rN));
21231
21232         IRTemp rMt = newTemp(Ity_I32);
21233         assign(rMt, getIRegT(rM));
21234
21235         IRTemp oldC = newTemp(Ity_I32);
21236         assign(oldC, mk_armg_calculate_flag_c());
21237
21238         IRTemp argR = newTemp(Ity_I32);
21239         compute_result_and_C_after_shift_by_imm5(
21240            dis_buf, &argR, NULL, rMt, how, imm5, rM
21241         );
21242
21243         const HChar* nm  = "???";
21244         IRTemp res = newTemp(Ity_I32);
21245         switch (INSN0(8,5)) {
21246            case BITS4(1,0,1,0): // ADC
21247               nm = "adc";
21248               assign(res,
21249                      binop(Iop_Add32,
21250                            binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
21251                            mkexpr(oldC) ));
21252               putIRegT(rD, mkexpr(res), condT);
21253               if (bS)
21254                  setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
21255                                     argL, argR, oldC, condT );
21256               break;
21257            case BITS4(1,0,1,1): // SBC
21258               nm = "sbc";
21259               assign(res,
21260                      binop(Iop_Sub32,
21261                            binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
21262                            binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
21263               putIRegT(rD, mkexpr(res), condT);
21264               if (bS)
21265                  setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
21266                                     argL, argR, oldC, condT );
21267               break;
21268            default:
21269               vassert(0);
21270         }
21271
21272         DIP("%s%s.w r%u, r%u, %s\n",
21273             nm, bS ? "s" : "", rD, rN, dis_buf);
21274         goto decode_success;
21275      }
21276   }
21277
21278   /* ---------- (T3) AND{S}.W Rd, Rn, Rm, {shift} ---------- */
21279   /* ---------- (T3) ORR{S}.W Rd, Rn, Rm, {shift} ---------- */
21280   /* ---------- (T3) EOR{S}.W Rd, Rn, Rm, {shift} ---------- */
21281   /* ---------- (T3) BIC{S}.W Rd, Rn, Rm, {shift} ---------- */
21282   /* ---------- (T1) ORN{S}.W Rd, Rn, Rm, {shift} ---------- */
21283   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21284       && (   INSN0(8,5) == BITS4(0,0,0,0)  // and subopc
21285           || INSN0(8,5) == BITS4(0,0,1,0)  // orr subopc
21286           || INSN0(8,5) == BITS4(0,1,0,0)  // eor subopc
21287           || INSN0(8,5) == BITS4(0,0,0,1)  // bic subopc
21288           || INSN0(8,5) == BITS4(0,0,1,1)) // orn subopc
21289       && INSN1(15,15) == 0) {
21290      UInt rN = INSN0(3,0);
21291      UInt rD = INSN1(11,8);
21292      UInt rM = INSN1(3,0);
21293      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21294         Bool notArgR = False;
21295         IROp op      = Iop_INVALID;
21296         const HChar* nm  = "???";
21297         switch (INSN0(8,5)) {
21298            case BITS4(0,0,0,0): op = Iop_And32; nm = "and"; break;
21299            case BITS4(0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
21300            case BITS4(0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
21301            case BITS4(0,0,0,1): op = Iop_And32; nm = "bic";
21302                                 notArgR = True; break;
21303            case BITS4(0,0,1,1): op = Iop_Or32; nm = "orn";
21304                                 notArgR = True; break;
21305            default: vassert(0);
21306         }
21307         UInt bS   = INSN0(4,4);
21308         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21309         UInt how  = INSN1(5,4);
21310
21311         IRTemp rNt = newTemp(Ity_I32);
21312         assign(rNt, getIRegT(rN));
21313
21314         IRTemp rMt = newTemp(Ity_I32);
21315         assign(rMt, getIRegT(rM));
21316
21317         IRTemp argR = newTemp(Ity_I32);
21318         IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21319
21320         compute_result_and_C_after_shift_by_imm5(
21321            dis_buf, &argR, bS ? &oldC : NULL, rMt, how, imm5, rM
21322         );
21323
21324         IRTemp res = newTemp(Ity_I32);
21325         if (notArgR) {
21326            vassert(op == Iop_And32 || op == Iop_Or32);
21327            assign(res, binop(op, mkexpr(rNt),
21328                                  unop(Iop_Not32, mkexpr(argR))));
21329         } else {
21330            assign(res, binop(op, mkexpr(rNt), mkexpr(argR)));
21331         }
21332
21333         putIRegT(rD, mkexpr(res), condT);
21334         if (bS) {
21335            IRTemp oldV = newTemp(Ity_I32);
21336            assign( oldV, mk_armg_calculate_flag_v() );
21337            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21338                               condT );
21339         }
21340
21341         DIP("%s%s.w r%u, r%u, %s\n",
21342             nm, bS ? "s" : "", rD, rN, dis_buf);
21343         goto decode_success;
21344      }
21345   }
21346
21347   /* -------------- (T?) LSL{S}.W Rd, Rn, Rm -------------- */
21348   /* -------------- (T?) LSR{S}.W Rd, Rn, Rm -------------- */
21349   /* -------------- (T?) ASR{S}.W Rd, Rn, Rm -------------- */
21350   /* -------------- (T?) ROR{S}.W Rd, Rn, Rm -------------- */
21351   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,0,0)
21352       && INSN1(15,12) == BITS4(1,1,1,1)
21353       && INSN1(7,4) == BITS4(0,0,0,0)) {
21354      UInt how = INSN0(6,5); // standard encoding
21355      UInt rN  = INSN0(3,0);
21356      UInt rD  = INSN1(11,8);
21357      UInt rM  = INSN1(3,0);
21358      UInt bS  = INSN0(4,4);
21359      Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
21360      if (valid) {
21361         IRTemp rNt    = newTemp(Ity_I32);
21362         IRTemp rMt    = newTemp(Ity_I32);
21363         IRTemp res    = newTemp(Ity_I32);
21364         IRTemp oldC   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21365         IRTemp oldV   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21366         const HChar* nms[4] = { "lsl", "lsr", "asr", "ror" };
21367         const HChar* nm     = nms[how];
21368         assign(rNt, getIRegT(rN));
21369         assign(rMt, getIRegT(rM));
21370         compute_result_and_C_after_shift_by_reg(
21371            dis_buf, &res, bS ? &oldC : NULL,
21372            rNt, how, rMt, rN, rM
21373         );
21374         if (bS)
21375            assign(oldV, mk_armg_calculate_flag_v());
21376         putIRegT(rD, mkexpr(res), condT);
21377         if (bS) {
21378            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21379                               condT );
21380         }
21381         DIP("%s%s.w r%u, r%u, r%u\n",
21382             nm, bS ? "s" : "", rD, rN, rM);
21383         goto decode_success;
21384      }
21385   }
21386
21387   /* ------------ (T?) MOV{S}.W Rd, Rn, {shift} ------------ */
21388   /* ------------ (T?) MVN{S}.W Rd, Rn, {shift} ------------ */
21389   if ((INSN0(15,0) & 0xFFCF) == 0xEA4F
21390       && INSN1(15,15) == 0) {
21391      UInt rD      = INSN1(11,8);
21392      UInt rN      = INSN1(3,0);
21393      UInt bS      = INSN0(4,4);
21394      UInt isMVN   = INSN0(5,5);
21395      Bool regsOK  = (bS || isMVN)
21396                        ? (!isBadRegT(rD) && !isBadRegT(rN))
21397                        : (rD != 15 && rN != 15 && (rD != 13 || rN != 13));
21398      if (regsOK) {
21399         UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
21400         UInt how   = INSN1(5,4);
21401
21402         IRTemp rNt = newTemp(Ity_I32);
21403         assign(rNt, getIRegT(rN));
21404
21405         IRTemp oldRn = newTemp(Ity_I32);
21406         IRTemp oldC  = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21407         compute_result_and_C_after_shift_by_imm5(
21408            dis_buf, &oldRn, bS ? &oldC : NULL, rNt, how, imm5, rN
21409         );
21410
21411         IRTemp res = newTemp(Ity_I32);
21412         assign(res, isMVN ? unop(Iop_Not32, mkexpr(oldRn))
21413                           : mkexpr(oldRn));
21414
21415         putIRegT(rD, mkexpr(res), condT);
21416         if (bS) {
21417            IRTemp oldV = newTemp(Ity_I32);
21418            assign( oldV, mk_armg_calculate_flag_v() );
21419            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT);
21420         }
21421         DIP("%s%s.w r%u, %s\n",
21422             isMVN ? "mvn" : "mov", bS ? "s" : "", rD, dis_buf);
21423         goto decode_success;
21424      }
21425   }
21426
21427   /* -------------- (T?) TST.W Rn, Rm, {shift} -------------- */
21428   /* -------------- (T?) TEQ.W Rn, Rm, {shift} -------------- */
21429   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21430       && (   INSN0(8,4) == BITS5(0,0,0,0,1)  // TST
21431           || INSN0(8,4) == BITS5(0,1,0,0,1)) // TEQ
21432       && INSN1(15,15) == 0
21433       && INSN1(11,8) == BITS4(1,1,1,1)) {
21434      UInt rN = INSN0(3,0);
21435      UInt rM = INSN1(3,0);
21436      if (!isBadRegT(rN) && !isBadRegT(rM)) {
21437         Bool isTST = INSN0(8,4) == BITS5(0,0,0,0,1);
21438
21439         UInt how  = INSN1(5,4);
21440         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21441
21442         IRTemp argL = newTemp(Ity_I32);
21443         assign(argL, getIRegT(rN));
21444
21445         IRTemp rMt = newTemp(Ity_I32);
21446         assign(rMt, getIRegT(rM));
21447
21448         IRTemp argR = newTemp(Ity_I32);
21449         IRTemp oldC = newTemp(Ity_I32);
21450         compute_result_and_C_after_shift_by_imm5(
21451            dis_buf, &argR, &oldC, rMt, how, imm5, rM
21452         );
21453
21454         IRTemp oldV = newTemp(Ity_I32);
21455         assign( oldV, mk_armg_calculate_flag_v() );
21456
21457         IRTemp res = newTemp(Ity_I32);
21458         assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
21459                           mkexpr(argL), mkexpr(argR)));
21460
21461         setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21462                            condT );
21463         DIP("%s.w r%u, %s\n", isTST ? "tst" : "teq", rN, dis_buf);
21464         goto decode_success;
21465      }
21466   }
21467
21468   /* -------------- (T3) CMP.W Rn, Rm, {shift} -------------- */
21469   /* -------------- (T2) CMN.W Rn, Rm, {shift} -------------- */
21470   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21471       && (   INSN0(8,4) == BITS5(1,1,0,1,1)  // CMP
21472           || INSN0(8,4) == BITS5(1,0,0,0,1)) // CMN
21473       && INSN1(15,15) == 0
21474       && INSN1(11,8) == BITS4(1,1,1,1)) {
21475      UInt rN = INSN0(3,0);
21476      UInt rM = INSN1(3,0);
21477      if (!isBadRegT(rN) && !isBadRegT(rM)) {
21478         Bool isCMN = INSN0(8,4) == BITS5(1,0,0,0,1);
21479         UInt how   = INSN1(5,4);
21480         UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
21481
21482         IRTemp argL = newTemp(Ity_I32);
21483         assign(argL, getIRegT(rN));
21484
21485         IRTemp rMt = newTemp(Ity_I32);
21486         assign(rMt, getIRegT(rM));
21487
21488         IRTemp argR = newTemp(Ity_I32);
21489         compute_result_and_C_after_shift_by_imm5(
21490            dis_buf, &argR, NULL, rMt, how, imm5, rM
21491         );
21492
21493         setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
21494                         argL, argR, condT );
21495
21496         DIP("%s.w r%u, %s\n", isCMN ? "cmn" : "cmp", rN, dis_buf);
21497         goto decode_success;
21498      }
21499   }
21500
21501   /* -------------- (T2) MOV{S}.W Rd, #constT -------------- */
21502   /* -------------- (T2) MVN{S}.W Rd, #constT -------------- */
21503   if (INSN0(15,11) == BITS5(1,1,1,1,0)
21504       && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // MOV
21505           || INSN0(9,5) == BITS5(0,0,0,1,1)) // MVN
21506       && INSN0(3,0) == BITS4(1,1,1,1)
21507       && INSN1(15,15) == 0) {
21508      UInt rD = INSN1(11,8);
21509      if (!isBadRegT(rD)) {
21510         Bool   updC  = False;
21511         UInt   bS    = INSN0(4,4);
21512         Bool   isMVN = INSN0(5,5) == 1;
21513         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
21514         IRTemp res   = newTemp(Ity_I32);
21515         assign(res, mkU32(isMVN ? ~imm32 : imm32));
21516         putIRegT(rD, mkexpr(res), condT);
21517         if (bS) {
21518            IRTemp oldV = newTemp(Ity_I32);
21519            IRTemp oldC = newTemp(Ity_I32);
21520            assign( oldV, mk_armg_calculate_flag_v() );
21521            assign( oldC, updC
21522                          ? mkU32((imm32 >> 31) & 1)
21523                          : mk_armg_calculate_flag_c() );
21524            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21525                               condT );
21526         }
21527         DIP("%s%s.w r%u, #%u\n",
21528             isMVN ? "mvn" : "mov", bS ? "s" : "", rD, imm32);
21529         goto decode_success;
21530      }
21531   }
21532
21533   /* -------------- (T3) MOVW Rd, #imm16 -------------- */
21534   if (INSN0(15,11) == BITS5(1,1,1,1,0)
21535       && INSN0(9,4) == BITS6(1,0,0,1,0,0)
21536       && INSN1(15,15) == 0) {
21537      UInt rD = INSN1(11,8);
21538      if (!isBadRegT(rD)) {
21539         UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
21540                      | (INSN1(14,12) << 8) | INSN1(7,0);
21541         putIRegT(rD, mkU32(imm16), condT);
21542         DIP("movw r%u, #%u\n", rD, imm16);
21543         goto decode_success;
21544      }
21545   }
21546
21547   /* ---------------- MOVT Rd, #imm16 ---------------- */
21548   if (INSN0(15,11) == BITS5(1,1,1,1,0)
21549       && INSN0(9,4) == BITS6(1,0,1,1,0,0)
21550       && INSN1(15,15) == 0) {
21551      UInt rD = INSN1(11,8);
21552      if (!isBadRegT(rD)) {
21553         UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
21554                      | (INSN1(14,12) << 8) | INSN1(7,0);
21555         IRTemp res = newTemp(Ity_I32);
21556         assign(res,
21557                binop(Iop_Or32,
21558                      binop(Iop_And32, getIRegT(rD), mkU32(0xFFFF)),
21559                      mkU32(imm16 << 16)));
21560         putIRegT(rD, mkexpr(res), condT);
21561         DIP("movt r%u, #%u\n", rD, imm16);
21562         goto decode_success;
21563      }
21564   }
21565
21566   /* ---------------- LD/ST reg+/-#imm8 ---------------- */
21567   /* Loads and stores of the form:
21568         op  Rt, [Rn, #-imm8]      or
21569         op  Rt, [Rn], #+/-imm8    or
21570         op  Rt, [Rn, #+/-imm8]!
21571      where op is one of
21572         ldrb ldrh ldr  ldrsb ldrsh
21573         strb strh str
21574   */
21575   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0) && INSN1(11,11) == 1) {
21576      Bool   valid  = True;
21577      Bool   syned  = False;
21578      Bool   isST   = False;
21579      IRType ty     = Ity_I8;
21580      const HChar* nm = "???";
21581
21582      switch (INSN0(8,4)) {
21583         case BITS5(0,0,0,0,0):   // strb
21584            nm = "strb"; isST = True; break;
21585         case BITS5(0,0,0,0,1):   // ldrb
21586            nm = "ldrb"; break;
21587         case BITS5(1,0,0,0,1):   // ldrsb
21588            nm = "ldrsb"; syned = True; break;
21589         case BITS5(0,0,0,1,0):   // strh
21590            nm = "strh"; ty = Ity_I16; isST = True; break;
21591         case BITS5(0,0,0,1,1):   // ldrh
21592            nm = "ldrh"; ty = Ity_I16; break;
21593         case BITS5(1,0,0,1,1):   // ldrsh
21594            nm = "ldrsh"; ty = Ity_I16; syned = True; break;
21595         case BITS5(0,0,1,0,0):   // str
21596            nm = "str"; ty = Ity_I32; isST = True; break;
21597         case BITS5(0,0,1,0,1):
21598            nm = "ldr"; ty = Ity_I32; break;  // ldr
21599         default:
21600            valid = False; break;
21601      }
21602
21603      UInt rN      = INSN0(3,0);
21604      UInt rT      = INSN1(15,12);
21605      UInt bP      = INSN1(10,10);
21606      UInt bU      = INSN1(9,9);
21607      UInt bW      = INSN1(8,8);
21608      UInt imm8    = INSN1(7,0);
21609      Bool loadsPC = False;
21610
21611      if (valid) {
21612         if (bP == 1 && bU == 1 && bW == 0)
21613            valid = False;
21614         if (bP == 0 && bW == 0)
21615            valid = False;
21616         if (rN == 15)
21617            valid = False;
21618         if (bW == 1 && rN == rT)
21619            valid = False;
21620         if (ty == Ity_I8 || ty == Ity_I16) {
21621            if (isBadRegT(rT))
21622               valid = False;
21623         } else {
21624            /* ty == Ity_I32 */
21625            if (isST && rT == 15)
21626               valid = False;
21627            if (!isST && rT == 15)
21628               loadsPC = True;
21629         }
21630      }
21631
21632      if (valid) {
21633         // if it's a branch, it can't happen in the middle of an IT block
21634         // Also, if it is a branch, make it unconditional at this point.
21635         // Doing conditional branches in-line is too complex (for now)
21636         if (loadsPC) {
21637            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
21638            // go uncond
21639            mk_skip_over_T32_if_cond_is_false(condT);
21640            condT = IRTemp_INVALID;
21641            // now uncond
21642         }
21643
21644         IRTemp preAddr = newTemp(Ity_I32);
21645         assign(preAddr, getIRegT(rN));
21646
21647         IRTemp postAddr = newTemp(Ity_I32);
21648         assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
21649                                mkexpr(preAddr), mkU32(imm8)));
21650
21651         IRTemp transAddr = bP == 1 ? postAddr : preAddr;
21652
21653         if (isST) {
21654
21655            /* Store.  If necessary, update the base register before
21656               the store itself, so that the common idiom of "str rX,
21657               [sp, #-4]!" (store rX at sp-4, then do new sp = sp-4,
21658               a.k.a "push rX") doesn't cause Memcheck to complain
21659               that the access is below the stack pointer.  Also, not
21660               updating sp before the store confuses Valgrind's
21661               dynamic stack-extending logic.  So do it before the
21662               store.  Hence we need to snarf the store data before
21663               doing the basereg update. */
21664
21665            /* get hold of the data to be stored */
21666            IRTemp oldRt = newTemp(Ity_I32);
21667            assign(oldRt, getIRegT(rT));
21668
21669            /* Update Rn if necessary. */
21670            if (bW == 1) {
21671               vassert(rN != rT); // assured by validity check above
21672               putIRegT(rN, mkexpr(postAddr), condT);
21673            }
21674
21675            /* generate the transfer */
21676            IRExpr* data = NULL;
21677            switch (ty) {
21678               case Ity_I8:
21679                  data = unop(Iop_32to8, mkexpr(oldRt));
21680                  break;
21681               case Ity_I16:
21682                  data = unop(Iop_32to16, mkexpr(oldRt));
21683                  break;
21684               case Ity_I32:
21685                  data = mkexpr(oldRt);
21686                  break;
21687               default:
21688                  vassert(0);
21689            }
21690            storeGuardedLE(mkexpr(transAddr), data, condT);
21691
21692         } else {
21693
21694            /* Load. */
21695            IRTemp llOldRt = newTemp(Ity_I32);
21696            assign(llOldRt, llGetIReg(rT));
21697
21698            /* generate the transfer */
21699            IRTemp    newRt = newTemp(Ity_I32);
21700            IRLoadGOp widen = ILGop_INVALID;
21701            switch (ty) {
21702               case Ity_I8:
21703                  widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
21704               case Ity_I16:
21705                  widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
21706               case Ity_I32:
21707                  widen = ILGop_Ident32; break;
21708               default:
21709                  vassert(0);
21710            }
21711            loadGuardedLE(newRt, widen,
21712                          mkexpr(transAddr), mkexpr(llOldRt), condT);
21713            if (rT == 15) {
21714               vassert(loadsPC);
21715               /* We'll do the write to the PC just below */
21716            } else {
21717               vassert(!loadsPC);
21718               /* IRTemp_INVALID is OK here because in the case where
21719                  condT is false at run time, we're just putting the
21720                  old rT value back. */
21721               putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21722            }
21723
21724            /* Update Rn if necessary. */
21725            if (bW == 1) {
21726               vassert(rN != rT); // assured by validity check above
21727               putIRegT(rN, mkexpr(postAddr), condT);
21728            }
21729
21730            if (loadsPC) {
21731               /* Presumably this is an interworking branch. */
21732               vassert(rN != 15); // assured by validity check above
21733               vassert(rT == 15);
21734               vassert(condT == IRTemp_INVALID); /* due to check above */
21735               llPutIReg(15, mkexpr(newRt));
21736               dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
21737               dres.whatNext    = Dis_StopHere;
21738            }
21739         }
21740
21741         if (bP == 1 && bW == 0) {
21742            DIP("%s.w r%u, [r%u, #%c%u]\n",
21743                nm, rT, rN, bU ? '+' : '-', imm8);
21744         }
21745         else if (bP == 1 && bW == 1) {
21746            DIP("%s.w r%u, [r%u, #%c%u]!\n",
21747                nm, rT, rN, bU ? '+' : '-', imm8);
21748         }
21749         else {
21750            vassert(bP == 0 && bW == 1);
21751            DIP("%s.w r%u, [r%u], #%c%u\n",
21752                nm, rT, rN, bU ? '+' : '-', imm8);
21753         }
21754
21755         goto decode_success;
21756      }
21757   }
21758
21759   /* ------------- LD/ST reg+(reg<<imm2) ------------- */
21760   /* Loads and stores of the form:
21761         op  Rt, [Rn, Rm, LSL #imm8]
21762      where op is one of
21763         ldrb ldrh ldr  ldrsb ldrsh
21764         strb strh str
21765   */
21766   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)
21767       && INSN1(11,6) == BITS6(0,0,0,0,0,0)) {
21768      Bool   valid  = True;
21769      Bool   syned  = False;
21770      Bool   isST   = False;
21771      IRType ty     = Ity_I8;
21772      const HChar* nm = "???";
21773
21774      switch (INSN0(8,4)) {
21775         case BITS5(0,0,0,0,0):   // strb
21776            nm = "strb"; isST = True; break;
21777         case BITS5(0,0,0,0,1):   // ldrb
21778            nm = "ldrb"; break;
21779         case BITS5(1,0,0,0,1):   // ldrsb
21780            nm = "ldrsb"; syned = True; break;
21781         case BITS5(0,0,0,1,0):   // strh
21782            nm = "strh"; ty = Ity_I16; isST = True; break;
21783         case BITS5(0,0,0,1,1):   // ldrh
21784            nm = "ldrh"; ty = Ity_I16; break;
21785         case BITS5(1,0,0,1,1):   // ldrsh
21786            nm = "ldrsh"; ty = Ity_I16; syned = True; break;
21787         case BITS5(0,0,1,0,0):   // str
21788            nm = "str"; ty = Ity_I32; isST = True; break;
21789         case BITS5(0,0,1,0,1):
21790            nm = "ldr"; ty = Ity_I32; break;  // ldr
21791         default:
21792            valid = False; break;
21793      }
21794
21795      UInt rN      = INSN0(3,0);
21796      UInt rM      = INSN1(3,0);
21797      UInt rT      = INSN1(15,12);
21798      UInt imm2    = INSN1(5,4);
21799      Bool loadsPC = False;
21800
21801      if (ty == Ity_I8 || ty == Ity_I16) {
21802         /* all 8- and 16-bit load and store cases have the
21803            same exclusion set. */
21804         if (rN == 15 || isBadRegT(rT) || isBadRegT(rM))
21805            valid = False;
21806      } else {
21807         vassert(ty == Ity_I32);
21808         if (rN == 15 || isBadRegT(rM))
21809            valid = False;
21810         if (isST && rT == 15)
21811            valid = False;
21812         /* If it is a load and rT is 15, that's only allowable if we
21813            not in an IT block, or are the last in it.  Need to insert
21814            a dynamic check for that. */
21815         if (!isST && rT == 15)
21816            loadsPC = True;
21817      }
21818
21819      if (valid) {
21820         // if it's a branch, it can't happen in the middle of an IT block
21821         // Also, if it is a branch, make it unconditional at this point.
21822         // Doing conditional branches in-line is too complex (for now)
21823         if (loadsPC) {
21824            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
21825            // go uncond
21826            mk_skip_over_T32_if_cond_is_false(condT);
21827            condT = IRTemp_INVALID;
21828            // now uncond
21829         }
21830
21831         IRTemp transAddr = newTemp(Ity_I32);
21832         assign(transAddr,
21833                binop( Iop_Add32,
21834                       getIRegT(rN),
21835                       binop(Iop_Shl32, getIRegT(rM), mkU8(imm2)) ));
21836
21837         if (isST) {
21838
21839            /* get hold of the data to be stored */
21840            IRTemp oldRt = newTemp(Ity_I32);
21841            assign(oldRt, getIRegT(rT));
21842
21843            /* generate the transfer */
21844            IRExpr* data = NULL;
21845            switch (ty) {
21846               case Ity_I8:
21847                  data = unop(Iop_32to8, mkexpr(oldRt));
21848                  break;
21849               case Ity_I16:
21850                  data = unop(Iop_32to16, mkexpr(oldRt));
21851                  break;
21852              case Ity_I32:
21853                  data = mkexpr(oldRt);
21854                  break;
21855              default:
21856                 vassert(0);
21857            }
21858            storeGuardedLE(mkexpr(transAddr), data, condT);
21859
21860         } else {
21861
21862            /* Load. */
21863            IRTemp llOldRt = newTemp(Ity_I32);
21864            assign(llOldRt, llGetIReg(rT));
21865
21866            /* generate the transfer */
21867            IRTemp    newRt = newTemp(Ity_I32);
21868            IRLoadGOp widen = ILGop_INVALID;
21869            switch (ty) {
21870               case Ity_I8:
21871                  widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
21872               case Ity_I16:
21873                  widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
21874               case Ity_I32:
21875                  widen = ILGop_Ident32; break;
21876               default:
21877                  vassert(0);
21878            }
21879            loadGuardedLE(newRt, widen,
21880                          mkexpr(transAddr), mkexpr(llOldRt), condT);
21881
21882            if (rT == 15) {
21883               vassert(loadsPC);
21884               /* We'll do the write to the PC just below */
21885            } else {
21886               vassert(!loadsPC);
21887               /* IRTemp_INVALID is OK here because in the case where
21888                  condT is false at run time, we're just putting the
21889                  old rT value back. */
21890               putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21891            }
21892
21893            if (loadsPC) {
21894               /* Presumably this is an interworking branch. */
21895               vassert(rN != 15); // assured by validity check above
21896               vassert(rT == 15);
21897               vassert(condT == IRTemp_INVALID); /* due to check above */
21898               llPutIReg(15, mkexpr(newRt));
21899               dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
21900               dres.whatNext    = Dis_StopHere;
21901            }
21902         }
21903
21904         DIP("%s.w r%u, [r%u, r%u, LSL #%u]\n",
21905             nm, rT, rN, rM, imm2);
21906
21907         goto decode_success;
21908      }
21909   }
21910
21911   /* --------------- LD/ST reg+imm12 --------------- */
21912   /* Loads and stores of the form:
21913         op  Rt, [Rn, #+-imm12]
21914      where op is one of
21915         ldrb ldrh ldr  ldrsb ldrsh
21916         strb strh str
21917   */
21918   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) {
21919      Bool   valid  = True;
21920      Bool   syned  = INSN0(8,8) == 1;
21921      Bool   isST   = False;
21922      IRType ty     = Ity_I8;
21923      UInt   bU     = INSN0(7,7); // 1: +imm   0: -imm
21924                                  // -imm is only supported by literal versions
21925      const HChar* nm = "???";
21926
21927      switch (INSN0(6,4)) {
21928         case BITS3(0,0,0):   // strb
21929            nm = "strb"; isST = True; break;
21930         case BITS3(0,0,1):   // ldrb
21931            nm = syned ? "ldrsb" : "ldrb"; break;
21932         case BITS3(0,1,0):   // strh
21933            nm = "strh"; ty = Ity_I16; isST = True; break;
21934         case BITS3(0,1,1):   // ldrh
21935            nm = syned ? "ldrsh" : "ldrh"; ty = Ity_I16; break;
21936         case BITS3(1,0,0):   // str
21937            nm = "str"; ty = Ity_I32; isST = True; break;
21938         case BITS3(1,0,1):
21939            nm = "ldr"; ty = Ity_I32; break;  // ldr
21940         default:
21941            valid = False; break;
21942      }
21943
21944      UInt rN      = INSN0(3,0);
21945      UInt rT      = INSN1(15,12);
21946      UInt imm12   = INSN1(11,0);
21947      Bool loadsPC = False;
21948
21949      if (rN != 15 && bU == 0) {
21950         // only pc supports #-imm12
21951         valid = False;
21952      }
21953
21954      if (isST) {
21955         if (syned) valid = False;
21956         if (rN == 15 || rT == 15)
21957            valid = False;
21958      } else {
21959         /* For a 32-bit load, rT == 15 is only allowable if we are not
21960            in an IT block, or are the last in it.  Need to insert
21961            a dynamic check for that.  Also, in this particular
21962            case, rN == 15 is allowable.  In this case however, the
21963            value obtained for rN is (apparently)
21964            "word-align(address of current insn + 4)". */
21965         if (rT == 15) {
21966            if (ty == Ity_I32)
21967               loadsPC = True;
21968            else // Can't do it for B/H loads
21969               valid = False;
21970         }
21971      }
21972
21973      if (valid) {
21974         // if it's a branch, it can't happen in the middle of an IT block
21975         // Also, if it is a branch, make it unconditional at this point.
21976         // Doing conditional branches in-line is too complex (for now)
21977         if (loadsPC) {
21978            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
21979            // go uncond
21980            mk_skip_over_T32_if_cond_is_false(condT);
21981            condT = IRTemp_INVALID;
21982            // now uncond
21983         }
21984
21985         IRTemp rNt = newTemp(Ity_I32);
21986         if (rN == 15) {
21987            vassert(!isST);
21988            assign(rNt, binop(Iop_And32, getIRegT(15), mkU32(~3)));
21989         } else {
21990            assign(rNt, getIRegT(rN));
21991         }
21992
21993         IRTemp transAddr = newTemp(Ity_I32);
21994         assign(transAddr,
21995                binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
21996                      mkexpr(rNt), mkU32(imm12)));
21997
21998         IRTemp oldRt = newTemp(Ity_I32);
21999         assign(oldRt, getIRegT(rT));
22000
22001         IRTemp llOldRt = newTemp(Ity_I32);
22002         assign(llOldRt, llGetIReg(rT));
22003
22004         if (isST) {
22005            IRExpr* data = NULL;
22006            switch (ty) {
22007               case Ity_I8:
22008                  data = unop(Iop_32to8, mkexpr(oldRt));
22009                  break;
22010               case Ity_I16:
22011                  data = unop(Iop_32to16, mkexpr(oldRt));
22012                  break;
22013              case Ity_I32:
22014                  data = mkexpr(oldRt);
22015                  break;
22016              default:
22017                 vassert(0);
22018            }
22019            storeGuardedLE(mkexpr(transAddr), data, condT);
22020         } else {
22021            IRTemp    newRt = newTemp(Ity_I32);
22022            IRLoadGOp widen = ILGop_INVALID;
22023            switch (ty) {
22024               case Ity_I8:
22025                  widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
22026               case Ity_I16:
22027                  widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
22028               case Ity_I32:
22029                  widen = ILGop_Ident32; break;
22030               default:
22031                  vassert(0);
22032            }
22033            loadGuardedLE(newRt, widen,
22034                          mkexpr(transAddr), mkexpr(llOldRt), condT);
22035            if (rT == 15) {
22036               vassert(loadsPC);
22037               /* We'll do the write to the PC just below */
22038            } else {
22039               vassert(!loadsPC);
22040               /* IRTemp_INVALID is OK here because in the case where
22041                  condT is false at run time, we're just putting the
22042                  old rT value back. */
22043               putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
22044            }
22045
22046            if (loadsPC) {
22047               /* Presumably this is an interworking branch. */
22048               vassert(rT == 15);
22049               vassert(condT == IRTemp_INVALID); /* due to check above */
22050               llPutIReg(15, mkexpr(newRt));
22051               dres.jk_StopHere = Ijk_Boring;
22052               dres.whatNext    = Dis_StopHere;
22053            }
22054         }
22055
22056         DIP("%s.w r%u, [r%u, +#%u]\n", nm, rT, rN, imm12);
22057
22058         goto decode_success;
22059      }
22060   }
22061
22062   /* -------------- LDRD/STRD reg+/-#imm8 -------------- */
22063   /* Doubleword loads and stores of the form:
22064         ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]    or
22065         ldrd/strd  Rt, Rt2, [Rn], #+/-imm8    or
22066         ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]!
22067   */
22068   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,0) && INSN0(6,6) == 1) {
22069      UInt bP   = INSN0(8,8);
22070      UInt bU   = INSN0(7,7);
22071      UInt bW   = INSN0(5,5);
22072      UInt bL   = INSN0(4,4);  // 1: load  0: store
22073      UInt rN   = INSN0(3,0);
22074      UInt rT   = INSN1(15,12);
22075      UInt rT2  = INSN1(11,8);
22076      UInt imm8 = INSN1(7,0);
22077
22078      Bool valid = True;
22079      if (bP == 0 && bW == 0)                 valid = False;
22080      if (bW == 1 && (rN == rT || rN == rT2)) valid = False;
22081      if (isBadRegT(rT) || isBadRegT(rT2))    valid = False;
22082      if (bL == 1 && rT == rT2)               valid = False;
22083      /* It's OK to use PC as the base register only in the
22084         following case: ldrd Rt, Rt2, [PC, #+/-imm8] */
22085      if (rN == 15 && (bL == 0/*store*/
22086                       || bW == 1/*wb*/))     valid = False;
22087
22088      if (valid) {
22089         IRTemp preAddr = newTemp(Ity_I32);
22090         assign(preAddr, 15 == rN
22091                           ? binop(Iop_And32, getIRegT(15), mkU32(~3U))
22092                           : getIRegT(rN));
22093
22094         IRTemp postAddr = newTemp(Ity_I32);
22095         assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
22096                                mkexpr(preAddr), mkU32(imm8 << 2)));
22097
22098         IRTemp transAddr = bP == 1 ? postAddr : preAddr;
22099
22100         /* For almost all cases, we do the writeback after the transfers.
22101            However, that leaves the stack "uncovered" in cases like:
22102               strd    rD, [sp, #-8]
22103               strd    rD, [sp, #-16]
22104            In which case, do the writeback to SP now, instead of later.
22105            This is bad in that it makes the insn non-restartable if the
22106            accesses fault, but at least keeps Memcheck happy. */
22107         Bool writeback_already_done = False;
22108         if (bL == 0/*store*/ && bW == 1/*wb*/
22109             && rN == 13 && rN != rT && rN != rT2
22110             && bU == 0/*minus*/
22111             && ((imm8 << 2) == 8 || (imm8 << 2) == 16)) {
22112            putIRegT(rN, mkexpr(postAddr), condT);
22113            writeback_already_done = True;
22114         }
22115
22116         if (bL == 0) {
22117            IRTemp oldRt  = newTemp(Ity_I32);
22118            IRTemp oldRt2 = newTemp(Ity_I32);
22119            assign(oldRt,  getIRegT(rT));
22120            assign(oldRt2, getIRegT(rT2));
22121            storeGuardedLE( mkexpr(transAddr),
22122                            mkexpr(oldRt), condT );
22123            storeGuardedLE( binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
22124                            mkexpr(oldRt2), condT );
22125         } else {
22126            IRTemp oldRt  = newTemp(Ity_I32);
22127            IRTemp oldRt2 = newTemp(Ity_I32);
22128            IRTemp newRt  = newTemp(Ity_I32);
22129            IRTemp newRt2 = newTemp(Ity_I32);
22130            assign(oldRt,  llGetIReg(rT));
22131            assign(oldRt2, llGetIReg(rT2));
22132            loadGuardedLE( newRt, ILGop_Ident32,
22133                           mkexpr(transAddr),
22134                           mkexpr(oldRt), condT );
22135            loadGuardedLE( newRt2, ILGop_Ident32,
22136                           binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
22137                           mkexpr(oldRt2), condT );
22138            /* Put unconditionally, since we already switched on the condT
22139               in the guarded loads. */
22140            putIRegT(rT,  mkexpr(newRt),  IRTemp_INVALID);
22141            putIRegT(rT2, mkexpr(newRt2), IRTemp_INVALID);
22142         }
22143
22144         if (bW == 1 && !writeback_already_done) {
22145            putIRegT(rN, mkexpr(postAddr), condT);
22146         }
22147
22148         const HChar* nm = bL ? "ldrd" : "strd";
22149
22150         if (bP == 1 && bW == 0) {
22151            DIP("%s.w r%u, r%u, [r%u, #%c%u]\n",
22152                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22153         }
22154         else if (bP == 1 && bW == 1) {
22155            DIP("%s.w r%u, r%u, [r%u, #%c%u]!\n",
22156                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22157         }
22158         else {
22159            vassert(bP == 0 && bW == 1);
22160            DIP("%s.w r%u, r%u, [r%u], #%c%u\n",
22161                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22162         }
22163
22164         goto decode_success;
22165      }
22166   }
22167
22168   /* -------------- (T3) Bcond.W label -------------- */
22169   /* This variant carries its own condition, so can't be part of an
22170      IT block ... */
22171   if (INSN0(15,11) == BITS5(1,1,1,1,0)
22172       && INSN1(15,14) == BITS2(1,0)
22173       && INSN1(12,12) == 0) {
22174      UInt cond = INSN0(9,6);
22175      if (cond != ARMCondAL && cond != ARMCondNV) {
22176         UInt uimm21
22177            =   (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
22178              | (INSN1(11,11) << (1 + 6 + 11 + 1))
22179              | (INSN1(13,13) << (6 + 11 + 1))
22180              | (INSN0(5,0)   << (11 + 1))
22181              | (INSN1(10,0)  << 1);
22182         uimm21 <<= 11;
22183         Int simm21 = (Int)uimm21;
22184         simm21 >>= 11;
22185
22186         vassert(0 == (guest_R15_curr_instr_notENC & 1));
22187         UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
22188
22189         /* Not allowed in an IT block; SIGILL if so. */
22190         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
22191
22192         IRTemp kondT = newTemp(Ity_I32);
22193         assign( kondT, mk_armg_calculate_condition(cond) );
22194         stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
22195                            Ijk_Boring,
22196                            IRConst_U32(dst | 1/*CPSR.T*/),
22197                            OFFB_R15T ));
22198         llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4)
22199                              | 1 /*CPSR.T*/ ));
22200         dres.jk_StopHere = Ijk_Boring;
22201         dres.whatNext    = Dis_StopHere;
22202         DIP("b%s.w 0x%x\n", nCC(cond), dst);
22203         goto decode_success;
22204      }
22205   }
22206
22207   /* ---------------- (T4) B.W label ---------------- */
22208   /* ... whereas this variant doesn't carry its own condition, so it
22209      has to be either unconditional or the conditional by virtue of
22210      being the last in an IT block.  The upside is that there's 4
22211      more bits available for the jump offset, so it has a 16-times
22212      greater branch range than the T3 variant. */
22213   if (INSN0(15,11) == BITS5(1,1,1,1,0)
22214       && INSN1(15,14) == BITS2(1,0)
22215       && INSN1(12,12) == 1) {
22216      if (1) {
22217         UInt bS  = INSN0(10,10);
22218         UInt bJ1 = INSN1(13,13);
22219         UInt bJ2 = INSN1(11,11);
22220         UInt bI1 = 1 ^ (bJ1 ^ bS);
22221         UInt bI2 = 1 ^ (bJ2 ^ bS);
22222         UInt uimm25
22223            =   (bS          << (1 + 1 + 10 + 11 + 1))
22224              | (bI1         << (1 + 10 + 11 + 1))
22225              | (bI2         << (10 + 11 + 1))
22226              | (INSN0(9,0)  << (11 + 1))
22227              | (INSN1(10,0) << 1);
22228         uimm25 <<= 7;
22229         Int simm25 = (Int)uimm25;
22230         simm25 >>= 7;
22231
22232         vassert(0 == (guest_R15_curr_instr_notENC & 1));
22233         UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
22234
22235         /* If in an IT block, must be the last insn. */
22236         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
22237
22238         // go uncond
22239         mk_skip_over_T32_if_cond_is_false(condT);
22240         condT = IRTemp_INVALID;
22241         // now uncond
22242
22243         // branch to dst
22244         llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
22245         dres.jk_StopHere = Ijk_Boring;
22246         dres.whatNext    = Dis_StopHere;
22247         DIP("b.w 0x%x\n", dst);
22248         goto decode_success;
22249      }
22250   }
22251
22252   /* ------------------ TBB, TBH ------------------ */
22253   if (INSN0(15,4) == 0xE8D && INSN1(15,5) == 0x780) {
22254      UInt rN = INSN0(3,0);
22255      UInt rM = INSN1(3,0);
22256      UInt bH = INSN1(4,4);
22257      if (bH/*ATC*/ || (rN != 13 && !isBadRegT(rM))) {
22258         /* Must be last or not-in IT block */
22259         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
22260         /* Go uncond */
22261         mk_skip_over_T32_if_cond_is_false(condT);
22262         condT = IRTemp_INVALID;
22263
22264         IRExpr* ea
22265             = binop(Iop_Add32,
22266                     getIRegT(rN),
22267                     bH ? binop(Iop_Shl32, getIRegT(rM), mkU8(1))
22268                        : getIRegT(rM));
22269
22270         IRTemp delta = newTemp(Ity_I32);
22271         if (bH) {
22272            assign(delta, unop(Iop_16Uto32, loadLE(Ity_I16, ea)));
22273         } else {
22274            assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
22275         }
22276
22277         llPutIReg(
22278            15,
22279            binop(Iop_Or32,
22280                  binop(Iop_Add32,
22281                        getIRegT(15),
22282                        binop(Iop_Shl32, mkexpr(delta), mkU8(1))
22283                  ),
22284                  mkU32(1)
22285         ));
22286         dres.jk_StopHere = Ijk_Boring;
22287         dres.whatNext    = Dis_StopHere;
22288         DIP("tb%c [r%u, r%u%s]\n",
22289             bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
22290         goto decode_success;
22291      }
22292   }
22293
22294   /* ------------------ UBFX ------------------ */
22295   /* ------------------ SBFX ------------------ */
22296   /* There's also ARM versions of same, but it doesn't seem worth the
22297      hassle to common up the handling (it's only a couple of C
22298      statements). */
22299   if ((INSN0(15,4) == 0xF3C // UBFX
22300        || INSN0(15,4) == 0xF34) // SBFX
22301       && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
22302      UInt rN  = INSN0(3,0);
22303      UInt rD  = INSN1(11,8);
22304      UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
22305      UInt wm1 = INSN1(4,0);
22306      UInt msb =  lsb + wm1;
22307      if (!isBadRegT(rD) && !isBadRegT(rN) && msb <= 31) {
22308         Bool   isU  = INSN0(15,4) == 0xF3C;
22309         IRTemp src  = newTemp(Ity_I32);
22310         IRTemp tmp  = newTemp(Ity_I32);
22311         IRTemp res  = newTemp(Ity_I32);
22312         UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
22313         vassert(msb >= 0 && msb <= 31);
22314         vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
22315
22316         assign(src, getIRegT(rN));
22317         assign(tmp, binop(Iop_And32,
22318                           binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
22319                           mkU32(mask)));
22320         assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
22321                           binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
22322                           mkU8(31-wm1)));
22323
22324         putIRegT(rD, mkexpr(res), condT);
22325
22326         DIP("%s r%u, r%u, #%u, #%u\n",
22327             isU ? "ubfx" : "sbfx", rD, rN, lsb, wm1 + 1);
22328         goto decode_success;
22329      }
22330   }
22331
22332   /* ------------------ UXTB ------------------ */
22333   /* ------------------ UXTH ------------------ */
22334   /* ------------------ SXTB ------------------ */
22335   /* ------------------ SXTH ------------------ */
22336   /* ----------------- UXTB16 ----------------- */
22337   /* ----------------- SXTB16 ----------------- */
22338   /* FIXME: this is an exact duplicate of the ARM version.  They
22339      should be commoned up. */
22340   if ((INSN0(15,0) == 0xFA5F     // UXTB
22341        || INSN0(15,0) == 0xFA1F  // UXTH
22342        || INSN0(15,0) == 0xFA4F  // SXTB
22343        || INSN0(15,0) == 0xFA0F  // SXTH
22344        || INSN0(15,0) == 0xFA3F  // UXTB16
22345        || INSN0(15,0) == 0xFA2F) // SXTB16
22346       && INSN1(15,12) == BITS4(1,1,1,1)
22347       && INSN1(7,6) == BITS2(1,0)) {
22348      UInt rD = INSN1(11,8);
22349      UInt rM = INSN1(3,0);
22350      UInt rot = INSN1(5,4);
22351      if (!isBadRegT(rD) && !isBadRegT(rM)) {
22352         const HChar* nm = "???";
22353         IRTemp srcT = newTemp(Ity_I32);
22354         IRTemp rotT = newTemp(Ity_I32);
22355         IRTemp dstT = newTemp(Ity_I32);
22356         assign(srcT, getIRegT(rM));
22357         assign(rotT, genROR32(srcT, 8 * rot));
22358         switch (INSN0(15,0)) {
22359            case 0xFA5F: // UXTB
22360               nm = "uxtb";
22361               assign(dstT, unop(Iop_8Uto32,
22362                                 unop(Iop_32to8, mkexpr(rotT))));
22363               break;
22364            case 0xFA1F: // UXTH
22365               nm = "uxth";
22366               assign(dstT, unop(Iop_16Uto32,
22367                                 unop(Iop_32to16, mkexpr(rotT))));
22368               break;
22369            case 0xFA4F: // SXTB
22370               nm = "sxtb";
22371               assign(dstT, unop(Iop_8Sto32,
22372                                 unop(Iop_32to8, mkexpr(rotT))));
22373               break;
22374            case 0xFA0F: // SXTH
22375               nm = "sxth";
22376               assign(dstT, unop(Iop_16Sto32,
22377                                 unop(Iop_32to16, mkexpr(rotT))));
22378               break;
22379            case 0xFA3F: // UXTB16
22380               nm = "uxtb16";
22381               assign(dstT, binop(Iop_And32, mkexpr(rotT),
22382                                             mkU32(0x00FF00FF)));
22383               break;
22384            case 0xFA2F: { // SXTB16
22385               nm = "sxtb16";
22386               IRTemp lo32 = newTemp(Ity_I32);
22387               IRTemp hi32 = newTemp(Ity_I32);
22388               assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
22389               assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
22390               assign(
22391                  dstT,
22392                  binop(Iop_Or32,
22393                        binop(Iop_And32,
22394                              unop(Iop_8Sto32,
22395                                   unop(Iop_32to8, mkexpr(lo32))),
22396                              mkU32(0xFFFF)),
22397                        binop(Iop_Shl32,
22398                              unop(Iop_8Sto32,
22399                                   unop(Iop_32to8, mkexpr(hi32))),
22400                              mkU8(16))
22401               ));
22402               break;
22403            }
22404            default:
22405               vassert(0);
22406         }
22407         putIRegT(rD, mkexpr(dstT), condT);
22408         DIP("%s r%u, r%u, ror #%u\n", nm, rD, rM, 8 * rot);
22409         goto decode_success;
22410      }
22411   }
22412
22413   /* -------------- MUL.W Rd, Rn, Rm -------------- */
22414   if (INSN0(15,4) == 0xFB0
22415       && (INSN1(15,0) & 0xF0F0) == 0xF000) {
22416      UInt rN = INSN0(3,0);
22417      UInt rD = INSN1(11,8);
22418      UInt rM = INSN1(3,0);
22419      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22420         IRTemp res = newTemp(Ity_I32);
22421         assign(res, binop(Iop_Mul32, getIRegT(rN), getIRegT(rM)));
22422         putIRegT(rD, mkexpr(res), condT);
22423         DIP("mul.w r%u, r%u, r%u\n", rD, rN, rM);
22424         goto decode_success;
22425      }
22426   }
22427
22428   /* -------------- SDIV.W Rd, Rn, Rm -------------- */
22429   if (INSN0(15,4) == 0xFB9
22430       && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
22431      UInt rN = INSN0(3,0);
22432      UInt rD = INSN1(11,8);
22433      UInt rM = INSN1(3,0);
22434      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22435         IRTemp res  = newTemp(Ity_I32);
22436         IRTemp argL = newTemp(Ity_I32);
22437         IRTemp argR = newTemp(Ity_I32);
22438         assign(argL, getIRegT(rN));
22439         assign(argR, getIRegT(rM));
22440         assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
22441         putIRegT(rD, mkexpr(res), condT);
22442         DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM);
22443         goto decode_success;
22444      }
22445   }
22446
22447   /* -------------- UDIV.W Rd, Rn, Rm -------------- */
22448   if (INSN0(15,4) == 0xFBB
22449       && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
22450      UInt rN = INSN0(3,0);
22451      UInt rD = INSN1(11,8);
22452      UInt rM = INSN1(3,0);
22453      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22454         IRTemp res  = newTemp(Ity_I32);
22455         IRTemp argL = newTemp(Ity_I32);
22456         IRTemp argR = newTemp(Ity_I32);
22457         assign(argL, getIRegT(rN));
22458         assign(argR, getIRegT(rM));
22459         assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
22460         putIRegT(rD, mkexpr(res), condT);
22461         DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM);
22462         goto decode_success;
22463      }
22464   }
22465
22466   /* ------------------ {U,S}MULL ------------------ */
22467   if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
22468       && INSN1(7,4) == BITS4(0,0,0,0)) {
22469      UInt isU  = INSN0(5,5);
22470      UInt rN   = INSN0(3,0);
22471      UInt rDlo = INSN1(15,12);
22472      UInt rDhi = INSN1(11,8);
22473      UInt rM   = INSN1(3,0);
22474      if (!isBadRegT(rDhi) && !isBadRegT(rDlo)
22475          && !isBadRegT(rN) && !isBadRegT(rM) && rDlo != rDhi) {
22476         IRTemp res   = newTemp(Ity_I64);
22477         assign(res, binop(isU ? Iop_MullU32 : Iop_MullS32,
22478                           getIRegT(rN), getIRegT(rM)));
22479         putIRegT( rDhi, unop(Iop_64HIto32, mkexpr(res)), condT );
22480         putIRegT( rDlo, unop(Iop_64to32, mkexpr(res)), condT );
22481         DIP("%cmull r%u, r%u, r%u, r%u\n",
22482             isU ? 'u' : 's', rDlo, rDhi, rN, rM);
22483         goto decode_success;
22484      }
22485   }
22486
22487   /* ------------------ ML{A,S} ------------------ */
22488   if (INSN0(15,4) == 0xFB0
22489       && (   INSN1(7,4) == BITS4(0,0,0,0)    // MLA
22490           || INSN1(7,4) == BITS4(0,0,0,1))) { // MLS
22491      UInt rN = INSN0(3,0);
22492      UInt rA = INSN1(15,12);
22493      UInt rD = INSN1(11,8);
22494      UInt rM = INSN1(3,0);
22495      if (!isBadRegT(rD) && !isBadRegT(rN)
22496          && !isBadRegT(rM) && !isBadRegT(rA)) {
22497         Bool   isMLA = INSN1(7,4) == BITS4(0,0,0,0);
22498         IRTemp res   = newTemp(Ity_I32);
22499         assign(res,
22500                binop(isMLA ? Iop_Add32 : Iop_Sub32,
22501                      getIRegT(rA),
22502                      binop(Iop_Mul32, getIRegT(rN), getIRegT(rM))));
22503         putIRegT(rD, mkexpr(res), condT);
22504         DIP("%s r%u, r%u, r%u, r%u\n",
22505             isMLA ? "mla" : "mls", rD, rN, rM, rA);
22506         goto decode_success;
22507      }
22508   }
22509
22510   /* ------------------ (T3) ADR ------------------ */
22511   if ((INSN0(15,0) == 0xF20F || INSN0(15,0) == 0xF60F)
22512       && INSN1(15,15) == 0) {
22513      /* rD = align4(PC) + imm32 */
22514      UInt rD = INSN1(11,8);
22515      if (!isBadRegT(rD)) {
22516         UInt imm32 = (INSN0(10,10) << 11)
22517                      | (INSN1(14,12) << 8) | INSN1(7,0);
22518         putIRegT(rD, binop(Iop_Add32,
22519                            binop(Iop_And32, getIRegT(15), mkU32(~3U)),
22520                            mkU32(imm32)),
22521                      condT);
22522         DIP("add r%u, pc, #%u\n", rD, imm32);
22523         goto decode_success;
22524      }
22525   }
22526
22527   /* ----------------- (T1) UMLAL ----------------- */
22528   /* ----------------- (T1) SMLAL ----------------- */
22529   if ((INSN0(15,4) == 0xFBE // UMLAL
22530        || INSN0(15,4) == 0xFBC) // SMLAL
22531       && INSN1(7,4) == BITS4(0,0,0,0)) {
22532      UInt rN   = INSN0(3,0);
22533      UInt rDlo = INSN1(15,12);
22534      UInt rDhi = INSN1(11,8);
22535      UInt rM   = INSN1(3,0);
22536      if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
22537          && !isBadRegT(rM) && rDhi != rDlo) {
22538         Bool   isS   = INSN0(15,4) == 0xFBC;
22539         IRTemp argL  = newTemp(Ity_I32);
22540         IRTemp argR  = newTemp(Ity_I32);
22541         IRTemp old   = newTemp(Ity_I64);
22542         IRTemp res   = newTemp(Ity_I64);
22543         IRTemp resHi = newTemp(Ity_I32);
22544         IRTemp resLo = newTemp(Ity_I32);
22545         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
22546         assign( argL, getIRegT(rM));
22547         assign( argR, getIRegT(rN));
22548         assign( old, binop(Iop_32HLto64, getIRegT(rDhi), getIRegT(rDlo)) );
22549         assign( res, binop(Iop_Add64,
22550                            mkexpr(old),
22551                            binop(mulOp, mkexpr(argL), mkexpr(argR))) );
22552         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
22553         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
22554         putIRegT( rDhi, mkexpr(resHi), condT );
22555         putIRegT( rDlo, mkexpr(resLo), condT );
22556         DIP("%cmlal r%u, r%u, r%u, r%u\n",
22557             isS ? 's' : 'u', rDlo, rDhi, rN, rM);
22558         goto decode_success;
22559      }
22560   }
22561
22562   /* ------------------ (T1) UMAAL ------------------ */
22563   if (INSN0(15,4) == 0xFBE && INSN1(7,4) == BITS4(0,1,1,0)) {
22564      UInt rN   = INSN0(3,0);
22565      UInt rDlo = INSN1(15,12);
22566      UInt rDhi = INSN1(11,8);
22567      UInt rM   = INSN1(3,0);
22568      if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
22569          && !isBadRegT(rM) && rDhi != rDlo) {
22570         IRTemp argN   = newTemp(Ity_I32);
22571         IRTemp argM   = newTemp(Ity_I32);
22572         IRTemp argDhi = newTemp(Ity_I32);
22573         IRTemp argDlo = newTemp(Ity_I32);
22574         IRTemp res    = newTemp(Ity_I64);
22575         IRTemp resHi  = newTemp(Ity_I32);
22576         IRTemp resLo  = newTemp(Ity_I32);
22577         assign( argN,   getIRegT(rN) );
22578         assign( argM,   getIRegT(rM) );
22579         assign( argDhi, getIRegT(rDhi) );
22580         assign( argDlo, getIRegT(rDlo) );
22581         assign( res,
22582                 binop(Iop_Add64,
22583                       binop(Iop_Add64,
22584                             binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
22585                             unop(Iop_32Uto64, mkexpr(argDhi))),
22586                       unop(Iop_32Uto64, mkexpr(argDlo))) );
22587         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
22588         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
22589         putIRegT( rDhi, mkexpr(resHi), condT );
22590         putIRegT( rDlo, mkexpr(resLo), condT );
22591         DIP("umaal r%u, r%u, r%u, r%u\n", rDlo, rDhi, rN, rM);
22592         goto decode_success;
22593      }
22594   }
22595
22596   /* ------------------- (T1) SMMUL{R} ------------------ */
22597   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
22598       && INSN0(6,4) == BITS3(1,0,1)
22599       && INSN1(15,12) == BITS4(1,1,1,1)
22600       && INSN1(7,5) == BITS3(0,0,0)) {
22601      UInt bitR = INSN1(4,4);
22602      UInt rD = INSN1(11,8);
22603      UInt rM = INSN1(3,0);
22604      UInt rN = INSN0(3,0);
22605      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22606         IRExpr* res
22607         = unop(Iop_64HIto32,
22608                binop(Iop_Add64,
22609                      binop(Iop_MullS32, getIRegT(rN), getIRegT(rM)),
22610                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
22611         putIRegT(rD, res, condT);
22612         DIP("smmul%s r%u, r%u, r%u\n",
22613             bitR ? "r" : "", rD, rN, rM);
22614         goto decode_success;
22615      }
22616   }
22617
22618   /* ------------------- (T1) SMMLA{R} ------------------ */
22619   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
22620       && INSN0(6,4) == BITS3(1,0,1)
22621       && INSN1(7,5) == BITS3(0,0,0)) {
22622      UInt bitR = INSN1(4,4);
22623      UInt rA = INSN1(15,12);
22624      UInt rD = INSN1(11,8);
22625      UInt rM = INSN1(3,0);
22626      UInt rN = INSN0(3,0);
22627      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && (rA != 13)) {
22628         IRExpr* res
22629         = unop(Iop_64HIto32,
22630                binop(Iop_Add64,
22631                      binop(Iop_Add64,
22632                            binop(Iop_32HLto64, getIRegT(rA), mkU32(0)),
22633                            binop(Iop_MullS32, getIRegT(rN), getIRegT(rM))),
22634                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
22635         putIRegT(rD, res, condT);
22636         DIP("smmla%s r%u, r%u, r%u, r%u\n",
22637             bitR ? "r" : "", rD, rN, rM, rA);
22638         goto decode_success;
22639      }
22640   }
22641
22642   /* ------------------ (T2) ADR ------------------ */
22643   if ((INSN0(15,0) == 0xF2AF || INSN0(15,0) == 0xF6AF)
22644       && INSN1(15,15) == 0) {
22645      /* rD = align4(PC) - imm32 */
22646      UInt rD = INSN1(11,8);
22647      if (!isBadRegT(rD)) {
22648         UInt imm32 = (INSN0(10,10) << 11)
22649                      | (INSN1(14,12) << 8) | INSN1(7,0);
22650         putIRegT(rD, binop(Iop_Sub32,
22651                            binop(Iop_And32, getIRegT(15), mkU32(~3U)),
22652                            mkU32(imm32)),
22653                      condT);
22654         DIP("sub r%u, pc, #%u\n", rD, imm32);
22655         goto decode_success;
22656      }
22657   }
22658
22659   /* ------------------- (T1) BFI ------------------- */
22660   /* ------------------- (T1) BFC ------------------- */
22661   if (INSN0(15,4) == 0xF36 && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
22662      UInt rD  = INSN1(11,8);
22663      UInt rN  = INSN0(3,0);
22664      UInt msb = INSN1(4,0);
22665      UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
22666      if (isBadRegT(rD) || rN == 13 || msb < lsb) {
22667         /* undecodable; fall through */
22668      } else {
22669         IRTemp src    = newTemp(Ity_I32);
22670         IRTemp olddst = newTemp(Ity_I32);
22671         IRTemp newdst = newTemp(Ity_I32);
22672         UInt   mask   = ((UInt)1) << (msb - lsb);
22673         mask = (mask - 1) + mask;
22674         vassert(mask != 0); // guaranteed by "msb < lsb" check above
22675         mask <<= lsb;
22676
22677         assign(src, rN == 15 ? mkU32(0) : getIRegT(rN));
22678         assign(olddst, getIRegT(rD));
22679         assign(newdst,
22680                binop(Iop_Or32,
22681                   binop(Iop_And32,
22682                         binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
22683                         mkU32(mask)),
22684                   binop(Iop_And32,
22685                         mkexpr(olddst),
22686                         mkU32(~mask)))
22687               );
22688
22689         putIRegT(rD, mkexpr(newdst), condT);
22690
22691         if (rN == 15) {
22692            DIP("bfc r%u, #%u, #%u\n",
22693                rD, lsb, msb-lsb+1);
22694         } else {
22695            DIP("bfi r%u, r%u, #%u, #%u\n",
22696                rD, rN, lsb, msb-lsb+1);
22697         }
22698         goto decode_success;
22699      }
22700   }
22701
22702   /* ------------------- (T1) SXTAH ------------------- */
22703   /* ------------------- (T1) UXTAH ------------------- */
22704   if ((INSN0(15,4) == 0xFA1      // UXTAH
22705        || INSN0(15,4) == 0xFA0)  // SXTAH
22706       && INSN1(15,12) == BITS4(1,1,1,1)
22707       && INSN1(7,6) == BITS2(1,0)) {
22708      Bool isU = INSN0(15,4) == 0xFA1;
22709      UInt rN  = INSN0(3,0);
22710      UInt rD  = INSN1(11,8);
22711      UInt rM  = INSN1(3,0);
22712      UInt rot = INSN1(5,4);
22713      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22714         IRTemp srcL = newTemp(Ity_I32);
22715         IRTemp srcR = newTemp(Ity_I32);
22716         IRTemp res  = newTemp(Ity_I32);
22717         assign(srcR, getIRegT(rM));
22718         assign(srcL, getIRegT(rN));
22719         assign(res,  binop(Iop_Add32,
22720                            mkexpr(srcL),
22721                            unop(isU ? Iop_16Uto32 : Iop_16Sto32,
22722                                 unop(Iop_32to16,
22723                                      genROR32(srcR, 8 * rot)))));
22724         putIRegT(rD, mkexpr(res), condT);
22725         DIP("%cxtah r%u, r%u, r%u, ror #%u\n",
22726             isU ? 'u' : 's', rD, rN, rM, rot);
22727         goto decode_success;
22728      }
22729   }
22730
22731   /* ------------------- (T1) SXTAB ------------------- */
22732   /* ------------------- (T1) UXTAB ------------------- */
22733   if ((INSN0(15,4) == 0xFA5      // UXTAB
22734        || INSN0(15,4) == 0xFA4)  // SXTAB
22735       && INSN1(15,12) == BITS4(1,1,1,1)
22736       && INSN1(7,6) == BITS2(1,0)) {
22737      Bool isU = INSN0(15,4) == 0xFA5;
22738      UInt rN  = INSN0(3,0);
22739      UInt rD  = INSN1(11,8);
22740      UInt rM  = INSN1(3,0);
22741      UInt rot = INSN1(5,4);
22742      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22743         IRTemp srcL = newTemp(Ity_I32);
22744         IRTemp srcR = newTemp(Ity_I32);
22745         IRTemp res  = newTemp(Ity_I32);
22746         assign(srcR, getIRegT(rM));
22747         assign(srcL, getIRegT(rN));
22748         assign(res,  binop(Iop_Add32,
22749                            mkexpr(srcL),
22750                            unop(isU ? Iop_8Uto32 : Iop_8Sto32,
22751                                 unop(Iop_32to8,
22752                                      genROR32(srcR, 8 * rot)))));
22753         putIRegT(rD, mkexpr(res), condT);
22754         DIP("%cxtab r%u, r%u, r%u, ror #%u\n",
22755             isU ? 'u' : 's', rD, rN, rM, rot);
22756         goto decode_success;
22757      }
22758   }
22759
22760   /* ------------------- (T1) CLZ ------------------- */
22761   if (INSN0(15,4) == 0xFAB
22762       && INSN1(15,12) == BITS4(1,1,1,1)
22763       && INSN1(7,4) == BITS4(1,0,0,0)) {
22764      UInt rM1 = INSN0(3,0);
22765      UInt rD  = INSN1(11,8);
22766      UInt rM2 = INSN1(3,0);
22767      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22768         IRTemp arg = newTemp(Ity_I32);
22769         IRTemp res = newTemp(Ity_I32);
22770         assign(arg, getIRegT(rM1));
22771         assign(res, IRExpr_ITE(
22772                        binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
22773                        mkU32(32),
22774                        unop(Iop_Clz32, mkexpr(arg))
22775         ));
22776         putIRegT(rD, mkexpr(res), condT);
22777         DIP("clz r%u, r%u\n", rD, rM1);
22778         goto decode_success;
22779      }
22780   }
22781
22782   /* ------------------- (T1) RBIT ------------------- */
22783   if (INSN0(15,4) == 0xFA9
22784       && INSN1(15,12) == BITS4(1,1,1,1)
22785       && INSN1(7,4) == BITS4(1,0,1,0)) {
22786      UInt rM1 = INSN0(3,0);
22787      UInt rD  = INSN1(11,8);
22788      UInt rM2 = INSN1(3,0);
22789      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22790         IRTemp arg = newTemp(Ity_I32);
22791         assign(arg, getIRegT(rM1));
22792         IRTemp res = gen_BITREV(arg);
22793         putIRegT(rD, mkexpr(res), condT);
22794         DIP("rbit r%u, r%u\n", rD, rM1);
22795         goto decode_success;
22796      }
22797   }
22798
22799   /* ------------------- (T2) REV   ------------------- */
22800   /* ------------------- (T2) REV16 ------------------- */
22801   if (INSN0(15,4) == 0xFA9
22802       && INSN1(15,12) == BITS4(1,1,1,1)
22803       && (   INSN1(7,4) == BITS4(1,0,0,0)     // REV
22804           || INSN1(7,4) == BITS4(1,0,0,1))) { // REV16
22805      UInt rM1   = INSN0(3,0);
22806      UInt rD    = INSN1(11,8);
22807      UInt rM2   = INSN1(3,0);
22808      Bool isREV = INSN1(7,4) == BITS4(1,0,0,0);
22809      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22810         IRTemp arg = newTemp(Ity_I32);
22811         assign(arg, getIRegT(rM1));
22812         IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
22813         putIRegT(rD, mkexpr(res), condT);
22814         DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM1);
22815         goto decode_success;
22816      }
22817   }
22818
22819   /* ------------------- (T2) REVSH ------------------ */
22820   if (INSN0(15,4) == 0xFA9
22821       && INSN1(15,12) == BITS4(1,1,1,1)
22822       && INSN1(7,4) == BITS4(1,0,1,1)) {
22823      UInt rM1 = INSN0(3,0);
22824      UInt rM2 = INSN1(3,0);
22825      UInt rD  = INSN1(11,8);
22826      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22827         IRTemp irt_rM  = newTemp(Ity_I32);
22828         IRTemp irt_hi  = newTemp(Ity_I32);
22829         IRTemp irt_low = newTemp(Ity_I32);
22830         IRTemp irt_res = newTemp(Ity_I32);
22831         assign(irt_rM, getIRegT(rM1));
22832         assign(irt_hi,
22833                binop(Iop_Sar32,
22834                      binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
22835                      mkU8(16)
22836                )
22837         );
22838         assign(irt_low,
22839                binop(Iop_And32,
22840                      binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
22841                      mkU32(0xFF)
22842                )
22843         );
22844         assign(irt_res,
22845                binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
22846         );
22847         putIRegT(rD, mkexpr(irt_res), condT);
22848         DIP("revsh r%u, r%u\n", rD, rM1);
22849         goto decode_success;
22850      }
22851   }
22852
22853   /* -------------- (T1) MSR apsr, reg -------------- */
22854   if (INSN0(15,4) == 0xF38
22855       && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(9,0) == 0x000) {
22856      UInt rN          = INSN0(3,0);
22857      UInt write_ge    = INSN1(10,10);
22858      UInt write_nzcvq = INSN1(11,11);
22859      if (!isBadRegT(rN) && (write_nzcvq || write_ge)) {
22860         IRTemp rNt = newTemp(Ity_I32);
22861         assign(rNt, getIRegT(rN));
22862         desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
22863         DIP("msr cpsr_%s%s, r%u\n",
22864             write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
22865         goto decode_success;
22866      }
22867   }
22868
22869   /* -------------- (T1) MRS reg, apsr -------------- */
22870   if (INSN0(15,0) == 0xF3EF
22871       && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(7,0) == 0x00) {
22872      UInt rD = INSN1(11,8);
22873      if (!isBadRegT(rD)) {
22874         IRTemp apsr = synthesise_APSR();
22875         putIRegT( rD, mkexpr(apsr), condT );
22876         DIP("mrs r%u, cpsr\n", rD);
22877         goto decode_success;
22878      }
22879   }
22880
22881   /* ----------------- (T1) LDREX ----------------- */
22882   if (INSN0(15,4) == 0xE85 && INSN1(11,8) == BITS4(1,1,1,1)) {
22883      UInt rN   = INSN0(3,0);
22884      UInt rT   = INSN1(15,12);
22885      UInt imm8 = INSN1(7,0);
22886      if (!isBadRegT(rT) && rN != 15) {
22887         IRTemp res;
22888         // go uncond
22889         mk_skip_over_T32_if_cond_is_false( condT );
22890         // now uncond
22891         res = newTemp(Ity_I32);
22892         stmt( IRStmt_LLSC(Iend_LE,
22893                           res,
22894                           binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
22895                           NULL/*this is a load*/ ));
22896         putIRegT(rT, mkexpr(res), IRTemp_INVALID);
22897         DIP("ldrex r%u, [r%u, #+%u]\n", rT, rN, imm8 * 4);
22898         goto decode_success;
22899      }
22900   }
22901
22902   /* --------------- (T1) LDREX{B,H} --------------- */
22903   if (INSN0(15,4) == 0xE8D
22904       && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) {
22905      UInt rN  = INSN0(3,0);
22906      UInt rT  = INSN1(15,12);
22907      Bool isH = INSN1(11,0) == 0xF5F;
22908      if (!isBadRegT(rT) && rN != 15) {
22909         IRTemp res;
22910         // go uncond
22911         mk_skip_over_T32_if_cond_is_false( condT );
22912         // now uncond
22913         res = newTemp(isH ? Ity_I16 : Ity_I8);
22914         stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
22915                           NULL/*this is a load*/ ));
22916         putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)),
22917                      IRTemp_INVALID);
22918         DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN);
22919         goto decode_success;
22920      }
22921   }
22922
22923   /* --------------- (T1) LDREXD --------------- */
22924   if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) {
22925      UInt rN  = INSN0(3,0);
22926      UInt rT  = INSN1(15,12);
22927      UInt rT2 = INSN1(11,8);
22928      if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) {
22929         IRTemp res;
22930         // go uncond
22931         mk_skip_over_T32_if_cond_is_false( condT );
22932         // now uncond
22933         res = newTemp(Ity_I64);
22934         // FIXME: assumes little-endian guest
22935         stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
22936                           NULL/*this is a load*/ ));
22937         // FIXME: assumes little-endian guest
22938         putIRegT(rT,  unop(Iop_64to32,   mkexpr(res)), IRTemp_INVALID);
22939         putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID);
22940         DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN);
22941         goto decode_success;
22942      }
22943   }
22944
22945   /* ----------------- (T1) STREX ----------------- */
22946   if (INSN0(15,4) == 0xE84) {
22947      UInt rN   = INSN0(3,0);
22948      UInt rT   = INSN1(15,12);
22949      UInt rD   = INSN1(11,8);
22950      UInt imm8 = INSN1(7,0);
22951      if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
22952          && rD != rN && rD != rT) {
22953         IRTemp resSC1, resSC32;
22954         // go uncond
22955         mk_skip_over_T32_if_cond_is_false( condT );
22956         // now uncond
22957         /* Ok, now we're unconditional.  Do the store. */
22958         resSC1 = newTemp(Ity_I1);
22959         stmt( IRStmt_LLSC(Iend_LE,
22960                           resSC1,
22961                           binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
22962                           getIRegT(rT)) );
22963         /* Set rD to 1 on failure, 0 on success.  Currently we have
22964            resSC1 == 0 on failure, 1 on success. */
22965         resSC32 = newTemp(Ity_I32);
22966         assign(resSC32,
22967                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
22968         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
22969         DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
22970         goto decode_success;
22971      }
22972   }
22973
22974   /* --------------- (T1) STREX{B,H} --------------- */
22975   if (INSN0(15,4) == 0xE8C
22976       && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) {
22977      UInt rN  = INSN0(3,0);
22978      UInt rT  = INSN1(15,12);
22979      UInt rD  = INSN1(3,0);
22980      Bool isH = INSN1(11,4) == 0xF5;
22981      if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
22982          && rD != rN && rD != rT) {
22983         IRTemp resSC1, resSC32;
22984         // go uncond
22985         mk_skip_over_T32_if_cond_is_false( condT );
22986         // now uncond
22987         /* Ok, now we're unconditional.  Do the store. */
22988         resSC1 = newTemp(Ity_I1);
22989         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN),
22990                           unop(isH ? Iop_32to16 : Iop_32to8,
22991                                getIRegT(rT))) );
22992         /* Set rD to 1 on failure, 0 on success.  Currently we have
22993            resSC1 == 0 on failure, 1 on success. */
22994         resSC32 = newTemp(Ity_I32);
22995         assign(resSC32,
22996                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
22997         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
22998         DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN);
22999         goto decode_success;
23000      }
23001   }
23002
23003   /* ---------------- (T1) STREXD ---------------- */
23004   if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) {
23005      UInt rN  = INSN0(3,0);
23006      UInt rT  = INSN1(15,12);
23007      UInt rT2 = INSN1(11,8);
23008      UInt rD  = INSN1(3,0);
23009      if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2)
23010          && rN != 15 && rD != rN && rD != rT && rD != rT2) {
23011         IRTemp resSC1, resSC32, data;
23012         // go uncond
23013         mk_skip_over_T32_if_cond_is_false( condT );
23014         // now uncond
23015         /* Ok, now we're unconditional.  Do the store. */
23016         resSC1 = newTemp(Ity_I1);
23017         data = newTemp(Ity_I64);
23018         // FIXME: assumes little-endian guest
23019         assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT)));
23020         // FIXME: assumes little-endian guest
23021         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data)));
23022         /* Set rD to 1 on failure, 0 on success.  Currently we have
23023            resSC1 == 0 on failure, 1 on success. */
23024         resSC32 = newTemp(Ity_I32);
23025         assign(resSC32,
23026                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
23027         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
23028         DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN);
23029         goto decode_success;
23030      }
23031   }
23032
23033   /* -------------- v7 barrier insns -------------- */
23034   if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) {
23035      /* FIXME: should this be unconditional? */
23036      /* XXX this isn't really right, is it?  The generated IR does
23037         them unconditionally.  I guess it doesn't matter since it
23038         doesn't do any harm to do them even when the guarding
23039         condition is false -- it's just a performance loss. */
23040      switch (INSN1(7,0)) {
23041         case 0x4F: /* DSB sy */
23042         case 0x4E: /* DSB st */
23043         case 0x4B: /* DSB ish */
23044         case 0x4A: /* DSB ishst */
23045         case 0x47: /* DSB nsh */
23046         case 0x46: /* DSB nshst */
23047         case 0x43: /* DSB osh */
23048         case 0x42: /* DSB oshst */
23049            stmt( IRStmt_MBE(Imbe_Fence) );
23050            DIP("DSB\n");
23051            goto decode_success;
23052         case 0x5F: /* DMB sy */
23053         case 0x5E: /* DMB st */
23054         case 0x5B: /* DMB ish */
23055         case 0x5A: /* DMB ishst */
23056         case 0x57: /* DMB nsh */
23057         case 0x56: /* DMB nshst */
23058         case 0x53: /* DMB osh */
23059         case 0x52: /* DMB oshst */
23060            stmt( IRStmt_MBE(Imbe_Fence) );
23061            DIP("DMB\n");
23062            goto decode_success;
23063         case 0x6F: /* ISB */
23064            stmt( IRStmt_MBE(Imbe_Fence) );
23065            DIP("ISB\n");
23066            goto decode_success;
23067         default:
23068            break;
23069      }
23070   }
23071
23072   /* ---------------------- PLD{,W} ---------------------- */
23073   if ((INSN0(15,4) & 0xFFD) == 0xF89 && INSN1(15,12) == 0xF) {
23074      /* FIXME: should this be unconditional? */
23075      /* PLD/PLDW immediate, encoding T1 */
23076      UInt rN    = INSN0(3,0);
23077      UInt bW    = INSN0(5,5);
23078      UInt imm12 = INSN1(11,0);
23079      DIP("pld%s [r%u, #%u]\n", bW ? "w" : "",  rN, imm12);
23080      goto decode_success;
23081   }
23082
23083   if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,8) == 0xFC) {
23084      /* FIXME: should this be unconditional? */
23085      /* PLD/PLDW immediate, encoding T2 */
23086      UInt rN    = INSN0(3,0);
23087      UInt bW    = INSN0(5,5);
23088      UInt imm8  = INSN1(7,0);
23089      DIP("pld%s [r%u, #-%u]\n", bW ? "w" : "",  rN, imm8);
23090      goto decode_success;
23091   }
23092
23093   if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,6) == 0x3C0) {
23094      /* FIXME: should this be unconditional? */
23095      /* PLD/PLDW register, encoding T1 */
23096      UInt rN   = INSN0(3,0);
23097      UInt rM   = INSN1(3,0);
23098      UInt bW   = INSN0(5,5);
23099      UInt imm2 = INSN1(5,4);
23100      if (!isBadRegT(rM)) {
23101         DIP("pld%s [r%u, r%u, lsl %u]\n", bW ? "w" : "", rN, rM, imm2);
23102         goto decode_success;
23103      }
23104      /* fall through */
23105   }
23106
23107   /* -------------- read CP15 TPIDRURO register ------------- */
23108   /* mrc     p15, 0,  r0, c13, c0, 3  up to
23109      mrc     p15, 0, r14, c13, c0, 3
23110   */
23111   /* I don't know whether this is really v7-only.  But anyway, we
23112      have to support it since arm-linux uses TPIDRURO as a thread
23113      state register. */
23114   if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F70)) {
23115      UInt rD = INSN1(15,12);
23116      if (!isBadRegT(rD)) {
23117         putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32), condT);
23118         DIP("mrc p15,0, r%u, c13, c0, 3\n", rD);
23119         goto decode_success;
23120      }
23121      /* fall through */
23122   }
23123
23124   /* -------------- read CP15 PMUSRENR register ------------- */
23125   /* mrc     p15, 0, r0,  c9, c14, 0  up to
23126      mrc     p15, 0, r14, c9, c14, 0
23127      See comment on the ARM equivalent of this (above) for details.
23128   */
23129   if ((INSN0(15,0) == 0xEE19) && (INSN1(11,0) == 0x0F1E)) {
23130      UInt rD = INSN1(15,12);
23131      if (!isBadRegT(rD)) {
23132         putIRegT(rD, mkU32(0), condT);
23133         DIP("mrc p15,0, r%u, c9, c14, 0\n", rD);
23134         goto decode_success;
23135      }
23136      /* fall through */
23137   }
23138
23139   /* ------------------- CLREX ------------------ */
23140   if (INSN0(15,0) == 0xF3BF && INSN1(15,0) == 0x8F2F) {
23141      /* AFAICS, this simply cancels a (all?) reservations made by a
23142         (any?) preceding LDREX(es).  Arrange to hand it through to
23143         the back end. */
23144      mk_skip_over_T32_if_cond_is_false( condT );
23145      stmt( IRStmt_MBE(Imbe_CancelReservation) );
23146      DIP("clrex\n");
23147      goto decode_success;
23148   }
23149
23150   /* ------------------- NOP ------------------ */
23151   if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
23152      DIP("nop\n");
23153      goto decode_success;
23154   }
23155
23156   /* -------------- (T1) LDRT reg+#imm8 -------------- */
23157   /* Load Register Unprivileged:
23158      ldrt Rt, [Rn, #imm8]
23159   */
23160   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,1)
23161       && INSN1(11,8) == BITS4(1,1,1,0)) {
23162      UInt rT    = INSN1(15,12);
23163      UInt rN    = INSN0(3,0);
23164      UInt imm8  = INSN1(7,0);
23165      Bool valid = True;
23166      if (rN == 15 || isBadRegT(rT)) valid = False;
23167      if (valid) {
23168         put_ITSTATE(old_itstate);
23169         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23170         IRTemp newRt = newTemp(Ity_I32);
23171         loadGuardedLE( newRt, ILGop_Ident32, ea, llGetIReg(rT), condT );
23172         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23173         put_ITSTATE(new_itstate);
23174         DIP("ldrt r%u, [r%u, #%u]\n", rT, rN, imm8);
23175         goto decode_success;
23176      }
23177   }
23178
23179   /* -------------- (T1) STRT reg+#imm8 -------------- */
23180   /* Store Register Unprivileged:
23181      strt Rt, [Rn, #imm8]
23182   */
23183   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,0)
23184       && INSN1(11,8) == BITS4(1,1,1,0)) {
23185      UInt rT    = INSN1(15,12);
23186      UInt rN    = INSN0(3,0);
23187      UInt imm8  = INSN1(7,0);
23188      Bool valid = True;
23189      if (rN == 15 || isBadRegT(rT)) valid = False;
23190      if (valid) {
23191         put_ITSTATE(old_itstate);
23192         IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23193         storeGuardedLE( address, llGetIReg(rT), condT );
23194         put_ITSTATE(new_itstate);
23195         DIP("strt r%u, [r%u, #%u]\n", rT, rN, imm8);
23196         goto decode_success;
23197      }
23198   }
23199
23200   /* -------------- (T1) STRBT reg+#imm8 -------------- */
23201   /* Store Register Byte Unprivileged:
23202      strbt Rt, [Rn, #imm8]
23203   */
23204   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,0)
23205       && INSN1(11,8) == BITS4(1,1,1,0)) {
23206      UInt rT    = INSN1(15,12);
23207      UInt rN    = INSN0(3,0);
23208      UInt imm8  = INSN1(7,0);
23209      Bool valid = True;
23210      if (rN == 15 || isBadRegT(rT)) valid = False;
23211      if (valid) {
23212         put_ITSTATE(old_itstate);
23213         IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23214         IRExpr* data = unop(Iop_32to8, llGetIReg(rT));
23215         storeGuardedLE( address, data, condT );
23216         put_ITSTATE(new_itstate);
23217         DIP("strbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23218         goto decode_success;
23219      }
23220   }
23221
23222   /* -------------- (T1) LDRHT reg+#imm8 -------------- */
23223   /* Load Register Halfword Unprivileged:
23224      ldrht Rt, [Rn, #imm8]
23225   */
23226   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,1)
23227       && INSN1(11,8) == BITS4(1,1,1,0)) {
23228      UInt rN    = INSN0(3,0);
23229      Bool valid = True;
23230      if (rN == 15) {
23231         /* In this case our instruction is LDRH (literal), in fact:
23232            LDRH (literal) was realized earlier, so we don't want to
23233            make it twice. */
23234         valid = False;
23235      }
23236      UInt rT    = INSN1(15,12);
23237      UInt imm8  = INSN1(7,0);
23238      if (isBadRegT(rT)) valid = False;
23239      if (valid) {
23240         put_ITSTATE(old_itstate);
23241         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23242         IRTemp newRt = newTemp(Ity_I32);
23243         loadGuardedLE( newRt, ILGop_16Uto32, ea, llGetIReg(rT), condT );
23244         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23245         put_ITSTATE(new_itstate);
23246         DIP("ldrht r%u, [r%u, #%u]\n", rT, rN, imm8);
23247         goto decode_success;
23248      }
23249   }
23250
23251   /* -------------- (T1) LDRSHT reg+#imm8 -------------- */
23252   /* Load Register Signed Halfword Unprivileged:
23253      ldrsht Rt, [Rn, #imm8]
23254   */
23255   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(1,1)
23256       && INSN1(11,8) == BITS4(1,1,1,0)) {
23257      UInt rN    = INSN0(3,0);
23258      Bool valid = True;
23259      if (rN == 15) {
23260         /* In this case our instruction is LDRSH (literal), in fact:
23261            LDRSH (literal) was realized earlier, so we don't want to
23262            make it twice. */
23263         valid = False;
23264      }
23265      UInt rT    = INSN1(15,12);
23266      UInt imm8  = INSN1(7,0);
23267      if (isBadRegT(rT)) valid = False;
23268      if (valid) {
23269         put_ITSTATE(old_itstate);
23270         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23271         IRTemp newRt = newTemp(Ity_I32);
23272         loadGuardedLE( newRt, ILGop_16Sto32, ea, llGetIReg(rT), condT );
23273         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23274         put_ITSTATE(new_itstate);
23275         DIP("ldrsht r%u, [r%u, #%u]\n", rT, rN, imm8);
23276         goto decode_success;
23277      }
23278   }
23279
23280   /* -------------- (T1) STRHT reg+#imm8 -------------- */
23281   /* Store Register Halfword Unprivileged:
23282      strht Rt, [Rn, #imm8]
23283   */
23284   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,0)
23285       && INSN1(11,8) == BITS4(1,1,1,0)) {
23286      UInt rT    = INSN1(15,12);
23287      UInt rN    = INSN0(3,0);
23288      UInt imm8  = INSN1(7,0);
23289      Bool valid = True;
23290      if (rN == 15 || isBadRegT(rT)) valid = False;
23291      if (valid) {
23292         put_ITSTATE(old_itstate);
23293         IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23294         IRExpr* data = unop(Iop_32to16, llGetIReg(rT));
23295         storeGuardedLE( address, data, condT );
23296         put_ITSTATE(new_itstate);
23297         DIP("strht r%u, [r%u, #%u]\n", rT, rN, imm8);
23298         goto decode_success;
23299      }
23300   }
23301
23302   /* -------------- (T1) LDRBT reg+#imm8 -------------- */
23303   /* Load Register Byte Unprivileged:
23304      ldrbt Rt, [Rn, #imm8]
23305   */
23306   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,1)
23307       && INSN1(11,8) == BITS4(1,1,1,0)) {
23308      UInt rN    = INSN0(3,0);
23309      UInt rT    = INSN1(15,12);
23310      UInt imm8  = INSN1(7,0);
23311      Bool valid = True;
23312      if (rN == 15 /* insn is LDRB (literal) */) valid = False;
23313      if (isBadRegT(rT)) valid = False;
23314      if (valid) {
23315         put_ITSTATE(old_itstate);
23316         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23317         IRTemp newRt = newTemp(Ity_I32);
23318         loadGuardedLE( newRt, ILGop_8Uto32, ea, llGetIReg(rT), condT );
23319         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23320         put_ITSTATE(new_itstate);
23321         DIP("ldrbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23322         goto decode_success;
23323      }
23324   }
23325
23326   /* -------------- (T1) LDRSBT reg+#imm8 -------------- */
23327   /* Load Register Signed Byte Unprivileged:
23328      ldrsbt Rt, [Rn, #imm8]
23329   */
23330   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
23331       && INSN1(11,8) == BITS4(1,1,1,0)) {
23332      UInt rN    = INSN0(3,0);
23333      Bool valid = True;
23334      UInt rT    = INSN1(15,12);
23335      UInt imm8  = INSN1(7,0);
23336      if (rN == 15 /* insn is LDRSB (literal) */) valid = False;
23337      if (isBadRegT(rT)) valid = False;
23338      if (valid) {
23339         put_ITSTATE(old_itstate);
23340         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23341         IRTemp newRt = newTemp(Ity_I32);
23342         loadGuardedLE( newRt, ILGop_8Sto32, ea, llGetIReg(rT), condT );
23343         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23344         put_ITSTATE(new_itstate);
23345         DIP("ldrsbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23346         goto decode_success;
23347      }
23348   }
23349
23350   /* -------------- (T1) PLI reg+#imm12 -------------- */
23351   /* Preload Instruction:
23352      pli [Rn, #imm12]
23353   */
23354   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,1,0) && INSN0(5,4) == BITS2(0,1)
23355       && INSN1(15,12) == BITS4(1,1,1,1)) {
23356      UInt rN    = INSN0(3,0);
23357      UInt imm12 = INSN1(11,0);
23358      if (rN != 15) {
23359         DIP("pli [r%u, #%u]\n", rN, imm12);
23360         goto decode_success;
23361      }
23362   }
23363
23364   /* -------------- (T2) PLI reg-#imm8 -------------- */
23365   /* Preload Instruction:
23366      pli [Rn, #-imm8]
23367   */
23368   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
23369       && INSN1(15,8) == BITS8(1,1,1,1,1,1,0,0)) {
23370      UInt rN   = INSN0(3,0);
23371      UInt imm8 = INSN1(7,0);
23372      if (rN != 15) {
23373         DIP("pli [r%u, #-%u]\n", rN, imm8);
23374         goto decode_success;
23375      }
23376   }
23377
23378   /* -------------- (T3) PLI PC+/-#imm12 -------------- */
23379   /* Preload Instruction:
23380      pli [PC, #+/-imm12]
23381   */
23382   if (INSN0(15,8) == BITS8(1,1,1,1,1,0,0,1)
23383       && INSN0(6,0) == BITS7(0,0,1,1,1,1,1)
23384       && INSN1(15,12) == BITS4(1,1,1,1)) {
23385      UInt imm12 = INSN1(11,0);
23386      UInt bU    = INSN0(7,7);
23387      DIP("pli [pc, #%c%u]\n", bU == 1 ? '+' : '-', imm12);
23388      goto decode_success;
23389   }
23390
23391   /* ----------------------------------------------------------- */
23392   /* -- VFP (CP 10, CP 11) instructions (in Thumb mode)       -- */
23393   /* ----------------------------------------------------------- */
23394
23395   if (INSN0(15,12) == BITS4(1,1,1,0)) {
23396      UInt insn28 = (INSN0(11,0) << 16) | INSN1(15,0);
23397      Bool ok_vfp = decode_CP10_CP11_instruction (
23398                       &dres, insn28, condT, ARMCondAL/*bogus*/,
23399                       True/*isT*/
23400                    );
23401      if (ok_vfp)
23402         goto decode_success;
23403   }
23404
23405   /* ----------------------------------------------------------- */
23406   /* -- NEON instructions (only v7 and below, in Thumb mode)  -- */
23407   /* ----------------------------------------------------------- */
23408
23409   if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
23410      UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23411      Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
23412                        &dres, insn32, condT, True/*isT*/
23413                     );
23414      if (ok_neon)
23415         goto decode_success;
23416   }
23417
23418   /* ----------------------------------------------------------- */
23419   /* -- v6 media instructions (in Thumb mode)                 -- */
23420   /* ----------------------------------------------------------- */
23421
23422   { UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23423     Bool ok_v6m = decode_V6MEDIA_instruction(
23424                      &dres, insn32, condT, ARMCondAL/*bogus*/,
23425                      True/*isT*/
23426                   );
23427     if (ok_v6m)
23428        goto decode_success;
23429   }
23430
23431   /* ----------------------------------------------------------- */
23432   /* -- v8 instructions (in Thumb mode)                       -- */
23433   /* ----------------------------------------------------------- */
23434
23435   /* If we get here, it means that all attempts to decode the
23436      instruction as ARMv7 or earlier have failed.  So, if we're doing
23437      ARMv8 or later, here is the point to try for it. */
23438
23439   if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
23440      UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23441      Bool ok_v8
23442         = decode_V8_instruction( &dres, insn32, condT, True/*isT*/,
23443                                  old_itstate, new_itstate );
23444      if (ok_v8)
23445         goto decode_success;
23446   }
23447
23448   /* ----------------------------------------------------------- */
23449   /* -- Undecodable                                           -- */
23450   /* ----------------------------------------------------------- */
23451
23452   goto decode_failure;
23453   /*NOTREACHED*/
23454
23455  decode_failure:
23456   /* All decode failures end up here. */
23457   if (sigill_diag)
23458      vex_printf("disInstr(thumb): unhandled instruction: "
23459                 "0x%04x 0x%04x\n", (UInt)insn0, (UInt)insn1);
23460
23461   /* Back up ITSTATE to the initial value for this instruction.
23462      If we don't do that, any subsequent restart of the instruction
23463      will restart with the wrong value. */
23464   if (old_itstate != IRTemp_INVALID)
23465      put_ITSTATE(old_itstate);
23466
23467   /* Tell the dispatcher that this insn cannot be decoded, and so has
23468      not been executed, and (is currently) the next to be executed.
23469      R15 should be up-to-date since it made so at the start of each
23470      insn, but nevertheless be paranoid and update it again right
23471      now. */
23472   vassert(0 == (guest_R15_curr_instr_notENC & 1));
23473   llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
23474   dres.len         = 0;
23475   dres.whatNext    = Dis_StopHere;
23476   dres.jk_StopHere = Ijk_NoDecode;
23477   dres.continueAt  = 0;
23478   return dres;
23479
23480  decode_success:
23481   /* All decode successes end up here. */
23482   vassert(dres.len == 4 || dres.len == 2 || dres.len == 20);
23483   switch (dres.whatNext) {
23484      case Dis_Continue:
23485         llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1)));
23486         break;
23487      case Dis_ResteerU:
23488      case Dis_ResteerC:
23489         llPutIReg(15, mkU32(dres.continueAt));
23490         break;
23491      case Dis_StopHere:
23492         break;
23493      default:
23494         vassert(0);
23495   }
23496
23497   DIP("\n");
23498
23499   return dres;
23500
23501#  undef INSN0
23502#  undef INSN1
23503}
23504
23505#undef DIP
23506#undef DIS
23507
23508
23509/* Helper table for figuring out how many insns an IT insn
23510   conditionalises.
23511
23512   An ITxyz instruction of the format "1011 1111 firstcond mask"
23513   conditionalises some number of instructions, as indicated by the
23514   following table.  A value of zero indicates the instruction is
23515   invalid in some way.
23516
23517   mask = 0 means this isn't an IT instruction
23518   fc = 15 (NV) means unpredictable
23519
23520   The line fc = 14 (AL) is different from the others; there are
23521   additional constraints in this case.
23522
23523          mask(0 ..                   15)
23524        +--------------------------------
23525   fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23526   ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23527        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23528        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23529        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23530        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23531        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23532        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23533        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23534        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23535        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23536        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23537        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23538        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23539        | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0
23540   15)  | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
23541
23542   To be conservative with the analysis, let's rule out the mask = 0
23543   case, since that isn't an IT insn at all.  But for all the other
23544   cases where the table contains zero, that means unpredictable, so
23545   let's say 4 to be conservative.  Hence we have a safe value for any
23546   IT (mask,fc) pair that the CPU would actually identify as an IT
23547   instruction.  The final table is
23548
23549          mask(0 ..                   15)
23550        +--------------------------------
23551   fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23552   ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23553        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23554        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23555        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23556        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23557        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23558        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23559        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23560        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23561        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23562        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23563        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23564        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23565        | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4
23566   15)  | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
23567*/
23568static const UChar it_length_table[256]
23569   = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23570       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23571       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23572       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23573       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23574       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23575       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23576       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23577       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23578       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23579       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23580       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23581       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23582       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23583       0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
23584       0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
23585     };
23586
23587
23588/*------------------------------------------------------------*/
23589/*--- Top-level fn                                         ---*/
23590/*------------------------------------------------------------*/
23591
23592/* Disassemble a single instruction into IR.  The instruction
23593   is located in host memory at &guest_code[delta]. */
23594
23595DisResult disInstr_ARM ( IRSB*        irsb_IN,
23596                         Bool         (*resteerOkFn) ( void*, Addr ),
23597                         Bool         resteerCisOk,
23598                         void*        callback_opaque,
23599                         const UChar* guest_code_IN,
23600                         Long         delta_ENCODED,
23601                         Addr         guest_IP_ENCODED,
23602                         VexArch      guest_arch,
23603                         const VexArchInfo* archinfo,
23604                         const VexAbiInfo*  abiinfo,
23605                         VexEndness   host_endness_IN,
23606                         Bool         sigill_diag_IN )
23607{
23608   DisResult dres;
23609   Bool isThumb = (Bool)(guest_IP_ENCODED & 1);
23610
23611   /* Set globals (see top of this file) */
23612   vassert(guest_arch == VexArchARM);
23613
23614   irsb            = irsb_IN;
23615   host_endness    = host_endness_IN;
23616   __curr_is_Thumb = isThumb;
23617
23618   if (isThumb) {
23619      guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED - 1;
23620   } else {
23621      guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED;
23622   }
23623
23624   if (isThumb) {
23625      dres = disInstr_THUMB_WRK ( resteerOkFn,
23626                                  resteerCisOk, callback_opaque,
23627                                  &guest_code_IN[delta_ENCODED - 1],
23628                                  archinfo, abiinfo, sigill_diag_IN );
23629   } else {
23630      dres = disInstr_ARM_WRK ( resteerOkFn,
23631                                resteerCisOk, callback_opaque,
23632                                &guest_code_IN[delta_ENCODED],
23633                                archinfo, abiinfo, sigill_diag_IN );
23634   }
23635
23636   return dres;
23637}
23638
23639/* Test program for the conversion of IRCmpF64Result values to VFP
23640   nzcv values.  See handling of FCMPD et al above. */
23641/*
23642UInt foo ( UInt x )
23643{
23644   UInt ix    = ((x >> 5) & 3) | (x & 1);
23645   UInt termL = (((((ix ^ 1) << 30) - 1) >> 29) + 1);
23646   UInt termR = (ix & (ix >> 1) & 1);
23647   return termL  -  termR;
23648}
23649
23650void try ( char* s, UInt ir, UInt req )
23651{
23652   UInt act = foo(ir);
23653   printf("%s 0x%02x -> req %d%d%d%d act %d%d%d%d (0x%x)\n",
23654          s, ir, (req >> 3) & 1, (req >> 2) & 1,
23655                 (req >> 1) & 1, (req >> 0) & 1,
23656                 (act >> 3) & 1, (act >> 2) & 1,
23657                 (act >> 1) & 1, (act >> 0) & 1, act);
23658
23659}
23660
23661int main ( void )
23662{
23663   printf("\n");
23664   try("UN", 0x45, 0b0011);
23665   try("LT", 0x01, 0b1000);
23666   try("GT", 0x00, 0b0010);
23667   try("EQ", 0x40, 0b0110);
23668   printf("\n");
23669   return 0;
23670}
23671*/
23672
23673/* Spare code for doing reference implementations of various 64-bit
23674   SIMD interleaves/deinterleaves/concatenation ops. */
23675/*
23676// Split a 64 bit value into 4 16 bit ones, in 32-bit IRTemps with
23677// the top halves guaranteed to be zero.
23678static void break64to16s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
23679                           IRTemp* out0, IRTemp v64 )
23680{
23681  if (out3) *out3 = newTemp(Ity_I32);
23682  if (out2) *out2 = newTemp(Ity_I32);
23683  if (out1) *out1 = newTemp(Ity_I32);
23684  if (out0) *out0 = newTemp(Ity_I32);
23685  IRTemp hi32 = newTemp(Ity_I32);
23686  IRTemp lo32 = newTemp(Ity_I32);
23687  assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
23688  assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
23689  if (out3) assign(*out3, binop(Iop_Shr32, mkexpr(hi32), mkU8(16)));
23690  if (out2) assign(*out2, binop(Iop_And32, mkexpr(hi32), mkU32(0xFFFF)));
23691  if (out1) assign(*out1, binop(Iop_Shr32, mkexpr(lo32), mkU8(16)));
23692  if (out0) assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFFFF)));
23693}
23694
23695// Make a 64 bit value from 4 16 bit ones, each of which is in a 32 bit
23696// IRTemp.
23697static IRTemp mk64from16s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
23698{
23699  IRTemp hi32 = newTemp(Ity_I32);
23700  IRTemp lo32 = newTemp(Ity_I32);
23701  assign(hi32,
23702         binop(Iop_Or32,
23703               binop(Iop_Shl32, mkexpr(in3), mkU8(16)),
23704               binop(Iop_And32, mkexpr(in2), mkU32(0xFFFF))));
23705  assign(lo32,
23706         binop(Iop_Or32,
23707               binop(Iop_Shl32, mkexpr(in1), mkU8(16)),
23708               binop(Iop_And32, mkexpr(in0), mkU32(0xFFFF))));
23709  IRTemp res = newTemp(Ity_I64);
23710  assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
23711  return res;
23712}
23713
23714static IRExpr* mk_InterleaveLO16x4 ( IRTemp a3210, IRTemp b3210 )
23715{
23716  // returns a1 b1 a0 b0
23717  IRTemp a1, a0, b1, b0;
23718  break64to16s(NULL, NULL, &a1, &a0, a3210);
23719  break64to16s(NULL, NULL, &b1, &b0, b3210);
23720  return mkexpr(mk64from16s(a1, b1, a0, b0));
23721}
23722
23723static IRExpr* mk_InterleaveHI16x4 ( IRTemp a3210, IRTemp b3210 )
23724{
23725  // returns a3 b3 a2 b2
23726  IRTemp a3, a2, b3, b2;
23727  break64to16s(&a3, &a2, NULL, NULL, a3210);
23728  break64to16s(&b3, &b2, NULL, NULL, b3210);
23729  return mkexpr(mk64from16s(a3, b3, a2, b2));
23730}
23731
23732static IRExpr* mk_CatEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23733{
23734  // returns a2 a0 b2 b0
23735  IRTemp a2, a0, b2, b0;
23736  break64to16s(NULL, &a2, NULL, &a0, a3210);
23737  break64to16s(NULL, &b2, NULL, &b0, b3210);
23738  return mkexpr(mk64from16s(a2, a0, b2, b0));
23739}
23740
23741static IRExpr* mk_CatOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23742{
23743  // returns a3 a1 b3 b1
23744  IRTemp a3, a1, b3, b1;
23745  break64to16s(&a3, NULL, &a1, NULL, a3210);
23746  break64to16s(&b3, NULL, &b1, NULL, b3210);
23747  return mkexpr(mk64from16s(a3, a1, b3, b1));
23748}
23749
23750static IRExpr* mk_InterleaveOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23751{
23752  // returns a3 b3 a1 b1
23753  IRTemp a3, b3, a1, b1;
23754  break64to16s(&a3, NULL, &a1, NULL, a3210);
23755  break64to16s(&b3, NULL, &b1, NULL, b3210);
23756  return mkexpr(mk64from16s(a3, b3, a1, b1));
23757}
23758
23759static IRExpr* mk_InterleaveEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23760{
23761  // returns a2 b2 a0 b0
23762  IRTemp a2, b2, a0, b0;
23763  break64to16s(NULL, &a2, NULL, &a0, a3210);
23764  break64to16s(NULL, &b2, NULL, &b0, b3210);
23765  return mkexpr(mk64from16s(a2, b2, a0, b0));
23766}
23767
23768static void break64to8s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
23769                          IRTemp* out4, IRTemp* out3, IRTemp* out2,
23770                          IRTemp* out1,IRTemp* out0, IRTemp v64 )
23771{
23772  if (out7) *out7 = newTemp(Ity_I32);
23773  if (out6) *out6 = newTemp(Ity_I32);
23774  if (out5) *out5 = newTemp(Ity_I32);
23775  if (out4) *out4 = newTemp(Ity_I32);
23776  if (out3) *out3 = newTemp(Ity_I32);
23777  if (out2) *out2 = newTemp(Ity_I32);
23778  if (out1) *out1 = newTemp(Ity_I32);
23779  if (out0) *out0 = newTemp(Ity_I32);
23780  IRTemp hi32 = newTemp(Ity_I32);
23781  IRTemp lo32 = newTemp(Ity_I32);
23782  assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
23783  assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
23784  if (out7)
23785    assign(*out7, binop(Iop_And32,
23786                        binop(Iop_Shr32, mkexpr(hi32), mkU8(24)),
23787                        mkU32(0xFF)));
23788  if (out6)
23789    assign(*out6, binop(Iop_And32,
23790                        binop(Iop_Shr32, mkexpr(hi32), mkU8(16)),
23791                        mkU32(0xFF)));
23792  if (out5)
23793    assign(*out5, binop(Iop_And32,
23794                        binop(Iop_Shr32, mkexpr(hi32), mkU8(8)),
23795                        mkU32(0xFF)));
23796  if (out4)
23797    assign(*out4, binop(Iop_And32, mkexpr(hi32), mkU32(0xFF)));
23798  if (out3)
23799    assign(*out3, binop(Iop_And32,
23800                        binop(Iop_Shr32, mkexpr(lo32), mkU8(24)),
23801                        mkU32(0xFF)));
23802  if (out2)
23803    assign(*out2, binop(Iop_And32,
23804                        binop(Iop_Shr32, mkexpr(lo32), mkU8(16)),
23805                        mkU32(0xFF)));
23806  if (out1)
23807    assign(*out1, binop(Iop_And32,
23808                        binop(Iop_Shr32, mkexpr(lo32), mkU8(8)),
23809                        mkU32(0xFF)));
23810  if (out0)
23811    assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFF)));
23812}
23813
23814static IRTemp mk64from8s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
23815                           IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
23816{
23817  IRTemp hi32 = newTemp(Ity_I32);
23818  IRTemp lo32 = newTemp(Ity_I32);
23819  assign(hi32,
23820         binop(Iop_Or32,
23821               binop(Iop_Or32,
23822                     binop(Iop_Shl32,
23823                           binop(Iop_And32, mkexpr(in7), mkU32(0xFF)),
23824                           mkU8(24)),
23825                     binop(Iop_Shl32,
23826                           binop(Iop_And32, mkexpr(in6), mkU32(0xFF)),
23827                           mkU8(16))),
23828               binop(Iop_Or32,
23829                     binop(Iop_Shl32,
23830                           binop(Iop_And32, mkexpr(in5), mkU32(0xFF)), mkU8(8)),
23831                     binop(Iop_And32,
23832                           mkexpr(in4), mkU32(0xFF)))));
23833  assign(lo32,
23834         binop(Iop_Or32,
23835               binop(Iop_Or32,
23836                     binop(Iop_Shl32,
23837                           binop(Iop_And32, mkexpr(in3), mkU32(0xFF)),
23838                           mkU8(24)),
23839                     binop(Iop_Shl32,
23840                           binop(Iop_And32, mkexpr(in2), mkU32(0xFF)),
23841                           mkU8(16))),
23842               binop(Iop_Or32,
23843                     binop(Iop_Shl32,
23844                           binop(Iop_And32, mkexpr(in1), mkU32(0xFF)), mkU8(8)),
23845                     binop(Iop_And32,
23846                           mkexpr(in0), mkU32(0xFF)))));
23847  IRTemp res = newTemp(Ity_I64);
23848  assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
23849  return res;
23850}
23851
23852static IRExpr* mk_InterleaveLO8x8 ( IRTemp a76543210, IRTemp b76543210 )
23853{
23854  // returns a3 b3 a2 b2 a1 b1 a0 b0
23855  IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
23856  break64to8s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
23857  break64to8s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
23858  return mkexpr(mk64from8s(a3, b3, a2, b2, a1, b1, a0, b0));
23859}
23860
23861static IRExpr* mk_InterleaveHI8x8 ( IRTemp a76543210, IRTemp b76543210 )
23862{
23863  // returns a7 b7 a6 b6 a5 b5 a4 b4
23864  IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
23865  break64to8s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
23866  break64to8s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
23867  return mkexpr(mk64from8s(a7, b7, a6, b6, a5, b5, a4, b4));
23868}
23869
23870static IRExpr* mk_CatEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23871{
23872  // returns a6 a4 a2 a0 b6 b4 b2 b0
23873  IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
23874  break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
23875  break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
23876  return mkexpr(mk64from8s(a6, a4, a2, a0, b6, b4, b2, b0));
23877}
23878
23879static IRExpr* mk_CatOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23880{
23881  // returns a7 a5 a3 a1 b7 b5 b3 b1
23882  IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
23883  break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
23884  break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
23885  return mkexpr(mk64from8s(a7, a5, a3, a1, b7, b5, b3, b1));
23886}
23887
23888static IRExpr* mk_InterleaveEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23889{
23890  // returns a6 b6 a4 b4 a2 b2 a0 b0
23891  IRTemp a6, b6, a4, b4, a2, b2, a0, b0;
23892  break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
23893  break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
23894  return mkexpr(mk64from8s(a6, b6, a4, b4, a2, b2, a0, b0));
23895}
23896
23897static IRExpr* mk_InterleaveOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23898{
23899  // returns a7 b7 a5 b5 a3 b3 a1 b1
23900  IRTemp a7, b7, a5, b5, a3, b3, a1, b1;
23901  break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
23902  break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
23903  return mkexpr(mk64from8s(a7, b7, a5, b5, a3, b3, a1, b1));
23904}
23905
23906static IRExpr* mk_InterleaveLO32x2 ( IRTemp a10, IRTemp b10 )
23907{
23908  // returns a0 b0
23909  return binop(Iop_32HLto64, unop(Iop_64to32, mkexpr(a10)),
23910                             unop(Iop_64to32, mkexpr(b10)));
23911}
23912
23913static IRExpr* mk_InterleaveHI32x2 ( IRTemp a10, IRTemp b10 )
23914{
23915  // returns a1 b1
23916  return binop(Iop_32HLto64, unop(Iop_64HIto32, mkexpr(a10)),
23917                             unop(Iop_64HIto32, mkexpr(b10)));
23918}
23919*/
23920
23921/*--------------------------------------------------------------------*/
23922/*--- end                                         guest_arm_toIR.c ---*/
23923/*--------------------------------------------------------------------*/
23924