1
2/*--------------------------------------------------------------------*/
3/*--- begin                                       guest_arm_toIR.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2013 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2013 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36/* XXXX thumb to check:
37   that all cases where putIRegT writes r15, we generate a jump.
38
39   All uses of newTemp assign to an IRTemp and not a UInt
40
41   For all thumb loads and stores, including VFP ones, new-ITSTATE is
42   backed out before the memory op, and restored afterwards.  This
43   needs to happen even after we go uncond.  (and for sure it doesn't
44   happen for VFP loads/stores right now).
45
46   VFP on thumb: check that we exclude all r13/r15 cases that we
47   should.
48
49   XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
50   taking into account the number of insns guarded by an IT.
51
52   remove the nasty hack, in the spechelper, of looking for Or32(...,
53   0xE0) in as the first arg to armg_calculate_condition, and instead
54   use Slice44 as specified in comments in the spechelper.
55
56   add specialisations for armg_calculate_flag_c and _v, as they
57   are moderately often needed in Thumb code.
58
59   Correctness: ITSTATE handling in Thumb SVCs is wrong.
60
61   Correctness (obscure): in m_transtab, when invalidating code
62   address ranges, invalidate up to 18 bytes after the end of the
63   range.  This is because the ITSTATE optimisation at the top of
64   _THUMB_WRK below analyses up to 18 bytes before the start of any
65   given instruction, and so might depend on the invalidated area.
66*/
67
68/* Limitations, etc
69
70   - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
71     These instructions are non-restartable in the case where the
72     transfer(s) fault.
73
74   - SWP: the restart jump back is Ijk_Boring; it should be
75     Ijk_NoRedir but that's expensive.  See comments on casLE() in
76     guest_x86_toIR.c.
77*/
78
79/* "Special" instructions.
80
81   This instruction decoder can decode four special instructions
82   which mean nothing natively (are no-ops as far as regs/mem are
83   concerned) but have meaning for supporting Valgrind.  A special
84   instruction is flagged by a 16-byte preamble:
85
86      E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
87      (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
88       mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
89
90   Following that, one of the following 3 are allowed
91   (standard interpretation in parentheses):
92
93      E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
94      E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
95      E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
96      E1899009 (orr r9,r9,r9)      IR injection
97
98   Any other bytes following the 16-byte preamble are illegal and
99   constitute a failure in instruction decoding.  This all assumes
100   that the preamble will never occur except in specific code
101   fragments designed for Valgrind to catch.
102*/
103
104/* Translates ARM(v5) code to IR. */
105
106#include "libvex_basictypes.h"
107#include "libvex_ir.h"
108#include "libvex.h"
109#include "libvex_guest_arm.h"
110
111#include "main_util.h"
112#include "main_globals.h"
113#include "guest_generic_bb_to_IR.h"
114#include "guest_arm_defs.h"
115
116
117/*------------------------------------------------------------*/
118/*--- Globals                                              ---*/
119/*------------------------------------------------------------*/
120
121/* These are set at the start of the translation of a instruction, so
122   that we don't have to pass them around endlessly.  CONST means does
123   not change during translation of the instruction.
124*/
125
126/* CONST: is the host bigendian?  This has to do with float vs double
127   register accesses on VFP, but it's complex and not properly thought
128   out. */
129static Bool host_is_bigendian;
130
131/* CONST: The guest address for the instruction currently being
132   translated.  This is the real, "decoded" address (not subject
133   to the CPSR.T kludge). */
134static Addr32 guest_R15_curr_instr_notENC;
135
136/* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
137   insn is Thumb (True) or ARM (False). */
138static Bool __curr_is_Thumb;
139
140/* MOD: The IRSB* into which we're generating code. */
141static IRSB* irsb;
142
143/* These are to do with handling writes to r15.  They are initially
144   set at the start of disInstr_ARM_WRK to indicate no update,
145   possibly updated during the routine, and examined again at the end.
146   If they have been set to indicate a r15 update then a jump is
147   generated.  Note, "explicit" jumps (b, bx, etc) are generated
148   directly, not using this mechanism -- this is intended to handle
149   the implicit-style jumps resulting from (eg) assigning to r15 as
150   the result of insns we wouldn't normally consider branchy. */
151
152/* MOD.  Initially False; set to True iff abovementioned handling is
153   required. */
154static Bool r15written;
155
156/* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
157   is conditional, this holds the gating IRTemp :: Ity_I32.  If the
158   branch to be generated is unconditional, this remains
159   IRTemp_INVALID. */
160static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
161
162/* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
163   this holds the jump kind. */
164static IRTemp r15kind;
165
166
167/*------------------------------------------------------------*/
168/*--- Debugging output                                     ---*/
169/*------------------------------------------------------------*/
170
171#define DIP(format, args...)           \
172   if (vex_traceflags & VEX_TRACE_FE)  \
173      vex_printf(format, ## args)
174
175#define DIS(buf, format, args...)      \
176   if (vex_traceflags & VEX_TRACE_FE)  \
177      vex_sprintf(buf, format, ## args)
178
179#define ASSERT_IS_THUMB \
180   do { vassert(__curr_is_Thumb); } while (0)
181
182#define ASSERT_IS_ARM \
183   do { vassert(! __curr_is_Thumb); } while (0)
184
185
186/*------------------------------------------------------------*/
187/*--- Helper bits and pieces for deconstructing the        ---*/
188/*--- arm insn stream.                                     ---*/
189/*------------------------------------------------------------*/
190
191/* Do a little-endian load of a 32-bit word, regardless of the
192   endianness of the underlying host. */
193static inline UInt getUIntLittleEndianly ( UChar* p )
194{
195   UInt w = 0;
196   w = (w << 8) | p[3];
197   w = (w << 8) | p[2];
198   w = (w << 8) | p[1];
199   w = (w << 8) | p[0];
200   return w;
201}
202
203/* Do a little-endian load of a 16-bit word, regardless of the
204   endianness of the underlying host. */
205static inline UShort getUShortLittleEndianly ( UChar* p )
206{
207   UShort w = 0;
208   w = (w << 8) | p[1];
209   w = (w << 8) | p[0];
210   return w;
211}
212
213static UInt ROR32 ( UInt x, UInt sh ) {
214   vassert(sh >= 0 && sh < 32);
215   if (sh == 0)
216      return x;
217   else
218      return (x << (32-sh)) | (x >> sh);
219}
220
221static Int popcount32 ( UInt x )
222{
223   Int res = 0, i;
224   for (i = 0; i < 32; i++) {
225      res += (x & 1);
226      x >>= 1;
227   }
228   return res;
229}
230
231static UInt setbit32 ( UInt x, Int ix, UInt b )
232{
233   UInt mask = 1 << ix;
234   x &= ~mask;
235   x |= ((b << ix) & mask);
236   return x;
237}
238
239#define BITS2(_b1,_b0) \
240   (((_b1) << 1) | (_b0))
241
242#define BITS3(_b2,_b1,_b0)                      \
243  (((_b2) << 2) | ((_b1) << 1) | (_b0))
244
245#define BITS4(_b3,_b2,_b1,_b0) \
246   (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
247
248#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
249   ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
250    | BITS4((_b3),(_b2),(_b1),(_b0)))
251
252#define BITS5(_b4,_b3,_b2,_b1,_b0)  \
253   (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
254#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
255   (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
256#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
257   (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
258
259#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
260   (((_b8) << 8) \
261    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
262
263#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
264   (((_b9) << 9) | ((_b8) << 8)                                \
265    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
266
267/* produces _uint[_bMax:_bMin] */
268#define SLICE_UInt(_uint,_bMax,_bMin) \
269   (( ((UInt)(_uint)) >> (_bMin)) \
270    & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
271
272
273/*------------------------------------------------------------*/
274/*--- Helper bits and pieces for creating IR fragments.    ---*/
275/*------------------------------------------------------------*/
276
277static IRExpr* mkU64 ( ULong i )
278{
279   return IRExpr_Const(IRConst_U64(i));
280}
281
282static IRExpr* mkU32 ( UInt i )
283{
284   return IRExpr_Const(IRConst_U32(i));
285}
286
287static IRExpr* mkU8 ( UInt i )
288{
289   vassert(i < 256);
290   return IRExpr_Const(IRConst_U8( (UChar)i ));
291}
292
293static IRExpr* mkexpr ( IRTemp tmp )
294{
295   return IRExpr_RdTmp(tmp);
296}
297
298static IRExpr* unop ( IROp op, IRExpr* a )
299{
300   return IRExpr_Unop(op, a);
301}
302
303static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
304{
305   return IRExpr_Binop(op, a1, a2);
306}
307
308static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
309{
310   return IRExpr_Triop(op, a1, a2, a3);
311}
312
313static IRExpr* loadLE ( IRType ty, IRExpr* addr )
314{
315   return IRExpr_Load(Iend_LE, ty, addr);
316}
317
318/* Add a statement to the list held by "irbb". */
319static void stmt ( IRStmt* st )
320{
321   addStmtToIRSB( irsb, st );
322}
323
324static void assign ( IRTemp dst, IRExpr* e )
325{
326   stmt( IRStmt_WrTmp(dst, e) );
327}
328
329static void storeLE ( IRExpr* addr, IRExpr* data )
330{
331   stmt( IRStmt_Store(Iend_LE, addr, data) );
332}
333
334static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
335{
336   if (guardT == IRTemp_INVALID) {
337      /* unconditional */
338      storeLE(addr, data);
339   } else {
340      stmt( IRStmt_StoreG(Iend_LE, addr, data,
341                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
342   }
343}
344
345static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
346                            IRExpr* addr, IRExpr* alt,
347                            IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
348{
349   if (guardT == IRTemp_INVALID) {
350      /* unconditional */
351      IRExpr* loaded = NULL;
352      switch (cvt) {
353         case ILGop_Ident32:
354            loaded = loadLE(Ity_I32, addr); break;
355         case ILGop_8Uto32:
356            loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
357         case ILGop_8Sto32:
358            loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
359         case ILGop_16Uto32:
360            loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
361         case ILGop_16Sto32:
362            loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
363         default:
364            vassert(0);
365      }
366      vassert(loaded != NULL);
367      assign(dst, loaded);
368   } else {
369      /* Generate a guarded load into 'dst', but apply 'cvt' to the
370         loaded data before putting the data in 'dst'.  If the load
371         does not take place, 'alt' is placed directly in 'dst'. */
372      stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
373                         binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
374   }
375}
376
377/* Generate a new temporary of the given type. */
378static IRTemp newTemp ( IRType ty )
379{
380   vassert(isPlausibleIRType(ty));
381   return newIRTemp( irsb->tyenv, ty );
382}
383
384/* Produces a value in 0 .. 3, which is encoded as per the type
385   IRRoundingMode. */
386static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
387{
388   return mkU32(Irrm_NEAREST);
389}
390
391/* Generate an expression for SRC rotated right by ROT. */
392static IRExpr* genROR32( IRTemp src, Int rot )
393{
394   vassert(rot >= 0 && rot < 32);
395   if (rot == 0)
396      return mkexpr(src);
397   return
398      binop(Iop_Or32,
399            binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
400            binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
401}
402
403static IRExpr* mkU128 ( ULong i )
404{
405   return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
406}
407
408/* Generate a 4-aligned version of the given expression if
409   the given condition is true.  Else return it unchanged. */
410static IRExpr* align4if ( IRExpr* e, Bool b )
411{
412   if (b)
413      return binop(Iop_And32, e, mkU32(~3));
414   else
415      return e;
416}
417
418
419/*------------------------------------------------------------*/
420/*--- Helpers for accessing guest registers.               ---*/
421/*------------------------------------------------------------*/
422
423#define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
424#define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
425#define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
426#define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
427#define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
428#define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
429#define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
430#define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
431#define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
432#define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
433#define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
434#define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
435#define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
436#define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
437#define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
438#define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
439
440#define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
441#define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
442#define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
443#define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
444#define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
445
446#define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
447#define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
448#define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
449#define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
450#define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
451#define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
452#define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
453#define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
454#define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
455#define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
456#define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
457#define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
458#define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
459#define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
460#define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
461#define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
462#define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
463#define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
464#define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
465#define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
466#define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
467#define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
468#define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
469#define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
470#define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
471#define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
472#define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
473#define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
474#define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
475#define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
476#define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
477#define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
478
479#define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
480#define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
481#define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
482#define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
483#define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
484#define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
485#define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
486#define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
487
488#define OFFB_CMSTART  offsetof(VexGuestARMState,guest_CMSTART)
489#define OFFB_CMLEN    offsetof(VexGuestARMState,guest_CMLEN)
490
491
492/* ---------------- Integer registers ---------------- */
493
494static Int integerGuestRegOffset ( UInt iregNo )
495{
496   /* Do we care about endianness here?  We do if sub-parts of integer
497      registers are accessed, but I don't think that ever happens on
498      ARM. */
499   switch (iregNo) {
500      case 0:  return OFFB_R0;
501      case 1:  return OFFB_R1;
502      case 2:  return OFFB_R2;
503      case 3:  return OFFB_R3;
504      case 4:  return OFFB_R4;
505      case 5:  return OFFB_R5;
506      case 6:  return OFFB_R6;
507      case 7:  return OFFB_R7;
508      case 8:  return OFFB_R8;
509      case 9:  return OFFB_R9;
510      case 10: return OFFB_R10;
511      case 11: return OFFB_R11;
512      case 12: return OFFB_R12;
513      case 13: return OFFB_R13;
514      case 14: return OFFB_R14;
515      case 15: return OFFB_R15T;
516      default: vassert(0);
517   }
518}
519
520/* Plain ("low level") read from a reg; no +8 offset magic for r15. */
521static IRExpr* llGetIReg ( UInt iregNo )
522{
523   vassert(iregNo < 16);
524   return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
525}
526
527/* Architected read from a reg in ARM mode.  This automagically adds 8
528   to all reads of r15. */
529static IRExpr* getIRegA ( UInt iregNo )
530{
531   IRExpr* e;
532   ASSERT_IS_ARM;
533   vassert(iregNo < 16);
534   if (iregNo == 15) {
535      /* If asked for r15, don't read the guest state value, as that
536         may not be up to date in the case where loop unrolling has
537         happened, because the first insn's write to the block is
538         omitted; hence in the 2nd and subsequent unrollings we don't
539         have a correct value in guest r15.  Instead produce the
540         constant that we know would be produced at this point. */
541      vassert(0 == (guest_R15_curr_instr_notENC & 3));
542      e = mkU32(guest_R15_curr_instr_notENC + 8);
543   } else {
544      e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
545   }
546   return e;
547}
548
549/* Architected read from a reg in Thumb mode.  This automagically adds
550   4 to all reads of r15. */
551static IRExpr* getIRegT ( UInt iregNo )
552{
553   IRExpr* e;
554   ASSERT_IS_THUMB;
555   vassert(iregNo < 16);
556   if (iregNo == 15) {
557      /* Ditto comment in getIReg. */
558      vassert(0 == (guest_R15_curr_instr_notENC & 1));
559      e = mkU32(guest_R15_curr_instr_notENC + 4);
560   } else {
561      e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
562   }
563   return e;
564}
565
566/* Plain ("low level") write to a reg; no jump or alignment magic for
567   r15. */
568static void llPutIReg ( UInt iregNo, IRExpr* e )
569{
570   vassert(iregNo < 16);
571   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
572   stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
573}
574
575/* Architected write to an integer register in ARM mode.  If it is to
576   r15, record info so at the end of this insn's translation, a branch
577   to it can be made.  Also handles conditional writes to the
578   register: if guardT == IRTemp_INVALID then the write is
579   unconditional.  If writing r15, also 4-align it. */
580static void putIRegA ( UInt       iregNo,
581                       IRExpr*    e,
582                       IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
583                       IRJumpKind jk /* if a jump is generated */ )
584{
585   /* if writing r15, force e to be 4-aligned. */
586   // INTERWORKING FIXME.  this needs to be relaxed so that
587   // puts caused by LDMxx which load r15 interwork right.
588   // but is no aligned too relaxed?
589   //if (iregNo == 15)
590   //   e = binop(Iop_And32, e, mkU32(~3));
591   ASSERT_IS_ARM;
592   /* So, generate either an unconditional or a conditional write to
593      the reg. */
594   if (guardT == IRTemp_INVALID) {
595      /* unconditional write */
596      llPutIReg( iregNo, e );
597   } else {
598      llPutIReg( iregNo,
599                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
600                             e, llGetIReg(iregNo) ));
601   }
602   if (iregNo == 15) {
603      // assert against competing r15 updates.  Shouldn't
604      // happen; should be ruled out by the instr matching
605      // logic.
606      vassert(r15written == False);
607      vassert(r15guard   == IRTemp_INVALID);
608      vassert(r15kind    == Ijk_Boring);
609      r15written = True;
610      r15guard   = guardT;
611      r15kind    = jk;
612   }
613}
614
615
616/* Architected write to an integer register in Thumb mode.  Writes to
617   r15 are not allowed.  Handles conditional writes to the register:
618   if guardT == IRTemp_INVALID then the write is unconditional. */
619static void putIRegT ( UInt       iregNo,
620                       IRExpr*    e,
621                       IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
622{
623   /* So, generate either an unconditional or a conditional write to
624      the reg. */
625   ASSERT_IS_THUMB;
626   vassert(iregNo >= 0 && iregNo <= 14);
627   if (guardT == IRTemp_INVALID) {
628      /* unconditional write */
629      llPutIReg( iregNo, e );
630   } else {
631      llPutIReg( iregNo,
632                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
633                             e, llGetIReg(iregNo) ));
634   }
635}
636
637
638/* Thumb16 and Thumb32 only.
639   Returns true if reg is 13 or 15.  Implements the BadReg
640   predicate in the ARM ARM. */
641static Bool isBadRegT ( UInt r )
642{
643   vassert(r <= 15);
644   ASSERT_IS_THUMB;
645   return r == 13 || r == 15;
646}
647
648
649/* ---------------- Double registers ---------------- */
650
651static Int doubleGuestRegOffset ( UInt dregNo )
652{
653   /* Do we care about endianness here?  Probably do if we ever get
654      into the situation of dealing with the single-precision VFP
655      registers. */
656   switch (dregNo) {
657      case 0:  return OFFB_D0;
658      case 1:  return OFFB_D1;
659      case 2:  return OFFB_D2;
660      case 3:  return OFFB_D3;
661      case 4:  return OFFB_D4;
662      case 5:  return OFFB_D5;
663      case 6:  return OFFB_D6;
664      case 7:  return OFFB_D7;
665      case 8:  return OFFB_D8;
666      case 9:  return OFFB_D9;
667      case 10: return OFFB_D10;
668      case 11: return OFFB_D11;
669      case 12: return OFFB_D12;
670      case 13: return OFFB_D13;
671      case 14: return OFFB_D14;
672      case 15: return OFFB_D15;
673      case 16: return OFFB_D16;
674      case 17: return OFFB_D17;
675      case 18: return OFFB_D18;
676      case 19: return OFFB_D19;
677      case 20: return OFFB_D20;
678      case 21: return OFFB_D21;
679      case 22: return OFFB_D22;
680      case 23: return OFFB_D23;
681      case 24: return OFFB_D24;
682      case 25: return OFFB_D25;
683      case 26: return OFFB_D26;
684      case 27: return OFFB_D27;
685      case 28: return OFFB_D28;
686      case 29: return OFFB_D29;
687      case 30: return OFFB_D30;
688      case 31: return OFFB_D31;
689      default: vassert(0);
690   }
691}
692
693/* Plain ("low level") read from a VFP Dreg. */
694static IRExpr* llGetDReg ( UInt dregNo )
695{
696   vassert(dregNo < 32);
697   return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
698}
699
700/* Architected read from a VFP Dreg. */
701static IRExpr* getDReg ( UInt dregNo ) {
702   return llGetDReg( dregNo );
703}
704
705/* Plain ("low level") write to a VFP Dreg. */
706static void llPutDReg ( UInt dregNo, IRExpr* e )
707{
708   vassert(dregNo < 32);
709   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
710   stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
711}
712
713/* Architected write to a VFP Dreg.  Handles conditional writes to the
714   register: if guardT == IRTemp_INVALID then the write is
715   unconditional. */
716static void putDReg ( UInt    dregNo,
717                      IRExpr* e,
718                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
719{
720   /* So, generate either an unconditional or a conditional write to
721      the reg. */
722   if (guardT == IRTemp_INVALID) {
723      /* unconditional write */
724      llPutDReg( dregNo, e );
725   } else {
726      llPutDReg( dregNo,
727                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
728                             e, llGetDReg(dregNo) ));
729   }
730}
731
732/* And now exactly the same stuff all over again, but this time
733   taking/returning I64 rather than F64, to support 64-bit Neon
734   ops. */
735
736/* Plain ("low level") read from a Neon Integer Dreg. */
737static IRExpr* llGetDRegI64 ( UInt dregNo )
738{
739   vassert(dregNo < 32);
740   return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
741}
742
743/* Architected read from a Neon Integer Dreg. */
744static IRExpr* getDRegI64 ( UInt dregNo ) {
745   return llGetDRegI64( dregNo );
746}
747
748/* Plain ("low level") write to a Neon Integer Dreg. */
749static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
750{
751   vassert(dregNo < 32);
752   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
753   stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
754}
755
756/* Architected write to a Neon Integer Dreg.  Handles conditional
757   writes to the register: if guardT == IRTemp_INVALID then the write
758   is unconditional. */
759static void putDRegI64 ( UInt    dregNo,
760                         IRExpr* e,
761                         IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
762{
763   /* So, generate either an unconditional or a conditional write to
764      the reg. */
765   if (guardT == IRTemp_INVALID) {
766      /* unconditional write */
767      llPutDRegI64( dregNo, e );
768   } else {
769      llPutDRegI64( dregNo,
770                    IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
771                                e, llGetDRegI64(dregNo) ));
772   }
773}
774
775/* ---------------- Quad registers ---------------- */
776
777static Int quadGuestRegOffset ( UInt qregNo )
778{
779   /* Do we care about endianness here?  Probably do if we ever get
780      into the situation of dealing with the 64 bit Neon registers. */
781   switch (qregNo) {
782      case 0:  return OFFB_D0;
783      case 1:  return OFFB_D2;
784      case 2:  return OFFB_D4;
785      case 3:  return OFFB_D6;
786      case 4:  return OFFB_D8;
787      case 5:  return OFFB_D10;
788      case 6:  return OFFB_D12;
789      case 7:  return OFFB_D14;
790      case 8:  return OFFB_D16;
791      case 9:  return OFFB_D18;
792      case 10: return OFFB_D20;
793      case 11: return OFFB_D22;
794      case 12: return OFFB_D24;
795      case 13: return OFFB_D26;
796      case 14: return OFFB_D28;
797      case 15: return OFFB_D30;
798      default: vassert(0);
799   }
800}
801
802/* Plain ("low level") read from a Neon Qreg. */
803static IRExpr* llGetQReg ( UInt qregNo )
804{
805   vassert(qregNo < 16);
806   return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
807}
808
809/* Architected read from a Neon Qreg. */
810static IRExpr* getQReg ( UInt qregNo ) {
811   return llGetQReg( qregNo );
812}
813
814/* Plain ("low level") write to a Neon Qreg. */
815static void llPutQReg ( UInt qregNo, IRExpr* e )
816{
817   vassert(qregNo < 16);
818   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
819   stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
820}
821
822/* Architected write to a Neon Qreg.  Handles conditional writes to the
823   register: if guardT == IRTemp_INVALID then the write is
824   unconditional. */
825static void putQReg ( UInt    qregNo,
826                      IRExpr* e,
827                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
828{
829   /* So, generate either an unconditional or a conditional write to
830      the reg. */
831   if (guardT == IRTemp_INVALID) {
832      /* unconditional write */
833      llPutQReg( qregNo, e );
834   } else {
835      llPutQReg( qregNo,
836                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
837                             e, llGetQReg(qregNo) ));
838   }
839}
840
841
842/* ---------------- Float registers ---------------- */
843
844static Int floatGuestRegOffset ( UInt fregNo )
845{
846   /* Start with the offset of the containing double, and then correct
847      for endianness.  Actually this is completely bogus and needs
848      careful thought. */
849   Int off;
850   vassert(fregNo < 32);
851   off = doubleGuestRegOffset(fregNo >> 1);
852   if (host_is_bigendian) {
853      vassert(0);
854   } else {
855      if (fregNo & 1)
856         off += 4;
857   }
858   return off;
859}
860
861/* Plain ("low level") read from a VFP Freg. */
862static IRExpr* llGetFReg ( UInt fregNo )
863{
864   vassert(fregNo < 32);
865   return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
866}
867
868/* Architected read from a VFP Freg. */
869static IRExpr* getFReg ( UInt fregNo ) {
870   return llGetFReg( fregNo );
871}
872
873/* Plain ("low level") write to a VFP Freg. */
874static void llPutFReg ( UInt fregNo, IRExpr* e )
875{
876   vassert(fregNo < 32);
877   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
878   stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
879}
880
881/* Architected write to a VFP Freg.  Handles conditional writes to the
882   register: if guardT == IRTemp_INVALID then the write is
883   unconditional. */
884static void putFReg ( UInt    fregNo,
885                      IRExpr* e,
886                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
887{
888   /* So, generate either an unconditional or a conditional write to
889      the reg. */
890   if (guardT == IRTemp_INVALID) {
891      /* unconditional write */
892      llPutFReg( fregNo, e );
893   } else {
894      llPutFReg( fregNo,
895                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
896                             e, llGetFReg(fregNo) ));
897   }
898}
899
900
901/* ---------------- Misc registers ---------------- */
902
903static void putMiscReg32 ( UInt    gsoffset,
904                           IRExpr* e, /* :: Ity_I32 */
905                           IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
906{
907   switch (gsoffset) {
908      case OFFB_FPSCR:   break;
909      case OFFB_QFLAG32: break;
910      case OFFB_GEFLAG0: break;
911      case OFFB_GEFLAG1: break;
912      case OFFB_GEFLAG2: break;
913      case OFFB_GEFLAG3: break;
914      default: vassert(0); /* awaiting more cases */
915   }
916   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
917
918   if (guardT == IRTemp_INVALID) {
919      /* unconditional write */
920      stmt(IRStmt_Put(gsoffset, e));
921   } else {
922      stmt(IRStmt_Put(
923         gsoffset,
924         IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
925                     e, IRExpr_Get(gsoffset, Ity_I32) )
926      ));
927   }
928}
929
930static IRTemp get_ITSTATE ( void )
931{
932   ASSERT_IS_THUMB;
933   IRTemp t = newTemp(Ity_I32);
934   assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
935   return t;
936}
937
938static void put_ITSTATE ( IRTemp t )
939{
940   ASSERT_IS_THUMB;
941   stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
942}
943
944static IRTemp get_QFLAG32 ( void )
945{
946   IRTemp t = newTemp(Ity_I32);
947   assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
948   return t;
949}
950
951static void put_QFLAG32 ( IRTemp t, IRTemp condT )
952{
953   putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
954}
955
956/* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
957   Status Register) to indicate that overflow or saturation occurred.
958   Nb: t must be zero to denote no saturation, and any nonzero
959   value to indicate saturation. */
960static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
961{
962   IRTemp old = get_QFLAG32();
963   IRTemp nyu = newTemp(Ity_I32);
964   assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
965   put_QFLAG32(nyu, condT);
966}
967
968/* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
969   flagNo: which flag bit to set [3...0]
970   lowbits_to_ignore:  0 = look at all 32 bits
971                       8 = look at top 24 bits only
972                      16 = look at top 16 bits only
973                      31 = look at the top bit only
974   e: input value to be evaluated.
975   The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
976   masked out.  If the resulting value is zero then the GE flag is
977   set to 0; any other value sets the flag to 1. */
978static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
979                           Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
980                           IRExpr* e,             /* Ity_I32 */
981                           IRTemp condT )
982{
983   vassert( flagNo >= 0 && flagNo <= 3 );
984   vassert( lowbits_to_ignore == 0  ||
985            lowbits_to_ignore == 8  ||
986            lowbits_to_ignore == 16 ||
987            lowbits_to_ignore == 31 );
988   IRTemp masked = newTemp(Ity_I32);
989   assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
990
991   switch (flagNo) {
992      case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
993      case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
994      case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
995      case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
996      default: vassert(0);
997   }
998}
999
1000/* Return the (32-bit, zero-or-nonzero representation scheme) of
1001   the specified GE flag. */
1002static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
1003{
1004   switch (flagNo) {
1005      case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
1006      case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
1007      case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
1008      case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
1009      default: vassert(0);
1010   }
1011}
1012
1013/* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
1014   2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
1015   15 of the value.  All other bits are ignored. */
1016static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
1017{
1018   IRTemp ge10 = newTemp(Ity_I32);
1019   IRTemp ge32 = newTemp(Ity_I32);
1020   assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1021   assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1022   put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
1023   put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
1024   put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
1025   put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
1026}
1027
1028
1029/* Set all 4 GE flags from the given 32-bit value as follows: GE 3
1030   from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
1031   bit 7.  All other bits are ignored. */
1032static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
1033{
1034   IRTemp ge0 = newTemp(Ity_I32);
1035   IRTemp ge1 = newTemp(Ity_I32);
1036   IRTemp ge2 = newTemp(Ity_I32);
1037   IRTemp ge3 = newTemp(Ity_I32);
1038   assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
1039   assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1040   assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
1041   assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1042   put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
1043   put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
1044   put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
1045   put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
1046}
1047
1048
1049/* ---------------- FPSCR stuff ---------------- */
1050
1051/* Generate IR to get hold of the rounding mode bits in FPSCR, and
1052   convert them to IR format.  Bind the final result to the
1053   returned temp. */
1054static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1055{
1056   /* The ARMvfp encoding for rounding mode bits is:
1057         00  to nearest
1058         01  to +infinity
1059         10  to -infinity
1060         11  to zero
1061      We need to convert that to the IR encoding:
1062         00  to nearest (the default)
1063         10  to +infinity
1064         01  to -infinity
1065         11  to zero
1066      Which can be done by swapping bits 0 and 1.
1067      The rmode bits are at 23:22 in FPSCR.
1068   */
1069   IRTemp armEncd = newTemp(Ity_I32);
1070   IRTemp swapped = newTemp(Ity_I32);
1071   /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
1072      we don't zero out bits 24 and above, since the assignment to
1073      'swapped' will mask them out anyway. */
1074   assign(armEncd,
1075          binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
1076   /* Now swap them. */
1077   assign(swapped,
1078          binop(Iop_Or32,
1079                binop(Iop_And32,
1080                      binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1081                      mkU32(2)),
1082                binop(Iop_And32,
1083                      binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1084                      mkU32(1))
1085         ));
1086   return swapped;
1087}
1088
1089
1090/*------------------------------------------------------------*/
1091/*--- Helpers for flag handling and conditional insns      ---*/
1092/*------------------------------------------------------------*/
1093
1094static const HChar* name_ARMCondcode ( ARMCondcode cond )
1095{
1096   switch (cond) {
1097      case ARMCondEQ:  return "{eq}";
1098      case ARMCondNE:  return "{ne}";
1099      case ARMCondHS:  return "{hs}";  // or 'cs'
1100      case ARMCondLO:  return "{lo}";  // or 'cc'
1101      case ARMCondMI:  return "{mi}";
1102      case ARMCondPL:  return "{pl}";
1103      case ARMCondVS:  return "{vs}";
1104      case ARMCondVC:  return "{vc}";
1105      case ARMCondHI:  return "{hi}";
1106      case ARMCondLS:  return "{ls}";
1107      case ARMCondGE:  return "{ge}";
1108      case ARMCondLT:  return "{lt}";
1109      case ARMCondGT:  return "{gt}";
1110      case ARMCondLE:  return "{le}";
1111      case ARMCondAL:  return ""; // {al}: is the default
1112      case ARMCondNV:  return "{nv}";
1113      default: vpanic("name_ARMCondcode");
1114   }
1115}
1116/* and a handy shorthand for it */
1117static const HChar* nCC ( ARMCondcode cond ) {
1118   return name_ARMCondcode(cond);
1119}
1120
1121
1122/* Build IR to calculate some particular condition from stored
1123   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1124   Ity_I32, suitable for narrowing.  Although the return type is
1125   Ity_I32, the returned value is either 0 or 1.  'cond' must be
1126   :: Ity_I32 and must denote the condition to compute in
1127   bits 7:4, and be zero everywhere else.
1128*/
1129static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
1130{
1131   vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
1132   /* And 'cond' had better produce a value in which only bits 7:4 are
1133      nonzero.  However, obviously we can't assert for that. */
1134
1135   /* So what we're constructing for the first argument is
1136      "(cond << 4) | stored-operation".
1137      However, as per comments above, 'cond' must be supplied
1138      pre-shifted to this function.
1139
1140      This pairing scheme requires that the ARM_CC_OP_ values all fit
1141      in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1142      8 bits of the first argument. */
1143   IRExpr** args
1144      = mkIRExprVec_4(
1145           binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
1146           IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1147           IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1148           IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
1149        );
1150   IRExpr* call
1151      = mkIRExprCCall(
1152           Ity_I32,
1153           0/*regparm*/,
1154           "armg_calculate_condition", &armg_calculate_condition,
1155           args
1156        );
1157
1158   /* Exclude the requested condition, OP and NDEP from definedness
1159      checking.  We're only interested in DEP1 and DEP2. */
1160   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1161   return call;
1162}
1163
1164
1165/* Build IR to calculate some particular condition from stored
1166   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1167   Ity_I32, suitable for narrowing.  Although the return type is
1168   Ity_I32, the returned value is either 0 or 1.
1169*/
1170static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
1171{
1172  /* First arg is "(cond << 4) | condition".  This requires that the
1173     ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1174     (COND, OP) pair in the lowest 8 bits of the first argument. */
1175   vassert(cond >= 0 && cond <= 15);
1176   return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
1177}
1178
1179
1180/* Build IR to calculate just the carry flag from stored
1181   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1182   Ity_I32. */
1183static IRExpr* mk_armg_calculate_flag_c ( void )
1184{
1185   IRExpr** args
1186      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1187                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1188                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1189                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1190   IRExpr* call
1191      = mkIRExprCCall(
1192           Ity_I32,
1193           0/*regparm*/,
1194           "armg_calculate_flag_c", &armg_calculate_flag_c,
1195           args
1196        );
1197   /* Exclude OP and NDEP from definedness checking.  We're only
1198      interested in DEP1 and DEP2. */
1199   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1200   return call;
1201}
1202
1203
1204/* Build IR to calculate just the overflow flag from stored
1205   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1206   Ity_I32. */
1207static IRExpr* mk_armg_calculate_flag_v ( void )
1208{
1209   IRExpr** args
1210      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1211                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1212                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1213                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1214   IRExpr* call
1215      = mkIRExprCCall(
1216           Ity_I32,
1217           0/*regparm*/,
1218           "armg_calculate_flag_v", &armg_calculate_flag_v,
1219           args
1220        );
1221   /* Exclude OP and NDEP from definedness checking.  We're only
1222      interested in DEP1 and DEP2. */
1223   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1224   return call;
1225}
1226
1227
1228/* Build IR to calculate N Z C V in bits 31:28 of the
1229   returned word. */
1230static IRExpr* mk_armg_calculate_flags_nzcv ( void )
1231{
1232   IRExpr** args
1233      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1234                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1235                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1236                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1237   IRExpr* call
1238      = mkIRExprCCall(
1239           Ity_I32,
1240           0/*regparm*/,
1241           "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
1242           args
1243        );
1244   /* Exclude OP and NDEP from definedness checking.  We're only
1245      interested in DEP1 and DEP2. */
1246   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1247   return call;
1248}
1249
1250static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
1251{
1252   IRExpr** args1;
1253   IRExpr** args2;
1254   IRExpr *call1, *call2, *res;
1255
1256   if (Q) {
1257      args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
1258                              binop(Iop_GetElem32x4, resL, mkU8(1)),
1259                              binop(Iop_GetElem32x4, resR, mkU8(0)),
1260                              binop(Iop_GetElem32x4, resR, mkU8(1)) );
1261      args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
1262                              binop(Iop_GetElem32x4, resL, mkU8(3)),
1263                              binop(Iop_GetElem32x4, resR, mkU8(2)),
1264                              binop(Iop_GetElem32x4, resR, mkU8(3)) );
1265   } else {
1266      args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
1267                              binop(Iop_GetElem32x2, resL, mkU8(1)),
1268                              binop(Iop_GetElem32x2, resR, mkU8(0)),
1269                              binop(Iop_GetElem32x2, resR, mkU8(1)) );
1270   }
1271
1272   call1 = mkIRExprCCall(
1273             Ity_I32,
1274             0/*regparm*/,
1275             "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1276             args1
1277          );
1278   if (Q) {
1279      call2 = mkIRExprCCall(
1280                Ity_I32,
1281                0/*regparm*/,
1282                "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1283                args2
1284             );
1285   }
1286   if (Q) {
1287      res = binop(Iop_Or32, call1, call2);
1288   } else {
1289      res = call1;
1290   }
1291   return res;
1292}
1293
1294// FIXME: this is named wrongly .. looks like a sticky set of
1295// QC, not a write to it.
1296static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
1297                         IRTemp condT )
1298{
1299   putMiscReg32 (OFFB_FPSCR,
1300                 binop(Iop_Or32,
1301                       IRExpr_Get(OFFB_FPSCR, Ity_I32),
1302                       binop(Iop_Shl32,
1303                             mk_armg_calculate_flag_qc(resL, resR, Q),
1304                             mkU8(27))),
1305                 condT);
1306}
1307
1308/* Build IR to conditionally set the flags thunk.  As with putIReg, if
1309   guard is IRTemp_INVALID then it's unconditional, else it holds a
1310   condition :: Ity_I32. */
1311static
1312void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
1313                         IRTemp t_dep2, IRTemp t_ndep,
1314                         IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1315{
1316   vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
1317   vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
1318   vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
1319   vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
1320   if (guardT == IRTemp_INVALID) {
1321      /* unconditional */
1322      stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
1323      stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1324      stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1325      stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1326   } else {
1327      /* conditional */
1328      IRTemp c1 = newTemp(Ity_I1);
1329      assign( c1, binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)) );
1330      stmt( IRStmt_Put(
1331               OFFB_CC_OP,
1332               IRExpr_ITE( mkexpr(c1),
1333                           mkU32(cc_op),
1334                           IRExpr_Get(OFFB_CC_OP, Ity_I32) ) ));
1335      stmt( IRStmt_Put(
1336               OFFB_CC_DEP1,
1337               IRExpr_ITE( mkexpr(c1),
1338                           mkexpr(t_dep1),
1339                           IRExpr_Get(OFFB_CC_DEP1, Ity_I32) ) ));
1340      stmt( IRStmt_Put(
1341               OFFB_CC_DEP2,
1342               IRExpr_ITE( mkexpr(c1),
1343                           mkexpr(t_dep2),
1344                           IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ) ));
1345      stmt( IRStmt_Put(
1346               OFFB_CC_NDEP,
1347               IRExpr_ITE( mkexpr(c1),
1348                           mkexpr(t_ndep),
1349                           IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ) ));
1350   }
1351}
1352
1353
1354/* Minor variant of the above that sets NDEP to zero (if it
1355   sets it at all) */
1356static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1357                             IRTemp t_dep2,
1358                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1359{
1360   IRTemp z32 = newTemp(Ity_I32);
1361   assign( z32, mkU32(0) );
1362   setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1363}
1364
1365
1366/* Minor variant of the above that sets DEP2 to zero (if it
1367   sets it at all) */
1368static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1369                             IRTemp t_ndep,
1370                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1371{
1372   IRTemp z32 = newTemp(Ity_I32);
1373   assign( z32, mkU32(0) );
1374   setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1375}
1376
1377
1378/* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1379   sets them at all) */
1380static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1381                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1382{
1383   IRTemp z32 = newTemp(Ity_I32);
1384   assign( z32, mkU32(0) );
1385   setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1386}
1387
1388
1389/* ARM only */
1390/* Generate a side-exit to the next instruction, if the given guard
1391   expression :: Ity_I32 is 0 (note!  the side exit is taken if the
1392   condition is false!)  This is used to skip over conditional
1393   instructions which we can't generate straight-line code for, either
1394   because they are too complex or (more likely) they potentially
1395   generate exceptions.
1396*/
1397static void mk_skip_over_A32_if_cond_is_false (
1398               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1399            )
1400{
1401   ASSERT_IS_ARM;
1402   vassert(guardT != IRTemp_INVALID);
1403   vassert(0 == (guest_R15_curr_instr_notENC & 3));
1404   stmt( IRStmt_Exit(
1405            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1406            Ijk_Boring,
1407            IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
1408            OFFB_R15T
1409       ));
1410}
1411
1412/* Thumb16 only */
1413/* ditto, but jump over a 16-bit thumb insn */
1414static void mk_skip_over_T16_if_cond_is_false (
1415               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1416            )
1417{
1418   ASSERT_IS_THUMB;
1419   vassert(guardT != IRTemp_INVALID);
1420   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1421   stmt( IRStmt_Exit(
1422            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1423            Ijk_Boring,
1424            IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
1425            OFFB_R15T
1426       ));
1427}
1428
1429
1430/* Thumb32 only */
1431/* ditto, but jump over a 32-bit thumb insn */
1432static void mk_skip_over_T32_if_cond_is_false (
1433               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1434            )
1435{
1436   ASSERT_IS_THUMB;
1437   vassert(guardT != IRTemp_INVALID);
1438   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1439   stmt( IRStmt_Exit(
1440            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1441            Ijk_Boring,
1442            IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
1443            OFFB_R15T
1444       ));
1445}
1446
1447
1448/* Thumb16 and Thumb32 only
1449   Generate a SIGILL followed by a restart of the current instruction
1450   if the given temp is nonzero. */
1451static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
1452{
1453   ASSERT_IS_THUMB;
1454   vassert(t != IRTemp_INVALID);
1455   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1456   stmt(
1457      IRStmt_Exit(
1458         binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
1459         Ijk_NoDecode,
1460         IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
1461         OFFB_R15T
1462      )
1463   );
1464}
1465
1466
1467/* Inspect the old_itstate, and generate a SIGILL if it indicates that
1468   we are currently in an IT block and are not the last in the block.
1469   This also rolls back guest_ITSTATE to its old value before the exit
1470   and restores it to its new value afterwards.  This is so that if
1471   the exit is taken, we have an up to date version of ITSTATE
1472   available.  Without doing that, we have no hope of making precise
1473   exceptions work. */
1474static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
1475               IRTemp old_itstate /* :: Ity_I32 */,
1476               IRTemp new_itstate /* :: Ity_I32 */
1477            )
1478{
1479   ASSERT_IS_THUMB;
1480   put_ITSTATE(old_itstate); // backout
1481   IRTemp guards_for_next3 = newTemp(Ity_I32);
1482   assign(guards_for_next3,
1483          binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
1484   gen_SIGILL_T_if_nonzero(guards_for_next3);
1485   put_ITSTATE(new_itstate); //restore
1486}
1487
1488
1489/* Simpler version of the above, which generates a SIGILL if
1490   we're anywhere within an IT block. */
1491static void gen_SIGILL_T_if_in_ITBlock (
1492               IRTemp old_itstate /* :: Ity_I32 */,
1493               IRTemp new_itstate /* :: Ity_I32 */
1494            )
1495{
1496   put_ITSTATE(old_itstate); // backout
1497   gen_SIGILL_T_if_nonzero(old_itstate);
1498   put_ITSTATE(new_itstate); //restore
1499}
1500
1501
1502/* Generate an APSR value, from the NZCV thunk, and
1503   from QFLAG32 and GEFLAG0 .. GEFLAG3. */
1504static IRTemp synthesise_APSR ( void )
1505{
1506   IRTemp res1 = newTemp(Ity_I32);
1507   // Get NZCV
1508   assign( res1, mk_armg_calculate_flags_nzcv() );
1509   // OR in the Q value
1510   IRTemp res2 = newTemp(Ity_I32);
1511   assign(
1512      res2,
1513      binop(Iop_Or32,
1514            mkexpr(res1),
1515            binop(Iop_Shl32,
1516                  unop(Iop_1Uto32,
1517                       binop(Iop_CmpNE32,
1518                             mkexpr(get_QFLAG32()),
1519                             mkU32(0))),
1520                  mkU8(ARMG_CC_SHIFT_Q)))
1521   );
1522   // OR in GE0 .. GE3
1523   IRExpr* ge0
1524      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
1525   IRExpr* ge1
1526      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
1527   IRExpr* ge2
1528      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
1529   IRExpr* ge3
1530      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
1531   IRTemp res3 = newTemp(Ity_I32);
1532   assign(res3,
1533          binop(Iop_Or32,
1534                mkexpr(res2),
1535                binop(Iop_Or32,
1536                      binop(Iop_Or32,
1537                            binop(Iop_Shl32, ge0, mkU8(16)),
1538                            binop(Iop_Shl32, ge1, mkU8(17))),
1539                      binop(Iop_Or32,
1540                            binop(Iop_Shl32, ge2, mkU8(18)),
1541                            binop(Iop_Shl32, ge3, mkU8(19))) )));
1542   return res3;
1543}
1544
1545
1546/* and the inverse transformation: given an APSR value,
1547   set the NZCV thunk, the Q flag, and the GE flags. */
1548static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
1549                                IRTemp apsrT, IRTemp condT )
1550{
1551   vassert(write_nzcvq || write_ge);
1552   if (write_nzcvq) {
1553      // Do NZCV
1554      IRTemp immT = newTemp(Ity_I32);
1555      assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
1556      setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
1557      // Do Q
1558      IRTemp qnewT = newTemp(Ity_I32);
1559      assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
1560      put_QFLAG32(qnewT, condT);
1561   }
1562   if (write_ge) {
1563      // Do GE3..0
1564      put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
1565                   condT);
1566      put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
1567                   condT);
1568      put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
1569                   condT);
1570      put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
1571                   condT);
1572   }
1573}
1574
1575
1576/*------------------------------------------------------------*/
1577/*--- Helpers for saturation                               ---*/
1578/*------------------------------------------------------------*/
1579
1580/* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
1581   (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
1582   (b) the floor is computed from the value of imm5.  these two fnsn
1583   should be commoned up. */
1584
1585/* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
1586   Optionally return flag resQ saying whether saturation occurred.
1587   See definition in manual, section A2.2.1, page 41
1588   (bits(N), boolean) UnsignedSatQ( integer i, integer N )
1589   {
1590     if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
1591     elsif ( i < 0 )    { result = 0; saturated = TRUE; }
1592     else               { result = i; saturated = FALSE; }
1593     return ( result<N-1:0>, saturated );
1594   }
1595*/
1596static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
1597                             IRTemp* resQ, /* OUT - Ity_I32  */
1598                             IRTemp regT,  /* value to clamp - Ity_I32 */
1599                             UInt imm5 )   /* saturation ceiling */
1600{
1601   UInt ceil  = (1 << imm5) - 1;    // (2^imm5)-1
1602   UInt floor = 0;
1603
1604   IRTemp nd0 = newTemp(Ity_I32);
1605   IRTemp nd1 = newTemp(Ity_I32);
1606   IRTemp nd2 = newTemp(Ity_I1);
1607   IRTemp nd3 = newTemp(Ity_I32);
1608   IRTemp nd4 = newTemp(Ity_I32);
1609   IRTemp nd5 = newTemp(Ity_I1);
1610   IRTemp nd6 = newTemp(Ity_I32);
1611
1612   assign( nd0, mkexpr(regT) );
1613   assign( nd1, mkU32(ceil) );
1614   assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1615   assign( nd3, IRExpr_ITE(mkexpr(nd2), mkexpr(nd1), mkexpr(nd0)) );
1616   assign( nd4, mkU32(floor) );
1617   assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1618   assign( nd6, IRExpr_ITE(mkexpr(nd5), mkexpr(nd4), mkexpr(nd3)) );
1619   assign( *res, mkexpr(nd6) );
1620
1621   /* if saturation occurred, then resQ is set to some nonzero value
1622      if sat did not occur, resQ is guaranteed to be zero. */
1623   if (resQ) {
1624      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1625   }
1626}
1627
1628
1629/* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
1630   Optionally return flag resQ saying whether saturation occurred.
1631   - see definition in manual, section A2.2.1, page 41
1632   (bits(N), boolean ) SignedSatQ( integer i, integer N )
1633   {
1634     if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
1635     elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
1636     else                      { result = i;           saturated = FALSE; }
1637     return ( result[N-1:0], saturated );
1638   }
1639*/
1640static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
1641                           UInt imm5,      /* saturation ceiling */
1642                           IRTemp* res,    /* OUT - Ity_I32 */
1643                           IRTemp* resQ )  /* OUT - Ity_I32  */
1644{
1645   Int ceil  =  (1 << (imm5-1)) - 1;  //  (2^(imm5-1))-1
1646   Int floor = -(1 << (imm5-1));      // -(2^(imm5-1))
1647
1648   IRTemp nd0 = newTemp(Ity_I32);
1649   IRTemp nd1 = newTemp(Ity_I32);
1650   IRTemp nd2 = newTemp(Ity_I1);
1651   IRTemp nd3 = newTemp(Ity_I32);
1652   IRTemp nd4 = newTemp(Ity_I32);
1653   IRTemp nd5 = newTemp(Ity_I1);
1654   IRTemp nd6 = newTemp(Ity_I32);
1655
1656   assign( nd0, mkexpr(regT) );
1657   assign( nd1, mkU32(ceil) );
1658   assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1659   assign( nd3, IRExpr_ITE( mkexpr(nd2), mkexpr(nd1), mkexpr(nd0) ) );
1660   assign( nd4, mkU32(floor) );
1661   assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1662   assign( nd6, IRExpr_ITE( mkexpr(nd5), mkexpr(nd4), mkexpr(nd3) ) );
1663   assign( *res, mkexpr(nd6) );
1664
1665   /* if saturation occurred, then resQ is set to some nonzero value
1666      if sat did not occur, resQ is guaranteed to be zero. */
1667   if (resQ) {
1668     assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1669   }
1670}
1671
1672
1673/* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
1674   overflow occurred for 32-bit addition.  Needs both args and the
1675   result.  HD p27. */
1676static
1677IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
1678                                      IRTemp argL, IRTemp argR )
1679{
1680   IRTemp res = newTemp(Ity_I32);
1681   assign(res, resE);
1682   return
1683      binop( Iop_Shr32,
1684             binop( Iop_And32,
1685                    binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
1686                    binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
1687             mkU8(31) );
1688}
1689
1690/* Similarly .. also from HD p27 .. */
1691static
1692IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
1693                                      IRTemp argL, IRTemp argR )
1694{
1695   IRTemp res = newTemp(Ity_I32);
1696   assign(res, resE);
1697   return
1698      binop( Iop_Shr32,
1699             binop( Iop_And32,
1700                    binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
1701                    binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )),
1702             mkU8(31) );
1703}
1704
1705
1706/*------------------------------------------------------------*/
1707/*--- Larger helpers                                       ---*/
1708/*------------------------------------------------------------*/
1709
1710/* Compute both the result and new C flag value for a LSL by an imm5
1711   or by a register operand.  May generate reads of the old C value
1712   (hence only safe to use before any writes to guest state happen).
1713   Are factored out so can be used by both ARM and Thumb.
1714
1715   Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
1716   "res" (the result)  is a.k.a. "shop", shifter operand
1717   "newC" (the new C)  is a.k.a. "shco", shifter carry out
1718
1719   The calling convention for res and newC is a bit funny.  They could
1720   be passed by value, but instead are passed by ref.
1721
1722   The C (shco) value computed must be zero in bits 31:1, as the IR
1723   optimisations for flag handling (guest_arm_spechelper) rely on
1724   that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
1725   for it.  Same applies to all these functions that compute shco
1726   after a shift or rotate, not just this one.
1727*/
1728
1729static void compute_result_and_C_after_LSL_by_imm5 (
1730               /*OUT*/HChar* buf,
1731               IRTemp* res,
1732               IRTemp* newC,
1733               IRTemp rMt, UInt shift_amt, /* operands */
1734               UInt rM      /* only for debug printing */
1735            )
1736{
1737   if (shift_amt == 0) {
1738      if (newC) {
1739         assign( *newC, mk_armg_calculate_flag_c() );
1740      }
1741      assign( *res, mkexpr(rMt) );
1742      DIS(buf, "r%u", rM);
1743   } else {
1744      vassert(shift_amt >= 1 && shift_amt <= 31);
1745      if (newC) {
1746         assign( *newC,
1747                 binop(Iop_And32,
1748                       binop(Iop_Shr32, mkexpr(rMt),
1749                                        mkU8(32 - shift_amt)),
1750                       mkU32(1)));
1751      }
1752      assign( *res,
1753              binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
1754      DIS(buf, "r%u, LSL #%u", rM, shift_amt);
1755   }
1756}
1757
1758
1759static void compute_result_and_C_after_LSL_by_reg (
1760               /*OUT*/HChar* buf,
1761               IRTemp* res,
1762               IRTemp* newC,
1763               IRTemp rMt, IRTemp rSt,  /* operands */
1764               UInt rM,    UInt rS      /* only for debug printing */
1765            )
1766{
1767   // shift left in range 0 .. 255
1768   // amt  = rS & 255
1769   // res  = amt < 32 ?  Rm << amt  : 0
1770   // newC = amt == 0     ? oldC  :
1771   //        amt in 1..32 ?  Rm[32-amt]  : 0
1772   IRTemp amtT = newTemp(Ity_I32);
1773   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1774   if (newC) {
1775      /* mux0X(amt == 0,
1776               mux0X(amt < 32,
1777                     0,
1778                     Rm[(32-amt) & 31]),
1779               oldC)
1780      */
1781      /* About the best you can do is pray that iropt is able
1782         to nuke most or all of the following junk. */
1783      IRTemp oldC = newTemp(Ity_I32);
1784      assign(oldC, mk_armg_calculate_flag_c() );
1785      assign(
1786         *newC,
1787         IRExpr_ITE(
1788            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1789            mkexpr(oldC),
1790            IRExpr_ITE(
1791               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1792               binop(Iop_And32,
1793                     binop(Iop_Shr32,
1794                           mkexpr(rMt),
1795                           unop(Iop_32to8,
1796                                binop(Iop_And32,
1797                                      binop(Iop_Sub32,
1798                                            mkU32(32),
1799                                            mkexpr(amtT)),
1800                                      mkU32(31)
1801                                )
1802                           )
1803                     ),
1804                     mkU32(1)
1805                     ),
1806               mkU32(0)
1807            )
1808         )
1809      );
1810   }
1811   // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1812   // Lhs of the & limits the shift to 31 bits, so as to
1813   // give known IR semantics.  Rhs of the & is all 1s for
1814   // Rs <= 31 and all 0s for Rs >= 32.
1815   assign(
1816      *res,
1817      binop(
1818         Iop_And32,
1819         binop(Iop_Shl32,
1820               mkexpr(rMt),
1821               unop(Iop_32to8,
1822                    binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1823         binop(Iop_Sar32,
1824               binop(Iop_Sub32,
1825                     mkexpr(amtT),
1826                     mkU32(32)),
1827               mkU8(31))));
1828    DIS(buf, "r%u, LSL r%u", rM, rS);
1829}
1830
1831
1832static void compute_result_and_C_after_LSR_by_imm5 (
1833               /*OUT*/HChar* buf,
1834               IRTemp* res,
1835               IRTemp* newC,
1836               IRTemp rMt, UInt shift_amt, /* operands */
1837               UInt rM      /* only for debug printing */
1838            )
1839{
1840   if (shift_amt == 0) {
1841      // conceptually a 32-bit shift, however:
1842      // res  = 0
1843      // newC = Rm[31]
1844      if (newC) {
1845         assign( *newC,
1846                 binop(Iop_And32,
1847                       binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1848                       mkU32(1)));
1849      }
1850      assign( *res, mkU32(0) );
1851      DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
1852   } else {
1853      // shift in range 1..31
1854      // res  = Rm >>u shift_amt
1855      // newC = Rm[shift_amt - 1]
1856      vassert(shift_amt >= 1 && shift_amt <= 31);
1857      if (newC) {
1858         assign( *newC,
1859                 binop(Iop_And32,
1860                       binop(Iop_Shr32, mkexpr(rMt),
1861                                        mkU8(shift_amt - 1)),
1862                       mkU32(1)));
1863      }
1864      assign( *res,
1865              binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
1866      DIS(buf, "r%u, LSR #%u", rM, shift_amt);
1867   }
1868}
1869
1870
1871static void compute_result_and_C_after_LSR_by_reg (
1872               /*OUT*/HChar* buf,
1873               IRTemp* res,
1874               IRTemp* newC,
1875               IRTemp rMt, IRTemp rSt,  /* operands */
1876               UInt rM,    UInt rS      /* only for debug printing */
1877            )
1878{
1879   // shift right in range 0 .. 255
1880   // amt = rS & 255
1881   // res  = amt < 32 ?  Rm >>u amt  : 0
1882   // newC = amt == 0     ? oldC  :
1883   //        amt in 1..32 ?  Rm[amt-1]  : 0
1884   IRTemp amtT = newTemp(Ity_I32);
1885   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1886   if (newC) {
1887      /* mux0X(amt == 0,
1888               mux0X(amt < 32,
1889                     0,
1890                     Rm[(amt-1) & 31]),
1891               oldC)
1892      */
1893      IRTemp oldC = newTemp(Ity_I32);
1894      assign(oldC, mk_armg_calculate_flag_c() );
1895      assign(
1896         *newC,
1897         IRExpr_ITE(
1898            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1899            mkexpr(oldC),
1900            IRExpr_ITE(
1901               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1902               binop(Iop_And32,
1903                     binop(Iop_Shr32,
1904                           mkexpr(rMt),
1905                           unop(Iop_32to8,
1906                                binop(Iop_And32,
1907                                      binop(Iop_Sub32,
1908                                            mkexpr(amtT),
1909                                            mkU32(1)),
1910                                      mkU32(31)
1911                                )
1912                           )
1913                     ),
1914                     mkU32(1)
1915                     ),
1916               mkU32(0)
1917            )
1918         )
1919      );
1920   }
1921   // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1922   // Lhs of the & limits the shift to 31 bits, so as to
1923   // give known IR semantics.  Rhs of the & is all 1s for
1924   // Rs <= 31 and all 0s for Rs >= 32.
1925   assign(
1926      *res,
1927      binop(
1928         Iop_And32,
1929         binop(Iop_Shr32,
1930               mkexpr(rMt),
1931               unop(Iop_32to8,
1932                    binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1933         binop(Iop_Sar32,
1934               binop(Iop_Sub32,
1935                     mkexpr(amtT),
1936                     mkU32(32)),
1937               mkU8(31))));
1938    DIS(buf, "r%u, LSR r%u", rM, rS);
1939}
1940
1941
1942static void compute_result_and_C_after_ASR_by_imm5 (
1943               /*OUT*/HChar* buf,
1944               IRTemp* res,
1945               IRTemp* newC,
1946               IRTemp rMt, UInt shift_amt, /* operands */
1947               UInt rM      /* only for debug printing */
1948            )
1949{
1950   if (shift_amt == 0) {
1951      // conceptually a 32-bit shift, however:
1952      // res  = Rm >>s 31
1953      // newC = Rm[31]
1954      if (newC) {
1955         assign( *newC,
1956                 binop(Iop_And32,
1957                       binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1958                       mkU32(1)));
1959      }
1960      assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
1961      DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
1962   } else {
1963      // shift in range 1..31
1964      // res = Rm >>s shift_amt
1965      // newC = Rm[shift_amt - 1]
1966      vassert(shift_amt >= 1 && shift_amt <= 31);
1967      if (newC) {
1968         assign( *newC,
1969                 binop(Iop_And32,
1970                       binop(Iop_Shr32, mkexpr(rMt),
1971                                        mkU8(shift_amt - 1)),
1972                       mkU32(1)));
1973      }
1974      assign( *res,
1975              binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
1976      DIS(buf, "r%u, ASR #%u", rM, shift_amt);
1977   }
1978}
1979
1980
1981static void compute_result_and_C_after_ASR_by_reg (
1982               /*OUT*/HChar* buf,
1983               IRTemp* res,
1984               IRTemp* newC,
1985               IRTemp rMt, IRTemp rSt,  /* operands */
1986               UInt rM,    UInt rS      /* only for debug printing */
1987            )
1988{
1989   // arithmetic shift right in range 0 .. 255
1990   // amt = rS & 255
1991   // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
1992   // newC = amt == 0     ? oldC  :
1993   //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
1994   IRTemp amtT = newTemp(Ity_I32);
1995   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1996   if (newC) {
1997      /* mux0X(amt == 0,
1998               mux0X(amt < 32,
1999                     Rm[31],
2000                     Rm[(amt-1) & 31])
2001               oldC)
2002      */
2003      IRTemp oldC = newTemp(Ity_I32);
2004      assign(oldC, mk_armg_calculate_flag_c() );
2005      assign(
2006         *newC,
2007         IRExpr_ITE(
2008            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
2009            mkexpr(oldC),
2010            IRExpr_ITE(
2011               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
2012               binop(Iop_And32,
2013                     binop(Iop_Shr32,
2014                           mkexpr(rMt),
2015                           unop(Iop_32to8,
2016                                binop(Iop_And32,
2017                                      binop(Iop_Sub32,
2018                                            mkexpr(amtT),
2019                                            mkU32(1)),
2020                                      mkU32(31)
2021                                )
2022                           )
2023                     ),
2024                     mkU32(1)
2025                     ),
2026               binop(Iop_And32,
2027                     binop(Iop_Shr32,
2028                           mkexpr(rMt),
2029                           mkU8(31)
2030                     ),
2031                     mkU32(1)
2032               )
2033            )
2034         )
2035      );
2036   }
2037   // (Rm >>s (amt <u 32 ? amt : 31))
2038   assign(
2039      *res,
2040      binop(
2041         Iop_Sar32,
2042         mkexpr(rMt),
2043         unop(
2044            Iop_32to8,
2045            IRExpr_ITE(
2046               binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32)),
2047               mkexpr(amtT),
2048               mkU32(31)))));
2049    DIS(buf, "r%u, ASR r%u", rM, rS);
2050}
2051
2052
2053static void compute_result_and_C_after_ROR_by_reg (
2054               /*OUT*/HChar* buf,
2055               IRTemp* res,
2056               IRTemp* newC,
2057               IRTemp rMt, IRTemp rSt,  /* operands */
2058               UInt rM,    UInt rS      /* only for debug printing */
2059            )
2060{
2061   // rotate right in range 0 .. 255
2062   // amt = rS & 255
2063   // shop =  Rm `ror` (amt & 31)
2064   // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
2065   IRTemp amtT = newTemp(Ity_I32);
2066   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2067   IRTemp amt5T = newTemp(Ity_I32);
2068   assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
2069   IRTemp oldC = newTemp(Ity_I32);
2070   assign(oldC, mk_armg_calculate_flag_c() );
2071   if (newC) {
2072      assign(
2073         *newC,
2074         IRExpr_ITE(
2075            binop(Iop_CmpNE32, mkexpr(amtT), mkU32(0)),
2076            binop(Iop_And32,
2077                  binop(Iop_Shr32,
2078                        mkexpr(rMt),
2079                        unop(Iop_32to8,
2080                             binop(Iop_And32,
2081                                   binop(Iop_Sub32,
2082                                         mkexpr(amtT),
2083                                         mkU32(1)
2084                                   ),
2085                                   mkU32(31)
2086                             )
2087                        )
2088                  ),
2089                  mkU32(1)
2090            ),
2091            mkexpr(oldC)
2092         )
2093      );
2094   }
2095   assign(
2096      *res,
2097      IRExpr_ITE(
2098         binop(Iop_CmpNE32, mkexpr(amt5T), mkU32(0)),
2099         binop(Iop_Or32,
2100               binop(Iop_Shr32,
2101                     mkexpr(rMt),
2102                     unop(Iop_32to8, mkexpr(amt5T))
2103               ),
2104               binop(Iop_Shl32,
2105                     mkexpr(rMt),
2106                     unop(Iop_32to8,
2107                          binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
2108                     )
2109               )
2110               ),
2111         mkexpr(rMt)
2112      )
2113   );
2114   DIS(buf, "r%u, ROR r#%u", rM, rS);
2115}
2116
2117
2118/* Generate an expression corresponding to the immediate-shift case of
2119   a shifter operand.  This is used both for ARM and Thumb2.
2120
2121   Bind it to a temporary, and return that via *res.  If newC is
2122   non-NULL, also compute a value for the shifter's carry out (in the
2123   LSB of a word), bind it to a temporary, and return that via *shco.
2124
2125   Generates GETs from the guest state and is therefore not safe to
2126   use once we start doing PUTs to it, for any given instruction.
2127
2128   'how' is encoded thusly:
2129      00b LSL,  01b LSR,  10b ASR,  11b ROR
2130   Most but not all ARM and Thumb integer insns use this encoding.
2131   Be careful to ensure the right value is passed here.
2132*/
2133static void compute_result_and_C_after_shift_by_imm5 (
2134               /*OUT*/HChar* buf,
2135               /*OUT*/IRTemp* res,
2136               /*OUT*/IRTemp* newC,
2137               IRTemp  rMt,       /* reg to shift */
2138               UInt    how,       /* what kind of shift */
2139               UInt    shift_amt, /* shift amount (0..31) */
2140               UInt    rM         /* only for debug printing */
2141            )
2142{
2143   vassert(shift_amt < 32);
2144   vassert(how < 4);
2145
2146   switch (how) {
2147
2148      case 0:
2149         compute_result_and_C_after_LSL_by_imm5(
2150            buf, res, newC, rMt, shift_amt, rM
2151         );
2152         break;
2153
2154      case 1:
2155         compute_result_and_C_after_LSR_by_imm5(
2156            buf, res, newC, rMt, shift_amt, rM
2157         );
2158         break;
2159
2160      case 2:
2161         compute_result_and_C_after_ASR_by_imm5(
2162            buf, res, newC, rMt, shift_amt, rM
2163         );
2164         break;
2165
2166      case 3:
2167         if (shift_amt == 0) {
2168            IRTemp oldcT = newTemp(Ity_I32);
2169            // rotate right 1 bit through carry (?)
2170            // RRX -- described at ARM ARM A5-17
2171            // res  = (oldC << 31) | (Rm >>u 1)
2172            // newC = Rm[0]
2173            if (newC) {
2174               assign( *newC,
2175                       binop(Iop_And32, mkexpr(rMt), mkU32(1)));
2176            }
2177            assign( oldcT, mk_armg_calculate_flag_c() );
2178            assign( *res,
2179                    binop(Iop_Or32,
2180                          binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
2181                          binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
2182            DIS(buf, "r%u, RRX", rM);
2183         } else {
2184            // rotate right in range 1..31
2185            // res  = Rm `ror` shift_amt
2186            // newC = Rm[shift_amt - 1]
2187            vassert(shift_amt >= 1 && shift_amt <= 31);
2188            if (newC) {
2189               assign( *newC,
2190                       binop(Iop_And32,
2191                             binop(Iop_Shr32, mkexpr(rMt),
2192                                              mkU8(shift_amt - 1)),
2193                             mkU32(1)));
2194            }
2195            assign( *res,
2196                    binop(Iop_Or32,
2197                          binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
2198                          binop(Iop_Shl32, mkexpr(rMt),
2199                                           mkU8(32-shift_amt))));
2200            DIS(buf, "r%u, ROR #%u", rM, shift_amt);
2201         }
2202         break;
2203
2204      default:
2205         /*NOTREACHED*/
2206         vassert(0);
2207   }
2208}
2209
2210
2211/* Generate an expression corresponding to the register-shift case of
2212   a shifter operand.  This is used both for ARM and Thumb2.
2213
2214   Bind it to a temporary, and return that via *res.  If newC is
2215   non-NULL, also compute a value for the shifter's carry out (in the
2216   LSB of a word), bind it to a temporary, and return that via *shco.
2217
2218   Generates GETs from the guest state and is therefore not safe to
2219   use once we start doing PUTs to it, for any given instruction.
2220
2221   'how' is encoded thusly:
2222      00b LSL,  01b LSR,  10b ASR,  11b ROR
2223   Most but not all ARM and Thumb integer insns use this encoding.
2224   Be careful to ensure the right value is passed here.
2225*/
2226static void compute_result_and_C_after_shift_by_reg (
2227               /*OUT*/HChar*  buf,
2228               /*OUT*/IRTemp* res,
2229               /*OUT*/IRTemp* newC,
2230               IRTemp  rMt,       /* reg to shift */
2231               UInt    how,       /* what kind of shift */
2232               IRTemp  rSt,       /* shift amount */
2233               UInt    rM,        /* only for debug printing */
2234               UInt    rS         /* only for debug printing */
2235            )
2236{
2237   vassert(how < 4);
2238   switch (how) {
2239      case 0: { /* LSL */
2240         compute_result_and_C_after_LSL_by_reg(
2241            buf, res, newC, rMt, rSt, rM, rS
2242         );
2243         break;
2244      }
2245      case 1: { /* LSR */
2246         compute_result_and_C_after_LSR_by_reg(
2247            buf, res, newC, rMt, rSt, rM, rS
2248         );
2249         break;
2250      }
2251      case 2: { /* ASR */
2252         compute_result_and_C_after_ASR_by_reg(
2253            buf, res, newC, rMt, rSt, rM, rS
2254         );
2255         break;
2256      }
2257      case 3: { /* ROR */
2258         compute_result_and_C_after_ROR_by_reg(
2259             buf, res, newC, rMt, rSt, rM, rS
2260         );
2261         break;
2262      }
2263      default:
2264         /*NOTREACHED*/
2265         vassert(0);
2266   }
2267}
2268
2269
2270/* Generate an expression corresponding to a shifter_operand, bind it
2271   to a temporary, and return that via *shop.  If shco is non-NULL,
2272   also compute a value for the shifter's carry out (in the LSB of a
2273   word), bind it to a temporary, and return that via *shco.
2274
2275   If for some reason we can't come up with a shifter operand (missing
2276   case?  not really a shifter operand?) return False.
2277
2278   Generates GETs from the guest state and is therefore not safe to
2279   use once we start doing PUTs to it, for any given instruction.
2280
2281   For ARM insns only; not for Thumb.
2282*/
2283static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
2284                                 /*OUT*/IRTemp* shop,
2285                                 /*OUT*/IRTemp* shco,
2286                                 /*OUT*/HChar* buf )
2287{
2288   UInt insn_4 = (insn_11_0 >> 4) & 1;
2289   UInt insn_7 = (insn_11_0 >> 7) & 1;
2290   vassert(insn_25 <= 0x1);
2291   vassert(insn_11_0 <= 0xFFF);
2292
2293   vassert(shop && *shop == IRTemp_INVALID);
2294   *shop = newTemp(Ity_I32);
2295
2296   if (shco) {
2297      vassert(*shco == IRTemp_INVALID);
2298      *shco = newTemp(Ity_I32);
2299   }
2300
2301   /* 32-bit immediate */
2302
2303   if (insn_25 == 1) {
2304      /* immediate: (7:0) rotated right by 2 * (11:8) */
2305      UInt imm = (insn_11_0 >> 0) & 0xFF;
2306      UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
2307      vassert(rot <= 30);
2308      imm = ROR32(imm, rot);
2309      if (shco) {
2310         if (rot == 0) {
2311            assign( *shco, mk_armg_calculate_flag_c() );
2312         } else {
2313            assign( *shco, mkU32( (imm >> 31) & 1 ) );
2314         }
2315      }
2316      DIS(buf, "#0x%x", imm);
2317      assign( *shop, mkU32(imm) );
2318      return True;
2319   }
2320
2321   /* Shift/rotate by immediate */
2322
2323   if (insn_25 == 0 && insn_4 == 0) {
2324      /* Rm (3:0) shifted (6:5) by immediate (11:7) */
2325      UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
2326      UInt rM        = (insn_11_0 >> 0) & 0xF;
2327      UInt how       = (insn_11_0 >> 5) & 3;
2328      /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2329      IRTemp rMt = newTemp(Ity_I32);
2330      assign(rMt, getIRegA(rM));
2331
2332      vassert(shift_amt <= 31);
2333
2334      compute_result_and_C_after_shift_by_imm5(
2335         buf, shop, shco, rMt, how, shift_amt, rM
2336      );
2337      return True;
2338   }
2339
2340   /* Shift/rotate by register */
2341   if (insn_25 == 0 && insn_4 == 1) {
2342      /* Rm (3:0) shifted (6:5) by Rs (11:8) */
2343      UInt rM  = (insn_11_0 >> 0) & 0xF;
2344      UInt rS  = (insn_11_0 >> 8) & 0xF;
2345      UInt how = (insn_11_0 >> 5) & 3;
2346      /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2347      IRTemp rMt = newTemp(Ity_I32);
2348      IRTemp rSt = newTemp(Ity_I32);
2349
2350      if (insn_7 == 1)
2351         return False; /* not really a shifter operand */
2352
2353      assign(rMt, getIRegA(rM));
2354      assign(rSt, getIRegA(rS));
2355
2356      compute_result_and_C_after_shift_by_reg(
2357         buf, shop, shco, rMt, how, rSt, rM, rS
2358      );
2359      return True;
2360   }
2361
2362   vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
2363   return False;
2364}
2365
2366
2367/* ARM only */
2368static
2369IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
2370                                    /*OUT*/HChar* buf )
2371{
2372   vassert(rN < 16);
2373   vassert(bU < 2);
2374   vassert(imm12 < 0x1000);
2375   HChar opChar = bU == 1 ? '+' : '-';
2376   DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
2377   return
2378      binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2379             getIRegA(rN),
2380             mkU32(imm12) );
2381}
2382
2383
2384/* ARM only.
2385   NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
2386*/
2387static
2388IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
2389                                          UInt sh2, UInt imm5,
2390                                          /*OUT*/HChar* buf )
2391{
2392   vassert(rN < 16);
2393   vassert(bU < 2);
2394   vassert(rM < 16);
2395   vassert(sh2 < 4);
2396   vassert(imm5 < 32);
2397   HChar   opChar = bU == 1 ? '+' : '-';
2398   IRExpr* index  = NULL;
2399   switch (sh2) {
2400      case 0: /* LSL */
2401         /* imm5 can be in the range 0 .. 31 inclusive. */
2402         index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
2403         DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
2404         break;
2405      case 1: /* LSR */
2406         if (imm5 == 0) {
2407            index = mkU32(0);
2408            vassert(0); // ATC
2409         } else {
2410            index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
2411         }
2412         DIS(buf, "[r%u, %cr%u, LSR #%u]",
2413                  rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2414         break;
2415      case 2: /* ASR */
2416         /* Doesn't this just mean that the behaviour with imm5 == 0
2417            is the same as if it had been 31 ? */
2418         if (imm5 == 0) {
2419            index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
2420            vassert(0); // ATC
2421         } else {
2422            index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
2423         }
2424         DIS(buf, "[r%u, %cr%u, ASR #%u]",
2425                  rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2426         break;
2427      case 3: /* ROR or RRX */
2428         if (imm5 == 0) {
2429            IRTemp rmT    = newTemp(Ity_I32);
2430            IRTemp cflagT = newTemp(Ity_I32);
2431            assign(rmT, getIRegA(rM));
2432            assign(cflagT, mk_armg_calculate_flag_c());
2433            index = binop(Iop_Or32,
2434                          binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
2435                          binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
2436            DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
2437         } else {
2438            IRTemp rmT = newTemp(Ity_I32);
2439            assign(rmT, getIRegA(rM));
2440            vassert(imm5 >= 1 && imm5 <= 31);
2441            index = binop(Iop_Or32,
2442                          binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
2443                          binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
2444            DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
2445         }
2446         break;
2447      default:
2448         vassert(0);
2449   }
2450   vassert(index);
2451   return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2452                getIRegA(rN), index);
2453}
2454
2455
2456/* ARM only */
2457static
2458IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
2459                                   /*OUT*/HChar* buf )
2460{
2461   vassert(rN < 16);
2462   vassert(bU < 2);
2463   vassert(imm8 < 0x100);
2464   HChar opChar = bU == 1 ? '+' : '-';
2465   DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
2466   return
2467      binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2468             getIRegA(rN),
2469             mkU32(imm8) );
2470}
2471
2472
2473/* ARM only */
2474static
2475IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
2476                                  /*OUT*/HChar* buf )
2477{
2478   vassert(rN < 16);
2479   vassert(bU < 2);
2480   vassert(rM < 16);
2481   HChar   opChar = bU == 1 ? '+' : '-';
2482   IRExpr* index  = getIRegA(rM);
2483   DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
2484   return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2485                getIRegA(rN), index);
2486}
2487
2488
2489/* irRes :: Ity_I32 holds a floating point comparison result encoded
2490   as an IRCmpF64Result.  Generate code to convert it to an
2491   ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
2492   Assign a new temp to hold that value, and return the temp. */
2493static
2494IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
2495{
2496   IRTemp ix       = newTemp(Ity_I32);
2497   IRTemp termL    = newTemp(Ity_I32);
2498   IRTemp termR    = newTemp(Ity_I32);
2499   IRTemp nzcv     = newTemp(Ity_I32);
2500
2501   /* This is where the fun starts.  We have to convert 'irRes' from
2502      an IR-convention return result (IRCmpF64Result) to an
2503      ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
2504      4 bits of 'nzcv'. */
2505   /* Map compare result from IR to ARM(nzcv) */
2506   /*
2507      FP cmp result | IR   | ARM(nzcv)
2508      --------------------------------
2509      UN              0x45   0011
2510      LT              0x01   1000
2511      GT              0x00   0010
2512      EQ              0x40   0110
2513   */
2514   /* Now since you're probably wondering WTF ..
2515
2516      ix fishes the useful bits out of the IR value, bits 6 and 0, and
2517      places them side by side, giving a number which is 0, 1, 2 or 3.
2518
2519      termL is a sequence cooked up by GNU superopt.  It converts ix
2520         into an almost correct value NZCV value (incredibly), except
2521         for the case of UN, where it produces 0100 instead of the
2522         required 0011.
2523
2524      termR is therefore a correction term, also computed from ix.  It
2525         is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
2526         the final correct value, we subtract termR from termL.
2527
2528      Don't take my word for it.  There's a test program at the bottom
2529      of this file, to try this out with.
2530   */
2531   assign(
2532      ix,
2533      binop(Iop_Or32,
2534            binop(Iop_And32,
2535                  binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
2536                  mkU32(3)),
2537            binop(Iop_And32, mkexpr(irRes), mkU32(1))));
2538
2539   assign(
2540      termL,
2541      binop(Iop_Add32,
2542            binop(Iop_Shr32,
2543                  binop(Iop_Sub32,
2544                        binop(Iop_Shl32,
2545                              binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
2546                              mkU8(30)),
2547                        mkU32(1)),
2548                  mkU8(29)),
2549            mkU32(1)));
2550
2551   assign(
2552      termR,
2553      binop(Iop_And32,
2554            binop(Iop_And32,
2555                  mkexpr(ix),
2556                  binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
2557            mkU32(1)));
2558
2559   assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
2560   return nzcv;
2561}
2562
2563
2564/* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
2565   updatesC is non-NULL, a boolean is written to it indicating whether
2566   or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
2567*/
2568static UInt thumbExpandImm ( Bool* updatesC,
2569                             UInt imm1, UInt imm3, UInt imm8 )
2570{
2571   vassert(imm1 < (1<<1));
2572   vassert(imm3 < (1<<3));
2573   vassert(imm8 < (1<<8));
2574   UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
2575   UInt abcdefgh = imm8;
2576   UInt lbcdefgh = imm8 | 0x80;
2577   if (updatesC) {
2578      *updatesC = i_imm3_a >= 8;
2579   }
2580   switch (i_imm3_a) {
2581      case 0: case 1:
2582         return abcdefgh;
2583      case 2: case 3:
2584         return (abcdefgh << 16) | abcdefgh;
2585      case 4: case 5:
2586         return (abcdefgh << 24) | (abcdefgh << 8);
2587      case 6: case 7:
2588         return (abcdefgh << 24) | (abcdefgh << 16)
2589                | (abcdefgh << 8) | abcdefgh;
2590      case 8 ... 31:
2591         return lbcdefgh << (32 - i_imm3_a);
2592      default:
2593         break;
2594   }
2595   /*NOTREACHED*/vassert(0);
2596}
2597
2598
2599/* Version of thumbExpandImm where we simply feed it the
2600   instruction halfwords (the lowest addressed one is I0). */
2601static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
2602                                        UShort i0s, UShort i1s )
2603{
2604   UInt i0    = (UInt)i0s;
2605   UInt i1    = (UInt)i1s;
2606   UInt imm1  = SLICE_UInt(i0,10,10);
2607   UInt imm3  = SLICE_UInt(i1,14,12);
2608   UInt imm8  = SLICE_UInt(i1,7,0);
2609   return thumbExpandImm(updatesC, imm1, imm3, imm8);
2610}
2611
2612
2613/* Thumb16 only.  Given the firstcond and mask fields from an IT
2614   instruction, compute the 32-bit ITSTATE value implied, as described
2615   in libvex_guest_arm.h.  This is not the ARM ARM representation.
2616   Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
2617   disassembly printing.  Returns False if firstcond or mask
2618   denote something invalid.
2619
2620   The number and conditions for the instructions to be
2621   conditionalised depend on firstcond and mask:
2622
2623   mask      cond 1    cond 2      cond 3      cond 4
2624
2625   1000      fc[3:0]
2626   x100      fc[3:0]   fc[3:1]:x
2627   xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
2628   xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
2629
2630   The condition fields are assembled in *itstate backwards (cond 4 at
2631   the top, cond 1 at the bottom).  Conditions are << 4'd and then
2632   ^0xE'd, and those fields that correspond to instructions in the IT
2633   block are tagged with a 1 bit.
2634*/
2635static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
2636                              /*OUT*/HChar* ch1,
2637                              /*OUT*/HChar* ch2,
2638                              /*OUT*/HChar* ch3,
2639                              UInt firstcond, UInt mask )
2640{
2641   vassert(firstcond <= 0xF);
2642   vassert(mask <= 0xF);
2643   *itstate = 0;
2644   *ch1 = *ch2 = *ch3 = '.';
2645   if (mask == 0)
2646      return False; /* the logic below actually ensures this anyway,
2647                       but clearer to make it explicit. */
2648   if (firstcond == 0xF)
2649      return False; /* NV is not allowed */
2650   if (firstcond == 0xE && popcount32(mask) != 1)
2651      return False; /* if firstcond is AL then all the rest must be too */
2652
2653   UInt m3 = (mask >> 3) & 1;
2654   UInt m2 = (mask >> 2) & 1;
2655   UInt m1 = (mask >> 1) & 1;
2656   UInt m0 = (mask >> 0) & 1;
2657
2658   UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
2659   UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
2660
2661   if (m3 == 1 && (m2|m1|m0) == 0) {
2662      *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
2663      *itstate ^= 0xE0E0E0E0;
2664      return True;
2665   }
2666
2667   if (m2 == 1 && (m1|m0) == 0) {
2668      *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
2669      *itstate ^= 0xE0E0E0E0;
2670      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2671      return True;
2672   }
2673
2674   if (m1 == 1 && m0 == 0) {
2675      *itstate = (ni << 24)
2676                 | (setbit32(fc, 4, m2) << 16)
2677                 | (setbit32(fc, 4, m3) << 8) | fc;
2678      *itstate ^= 0xE0E0E0E0;
2679      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2680      *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2681      return True;
2682   }
2683
2684   if (m0 == 1) {
2685      *itstate = (setbit32(fc, 4, m1) << 24)
2686                 | (setbit32(fc, 4, m2) << 16)
2687                 | (setbit32(fc, 4, m3) << 8) | fc;
2688      *itstate ^= 0xE0E0E0E0;
2689      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2690      *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2691      *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
2692      return True;
2693   }
2694
2695   return False;
2696}
2697
2698
2699/* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
2700   Chapter 7 Section 1. */
2701static IRTemp gen_BITREV ( IRTemp x0 )
2702{
2703   IRTemp x1 = newTemp(Ity_I32);
2704   IRTemp x2 = newTemp(Ity_I32);
2705   IRTemp x3 = newTemp(Ity_I32);
2706   IRTemp x4 = newTemp(Ity_I32);
2707   IRTemp x5 = newTemp(Ity_I32);
2708   UInt   c1 = 0x55555555;
2709   UInt   c2 = 0x33333333;
2710   UInt   c3 = 0x0F0F0F0F;
2711   UInt   c4 = 0x00FF00FF;
2712   UInt   c5 = 0x0000FFFF;
2713   assign(x1,
2714          binop(Iop_Or32,
2715                binop(Iop_Shl32,
2716                      binop(Iop_And32, mkexpr(x0), mkU32(c1)),
2717                      mkU8(1)),
2718                binop(Iop_Shr32,
2719                      binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
2720                      mkU8(1))
2721   ));
2722   assign(x2,
2723          binop(Iop_Or32,
2724                binop(Iop_Shl32,
2725                      binop(Iop_And32, mkexpr(x1), mkU32(c2)),
2726                      mkU8(2)),
2727                binop(Iop_Shr32,
2728                      binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
2729                      mkU8(2))
2730   ));
2731   assign(x3,
2732          binop(Iop_Or32,
2733                binop(Iop_Shl32,
2734                      binop(Iop_And32, mkexpr(x2), mkU32(c3)),
2735                      mkU8(4)),
2736                binop(Iop_Shr32,
2737                      binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
2738                      mkU8(4))
2739   ));
2740   assign(x4,
2741          binop(Iop_Or32,
2742                binop(Iop_Shl32,
2743                      binop(Iop_And32, mkexpr(x3), mkU32(c4)),
2744                      mkU8(8)),
2745                binop(Iop_Shr32,
2746                      binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
2747                      mkU8(8))
2748   ));
2749   assign(x5,
2750          binop(Iop_Or32,
2751                binop(Iop_Shl32,
2752                      binop(Iop_And32, mkexpr(x4), mkU32(c5)),
2753                      mkU8(16)),
2754                binop(Iop_Shr32,
2755                      binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
2756                      mkU8(16))
2757   ));
2758   return x5;
2759}
2760
2761
2762/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2763   0:1:2:3 (aka byte-swap). */
2764static IRTemp gen_REV ( IRTemp arg )
2765{
2766   IRTemp res = newTemp(Ity_I32);
2767   assign(res,
2768          binop(Iop_Or32,
2769                binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
2770          binop(Iop_Or32,
2771                binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2772                                 mkU32(0x00FF0000)),
2773          binop(Iop_Or32,
2774                binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2775                                       mkU32(0x0000FF00)),
2776                binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
2777                                       mkU32(0x000000FF) )
2778   ))));
2779   return res;
2780}
2781
2782
2783/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2784   2:3:0:1 (swap within lo and hi halves). */
2785static IRTemp gen_REV16 ( IRTemp arg )
2786{
2787   IRTemp res = newTemp(Ity_I32);
2788   assign(res,
2789          binop(Iop_Or32,
2790                binop(Iop_And32,
2791                      binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2792                      mkU32(0xFF00FF00)),
2793                binop(Iop_And32,
2794                      binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2795                      mkU32(0x00FF00FF))));
2796   return res;
2797}
2798
2799
2800/*------------------------------------------------------------*/
2801/*--- Advanced SIMD (NEON) instructions                    ---*/
2802/*------------------------------------------------------------*/
2803
2804/*------------------------------------------------------------*/
2805/*--- NEON data processing                                 ---*/
2806/*------------------------------------------------------------*/
2807
2808/* For all NEON DP ops, we use the normal scheme to handle conditional
2809   writes to registers -- pass in condT and hand that on to the
2810   put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
2811   since NEON is unconditional for ARM.  In Thumb mode condT is
2812   derived from the ITSTATE shift register in the normal way. */
2813
2814static
2815UInt get_neon_d_regno(UInt theInstr)
2816{
2817   UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2818   if (theInstr & 0x40) {
2819      if (x & 1) {
2820         x = x + 0x100;
2821      } else {
2822         x = x >> 1;
2823      }
2824   }
2825   return x;
2826}
2827
2828static
2829UInt get_neon_n_regno(UInt theInstr)
2830{
2831   UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
2832   if (theInstr & 0x40) {
2833      if (x & 1) {
2834         x = x + 0x100;
2835      } else {
2836         x = x >> 1;
2837      }
2838   }
2839   return x;
2840}
2841
2842static
2843UInt get_neon_m_regno(UInt theInstr)
2844{
2845   UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2846   if (theInstr & 0x40) {
2847      if (x & 1) {
2848         x = x + 0x100;
2849      } else {
2850         x = x >> 1;
2851      }
2852   }
2853   return x;
2854}
2855
2856static
2857Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
2858{
2859   UInt dreg = get_neon_d_regno(theInstr);
2860   UInt mreg = get_neon_m_regno(theInstr);
2861   UInt nreg = get_neon_n_regno(theInstr);
2862   UInt imm4 = (theInstr >> 8) & 0xf;
2863   UInt Q = (theInstr >> 6) & 1;
2864   HChar reg_t = Q ? 'q' : 'd';
2865
2866   if (Q) {
2867      putQReg(dreg, triop(Iop_ExtractV128, getQReg(nreg),
2868               getQReg(mreg), mkU8(imm4)), condT);
2869   } else {
2870      putDRegI64(dreg, triop(Iop_Extract64, getDRegI64(nreg),
2871                 getDRegI64(mreg), mkU8(imm4)), condT);
2872   }
2873   DIP("vext.8 %c%d, %c%d, %c%d, #%d\n", reg_t, dreg, reg_t, nreg,
2874                                         reg_t, mreg, imm4);
2875   return True;
2876}
2877
2878/* Generate specific vector FP binary ops, possibly with a fake
2879   rounding mode as required by the primop. */
2880static
2881IRExpr* binop_w_fake_RM ( IROp op, IRExpr* argL, IRExpr* argR )
2882{
2883   switch (op) {
2884      case Iop_Add32Fx4:
2885      case Iop_Sub32Fx4:
2886      case Iop_Mul32Fx4:
2887         return triop(op, get_FAKE_roundingmode(), argL, argR );
2888      case Iop_Add32x4: case Iop_Add16x8:
2889      case Iop_Sub32x4: case Iop_Sub16x8:
2890      case Iop_Mul32x4: case Iop_Mul16x8:
2891      case Iop_Mul32x2: case Iop_Mul16x4:
2892      case Iop_Add32Fx2:
2893      case Iop_Sub32Fx2:
2894      case Iop_Mul32Fx2:
2895      case Iop_PwAdd32Fx2:
2896         return binop(op, argL, argR);
2897      default:
2898        ppIROp(op);
2899        vassert(0);
2900   }
2901}
2902
2903/* VTBL, VTBX */
2904static
2905Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
2906{
2907   UInt op = (theInstr >> 6) & 1;
2908   UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
2909   UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
2910   UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
2911   UInt len = (theInstr >> 8) & 3;
2912   Int i;
2913   IROp cmp;
2914   ULong imm;
2915   IRTemp arg_l;
2916   IRTemp old_mask, new_mask, cur_mask;
2917   IRTemp old_res, new_res;
2918   IRTemp old_arg, new_arg;
2919
2920   if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
2921      return False;
2922   if (nreg + len > 31)
2923      return False;
2924
2925   cmp = Iop_CmpGT8Ux8;
2926
2927   old_mask = newTemp(Ity_I64);
2928   old_res = newTemp(Ity_I64);
2929   old_arg = newTemp(Ity_I64);
2930   assign(old_mask, mkU64(0));
2931   assign(old_res, mkU64(0));
2932   assign(old_arg, getDRegI64(mreg));
2933   imm = 8;
2934   imm = (imm <<  8) | imm;
2935   imm = (imm << 16) | imm;
2936   imm = (imm << 32) | imm;
2937
2938   for (i = 0; i <= len; i++) {
2939      arg_l = newTemp(Ity_I64);
2940      new_mask = newTemp(Ity_I64);
2941      cur_mask = newTemp(Ity_I64);
2942      new_res = newTemp(Ity_I64);
2943      new_arg = newTemp(Ity_I64);
2944      assign(arg_l, getDRegI64(nreg+i));
2945      assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
2946      assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
2947      assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
2948      assign(new_res, binop(Iop_Or64,
2949                            mkexpr(old_res),
2950                            binop(Iop_And64,
2951                                  binop(Iop_Perm8x8,
2952                                        mkexpr(arg_l),
2953                                        binop(Iop_And64,
2954                                              mkexpr(old_arg),
2955                                              mkexpr(cur_mask))),
2956                                  mkexpr(cur_mask))));
2957
2958      old_arg = new_arg;
2959      old_mask = new_mask;
2960      old_res = new_res;
2961   }
2962   if (op) {
2963      new_res = newTemp(Ity_I64);
2964      assign(new_res, binop(Iop_Or64,
2965                            binop(Iop_And64,
2966                                  getDRegI64(dreg),
2967                                  unop(Iop_Not64, mkexpr(old_mask))),
2968                            mkexpr(old_res)));
2969      old_res = new_res;
2970   }
2971
2972   putDRegI64(dreg, mkexpr(old_res), condT);
2973   DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
2974   if (len > 0) {
2975      DIP("d%u-d%u", nreg, nreg + len);
2976   } else {
2977      DIP("d%u", nreg);
2978   }
2979   DIP("}, d%u\n", mreg);
2980   return True;
2981}
2982
2983/* VDUP (scalar)  */
2984static
2985Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
2986{
2987   UInt Q = (theInstr >> 6) & 1;
2988   UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2989   UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2990   UInt imm4 = (theInstr >> 16) & 0xF;
2991   UInt index;
2992   UInt size;
2993   IRTemp arg_m;
2994   IRTemp res;
2995   IROp op, op2;
2996
2997   if ((imm4 == 0) || (imm4 == 8))
2998      return False;
2999   if ((Q == 1) && ((dreg & 1) == 1))
3000      return False;
3001   if (Q)
3002      dreg >>= 1;
3003   arg_m = newTemp(Ity_I64);
3004   assign(arg_m, getDRegI64(mreg));
3005   if (Q)
3006      res = newTemp(Ity_V128);
3007   else
3008      res = newTemp(Ity_I64);
3009   if ((imm4 & 1) == 1) {
3010      op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
3011      op2 = Iop_GetElem8x8;
3012      index = imm4 >> 1;
3013      size = 8;
3014   } else if ((imm4 & 3) == 2) {
3015      op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
3016      op2 = Iop_GetElem16x4;
3017      index = imm4 >> 2;
3018      size = 16;
3019   } else if ((imm4 & 7) == 4) {
3020      op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
3021      op2 = Iop_GetElem32x2;
3022      index = imm4 >> 3;
3023      size = 32;
3024   } else {
3025      return False; // can this ever happen?
3026   }
3027   assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
3028   if (Q) {
3029      putQReg(dreg, mkexpr(res), condT);
3030   } else {
3031      putDRegI64(dreg, mkexpr(res), condT);
3032   }
3033   DIP("vdup.%d %c%d, d%d[%d]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
3034   return True;
3035}
3036
3037/* A7.4.1 Three registers of the same length */
3038static
3039Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
3040{
3041   UInt Q = (theInstr >> 6) & 1;
3042   UInt dreg = get_neon_d_regno(theInstr);
3043   UInt nreg = get_neon_n_regno(theInstr);
3044   UInt mreg = get_neon_m_regno(theInstr);
3045   UInt A = (theInstr >> 8) & 0xF;
3046   UInt B = (theInstr >> 4) & 1;
3047   UInt C = (theInstr >> 20) & 0x3;
3048   UInt U = (theInstr >> 24) & 1;
3049   UInt size = C;
3050
3051   IRTemp arg_n;
3052   IRTemp arg_m;
3053   IRTemp res;
3054
3055   if (Q) {
3056      arg_n = newTemp(Ity_V128);
3057      arg_m = newTemp(Ity_V128);
3058      res = newTemp(Ity_V128);
3059      assign(arg_n, getQReg(nreg));
3060      assign(arg_m, getQReg(mreg));
3061   } else {
3062      arg_n = newTemp(Ity_I64);
3063      arg_m = newTemp(Ity_I64);
3064      res = newTemp(Ity_I64);
3065      assign(arg_n, getDRegI64(nreg));
3066      assign(arg_m, getDRegI64(mreg));
3067   }
3068
3069   switch(A) {
3070      case 0:
3071         if (B == 0) {
3072            /* VHADD */
3073            ULong imm = 0;
3074            IRExpr *imm_val;
3075            IROp addOp;
3076            IROp andOp;
3077            IROp shOp;
3078            HChar regType = Q ? 'q' : 'd';
3079
3080            if (size == 3)
3081               return False;
3082            switch(size) {
3083               case 0: imm = 0x101010101010101LL; break;
3084               case 1: imm = 0x1000100010001LL; break;
3085               case 2: imm = 0x100000001LL; break;
3086               default: vassert(0);
3087            }
3088            if (Q) {
3089               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3090               andOp = Iop_AndV128;
3091            } else {
3092               imm_val = mkU64(imm);
3093               andOp = Iop_And64;
3094            }
3095            if (U) {
3096               switch(size) {
3097                  case 0:
3098                     addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3099                     shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3100                     break;
3101                  case 1:
3102                     addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3103                     shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3104                     break;
3105                  case 2:
3106                     addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3107                     shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3108                     break;
3109                  default:
3110                     vassert(0);
3111               }
3112            } else {
3113               switch(size) {
3114                  case 0:
3115                     addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3116                     shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3117                     break;
3118                  case 1:
3119                     addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3120                     shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3121                     break;
3122                  case 2:
3123                     addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3124                     shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3125                     break;
3126                  default:
3127                     vassert(0);
3128               }
3129            }
3130            assign(res,
3131                   binop(addOp,
3132                         binop(addOp,
3133                               binop(shOp, mkexpr(arg_m), mkU8(1)),
3134                               binop(shOp, mkexpr(arg_n), mkU8(1))),
3135                         binop(shOp,
3136                               binop(addOp,
3137                                     binop(andOp, mkexpr(arg_m), imm_val),
3138                                     binop(andOp, mkexpr(arg_n), imm_val)),
3139                               mkU8(1))));
3140            DIP("vhadd.%c%d %c%d, %c%d, %c%d\n",
3141                U ? 'u' : 's', 8 << size, regType,
3142                dreg, regType, nreg, regType, mreg);
3143         } else {
3144            /* VQADD */
3145            IROp op, op2;
3146            IRTemp tmp;
3147            HChar reg_t = Q ? 'q' : 'd';
3148            if (Q) {
3149               switch (size) {
3150                  case 0:
3151                     op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
3152                     op2 = Iop_Add8x16;
3153                     break;
3154                  case 1:
3155                     op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
3156                     op2 = Iop_Add16x8;
3157                     break;
3158                  case 2:
3159                     op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
3160                     op2 = Iop_Add32x4;
3161                     break;
3162                  case 3:
3163                     op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
3164                     op2 = Iop_Add64x2;
3165                     break;
3166                  default:
3167                     vassert(0);
3168               }
3169            } else {
3170               switch (size) {
3171                  case 0:
3172                     op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
3173                     op2 = Iop_Add8x8;
3174                     break;
3175                  case 1:
3176                     op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
3177                     op2 = Iop_Add16x4;
3178                     break;
3179                  case 2:
3180                     op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
3181                     op2 = Iop_Add32x2;
3182                     break;
3183                  case 3:
3184                     op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
3185                     op2 = Iop_Add64;
3186                     break;
3187                  default:
3188                     vassert(0);
3189               }
3190            }
3191            if (Q) {
3192               tmp = newTemp(Ity_V128);
3193            } else {
3194               tmp = newTemp(Ity_I64);
3195            }
3196            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3197            assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3198            setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3199            DIP("vqadd.%c%d %c%d, %c%d, %c%d\n",
3200                U ? 'u' : 's',
3201                8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3202         }
3203         break;
3204      case 1:
3205         if (B == 0) {
3206            /* VRHADD */
3207            /* VRHADD C, A, B ::=
3208                 C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
3209            IROp shift_op, add_op;
3210            IRTemp cc;
3211            ULong one = 1;
3212            HChar reg_t = Q ? 'q' : 'd';
3213            switch (size) {
3214               case 0: one = (one <<  8) | one; /* fall through */
3215               case 1: one = (one << 16) | one; /* fall through */
3216               case 2: one = (one << 32) | one; break;
3217               case 3: return False;
3218               default: vassert(0);
3219            }
3220            if (Q) {
3221               switch (size) {
3222                  case 0:
3223                     shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
3224                     add_op = Iop_Add8x16;
3225                     break;
3226                  case 1:
3227                     shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
3228                     add_op = Iop_Add16x8;
3229                     break;
3230                  case 2:
3231                     shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
3232                     add_op = Iop_Add32x4;
3233                     break;
3234                  case 3:
3235                     return False;
3236                  default:
3237                     vassert(0);
3238               }
3239            } else {
3240               switch (size) {
3241                  case 0:
3242                     shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
3243                     add_op = Iop_Add8x8;
3244                     break;
3245                  case 1:
3246                     shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
3247                     add_op = Iop_Add16x4;
3248                     break;
3249                  case 2:
3250                     shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
3251                     add_op = Iop_Add32x2;
3252                     break;
3253                  case 3:
3254                     return False;
3255                  default:
3256                     vassert(0);
3257               }
3258            }
3259            if (Q) {
3260               cc = newTemp(Ity_V128);
3261               assign(cc, binop(shift_op,
3262                                binop(add_op,
3263                                      binop(add_op,
3264                                            binop(Iop_AndV128,
3265                                                  mkexpr(arg_n),
3266                                                  binop(Iop_64HLtoV128,
3267                                                        mkU64(one),
3268                                                        mkU64(one))),
3269                                            binop(Iop_AndV128,
3270                                                  mkexpr(arg_m),
3271                                                  binop(Iop_64HLtoV128,
3272                                                        mkU64(one),
3273                                                        mkU64(one)))),
3274                                      binop(Iop_64HLtoV128,
3275                                            mkU64(one),
3276                                            mkU64(one))),
3277                                mkU8(1)));
3278               assign(res, binop(add_op,
3279                                 binop(add_op,
3280                                       binop(shift_op,
3281                                             mkexpr(arg_n),
3282                                             mkU8(1)),
3283                                       binop(shift_op,
3284                                             mkexpr(arg_m),
3285                                             mkU8(1))),
3286                                 mkexpr(cc)));
3287            } else {
3288               cc = newTemp(Ity_I64);
3289               assign(cc, binop(shift_op,
3290                                binop(add_op,
3291                                      binop(add_op,
3292                                            binop(Iop_And64,
3293                                                  mkexpr(arg_n),
3294                                                  mkU64(one)),
3295                                            binop(Iop_And64,
3296                                                  mkexpr(arg_m),
3297                                                  mkU64(one))),
3298                                      mkU64(one)),
3299                                mkU8(1)));
3300               assign(res, binop(add_op,
3301                                 binop(add_op,
3302                                       binop(shift_op,
3303                                             mkexpr(arg_n),
3304                                             mkU8(1)),
3305                                       binop(shift_op,
3306                                             mkexpr(arg_m),
3307                                             mkU8(1))),
3308                                 mkexpr(cc)));
3309            }
3310            DIP("vrhadd.%c%d %c%d, %c%d, %c%d\n",
3311                U ? 'u' : 's',
3312                8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3313         } else {
3314            if (U == 0)  {
3315               switch(C) {
3316                  case 0: {
3317                     /* VAND  */
3318                     HChar reg_t = Q ? 'q' : 'd';
3319                     if (Q) {
3320                        assign(res, binop(Iop_AndV128, mkexpr(arg_n),
3321                                                       mkexpr(arg_m)));
3322                     } else {
3323                        assign(res, binop(Iop_And64, mkexpr(arg_n),
3324                                                     mkexpr(arg_m)));
3325                     }
3326                     DIP("vand %c%d, %c%d, %c%d\n",
3327                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3328                     break;
3329                  }
3330                  case 1: {
3331                     /* VBIC  */
3332                     HChar reg_t = Q ? 'q' : 'd';
3333                     if (Q) {
3334                        assign(res, binop(Iop_AndV128,mkexpr(arg_n),
3335                               unop(Iop_NotV128, mkexpr(arg_m))));
3336                     } else {
3337                        assign(res, binop(Iop_And64, mkexpr(arg_n),
3338                               unop(Iop_Not64, mkexpr(arg_m))));
3339                     }
3340                     DIP("vbic %c%d, %c%d, %c%d\n",
3341                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3342                     break;
3343                  }
3344                  case 2:
3345                     if ( nreg != mreg) {
3346                        /* VORR  */
3347                        HChar reg_t = Q ? 'q' : 'd';
3348                        if (Q) {
3349                           assign(res, binop(Iop_OrV128, mkexpr(arg_n),
3350                                                         mkexpr(arg_m)));
3351                        } else {
3352                           assign(res, binop(Iop_Or64, mkexpr(arg_n),
3353                                                       mkexpr(arg_m)));
3354                        }
3355                        DIP("vorr %c%d, %c%d, %c%d\n",
3356                            reg_t, dreg, reg_t, nreg, reg_t, mreg);
3357                     } else {
3358                        /* VMOV  */
3359                        HChar reg_t = Q ? 'q' : 'd';
3360                        assign(res, mkexpr(arg_m));
3361                        DIP("vmov %c%d, %c%d\n", reg_t, dreg, reg_t, mreg);
3362                     }
3363                     break;
3364                  case 3:{
3365                     /* VORN  */
3366                     HChar reg_t = Q ? 'q' : 'd';
3367                     if (Q) {
3368                        assign(res, binop(Iop_OrV128,mkexpr(arg_n),
3369                               unop(Iop_NotV128, mkexpr(arg_m))));
3370                     } else {
3371                        assign(res, binop(Iop_Or64, mkexpr(arg_n),
3372                               unop(Iop_Not64, mkexpr(arg_m))));
3373                     }
3374                     DIP("vorn %c%d, %c%d, %c%d\n",
3375                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3376                     break;
3377                  }
3378               }
3379            } else {
3380               switch(C) {
3381                  case 0:
3382                     /* VEOR (XOR)  */
3383                     if (Q) {
3384                        assign(res, binop(Iop_XorV128, mkexpr(arg_n),
3385                                                       mkexpr(arg_m)));
3386                     } else {
3387                        assign(res, binop(Iop_Xor64, mkexpr(arg_n),
3388                                                     mkexpr(arg_m)));
3389                     }
3390                     DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
3391                           Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3392                     break;
3393                  case 1:
3394                     /* VBSL  */
3395                     if (Q) {
3396                        IRTemp reg_d = newTemp(Ity_V128);
3397                        assign(reg_d, getQReg(dreg));
3398                        assign(res,
3399                               binop(Iop_OrV128,
3400                                     binop(Iop_AndV128, mkexpr(arg_n),
3401                                                        mkexpr(reg_d)),
3402                                     binop(Iop_AndV128,
3403                                           mkexpr(arg_m),
3404                                           unop(Iop_NotV128,
3405                                                 mkexpr(reg_d)) ) ) );
3406                     } else {
3407                        IRTemp reg_d = newTemp(Ity_I64);
3408                        assign(reg_d, getDRegI64(dreg));
3409                        assign(res,
3410                               binop(Iop_Or64,
3411                                     binop(Iop_And64, mkexpr(arg_n),
3412                                                      mkexpr(reg_d)),
3413                                     binop(Iop_And64,
3414                                           mkexpr(arg_m),
3415                                           unop(Iop_Not64, mkexpr(reg_d)))));
3416                     }
3417                     DIP("vbsl %c%u, %c%u, %c%u\n",
3418                         Q ? 'q' : 'd', dreg,
3419                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3420                     break;
3421                  case 2:
3422                     /* VBIT  */
3423                     if (Q) {
3424                        IRTemp reg_d = newTemp(Ity_V128);
3425                        assign(reg_d, getQReg(dreg));
3426                        assign(res,
3427                               binop(Iop_OrV128,
3428                                     binop(Iop_AndV128, mkexpr(arg_n),
3429                                                        mkexpr(arg_m)),
3430                                     binop(Iop_AndV128,
3431                                           mkexpr(reg_d),
3432                                           unop(Iop_NotV128, mkexpr(arg_m)))));
3433                     } else {
3434                        IRTemp reg_d = newTemp(Ity_I64);
3435                        assign(reg_d, getDRegI64(dreg));
3436                        assign(res,
3437                               binop(Iop_Or64,
3438                                     binop(Iop_And64, mkexpr(arg_n),
3439                                                      mkexpr(arg_m)),
3440                                     binop(Iop_And64,
3441                                           mkexpr(reg_d),
3442                                           unop(Iop_Not64, mkexpr(arg_m)))));
3443                     }
3444                     DIP("vbit %c%u, %c%u, %c%u\n",
3445                         Q ? 'q' : 'd', dreg,
3446                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3447                     break;
3448                  case 3:
3449                     /* VBIF  */
3450                     if (Q) {
3451                        IRTemp reg_d = newTemp(Ity_V128);
3452                        assign(reg_d, getQReg(dreg));
3453                        assign(res,
3454                               binop(Iop_OrV128,
3455                                     binop(Iop_AndV128, mkexpr(reg_d),
3456                                                        mkexpr(arg_m)),
3457                                     binop(Iop_AndV128,
3458                                           mkexpr(arg_n),
3459                                           unop(Iop_NotV128, mkexpr(arg_m)))));
3460                     } else {
3461                        IRTemp reg_d = newTemp(Ity_I64);
3462                        assign(reg_d, getDRegI64(dreg));
3463                        assign(res,
3464                               binop(Iop_Or64,
3465                                     binop(Iop_And64, mkexpr(reg_d),
3466                                                      mkexpr(arg_m)),
3467                                     binop(Iop_And64,
3468                                           mkexpr(arg_n),
3469                                           unop(Iop_Not64, mkexpr(arg_m)))));
3470                     }
3471                     DIP("vbif %c%u, %c%u, %c%u\n",
3472                         Q ? 'q' : 'd', dreg,
3473                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3474                     break;
3475               }
3476            }
3477         }
3478         break;
3479      case 2:
3480         if (B == 0) {
3481            /* VHSUB */
3482            /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
3483            ULong imm = 0;
3484            IRExpr *imm_val;
3485            IROp subOp;
3486            IROp notOp;
3487            IROp andOp;
3488            IROp shOp;
3489            if (size == 3)
3490               return False;
3491            switch(size) {
3492               case 0: imm = 0x101010101010101LL; break;
3493               case 1: imm = 0x1000100010001LL; break;
3494               case 2: imm = 0x100000001LL; break;
3495               default: vassert(0);
3496            }
3497            if (Q) {
3498               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3499               andOp = Iop_AndV128;
3500               notOp = Iop_NotV128;
3501            } else {
3502               imm_val = mkU64(imm);
3503               andOp = Iop_And64;
3504               notOp = Iop_Not64;
3505            }
3506            if (U) {
3507               switch(size) {
3508                  case 0:
3509                     subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3510                     shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3511                     break;
3512                  case 1:
3513                     subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3514                     shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3515                     break;
3516                  case 2:
3517                     subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3518                     shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3519                     break;
3520                  default:
3521                     vassert(0);
3522               }
3523            } else {
3524               switch(size) {
3525                  case 0:
3526                     subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3527                     shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3528                     break;
3529                  case 1:
3530                     subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3531                     shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3532                     break;
3533                  case 2:
3534                     subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3535                     shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3536                     break;
3537                  default:
3538                     vassert(0);
3539               }
3540            }
3541            assign(res,
3542                   binop(subOp,
3543                         binop(subOp,
3544                               binop(shOp, mkexpr(arg_n), mkU8(1)),
3545                               binop(shOp, mkexpr(arg_m), mkU8(1))),
3546                         binop(andOp,
3547                               binop(andOp,
3548                                     unop(notOp, mkexpr(arg_n)),
3549                                     mkexpr(arg_m)),
3550                               imm_val)));
3551            DIP("vhsub.%c%u %c%u, %c%u, %c%u\n",
3552                U ? 'u' : 's', 8 << size,
3553                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3554                mreg);
3555         } else {
3556            /* VQSUB */
3557            IROp op, op2;
3558            IRTemp tmp;
3559            if (Q) {
3560               switch (size) {
3561                  case 0:
3562                     op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
3563                     op2 = Iop_Sub8x16;
3564                     break;
3565                  case 1:
3566                     op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
3567                     op2 = Iop_Sub16x8;
3568                     break;
3569                  case 2:
3570                     op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
3571                     op2 = Iop_Sub32x4;
3572                     break;
3573                  case 3:
3574                     op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
3575                     op2 = Iop_Sub64x2;
3576                     break;
3577                  default:
3578                     vassert(0);
3579               }
3580            } else {
3581               switch (size) {
3582                  case 0:
3583                     op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
3584                     op2 = Iop_Sub8x8;
3585                     break;
3586                  case 1:
3587                     op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
3588                     op2 = Iop_Sub16x4;
3589                     break;
3590                  case 2:
3591                     op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
3592                     op2 = Iop_Sub32x2;
3593                     break;
3594                  case 3:
3595                     op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
3596                     op2 = Iop_Sub64;
3597                     break;
3598                  default:
3599                     vassert(0);
3600               }
3601            }
3602            if (Q)
3603               tmp = newTemp(Ity_V128);
3604            else
3605               tmp = newTemp(Ity_I64);
3606            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3607            assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3608            setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3609            DIP("vqsub.%c%u %c%u, %c%u, %c%u\n",
3610                U ? 'u' : 's', 8 << size,
3611                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3612                mreg);
3613         }
3614         break;
3615      case 3: {
3616            IROp op;
3617            if (Q) {
3618               switch (size) {
3619                  case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
3620                  case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
3621                  case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
3622                  case 3: return False;
3623                  default: vassert(0);
3624               }
3625            } else {
3626               switch (size) {
3627                  case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
3628                  case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
3629                  case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
3630                  case 3: return False;
3631                  default: vassert(0);
3632               }
3633            }
3634            if (B == 0) {
3635               /* VCGT  */
3636               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3637               DIP("vcgt.%c%u %c%u, %c%u, %c%u\n",
3638                   U ? 'u' : 's', 8 << size,
3639                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3640                   mreg);
3641            } else {
3642               /* VCGE  */
3643               /* VCGE res, argn, argm
3644                    is equal to
3645                  VCGT tmp, argm, argn
3646                  VNOT res, tmp */
3647               assign(res,
3648                      unop(Q ? Iop_NotV128 : Iop_Not64,
3649                           binop(op, mkexpr(arg_m), mkexpr(arg_n))));
3650               DIP("vcge.%c%u %c%u, %c%u, %c%u\n",
3651                   U ? 'u' : 's', 8 << size,
3652                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3653                   mreg);
3654            }
3655         }
3656         break;
3657      case 4:
3658         if (B == 0) {
3659            /* VSHL */
3660            IROp op, sub_op;
3661            IRTemp tmp;
3662            if (U) {
3663               switch (size) {
3664                  case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
3665                  case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
3666                  case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
3667                  case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
3668                  default: vassert(0);
3669               }
3670            } else {
3671               tmp = newTemp(Q ? Ity_V128 : Ity_I64);
3672               switch (size) {
3673                  case 0:
3674                     op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3675                     sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3676                     break;
3677                  case 1:
3678                     op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3679                     sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3680                     break;
3681                  case 2:
3682                     op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3683                     sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3684                     break;
3685                  case 3:
3686                     op = Q ? Iop_Sar64x2 : Iop_Sar64;
3687                     sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
3688                     break;
3689                  default:
3690                     vassert(0);
3691               }
3692            }
3693            if (U) {
3694               if (!Q && (size == 3))
3695                  assign(res, binop(op, mkexpr(arg_m),
3696                                        unop(Iop_64to8, mkexpr(arg_n))));
3697               else
3698                  assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3699            } else {
3700               if (Q)
3701                  assign(tmp, binop(sub_op,
3702                                    binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
3703                                    mkexpr(arg_n)));
3704               else
3705                  assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
3706               if (!Q && (size == 3))
3707                  assign(res, binop(op, mkexpr(arg_m),
3708                                        unop(Iop_64to8, mkexpr(tmp))));
3709               else
3710                  assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
3711            }
3712            DIP("vshl.%c%u %c%u, %c%u, %c%u\n",
3713                U ? 'u' : 's', 8 << size,
3714                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3715                nreg);
3716         } else {
3717            /* VQSHL */
3718            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
3719            IRTemp tmp, shval, mask, old_shval;
3720            UInt i;
3721            ULong esize;
3722            cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3723            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3724            if (U) {
3725               switch (size) {
3726                  case 0:
3727                     op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
3728                     op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
3729                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3730                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3731                     break;
3732                  case 1:
3733                     op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
3734                     op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
3735                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3736                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3737                     break;
3738                  case 2:
3739                     op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
3740                     op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
3741                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3742                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3743                     break;
3744                  case 3:
3745                     op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
3746                     op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
3747                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3748                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3749                     break;
3750                  default:
3751                     vassert(0);
3752               }
3753            } else {
3754               switch (size) {
3755                  case 0:
3756                     op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
3757                     op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3758                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3759                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3760                     break;
3761                  case 1:
3762                     op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
3763                     op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3764                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3765                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3766                     break;
3767                  case 2:
3768                     op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
3769                     op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3770                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3771                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3772                     break;
3773                  case 3:
3774                     op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
3775                     op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
3776                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3777                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3778                     break;
3779                  default:
3780                     vassert(0);
3781               }
3782            }
3783            if (Q) {
3784               tmp = newTemp(Ity_V128);
3785               shval = newTemp(Ity_V128);
3786               mask = newTemp(Ity_V128);
3787            } else {
3788               tmp = newTemp(Ity_I64);
3789               shval = newTemp(Ity_I64);
3790               mask = newTemp(Ity_I64);
3791            }
3792            assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3793            /* Only least significant byte from second argument is used.
3794               Copy this byte to the whole vector element. */
3795            assign(shval, binop(op_shrn,
3796                                binop(op_shln,
3797                                       mkexpr(arg_n),
3798                                       mkU8((8 << size) - 8)),
3799                                mkU8((8 << size) - 8)));
3800            for(i = 0; i < size; i++) {
3801               old_shval = shval;
3802               shval = newTemp(Q ? Ity_V128 : Ity_I64);
3803               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3804                                   mkexpr(old_shval),
3805                                   binop(op_shln,
3806                                         mkexpr(old_shval),
3807                                         mkU8(8 << i))));
3808            }
3809            /* If shift is greater or equal to the element size and
3810               element is non-zero, then QC flag should be set. */
3811            esize = (8 << size) - 1;
3812            esize = (esize <<  8) | esize;
3813            esize = (esize << 16) | esize;
3814            esize = (esize << 32) | esize;
3815            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3816                             binop(cmp_gt, mkexpr(shval),
3817                                           Q ? mkU128(esize) : mkU64(esize)),
3818                             unop(cmp_neq, mkexpr(arg_m))),
3819                       Q ? mkU128(0) : mkU64(0),
3820                       Q, condT);
3821            /* Othervise QC flag should be set if shift value is positive and
3822               result beign rightshifted the same value is not equal to left
3823               argument. */
3824            assign(mask, binop(cmp_gt, mkexpr(shval),
3825                                       Q ? mkU128(0) : mkU64(0)));
3826            if (!Q && size == 3)
3827               assign(tmp, binop(op_rev, mkexpr(res),
3828                                         unop(Iop_64to8, mkexpr(arg_n))));
3829            else
3830               assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
3831            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3832                             mkexpr(tmp), mkexpr(mask)),
3833                       binop(Q ? Iop_AndV128 : Iop_And64,
3834                             mkexpr(arg_m), mkexpr(mask)),
3835                       Q, condT);
3836            DIP("vqshl.%c%u %c%u, %c%u, %c%u\n",
3837                U ? 'u' : 's', 8 << size,
3838                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3839                nreg);
3840         }
3841         break;
3842      case 5:
3843         if (B == 0) {
3844            /* VRSHL */
3845            IROp op, op_shrn, op_shln, cmp_gt, op_add;
3846            IRTemp shval, old_shval, imm_val, round;
3847            UInt i;
3848            ULong imm;
3849            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3850            imm = 1L;
3851            switch (size) {
3852               case 0: imm = (imm <<  8) | imm; /* fall through */
3853               case 1: imm = (imm << 16) | imm; /* fall through */
3854               case 2: imm = (imm << 32) | imm; /* fall through */
3855               case 3: break;
3856               default: vassert(0);
3857            }
3858            imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3859            round = newTemp(Q ? Ity_V128 : Ity_I64);
3860            assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3861            if (U) {
3862               switch (size) {
3863                  case 0:
3864                     op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
3865                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3866                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3867                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3868                     break;
3869                  case 1:
3870                     op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
3871                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3872                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3873                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3874                     break;
3875                  case 2:
3876                     op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
3877                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3878                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3879                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3880                     break;
3881                  case 3:
3882                     op = Q ? Iop_Shl64x2 : Iop_Shl64;
3883                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
3884                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3885                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3886                     break;
3887                  default:
3888                     vassert(0);
3889               }
3890            } else {
3891               switch (size) {
3892                  case 0:
3893                     op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
3894                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3895                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3896                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3897                     break;
3898                  case 1:
3899                     op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
3900                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3901                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3902                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3903                     break;
3904                  case 2:
3905                     op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
3906                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3907                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3908                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3909                     break;
3910                  case 3:
3911                     op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
3912                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
3913                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3914                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3915                     break;
3916                  default:
3917                     vassert(0);
3918               }
3919            }
3920            if (Q) {
3921               shval = newTemp(Ity_V128);
3922            } else {
3923               shval = newTemp(Ity_I64);
3924            }
3925            /* Only least significant byte from second argument is used.
3926               Copy this byte to the whole vector element. */
3927            assign(shval, binop(op_shrn,
3928                                binop(op_shln,
3929                                       mkexpr(arg_n),
3930                                       mkU8((8 << size) - 8)),
3931                                mkU8((8 << size) - 8)));
3932            for (i = 0; i < size; i++) {
3933               old_shval = shval;
3934               shval = newTemp(Q ? Ity_V128 : Ity_I64);
3935               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3936                                   mkexpr(old_shval),
3937                                   binop(op_shln,
3938                                         mkexpr(old_shval),
3939                                         mkU8(8 << i))));
3940            }
3941            /* Compute the result */
3942            if (!Q && size == 3 && U) {
3943               assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3944                                   binop(op,
3945                                         mkexpr(arg_m),
3946                                         unop(Iop_64to8,
3947                                              binop(op_add,
3948                                                    mkexpr(arg_n),
3949                                                    mkexpr(imm_val)))),
3950                                   binop(Q ? Iop_AndV128 : Iop_And64,
3951                                         mkexpr(imm_val),
3952                                         binop(cmp_gt,
3953                                               Q ? mkU128(0) : mkU64(0),
3954                                               mkexpr(arg_n)))));
3955               assign(res, binop(op_add,
3956                                 binop(op,
3957                                       mkexpr(arg_m),
3958                                       unop(Iop_64to8, mkexpr(arg_n))),
3959                                 mkexpr(round)));
3960            } else {
3961               assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3962                                   binop(op,
3963                                         mkexpr(arg_m),
3964                                         binop(op_add,
3965                                               mkexpr(arg_n),
3966                                               mkexpr(imm_val))),
3967                                   binop(Q ? Iop_AndV128 : Iop_And64,
3968                                         mkexpr(imm_val),
3969                                         binop(cmp_gt,
3970                                               Q ? mkU128(0) : mkU64(0),
3971                                               mkexpr(arg_n)))));
3972               assign(res, binop(op_add,
3973                                 binop(op, mkexpr(arg_m), mkexpr(arg_n)),
3974                                 mkexpr(round)));
3975            }
3976            DIP("vrshl.%c%u %c%u, %c%u, %c%u\n",
3977                U ? 'u' : 's', 8 << size,
3978                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3979                nreg);
3980         } else {
3981            /* VQRSHL */
3982            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
3983            IRTemp tmp, shval, mask, old_shval, imm_val, round;
3984            UInt i;
3985            ULong esize, imm;
3986            cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3987            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3988            imm = 1L;
3989            switch (size) {
3990               case 0: imm = (imm <<  8) | imm; /* fall through */
3991               case 1: imm = (imm << 16) | imm; /* fall through */
3992               case 2: imm = (imm << 32) | imm; /* fall through */
3993               case 3: break;
3994               default: vassert(0);
3995            }
3996            imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3997            round = newTemp(Q ? Ity_V128 : Ity_I64);
3998            assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3999            if (U) {
4000               switch (size) {
4001                  case 0:
4002                     op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
4003                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4004                     op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
4005                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4006                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4007                     break;
4008                  case 1:
4009                     op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
4010                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4011                     op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
4012                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4013                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4014                     break;
4015                  case 2:
4016                     op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
4017                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4018                     op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
4019                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4020                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4021                     break;
4022                  case 3:
4023                     op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
4024                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
4025                     op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
4026                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4027                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4028                     break;
4029                  default:
4030                     vassert(0);
4031               }
4032            } else {
4033               switch (size) {
4034                  case 0:
4035                     op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
4036                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4037                     op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
4038                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4039                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4040                     break;
4041                  case 1:
4042                     op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
4043                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4044                     op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
4045                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4046                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4047                     break;
4048                  case 2:
4049                     op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
4050                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4051                     op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
4052                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4053                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4054                     break;
4055                  case 3:
4056                     op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
4057                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
4058                     op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
4059                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4060                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4061                     break;
4062                  default:
4063                     vassert(0);
4064               }
4065            }
4066            if (Q) {
4067               tmp = newTemp(Ity_V128);
4068               shval = newTemp(Ity_V128);
4069               mask = newTemp(Ity_V128);
4070            } else {
4071               tmp = newTemp(Ity_I64);
4072               shval = newTemp(Ity_I64);
4073               mask = newTemp(Ity_I64);
4074            }
4075            /* Only least significant byte from second argument is used.
4076               Copy this byte to the whole vector element. */
4077            assign(shval, binop(op_shrn,
4078                                binop(op_shln,
4079                                       mkexpr(arg_n),
4080                                       mkU8((8 << size) - 8)),
4081                                mkU8((8 << size) - 8)));
4082            for (i = 0; i < size; i++) {
4083               old_shval = shval;
4084               shval = newTemp(Q ? Ity_V128 : Ity_I64);
4085               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
4086                                   mkexpr(old_shval),
4087                                   binop(op_shln,
4088                                         mkexpr(old_shval),
4089                                         mkU8(8 << i))));
4090            }
4091            /* Compute the result */
4092            assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
4093                                binop(op,
4094                                      mkexpr(arg_m),
4095                                      binop(op_add,
4096                                            mkexpr(arg_n),
4097                                            mkexpr(imm_val))),
4098                                binop(Q ? Iop_AndV128 : Iop_And64,
4099                                      mkexpr(imm_val),
4100                                      binop(cmp_gt,
4101                                            Q ? mkU128(0) : mkU64(0),
4102                                            mkexpr(arg_n)))));
4103            assign(res, binop(op_add,
4104                              binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4105                              mkexpr(round)));
4106            /* If shift is greater or equal to the element size and element is
4107               non-zero, then QC flag should be set. */
4108            esize = (8 << size) - 1;
4109            esize = (esize <<  8) | esize;
4110            esize = (esize << 16) | esize;
4111            esize = (esize << 32) | esize;
4112            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4113                             binop(cmp_gt, mkexpr(shval),
4114                                           Q ? mkU128(esize) : mkU64(esize)),
4115                             unop(cmp_neq, mkexpr(arg_m))),
4116                       Q ? mkU128(0) : mkU64(0),
4117                       Q, condT);
4118            /* Othervise QC flag should be set if shift value is positive and
4119               result beign rightshifted the same value is not equal to left
4120               argument. */
4121            assign(mask, binop(cmp_gt, mkexpr(shval),
4122                               Q ? mkU128(0) : mkU64(0)));
4123            if (!Q && size == 3)
4124               assign(tmp, binop(op_rev, mkexpr(res),
4125                                         unop(Iop_64to8, mkexpr(arg_n))));
4126            else
4127               assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
4128            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4129                             mkexpr(tmp), mkexpr(mask)),
4130                       binop(Q ? Iop_AndV128 : Iop_And64,
4131                             mkexpr(arg_m), mkexpr(mask)),
4132                       Q, condT);
4133            DIP("vqrshl.%c%u %c%u, %c%u, %c%u\n",
4134                U ? 'u' : 's', 8 << size,
4135                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4136                nreg);
4137         }
4138         break;
4139      case 6:
4140         /* VMAX, VMIN  */
4141         if (B == 0) {
4142            /* VMAX */
4143            IROp op;
4144            if (U == 0) {
4145               switch (size) {
4146                  case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
4147                  case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
4148                  case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
4149                  case 3: return False;
4150                  default: vassert(0);
4151               }
4152            } else {
4153               switch (size) {
4154                  case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
4155                  case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
4156                  case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
4157                  case 3: return False;
4158                  default: vassert(0);
4159               }
4160            }
4161            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4162            DIP("vmax.%c%u %c%u, %c%u, %c%u\n",
4163                U ? 'u' : 's', 8 << size,
4164                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4165                mreg);
4166         } else {
4167            /* VMIN */
4168            IROp op;
4169            if (U == 0) {
4170               switch (size) {
4171                  case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
4172                  case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
4173                  case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
4174                  case 3: return False;
4175                  default: vassert(0);
4176               }
4177            } else {
4178               switch (size) {
4179                  case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
4180                  case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
4181                  case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
4182                  case 3: return False;
4183                  default: vassert(0);
4184               }
4185            }
4186            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4187            DIP("vmin.%c%u %c%u, %c%u, %c%u\n",
4188                U ? 'u' : 's', 8 << size,
4189                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4190                mreg);
4191         }
4192         break;
4193      case 7:
4194         if (B == 0) {
4195            /* VABD */
4196            IROp op_cmp, op_sub;
4197            IRTemp cond;
4198            if ((theInstr >> 23) & 1) {
4199               vpanic("VABDL should not be in dis_neon_data_3same\n");
4200            }
4201            if (Q) {
4202               switch (size) {
4203                  case 0:
4204                     op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4205                     op_sub = Iop_Sub8x16;
4206                     break;
4207                  case 1:
4208                     op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4209                     op_sub = Iop_Sub16x8;
4210                     break;
4211                  case 2:
4212                     op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4213                     op_sub = Iop_Sub32x4;
4214                     break;
4215                  case 3:
4216                     return False;
4217                  default:
4218                     vassert(0);
4219               }
4220            } else {
4221               switch (size) {
4222                  case 0:
4223                     op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4224                     op_sub = Iop_Sub8x8;
4225                     break;
4226                  case 1:
4227                     op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4228                     op_sub = Iop_Sub16x4;
4229                     break;
4230                  case 2:
4231                     op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4232                     op_sub = Iop_Sub32x2;
4233                     break;
4234                  case 3:
4235                     return False;
4236                  default:
4237                     vassert(0);
4238               }
4239            }
4240            if (Q) {
4241               cond = newTemp(Ity_V128);
4242            } else {
4243               cond = newTemp(Ity_I64);
4244            }
4245            assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4246            assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
4247                              binop(Q ? Iop_AndV128 : Iop_And64,
4248                                    binop(op_sub, mkexpr(arg_n),
4249                                                  mkexpr(arg_m)),
4250                                    mkexpr(cond)),
4251                              binop(Q ? Iop_AndV128 : Iop_And64,
4252                                    binop(op_sub, mkexpr(arg_m),
4253                                                  mkexpr(arg_n)),
4254                                    unop(Q ? Iop_NotV128 : Iop_Not64,
4255                                         mkexpr(cond)))));
4256            DIP("vabd.%c%u %c%u, %c%u, %c%u\n",
4257                U ? 'u' : 's', 8 << size,
4258                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4259                mreg);
4260         } else {
4261            /* VABA */
4262            IROp op_cmp, op_sub, op_add;
4263            IRTemp cond, acc, tmp;
4264            if ((theInstr >> 23) & 1) {
4265               vpanic("VABAL should not be in dis_neon_data_3same");
4266            }
4267            if (Q) {
4268               switch (size) {
4269                  case 0:
4270                     op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4271                     op_sub = Iop_Sub8x16;
4272                     op_add = Iop_Add8x16;
4273                     break;
4274                  case 1:
4275                     op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4276                     op_sub = Iop_Sub16x8;
4277                     op_add = Iop_Add16x8;
4278                     break;
4279                  case 2:
4280                     op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4281                     op_sub = Iop_Sub32x4;
4282                     op_add = Iop_Add32x4;
4283                     break;
4284                  case 3:
4285                     return False;
4286                  default:
4287                     vassert(0);
4288               }
4289            } else {
4290               switch (size) {
4291                  case 0:
4292                     op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4293                     op_sub = Iop_Sub8x8;
4294                     op_add = Iop_Add8x8;
4295                     break;
4296                  case 1:
4297                     op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4298                     op_sub = Iop_Sub16x4;
4299                     op_add = Iop_Add16x4;
4300                     break;
4301                  case 2:
4302                     op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4303                     op_sub = Iop_Sub32x2;
4304                     op_add = Iop_Add32x2;
4305                     break;
4306                  case 3:
4307                     return False;
4308                  default:
4309                     vassert(0);
4310               }
4311            }
4312            if (Q) {
4313               cond = newTemp(Ity_V128);
4314               acc = newTemp(Ity_V128);
4315               tmp = newTemp(Ity_V128);
4316               assign(acc, getQReg(dreg));
4317            } else {
4318               cond = newTemp(Ity_I64);
4319               acc = newTemp(Ity_I64);
4320               tmp = newTemp(Ity_I64);
4321               assign(acc, getDRegI64(dreg));
4322            }
4323            assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4324            assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
4325                              binop(Q ? Iop_AndV128 : Iop_And64,
4326                                    binop(op_sub, mkexpr(arg_n),
4327                                                  mkexpr(arg_m)),
4328                                    mkexpr(cond)),
4329                              binop(Q ? Iop_AndV128 : Iop_And64,
4330                                    binop(op_sub, mkexpr(arg_m),
4331                                                  mkexpr(arg_n)),
4332                                    unop(Q ? Iop_NotV128 : Iop_Not64,
4333                                         mkexpr(cond)))));
4334            assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
4335            DIP("vaba.%c%u %c%u, %c%u, %c%u\n",
4336                U ? 'u' : 's', 8 << size,
4337                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4338                mreg);
4339         }
4340         break;
4341      case 8:
4342         if (B == 0) {
4343            IROp op;
4344            if (U == 0) {
4345               /* VADD  */
4346               switch (size) {
4347                  case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
4348                  case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
4349                  case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
4350                  case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
4351                  default: vassert(0);
4352               }
4353               DIP("vadd.i%u %c%u, %c%u, %c%u\n",
4354                   8 << size, Q ? 'q' : 'd',
4355                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4356            } else {
4357               /* VSUB  */
4358               switch (size) {
4359                  case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
4360                  case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
4361                  case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
4362                  case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
4363                  default: vassert(0);
4364               }
4365               DIP("vsub.i%u %c%u, %c%u, %c%u\n",
4366                   8 << size, Q ? 'q' : 'd',
4367                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4368            }
4369            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4370         } else {
4371            IROp op;
4372            switch (size) {
4373               case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
4374               case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
4375               case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
4376               case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
4377               default: vassert(0);
4378            }
4379            if (U == 0) {
4380               /* VTST  */
4381               assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
4382                                          mkexpr(arg_n),
4383                                          mkexpr(arg_m))));
4384               DIP("vtst.%u %c%u, %c%u, %c%u\n",
4385                   8 << size, Q ? 'q' : 'd',
4386                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4387            } else {
4388               /* VCEQ  */
4389               assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
4390                                unop(op,
4391                                     binop(Q ? Iop_XorV128 : Iop_Xor64,
4392                                           mkexpr(arg_n),
4393                                           mkexpr(arg_m)))));
4394               DIP("vceq.i%u %c%u, %c%u, %c%u\n",
4395                   8 << size, Q ? 'q' : 'd',
4396                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4397            }
4398         }
4399         break;
4400      case 9:
4401         if (B == 0) {
4402            /* VMLA, VMLS (integer) */
4403            IROp op, op2;
4404            UInt P = (theInstr >> 24) & 1;
4405            if (P) {
4406               switch (size) {
4407                  case 0:
4408                     op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4409                     op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
4410                     break;
4411                  case 1:
4412                     op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4413                     op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
4414                     break;
4415                  case 2:
4416                     op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4417                     op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
4418                     break;
4419                  case 3:
4420                     return False;
4421                  default:
4422                     vassert(0);
4423               }
4424            } else {
4425               switch (size) {
4426                  case 0:
4427                     op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4428                     op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
4429                     break;
4430                  case 1:
4431                     op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4432                     op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
4433                     break;
4434                  case 2:
4435                     op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4436                     op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
4437                     break;
4438                  case 3:
4439                     return False;
4440                  default:
4441                     vassert(0);
4442               }
4443            }
4444            assign(res, binop(op2,
4445                              Q ? getQReg(dreg) : getDRegI64(dreg),
4446                              binop(op, mkexpr(arg_n), mkexpr(arg_m))));
4447            DIP("vml%c.i%u %c%u, %c%u, %c%u\n",
4448                P ? 's' : 'a', 8 << size,
4449                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4450                mreg);
4451         } else {
4452            /* VMUL */
4453            IROp op;
4454            UInt P = (theInstr >> 24) & 1;
4455            if (P) {
4456               switch (size) {
4457                  case 0:
4458                     op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
4459                     break;
4460                  case 1: case 2: case 3: return False;
4461                  default: vassert(0);
4462               }
4463            } else {
4464               switch (size) {
4465                  case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
4466                  case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
4467                  case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
4468                  case 3: return False;
4469                  default: vassert(0);
4470               }
4471            }
4472            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4473            DIP("vmul.%c%u %c%u, %c%u, %c%u\n",
4474                P ? 'p' : 'i', 8 << size,
4475                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4476                mreg);
4477         }
4478         break;
4479      case 10: {
4480         /* VPMAX, VPMIN  */
4481         UInt P = (theInstr >> 4) & 1;
4482         IROp op;
4483         if (Q)
4484            return False;
4485         if (P) {
4486            switch (size) {
4487               case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
4488               case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
4489               case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
4490               case 3: return False;
4491               default: vassert(0);
4492            }
4493         } else {
4494            switch (size) {
4495               case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
4496               case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
4497               case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
4498               case 3: return False;
4499               default: vassert(0);
4500            }
4501         }
4502         assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4503         DIP("vp%s.%c%u %c%u, %c%u, %c%u\n",
4504             P ? "min" : "max", U ? 'u' : 's',
4505             8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4506             Q ? 'q' : 'd', mreg);
4507         break;
4508      }
4509      case 11:
4510         if (B == 0) {
4511            if (U == 0) {
4512               /* VQDMULH  */
4513               IROp op ,op2;
4514               ULong imm;
4515               switch (size) {
4516                  case 0: case 3:
4517                     return False;
4518                  case 1:
4519                     op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
4520                     op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4521                     imm = 1LL << 15;
4522                     imm = (imm << 16) | imm;
4523                     imm = (imm << 32) | imm;
4524                     break;
4525                  case 2:
4526                     op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
4527                     op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4528                     imm = 1LL << 31;
4529                     imm = (imm << 32) | imm;
4530                     break;
4531                  default:
4532                     vassert(0);
4533               }
4534               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4535               setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4536                                binop(op2, mkexpr(arg_n),
4537                                           Q ? mkU128(imm) : mkU64(imm)),
4538                                binop(op2, mkexpr(arg_m),
4539                                           Q ? mkU128(imm) : mkU64(imm))),
4540                          Q ? mkU128(0) : mkU64(0),
4541                          Q, condT);
4542               DIP("vqdmulh.s%u %c%u, %c%u, %c%u\n",
4543                   8 << size, Q ? 'q' : 'd',
4544                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4545            } else {
4546               /* VQRDMULH */
4547               IROp op ,op2;
4548               ULong imm;
4549               switch(size) {
4550                  case 0: case 3:
4551                     return False;
4552                  case 1:
4553                     imm = 1LL << 15;
4554                     imm = (imm << 16) | imm;
4555                     imm = (imm << 32) | imm;
4556                     op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
4557                     op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4558                     break;
4559                  case 2:
4560                     imm = 1LL << 31;
4561                     imm = (imm << 32) | imm;
4562                     op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
4563                     op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4564                     break;
4565                  default:
4566                     vassert(0);
4567               }
4568               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4569               setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4570                                binop(op2, mkexpr(arg_n),
4571                                           Q ? mkU128(imm) : mkU64(imm)),
4572                                binop(op2, mkexpr(arg_m),
4573                                           Q ? mkU128(imm) : mkU64(imm))),
4574                          Q ? mkU128(0) : mkU64(0),
4575                          Q, condT);
4576               DIP("vqrdmulh.s%u %c%u, %c%u, %c%u\n",
4577                   8 << size, Q ? 'q' : 'd',
4578                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4579            }
4580         } else {
4581            if (U == 0) {
4582               /* VPADD */
4583               IROp op;
4584               if (Q)
4585                  return False;
4586               switch (size) {
4587                  case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
4588                  case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
4589                  case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
4590                  case 3: return False;
4591                  default: vassert(0);
4592               }
4593               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4594               DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
4595                   8 << size, Q ? 'q' : 'd',
4596                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4597            }
4598         }
4599         break;
4600      /* Starting from here these are FP SIMD cases */
4601      case 13:
4602         if (B == 0) {
4603            IROp op;
4604            if (U == 0) {
4605               if ((C >> 1) == 0) {
4606                  /* VADD  */
4607                  op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
4608                  DIP("vadd.f32 %c%u, %c%u, %c%u\n",
4609                      Q ? 'q' : 'd', dreg,
4610                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4611               } else {
4612                  /* VSUB  */
4613                  op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
4614                  DIP("vsub.f32 %c%u, %c%u, %c%u\n",
4615                      Q ? 'q' : 'd', dreg,
4616                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4617               }
4618            } else {
4619               if ((C >> 1) == 0) {
4620                  /* VPADD */
4621                  if (Q)
4622                     return False;
4623                  op = Iop_PwAdd32Fx2;
4624                  DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4625               } else {
4626                  /* VABD  */
4627                  if (Q) {
4628                     assign(res, unop(Iop_Abs32Fx4,
4629                                      triop(Iop_Sub32Fx4,
4630                                            get_FAKE_roundingmode(),
4631                                            mkexpr(arg_n),
4632                                            mkexpr(arg_m))));
4633                  } else {
4634                     assign(res, unop(Iop_Abs32Fx2,
4635                                      binop(Iop_Sub32Fx2,
4636                                            mkexpr(arg_n),
4637                                            mkexpr(arg_m))));
4638                  }
4639                  DIP("vabd.f32 %c%u, %c%u, %c%u\n",
4640                      Q ? 'q' : 'd', dreg,
4641                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4642                  break;
4643               }
4644            }
4645            assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4646         } else {
4647            if (U == 0) {
4648               /* VMLA, VMLS  */
4649               IROp op, op2;
4650               UInt P = (theInstr >> 21) & 1;
4651               if (P) {
4652                  switch (size & 1) {
4653                     case 0:
4654                        op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4655                        op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
4656                        break;
4657                     case 1: return False;
4658                     default: vassert(0);
4659                  }
4660               } else {
4661                  switch (size & 1) {
4662                     case 0:
4663                        op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4664                        op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
4665                        break;
4666                     case 1: return False;
4667                     default: vassert(0);
4668                  }
4669               }
4670               assign(res, binop_w_fake_RM(
4671                              op2,
4672                              Q ? getQReg(dreg) : getDRegI64(dreg),
4673                              binop_w_fake_RM(op, mkexpr(arg_n),
4674                                                  mkexpr(arg_m))));
4675
4676               DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
4677                   P ? 's' : 'a', Q ? 'q' : 'd',
4678                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4679            } else {
4680               /* VMUL  */
4681               IROp op;
4682               if ((C >> 1) != 0)
4683                  return False;
4684               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
4685               assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4686               DIP("vmul.f32 %c%u, %c%u, %c%u\n",
4687                   Q ? 'q' : 'd', dreg,
4688                   Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4689            }
4690         }
4691         break;
4692      case 14:
4693         if (B == 0) {
4694            if (U == 0) {
4695               if ((C >> 1) == 0) {
4696                  /* VCEQ  */
4697                  IROp op;
4698                  if ((theInstr >> 20) & 1)
4699                     return False;
4700                  op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
4701                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4702                  DIP("vceq.f32 %c%u, %c%u, %c%u\n",
4703                      Q ? 'q' : 'd', dreg,
4704                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4705               } else {
4706                  return False;
4707               }
4708            } else {
4709               if ((C >> 1) == 0) {
4710                  /* VCGE  */
4711                  IROp op;
4712                  if ((theInstr >> 20) & 1)
4713                     return False;
4714                  op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4715                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4716                  DIP("vcge.f32 %c%u, %c%u, %c%u\n",
4717                      Q ? 'q' : 'd', dreg,
4718                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4719               } else {
4720                  /* VCGT  */
4721                  IROp op;
4722                  if ((theInstr >> 20) & 1)
4723                     return False;
4724                  op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4725                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4726                  DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
4727                      Q ? 'q' : 'd', dreg,
4728                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4729               }
4730            }
4731         } else {
4732            if (U == 1) {
4733               /* VACGE, VACGT */
4734               UInt op_bit = (theInstr >> 21) & 1;
4735               IROp op, op2;
4736               op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
4737               if (op_bit) {
4738                  op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4739                  assign(res, binop(op,
4740                                    unop(op2, mkexpr(arg_n)),
4741                                    unop(op2, mkexpr(arg_m))));
4742               } else {
4743                  op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4744                  assign(res, binop(op,
4745                                    unop(op2, mkexpr(arg_n)),
4746                                    unop(op2, mkexpr(arg_m))));
4747               }
4748               DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
4749                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4750                   Q ? 'q' : 'd', mreg);
4751            }
4752         }
4753         break;
4754      case 15:
4755         if (B == 0) {
4756            if (U == 0) {
4757               /* VMAX, VMIN  */
4758               IROp op;
4759               if ((theInstr >> 20) & 1)
4760                  return False;
4761               if ((theInstr >> 21) & 1) {
4762                  op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
4763                  DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4764                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4765               } else {
4766                  op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
4767                  DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4768                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4769               }
4770               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4771            } else {
4772               /* VPMAX, VPMIN   */
4773               IROp op;
4774               if (Q)
4775                  return False;
4776               if ((theInstr >> 20) & 1)
4777                  return False;
4778               if ((theInstr >> 21) & 1) {
4779                  op = Iop_PwMin32Fx2;
4780                  DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4781               } else {
4782                  op = Iop_PwMax32Fx2;
4783                  DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4784               }
4785               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4786            }
4787         } else {
4788            if (U == 0) {
4789               if ((C >> 1) == 0) {
4790                  /* VRECPS */
4791                  if ((theInstr >> 20) & 1)
4792                     return False;
4793                  assign(res, binop(Q ? Iop_Recps32Fx4 : Iop_Recps32Fx2,
4794                                    mkexpr(arg_n),
4795                                    mkexpr(arg_m)));
4796                  DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4797                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4798               } else {
4799                  /* VRSQRTS  */
4800                  if ((theInstr >> 20) & 1)
4801                     return False;
4802                  assign(res, binop(Q ? Iop_Rsqrts32Fx4 : Iop_Rsqrts32Fx2,
4803                                    mkexpr(arg_n),
4804                                    mkexpr(arg_m)));
4805                  DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4806                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4807               }
4808            }
4809         }
4810         break;
4811   }
4812
4813   if (Q) {
4814      putQReg(dreg, mkexpr(res), condT);
4815   } else {
4816      putDRegI64(dreg, mkexpr(res), condT);
4817   }
4818
4819   return True;
4820}
4821
4822/* A7.4.2 Three registers of different length */
4823static
4824Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
4825{
4826   UInt A = (theInstr >> 8) & 0xf;
4827   UInt B = (theInstr >> 20) & 3;
4828   UInt U = (theInstr >> 24) & 1;
4829   UInt P = (theInstr >> 9) & 1;
4830   UInt mreg = get_neon_m_regno(theInstr);
4831   UInt nreg = get_neon_n_regno(theInstr);
4832   UInt dreg = get_neon_d_regno(theInstr);
4833   UInt size = B;
4834   ULong imm;
4835   IRTemp res, arg_m, arg_n, cond, tmp;
4836   IROp cvt, cvt2, cmp, op, op2, sh, add;
4837   switch (A) {
4838      case 0: case 1: case 2: case 3:
4839         /* VADDL, VADDW, VSUBL, VSUBW */
4840         if (dreg & 1)
4841            return False;
4842         dreg >>= 1;
4843         size = B;
4844         switch (size) {
4845            case 0:
4846               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4847               op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
4848               break;
4849            case 1:
4850               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4851               op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
4852               break;
4853            case 2:
4854               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4855               op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
4856               break;
4857            case 3:
4858               return False;
4859            default:
4860               vassert(0);
4861         }
4862         arg_n = newTemp(Ity_V128);
4863         arg_m = newTemp(Ity_V128);
4864         if (A & 1) {
4865            if (nreg & 1)
4866               return False;
4867            nreg >>= 1;
4868            assign(arg_n, getQReg(nreg));
4869         } else {
4870            assign(arg_n, unop(cvt, getDRegI64(nreg)));
4871         }
4872         assign(arg_m, unop(cvt, getDRegI64(mreg)));
4873         putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4874                       condT);
4875         DIP("v%s%c.%c%u q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
4876             (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
4877             (A & 1) ? 'q' : 'd', nreg, mreg);
4878         return True;
4879      case 4:
4880         /* VADDHN, VRADDHN */
4881         if (mreg & 1)
4882            return False;
4883         mreg >>= 1;
4884         if (nreg & 1)
4885            return False;
4886         nreg >>= 1;
4887         size = B;
4888         switch (size) {
4889            case 0:
4890               op = Iop_Add16x8;
4891               cvt = Iop_NarrowUn16to8x8;
4892               sh = Iop_ShrN16x8;
4893               imm = 1U << 7;
4894               imm = (imm << 16) | imm;
4895               imm = (imm << 32) | imm;
4896               break;
4897            case 1:
4898               op = Iop_Add32x4;
4899               cvt = Iop_NarrowUn32to16x4;
4900               sh = Iop_ShrN32x4;
4901               imm = 1U << 15;
4902               imm = (imm << 32) | imm;
4903               break;
4904            case 2:
4905               op = Iop_Add64x2;
4906               cvt = Iop_NarrowUn64to32x2;
4907               sh = Iop_ShrN64x2;
4908               imm = 1U << 31;
4909               break;
4910            case 3:
4911               return False;
4912            default:
4913               vassert(0);
4914         }
4915         tmp = newTemp(Ity_V128);
4916         res = newTemp(Ity_V128);
4917         assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
4918         if (U) {
4919            /* VRADDHN */
4920            assign(res, binop(op, mkexpr(tmp),
4921                     binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
4922         } else {
4923            assign(res, mkexpr(tmp));
4924         }
4925         putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
4926                    condT);
4927         DIP("v%saddhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
4928             nreg, mreg);
4929         return True;
4930      case 5:
4931         /* VABAL */
4932         if (!((theInstr >> 23) & 1)) {
4933            vpanic("VABA should not be in dis_neon_data_3diff\n");
4934         }
4935         if (dreg & 1)
4936            return False;
4937         dreg >>= 1;
4938         switch (size) {
4939            case 0:
4940               cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4941               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4942               cvt2 = Iop_Widen8Sto16x8;
4943               op = Iop_Sub16x8;
4944               op2 = Iop_Add16x8;
4945               break;
4946            case 1:
4947               cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4948               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4949               cvt2 = Iop_Widen16Sto32x4;
4950               op = Iop_Sub32x4;
4951               op2 = Iop_Add32x4;
4952               break;
4953            case 2:
4954               cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4955               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4956               cvt2 = Iop_Widen32Sto64x2;
4957               op = Iop_Sub64x2;
4958               op2 = Iop_Add64x2;
4959               break;
4960            case 3:
4961               return False;
4962            default:
4963               vassert(0);
4964         }
4965         arg_n = newTemp(Ity_V128);
4966         arg_m = newTemp(Ity_V128);
4967         cond = newTemp(Ity_V128);
4968         res = newTemp(Ity_V128);
4969         assign(arg_n, unop(cvt, getDRegI64(nreg)));
4970         assign(arg_m, unop(cvt, getDRegI64(mreg)));
4971         assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
4972                                            getDRegI64(mreg))));
4973         assign(res, binop(op2,
4974                           binop(Iop_OrV128,
4975                                 binop(Iop_AndV128,
4976                                       binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4977                                       mkexpr(cond)),
4978                                 binop(Iop_AndV128,
4979                                       binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4980                                       unop(Iop_NotV128, mkexpr(cond)))),
4981                           getQReg(dreg)));
4982         putQReg(dreg, mkexpr(res), condT);
4983         DIP("vabal.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
4984             nreg, mreg);
4985         return True;
4986      case 6:
4987         /* VSUBHN, VRSUBHN */
4988         if (mreg & 1)
4989            return False;
4990         mreg >>= 1;
4991         if (nreg & 1)
4992            return False;
4993         nreg >>= 1;
4994         size = B;
4995         switch (size) {
4996            case 0:
4997               op = Iop_Sub16x8;
4998               op2 = Iop_Add16x8;
4999               cvt = Iop_NarrowUn16to8x8;
5000               sh = Iop_ShrN16x8;
5001               imm = 1U << 7;
5002               imm = (imm << 16) | imm;
5003               imm = (imm << 32) | imm;
5004               break;
5005            case 1:
5006               op = Iop_Sub32x4;
5007               op2 = Iop_Add32x4;
5008               cvt = Iop_NarrowUn32to16x4;
5009               sh = Iop_ShrN32x4;
5010               imm = 1U << 15;
5011               imm = (imm << 32) | imm;
5012               break;
5013            case 2:
5014               op = Iop_Sub64x2;
5015               op2 = Iop_Add64x2;
5016               cvt = Iop_NarrowUn64to32x2;
5017               sh = Iop_ShrN64x2;
5018               imm = 1U << 31;
5019               break;
5020            case 3:
5021               return False;
5022            default:
5023               vassert(0);
5024         }
5025         tmp = newTemp(Ity_V128);
5026         res = newTemp(Ity_V128);
5027         assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
5028         if (U) {
5029            /* VRSUBHN */
5030            assign(res, binop(op2, mkexpr(tmp),
5031                     binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
5032         } else {
5033            assign(res, mkexpr(tmp));
5034         }
5035         putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
5036                    condT);
5037         DIP("v%ssubhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
5038             nreg, mreg);
5039         return True;
5040      case 7:
5041         /* VABDL */
5042         if (!((theInstr >> 23) & 1)) {
5043            vpanic("VABL should not be in dis_neon_data_3diff\n");
5044         }
5045         if (dreg & 1)
5046            return False;
5047         dreg >>= 1;
5048         switch (size) {
5049            case 0:
5050               cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
5051               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
5052               cvt2 = Iop_Widen8Sto16x8;
5053               op = Iop_Sub16x8;
5054               break;
5055            case 1:
5056               cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5057               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5058               cvt2 = Iop_Widen16Sto32x4;
5059               op = Iop_Sub32x4;
5060               break;
5061            case 2:
5062               cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5063               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5064               cvt2 = Iop_Widen32Sto64x2;
5065               op = Iop_Sub64x2;
5066               break;
5067            case 3:
5068               return False;
5069            default:
5070               vassert(0);
5071         }
5072         arg_n = newTemp(Ity_V128);
5073         arg_m = newTemp(Ity_V128);
5074         cond = newTemp(Ity_V128);
5075         res = newTemp(Ity_V128);
5076         assign(arg_n, unop(cvt, getDRegI64(nreg)));
5077         assign(arg_m, unop(cvt, getDRegI64(mreg)));
5078         assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5079                                            getDRegI64(mreg))));
5080         assign(res, binop(Iop_OrV128,
5081                           binop(Iop_AndV128,
5082                                 binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5083                                 mkexpr(cond)),
5084                           binop(Iop_AndV128,
5085                                 binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5086                                 unop(Iop_NotV128, mkexpr(cond)))));
5087         putQReg(dreg, mkexpr(res), condT);
5088         DIP("vabdl.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5089             nreg, mreg);
5090         return True;
5091      case 8:
5092      case 10:
5093         /* VMLAL, VMLSL (integer) */
5094         if (dreg & 1)
5095            return False;
5096         dreg >>= 1;
5097         size = B;
5098         switch (size) {
5099            case 0:
5100               op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5101               op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
5102               break;
5103            case 1:
5104               op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5105               op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5106               break;
5107            case 2:
5108               op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5109               op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5110               break;
5111            case 3:
5112               return False;
5113            default:
5114               vassert(0);
5115         }
5116         res = newTemp(Ity_V128);
5117         assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
5118         putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5119         DIP("vml%cl.%c%u q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
5120             8 << size, dreg, nreg, mreg);
5121         return True;
5122      case 9:
5123      case 11:
5124         /* VQDMLAL, VQDMLSL */
5125         if (U)
5126            return False;
5127         if (dreg & 1)
5128            return False;
5129         dreg >>= 1;
5130         size = B;
5131         switch (size) {
5132            case 0: case 3:
5133               return False;
5134            case 1:
5135               op = Iop_QDMulLong16Sx4;
5136               cmp = Iop_CmpEQ16x4;
5137               add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5138               op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5139               imm = 1LL << 15;
5140               imm = (imm << 16) | imm;
5141               imm = (imm << 32) | imm;
5142               break;
5143            case 2:
5144               op = Iop_QDMulLong32Sx2;
5145               cmp = Iop_CmpEQ32x2;
5146               add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5147               op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5148               imm = 1LL << 31;
5149               imm = (imm << 32) | imm;
5150               break;
5151            default:
5152               vassert(0);
5153         }
5154         res = newTemp(Ity_V128);
5155         tmp = newTemp(Ity_V128);
5156         assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
5157         assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5158         setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5159                    True, condT);
5160         setFlag_QC(binop(Iop_And64,
5161                          binop(cmp, getDRegI64(nreg), mkU64(imm)),
5162                          binop(cmp, getDRegI64(mreg), mkU64(imm))),
5163                    mkU64(0),
5164                    False, condT);
5165         putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5166         DIP("vqdml%cl.s%u q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
5167             nreg, mreg);
5168         return True;
5169      case 12:
5170      case 14:
5171         /* VMULL (integer or polynomial) */
5172         if (dreg & 1)
5173            return False;
5174         dreg >>= 1;
5175         size = B;
5176         switch (size) {
5177            case 0:
5178               op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5179               if (P)
5180                  op = Iop_PolynomialMull8x8;
5181               break;
5182            case 1:
5183               op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5184               break;
5185            case 2:
5186               op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5187               break;
5188            default:
5189               vassert(0);
5190         }
5191         putQReg(dreg, binop(op, getDRegI64(nreg),
5192                                 getDRegI64(mreg)), condT);
5193         DIP("vmull.%c%u q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
5194               8 << size, dreg, nreg, mreg);
5195         return True;
5196      case 13:
5197         /* VQDMULL */
5198         if (U)
5199            return False;
5200         if (dreg & 1)
5201            return False;
5202         dreg >>= 1;
5203         size = B;
5204         switch (size) {
5205            case 0:
5206            case 3:
5207               return False;
5208            case 1:
5209               op = Iop_QDMulLong16Sx4;
5210               op2 = Iop_CmpEQ16x4;
5211               imm = 1LL << 15;
5212               imm = (imm << 16) | imm;
5213               imm = (imm << 32) | imm;
5214               break;
5215            case 2:
5216               op = Iop_QDMulLong32Sx2;
5217               op2 = Iop_CmpEQ32x2;
5218               imm = 1LL << 31;
5219               imm = (imm << 32) | imm;
5220               break;
5221            default:
5222               vassert(0);
5223         }
5224         putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
5225               condT);
5226         setFlag_QC(binop(Iop_And64,
5227                          binop(op2, getDRegI64(nreg), mkU64(imm)),
5228                          binop(op2, getDRegI64(mreg), mkU64(imm))),
5229                    mkU64(0),
5230                    False, condT);
5231         DIP("vqdmull.s%u q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
5232         return True;
5233      default:
5234         return False;
5235   }
5236   return False;
5237}
5238
5239/* A7.4.3 Two registers and a scalar */
5240static
5241Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
5242{
5243#  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
5244   UInt U = INSN(24,24);
5245   UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
5246   UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
5247   UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
5248   UInt size = INSN(21,20);
5249   UInt index;
5250   UInt Q = INSN(24,24);
5251
5252   if (INSN(27,25) != 1 || INSN(23,23) != 1
5253       || INSN(6,6) != 1 || INSN(4,4) != 0)
5254      return False;
5255
5256   /* VMLA, VMLS (scalar)  */
5257   if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
5258      IRTemp res, arg_m, arg_n;
5259      IROp dup, get, op, op2, add, sub;
5260      if (Q) {
5261         if ((dreg & 1) || (nreg & 1))
5262            return False;
5263         dreg >>= 1;
5264         nreg >>= 1;
5265         res = newTemp(Ity_V128);
5266         arg_m = newTemp(Ity_V128);
5267         arg_n = newTemp(Ity_V128);
5268         assign(arg_n, getQReg(nreg));
5269         switch(size) {
5270            case 1:
5271               dup = Iop_Dup16x8;
5272               get = Iop_GetElem16x4;
5273               index = mreg >> 3;
5274               mreg &= 7;
5275               break;
5276            case 2:
5277               dup = Iop_Dup32x4;
5278               get = Iop_GetElem32x2;
5279               index = mreg >> 4;
5280               mreg &= 0xf;
5281               break;
5282            case 0:
5283            case 3:
5284               return False;
5285            default:
5286               vassert(0);
5287         }
5288         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5289      } else {
5290         res = newTemp(Ity_I64);
5291         arg_m = newTemp(Ity_I64);
5292         arg_n = newTemp(Ity_I64);
5293         assign(arg_n, getDRegI64(nreg));
5294         switch(size) {
5295            case 1:
5296               dup = Iop_Dup16x4;
5297               get = Iop_GetElem16x4;
5298               index = mreg >> 3;
5299               mreg &= 7;
5300               break;
5301            case 2:
5302               dup = Iop_Dup32x2;
5303               get = Iop_GetElem32x2;
5304               index = mreg >> 4;
5305               mreg &= 0xf;
5306               break;
5307            case 0:
5308            case 3:
5309               return False;
5310            default:
5311               vassert(0);
5312         }
5313         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5314      }
5315      if (INSN(8,8)) {
5316         switch (size) {
5317            case 2:
5318               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5319               add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
5320               sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
5321               break;
5322            case 0:
5323            case 1:
5324            case 3:
5325               return False;
5326            default:
5327               vassert(0);
5328         }
5329      } else {
5330         switch (size) {
5331            case 1:
5332               op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5333               add = Q ? Iop_Add16x8 : Iop_Add16x4;
5334               sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
5335               break;
5336            case 2:
5337               op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5338               add = Q ? Iop_Add32x4 : Iop_Add32x2;
5339               sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
5340               break;
5341            case 0:
5342            case 3:
5343               return False;
5344            default:
5345               vassert(0);
5346         }
5347      }
5348      op2 = INSN(10,10) ? sub : add;
5349      assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5350      if (Q)
5351         putQReg(dreg, binop_w_fake_RM(op2, getQReg(dreg), mkexpr(res)),
5352                 condT);
5353      else
5354         putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
5355                    condT);
5356      DIP("vml%c.%c%u %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
5357            INSN(8,8) ? 'f' : 'i', 8 << size,
5358            Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
5359      return True;
5360   }
5361
5362   /* VMLAL, VMLSL (scalar)   */
5363   if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
5364      IRTemp res, arg_m, arg_n;
5365      IROp dup, get, op, op2, add, sub;
5366      if (dreg & 1)
5367         return False;
5368      dreg >>= 1;
5369      res = newTemp(Ity_V128);
5370      arg_m = newTemp(Ity_I64);
5371      arg_n = newTemp(Ity_I64);
5372      assign(arg_n, getDRegI64(nreg));
5373      switch(size) {
5374         case 1:
5375            dup = Iop_Dup16x4;
5376            get = Iop_GetElem16x4;
5377            index = mreg >> 3;
5378            mreg &= 7;
5379            break;
5380         case 2:
5381            dup = Iop_Dup32x2;
5382            get = Iop_GetElem32x2;
5383            index = mreg >> 4;
5384            mreg &= 0xf;
5385            break;
5386         case 0:
5387         case 3:
5388            return False;
5389         default:
5390            vassert(0);
5391      }
5392      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5393      switch (size) {
5394         case 1:
5395            op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5396            add = Iop_Add32x4;
5397            sub = Iop_Sub32x4;
5398            break;
5399         case 2:
5400            op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5401            add = Iop_Add64x2;
5402            sub = Iop_Sub64x2;
5403            break;
5404         case 0:
5405         case 3:
5406            return False;
5407         default:
5408            vassert(0);
5409      }
5410      op2 = INSN(10,10) ? sub : add;
5411      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5412      putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5413      DIP("vml%cl.%c%u q%u, d%u, d%u[%u]\n",
5414          INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
5415          8 << size, dreg, nreg, mreg, index);
5416      return True;
5417   }
5418
5419   /* VQDMLAL, VQDMLSL (scalar)  */
5420   if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
5421      IRTemp res, arg_m, arg_n, tmp;
5422      IROp dup, get, op, op2, add, cmp;
5423      UInt P = INSN(10,10);
5424      ULong imm;
5425      if (dreg & 1)
5426         return False;
5427      dreg >>= 1;
5428      res = newTemp(Ity_V128);
5429      arg_m = newTemp(Ity_I64);
5430      arg_n = newTemp(Ity_I64);
5431      assign(arg_n, getDRegI64(nreg));
5432      switch(size) {
5433         case 1:
5434            dup = Iop_Dup16x4;
5435            get = Iop_GetElem16x4;
5436            index = mreg >> 3;
5437            mreg &= 7;
5438            break;
5439         case 2:
5440            dup = Iop_Dup32x2;
5441            get = Iop_GetElem32x2;
5442            index = mreg >> 4;
5443            mreg &= 0xf;
5444            break;
5445         case 0:
5446         case 3:
5447            return False;
5448         default:
5449            vassert(0);
5450      }
5451      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5452      switch (size) {
5453         case 0:
5454         case 3:
5455            return False;
5456         case 1:
5457            op = Iop_QDMulLong16Sx4;
5458            cmp = Iop_CmpEQ16x4;
5459            add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5460            op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5461            imm = 1LL << 15;
5462            imm = (imm << 16) | imm;
5463            imm = (imm << 32) | imm;
5464            break;
5465         case 2:
5466            op = Iop_QDMulLong32Sx2;
5467            cmp = Iop_CmpEQ32x2;
5468            add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5469            op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5470            imm = 1LL << 31;
5471            imm = (imm << 32) | imm;
5472            break;
5473         default:
5474            vassert(0);
5475      }
5476      res = newTemp(Ity_V128);
5477      tmp = newTemp(Ity_V128);
5478      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5479      assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5480      setFlag_QC(binop(Iop_And64,
5481                       binop(cmp, mkexpr(arg_n), mkU64(imm)),
5482                       binop(cmp, mkexpr(arg_m), mkU64(imm))),
5483                 mkU64(0),
5484                 False, condT);
5485      setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5486                 True, condT);
5487      putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5488      DIP("vqdml%cl.s%u q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
5489          dreg, nreg, mreg, index);
5490      return True;
5491   }
5492
5493   /* VMUL (by scalar)  */
5494   if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
5495      IRTemp res, arg_m, arg_n;
5496      IROp dup, get, op;
5497      if (Q) {
5498         if ((dreg & 1) || (nreg & 1))
5499            return False;
5500         dreg >>= 1;
5501         nreg >>= 1;
5502         res = newTemp(Ity_V128);
5503         arg_m = newTemp(Ity_V128);
5504         arg_n = newTemp(Ity_V128);
5505         assign(arg_n, getQReg(nreg));
5506         switch(size) {
5507            case 1:
5508               dup = Iop_Dup16x8;
5509               get = Iop_GetElem16x4;
5510               index = mreg >> 3;
5511               mreg &= 7;
5512               break;
5513            case 2:
5514               dup = Iop_Dup32x4;
5515               get = Iop_GetElem32x2;
5516               index = mreg >> 4;
5517               mreg &= 0xf;
5518               break;
5519            case 0:
5520            case 3:
5521               return False;
5522            default:
5523               vassert(0);
5524         }
5525         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5526      } else {
5527         res = newTemp(Ity_I64);
5528         arg_m = newTemp(Ity_I64);
5529         arg_n = newTemp(Ity_I64);
5530         assign(arg_n, getDRegI64(nreg));
5531         switch(size) {
5532            case 1:
5533               dup = Iop_Dup16x4;
5534               get = Iop_GetElem16x4;
5535               index = mreg >> 3;
5536               mreg &= 7;
5537               break;
5538            case 2:
5539               dup = Iop_Dup32x2;
5540               get = Iop_GetElem32x2;
5541               index = mreg >> 4;
5542               mreg &= 0xf;
5543               break;
5544            case 0:
5545            case 3:
5546               return False;
5547            default:
5548               vassert(0);
5549         }
5550         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5551      }
5552      if (INSN(8,8)) {
5553         switch (size) {
5554            case 2:
5555               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5556               break;
5557            case 0:
5558            case 1:
5559            case 3:
5560               return False;
5561            default:
5562               vassert(0);
5563         }
5564      } else {
5565         switch (size) {
5566            case 1:
5567               op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5568               break;
5569            case 2:
5570               op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5571               break;
5572            case 0:
5573            case 3:
5574               return False;
5575            default:
5576               vassert(0);
5577         }
5578      }
5579      assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5580      if (Q)
5581         putQReg(dreg, mkexpr(res), condT);
5582      else
5583         putDRegI64(dreg, mkexpr(res), condT);
5584      DIP("vmul.%c%u %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
5585          8 << size, Q ? 'q' : 'd', dreg,
5586          Q ? 'q' : 'd', nreg, mreg, index);
5587      return True;
5588   }
5589
5590   /* VMULL (scalar) */
5591   if (INSN(11,8) == BITS4(1,0,1,0)) {
5592      IRTemp res, arg_m, arg_n;
5593      IROp dup, get, op;
5594      if (dreg & 1)
5595         return False;
5596      dreg >>= 1;
5597      res = newTemp(Ity_V128);
5598      arg_m = newTemp(Ity_I64);
5599      arg_n = newTemp(Ity_I64);
5600      assign(arg_n, getDRegI64(nreg));
5601      switch(size) {
5602         case 1:
5603            dup = Iop_Dup16x4;
5604            get = Iop_GetElem16x4;
5605            index = mreg >> 3;
5606            mreg &= 7;
5607            break;
5608         case 2:
5609            dup = Iop_Dup32x2;
5610            get = Iop_GetElem32x2;
5611            index = mreg >> 4;
5612            mreg &= 0xf;
5613            break;
5614         case 0:
5615         case 3:
5616            return False;
5617         default:
5618            vassert(0);
5619      }
5620      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5621      switch (size) {
5622         case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
5623         case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
5624         case 0: case 3: return False;
5625         default: vassert(0);
5626      }
5627      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5628      putQReg(dreg, mkexpr(res), condT);
5629      DIP("vmull.%c%u q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
5630          nreg, mreg, index);
5631      return True;
5632   }
5633
5634   /* VQDMULL */
5635   if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
5636      IROp op ,op2, dup, get;
5637      ULong imm;
5638      IRTemp arg_m, arg_n;
5639      if (dreg & 1)
5640         return False;
5641      dreg >>= 1;
5642      arg_m = newTemp(Ity_I64);
5643      arg_n = newTemp(Ity_I64);
5644      assign(arg_n, getDRegI64(nreg));
5645      switch(size) {
5646         case 1:
5647            dup = Iop_Dup16x4;
5648            get = Iop_GetElem16x4;
5649            index = mreg >> 3;
5650            mreg &= 7;
5651            break;
5652         case 2:
5653            dup = Iop_Dup32x2;
5654            get = Iop_GetElem32x2;
5655            index = mreg >> 4;
5656            mreg &= 0xf;
5657            break;
5658         case 0:
5659         case 3:
5660            return False;
5661         default:
5662            vassert(0);
5663      }
5664      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5665      switch (size) {
5666         case 0:
5667         case 3:
5668            return False;
5669         case 1:
5670            op = Iop_QDMulLong16Sx4;
5671            op2 = Iop_CmpEQ16x4;
5672            imm = 1LL << 15;
5673            imm = (imm << 16) | imm;
5674            imm = (imm << 32) | imm;
5675            break;
5676         case 2:
5677            op = Iop_QDMulLong32Sx2;
5678            op2 = Iop_CmpEQ32x2;
5679            imm = 1LL << 31;
5680            imm = (imm << 32) | imm;
5681            break;
5682         default:
5683            vassert(0);
5684      }
5685      putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5686            condT);
5687      setFlag_QC(binop(Iop_And64,
5688                       binop(op2, mkexpr(arg_n), mkU64(imm)),
5689                       binop(op2, mkexpr(arg_m), mkU64(imm))),
5690                 mkU64(0),
5691                 False, condT);
5692      DIP("vqdmull.s%u q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
5693          index);
5694      return True;
5695   }
5696
5697   /* VQDMULH */
5698   if (INSN(11,8) == BITS4(1,1,0,0)) {
5699      IROp op ,op2, dup, get;
5700      ULong imm;
5701      IRTemp res, arg_m, arg_n;
5702      if (Q) {
5703         if ((dreg & 1) || (nreg & 1))
5704            return False;
5705         dreg >>= 1;
5706         nreg >>= 1;
5707         res = newTemp(Ity_V128);
5708         arg_m = newTemp(Ity_V128);
5709         arg_n = newTemp(Ity_V128);
5710         assign(arg_n, getQReg(nreg));
5711         switch(size) {
5712            case 1:
5713               dup = Iop_Dup16x8;
5714               get = Iop_GetElem16x4;
5715               index = mreg >> 3;
5716               mreg &= 7;
5717               break;
5718            case 2:
5719               dup = Iop_Dup32x4;
5720               get = Iop_GetElem32x2;
5721               index = mreg >> 4;
5722               mreg &= 0xf;
5723               break;
5724            case 0:
5725            case 3:
5726               return False;
5727            default:
5728               vassert(0);
5729         }
5730         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5731      } else {
5732         res = newTemp(Ity_I64);
5733         arg_m = newTemp(Ity_I64);
5734         arg_n = newTemp(Ity_I64);
5735         assign(arg_n, getDRegI64(nreg));
5736         switch(size) {
5737            case 1:
5738               dup = Iop_Dup16x4;
5739               get = Iop_GetElem16x4;
5740               index = mreg >> 3;
5741               mreg &= 7;
5742               break;
5743            case 2:
5744               dup = Iop_Dup32x2;
5745               get = Iop_GetElem32x2;
5746               index = mreg >> 4;
5747               mreg &= 0xf;
5748               break;
5749            case 0:
5750            case 3:
5751               return False;
5752            default:
5753               vassert(0);
5754         }
5755         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5756      }
5757      switch (size) {
5758         case 0:
5759         case 3:
5760            return False;
5761         case 1:
5762            op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
5763            op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5764            imm = 1LL << 15;
5765            imm = (imm << 16) | imm;
5766            imm = (imm << 32) | imm;
5767            break;
5768         case 2:
5769            op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
5770            op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5771            imm = 1LL << 31;
5772            imm = (imm << 32) | imm;
5773            break;
5774         default:
5775            vassert(0);
5776      }
5777      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5778      setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5779                       binop(op2, mkexpr(arg_n),
5780                                  Q ? mkU128(imm) : mkU64(imm)),
5781                       binop(op2, mkexpr(arg_m),
5782                             Q ? mkU128(imm) : mkU64(imm))),
5783                 Q ? mkU128(0) : mkU64(0),
5784                 Q, condT);
5785      if (Q)
5786         putQReg(dreg, mkexpr(res), condT);
5787      else
5788         putDRegI64(dreg, mkexpr(res), condT);
5789      DIP("vqdmulh.s%u %c%u, %c%u, d%u[%u]\n",
5790          8 << size, Q ? 'q' : 'd', dreg,
5791          Q ? 'q' : 'd', nreg, mreg, index);
5792      return True;
5793   }
5794
5795   /* VQRDMULH (scalar) */
5796   if (INSN(11,8) == BITS4(1,1,0,1)) {
5797      IROp op ,op2, dup, get;
5798      ULong imm;
5799      IRTemp res, arg_m, arg_n;
5800      if (Q) {
5801         if ((dreg & 1) || (nreg & 1))
5802            return False;
5803         dreg >>= 1;
5804         nreg >>= 1;
5805         res = newTemp(Ity_V128);
5806         arg_m = newTemp(Ity_V128);
5807         arg_n = newTemp(Ity_V128);
5808         assign(arg_n, getQReg(nreg));
5809         switch(size) {
5810            case 1:
5811               dup = Iop_Dup16x8;
5812               get = Iop_GetElem16x4;
5813               index = mreg >> 3;
5814               mreg &= 7;
5815               break;
5816            case 2:
5817               dup = Iop_Dup32x4;
5818               get = Iop_GetElem32x2;
5819               index = mreg >> 4;
5820               mreg &= 0xf;
5821               break;
5822            case 0:
5823            case 3:
5824               return False;
5825            default:
5826               vassert(0);
5827         }
5828         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5829      } else {
5830         res = newTemp(Ity_I64);
5831         arg_m = newTemp(Ity_I64);
5832         arg_n = newTemp(Ity_I64);
5833         assign(arg_n, getDRegI64(nreg));
5834         switch(size) {
5835            case 1:
5836               dup = Iop_Dup16x4;
5837               get = Iop_GetElem16x4;
5838               index = mreg >> 3;
5839               mreg &= 7;
5840               break;
5841            case 2:
5842               dup = Iop_Dup32x2;
5843               get = Iop_GetElem32x2;
5844               index = mreg >> 4;
5845               mreg &= 0xf;
5846               break;
5847            case 0:
5848            case 3:
5849               return False;
5850            default:
5851               vassert(0);
5852         }
5853         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5854      }
5855      switch (size) {
5856         case 0:
5857         case 3:
5858            return False;
5859         case 1:
5860            op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
5861            op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5862            imm = 1LL << 15;
5863            imm = (imm << 16) | imm;
5864            imm = (imm << 32) | imm;
5865            break;
5866         case 2:
5867            op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
5868            op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5869            imm = 1LL << 31;
5870            imm = (imm << 32) | imm;
5871            break;
5872         default:
5873            vassert(0);
5874      }
5875      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5876      setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5877                       binop(op2, mkexpr(arg_n),
5878                                  Q ? mkU128(imm) : mkU64(imm)),
5879                       binop(op2, mkexpr(arg_m),
5880                                  Q ? mkU128(imm) : mkU64(imm))),
5881                 Q ? mkU128(0) : mkU64(0),
5882                 Q, condT);
5883      if (Q)
5884         putQReg(dreg, mkexpr(res), condT);
5885      else
5886         putDRegI64(dreg, mkexpr(res), condT);
5887      DIP("vqrdmulh.s%u %c%u, %c%u, d%u[%u]\n",
5888          8 << size, Q ? 'q' : 'd', dreg,
5889          Q ? 'q' : 'd', nreg, mreg, index);
5890      return True;
5891   }
5892
5893   return False;
5894#  undef INSN
5895}
5896
5897/* A7.4.4 Two registers and a shift amount */
5898static
5899Bool dis_neon_data_2reg_and_shift ( UInt theInstr, IRTemp condT )
5900{
5901   UInt A = (theInstr >> 8) & 0xf;
5902   UInt B = (theInstr >> 6) & 1;
5903   UInt L = (theInstr >> 7) & 1;
5904   UInt U = (theInstr >> 24) & 1;
5905   UInt Q = B;
5906   UInt imm6 = (theInstr >> 16) & 0x3f;
5907   UInt shift_imm;
5908   UInt size = 4;
5909   UInt tmp;
5910   UInt mreg = get_neon_m_regno(theInstr);
5911   UInt dreg = get_neon_d_regno(theInstr);
5912   ULong imm = 0;
5913   IROp op, cvt, add = Iop_INVALID, cvt2, op_rev;
5914   IRTemp reg_m, res, mask;
5915
5916   if (L == 0 && ((theInstr >> 19) & 7) == 0)
5917      /* It is one reg and immediate */
5918      return False;
5919
5920   tmp = (L << 6) | imm6;
5921   if (tmp & 0x40) {
5922      size = 3;
5923      shift_imm = 64 - imm6;
5924   } else if (tmp & 0x20) {
5925      size = 2;
5926      shift_imm = 64 - imm6;
5927   } else if (tmp & 0x10) {
5928      size = 1;
5929      shift_imm = 32 - imm6;
5930   } else if (tmp & 0x8) {
5931      size = 0;
5932      shift_imm = 16 - imm6;
5933   } else {
5934      return False;
5935   }
5936
5937   switch (A) {
5938      case 3:
5939      case 2:
5940         /* VRSHR, VRSRA */
5941         if (shift_imm > 0) {
5942            IRExpr *imm_val;
5943            imm = 1L;
5944            switch (size) {
5945               case 0:
5946                  imm = (imm << 8) | imm;
5947                  /* fall through */
5948               case 1:
5949                  imm = (imm << 16) | imm;
5950                  /* fall through */
5951               case 2:
5952                  imm = (imm << 32) | imm;
5953                  /* fall through */
5954               case 3:
5955                  break;
5956               default:
5957                  vassert(0);
5958            }
5959            if (Q) {
5960               reg_m = newTemp(Ity_V128);
5961               res = newTemp(Ity_V128);
5962               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
5963               assign(reg_m, getQReg(mreg));
5964               switch (size) {
5965                  case 0:
5966                     add = Iop_Add8x16;
5967                     op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
5968                     break;
5969                  case 1:
5970                     add = Iop_Add16x8;
5971                     op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
5972                     break;
5973                  case 2:
5974                     add = Iop_Add32x4;
5975                     op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
5976                     break;
5977                  case 3:
5978                     add = Iop_Add64x2;
5979                     op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
5980                     break;
5981                  default:
5982                     vassert(0);
5983               }
5984            } else {
5985               reg_m = newTemp(Ity_I64);
5986               res = newTemp(Ity_I64);
5987               imm_val = mkU64(imm);
5988               assign(reg_m, getDRegI64(mreg));
5989               switch (size) {
5990                  case 0:
5991                     add = Iop_Add8x8;
5992                     op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
5993                     break;
5994                  case 1:
5995                     add = Iop_Add16x4;
5996                     op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
5997                     break;
5998                  case 2:
5999                     add = Iop_Add32x2;
6000                     op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6001                     break;
6002                  case 3:
6003                     add = Iop_Add64;
6004                     op = U ? Iop_Shr64 : Iop_Sar64;
6005                     break;
6006                  default:
6007                     vassert(0);
6008               }
6009            }
6010            assign(res,
6011                   binop(add,
6012                         binop(op,
6013                               mkexpr(reg_m),
6014                               mkU8(shift_imm)),
6015                         binop(Q ? Iop_AndV128 : Iop_And64,
6016                               binop(op,
6017                                     mkexpr(reg_m),
6018                                     mkU8(shift_imm - 1)),
6019                               imm_val)));
6020         } else {
6021            if (Q) {
6022               res = newTemp(Ity_V128);
6023               assign(res, getQReg(mreg));
6024            } else {
6025               res = newTemp(Ity_I64);
6026               assign(res, getDRegI64(mreg));
6027            }
6028         }
6029         if (A == 3) {
6030            if (Q) {
6031               putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6032                             condT);
6033            } else {
6034               putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6035                                condT);
6036            }
6037            DIP("vrsra.%c%u %c%u, %c%u, #%u\n",
6038                U ? 'u' : 's', 8 << size,
6039                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6040         } else {
6041            if (Q) {
6042               putQReg(dreg, mkexpr(res), condT);
6043            } else {
6044               putDRegI64(dreg, mkexpr(res), condT);
6045            }
6046            DIP("vrshr.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6047                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6048         }
6049         return True;
6050      case 1:
6051      case 0:
6052         /* VSHR, VSRA */
6053         if (Q) {
6054            reg_m = newTemp(Ity_V128);
6055            assign(reg_m, getQReg(mreg));
6056            res = newTemp(Ity_V128);
6057         } else {
6058            reg_m = newTemp(Ity_I64);
6059            assign(reg_m, getDRegI64(mreg));
6060            res = newTemp(Ity_I64);
6061         }
6062         if (Q) {
6063            switch (size) {
6064               case 0:
6065                  op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
6066                  add = Iop_Add8x16;
6067                  break;
6068               case 1:
6069                  op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6070                  add = Iop_Add16x8;
6071                  break;
6072               case 2:
6073                  op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6074                  add = Iop_Add32x4;
6075                  break;
6076               case 3:
6077                  op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6078                  add = Iop_Add64x2;
6079                  break;
6080               default:
6081                  vassert(0);
6082            }
6083         } else {
6084            switch (size) {
6085               case 0:
6086                  op =  U ? Iop_ShrN8x8 : Iop_SarN8x8;
6087                  add = Iop_Add8x8;
6088                  break;
6089               case 1:
6090                  op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
6091                  add = Iop_Add16x4;
6092                  break;
6093               case 2:
6094                  op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6095                  add = Iop_Add32x2;
6096                  break;
6097               case 3:
6098                  op = U ? Iop_Shr64 : Iop_Sar64;
6099                  add = Iop_Add64;
6100                  break;
6101               default:
6102                  vassert(0);
6103            }
6104         }
6105         assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6106         if (A == 1) {
6107            if (Q) {
6108               putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6109                             condT);
6110            } else {
6111               putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6112                                condT);
6113            }
6114            DIP("vsra.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6115                  Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6116         } else {
6117            if (Q) {
6118               putQReg(dreg, mkexpr(res), condT);
6119            } else {
6120               putDRegI64(dreg, mkexpr(res), condT);
6121            }
6122            DIP("vshr.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6123                  Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6124         }
6125         return True;
6126      case 4:
6127         /* VSRI */
6128         if (!U)
6129            return False;
6130         if (Q) {
6131            res = newTemp(Ity_V128);
6132            mask = newTemp(Ity_V128);
6133         } else {
6134            res = newTemp(Ity_I64);
6135            mask = newTemp(Ity_I64);
6136         }
6137         switch (size) {
6138            case 0: op = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; break;
6139            case 1: op = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; break;
6140            case 2: op = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; break;
6141            case 3: op = Q ? Iop_ShrN64x2 : Iop_Shr64; break;
6142            default: vassert(0);
6143         }
6144         if (Q) {
6145            assign(mask, binop(op, binop(Iop_64HLtoV128,
6146                                         mkU64(0xFFFFFFFFFFFFFFFFLL),
6147                                         mkU64(0xFFFFFFFFFFFFFFFFLL)),
6148                               mkU8(shift_imm)));
6149            assign(res, binop(Iop_OrV128,
6150                              binop(Iop_AndV128,
6151                                    getQReg(dreg),
6152                                    unop(Iop_NotV128,
6153                                         mkexpr(mask))),
6154                              binop(op,
6155                                    getQReg(mreg),
6156                                    mkU8(shift_imm))));
6157            putQReg(dreg, mkexpr(res), condT);
6158         } else {
6159            assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6160                               mkU8(shift_imm)));
6161            assign(res, binop(Iop_Or64,
6162                              binop(Iop_And64,
6163                                    getDRegI64(dreg),
6164                                    unop(Iop_Not64,
6165                                         mkexpr(mask))),
6166                              binop(op,
6167                                    getDRegI64(mreg),
6168                                    mkU8(shift_imm))));
6169            putDRegI64(dreg, mkexpr(res), condT);
6170         }
6171         DIP("vsri.%u %c%u, %c%u, #%u\n",
6172             8 << size, Q ? 'q' : 'd', dreg,
6173             Q ? 'q' : 'd', mreg, shift_imm);
6174         return True;
6175      case 5:
6176         if (U) {
6177            /* VSLI */
6178            shift_imm = 8 * (1 << size) - shift_imm;
6179            if (Q) {
6180               res = newTemp(Ity_V128);
6181               mask = newTemp(Ity_V128);
6182            } else {
6183               res = newTemp(Ity_I64);
6184               mask = newTemp(Ity_I64);
6185            }
6186            switch (size) {
6187               case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6188               case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6189               case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6190               case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6191               default: vassert(0);
6192            }
6193            if (Q) {
6194               assign(mask, binop(op, binop(Iop_64HLtoV128,
6195                                            mkU64(0xFFFFFFFFFFFFFFFFLL),
6196                                            mkU64(0xFFFFFFFFFFFFFFFFLL)),
6197                                  mkU8(shift_imm)));
6198               assign(res, binop(Iop_OrV128,
6199                                 binop(Iop_AndV128,
6200                                       getQReg(dreg),
6201                                       unop(Iop_NotV128,
6202                                            mkexpr(mask))),
6203                                 binop(op,
6204                                       getQReg(mreg),
6205                                       mkU8(shift_imm))));
6206               putQReg(dreg, mkexpr(res), condT);
6207            } else {
6208               assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6209                                  mkU8(shift_imm)));
6210               assign(res, binop(Iop_Or64,
6211                                 binop(Iop_And64,
6212                                       getDRegI64(dreg),
6213                                       unop(Iop_Not64,
6214                                            mkexpr(mask))),
6215                                 binop(op,
6216                                       getDRegI64(mreg),
6217                                       mkU8(shift_imm))));
6218               putDRegI64(dreg, mkexpr(res), condT);
6219            }
6220            DIP("vsli.%u %c%u, %c%u, #%u\n",
6221                8 << size, Q ? 'q' : 'd', dreg,
6222                Q ? 'q' : 'd', mreg, shift_imm);
6223            return True;
6224         } else {
6225            /* VSHL #imm */
6226            shift_imm = 8 * (1 << size) - shift_imm;
6227            if (Q) {
6228               res = newTemp(Ity_V128);
6229            } else {
6230               res = newTemp(Ity_I64);
6231            }
6232            switch (size) {
6233               case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6234               case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6235               case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6236               case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6237               default: vassert(0);
6238            }
6239            assign(res, binop(op, Q ? getQReg(mreg) : getDRegI64(mreg),
6240                     mkU8(shift_imm)));
6241            if (Q) {
6242               putQReg(dreg, mkexpr(res), condT);
6243            } else {
6244               putDRegI64(dreg, mkexpr(res), condT);
6245            }
6246            DIP("vshl.i%u %c%u, %c%u, #%u\n",
6247                8 << size, Q ? 'q' : 'd', dreg,
6248                Q ? 'q' : 'd', mreg, shift_imm);
6249            return True;
6250         }
6251         break;
6252      case 6:
6253      case 7:
6254         /* VQSHL, VQSHLU */
6255         shift_imm = 8 * (1 << size) - shift_imm;
6256         if (U) {
6257            if (A & 1) {
6258               switch (size) {
6259                  case 0:
6260                     op = Q ? Iop_QShlN8x16 : Iop_QShlN8x8;
6261                     op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6262                     break;
6263                  case 1:
6264                     op = Q ? Iop_QShlN16x8 : Iop_QShlN16x4;
6265                     op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6266                     break;
6267                  case 2:
6268                     op = Q ? Iop_QShlN32x4 : Iop_QShlN32x2;
6269                     op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6270                     break;
6271                  case 3:
6272                     op = Q ? Iop_QShlN64x2 : Iop_QShlN64x1;
6273                     op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6274                     break;
6275                  default:
6276                     vassert(0);
6277               }
6278               DIP("vqshl.u%u %c%u, %c%u, #%u\n",
6279                   8 << size,
6280                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6281            } else {
6282               switch (size) {
6283                  case 0:
6284                     op = Q ? Iop_QShlN8Sx16 : Iop_QShlN8Sx8;
6285                     op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6286                     break;
6287                  case 1:
6288                     op = Q ? Iop_QShlN16Sx8 : Iop_QShlN16Sx4;
6289                     op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6290                     break;
6291                  case 2:
6292                     op = Q ? Iop_QShlN32Sx4 : Iop_QShlN32Sx2;
6293                     op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6294                     break;
6295                  case 3:
6296                     op = Q ? Iop_QShlN64Sx2 : Iop_QShlN64Sx1;
6297                     op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6298                     break;
6299                  default:
6300                     vassert(0);
6301               }
6302               DIP("vqshlu.s%u %c%u, %c%u, #%u\n",
6303                   8 << size,
6304                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6305            }
6306         } else {
6307            if (!(A & 1))
6308               return False;
6309            switch (size) {
6310               case 0:
6311                  op = Q ? Iop_QSalN8x16 : Iop_QSalN8x8;
6312                  op_rev = Q ? Iop_SarN8x16 : Iop_SarN8x8;
6313                  break;
6314               case 1:
6315                  op = Q ? Iop_QSalN16x8 : Iop_QSalN16x4;
6316                  op_rev = Q ? Iop_SarN16x8 : Iop_SarN16x4;
6317                  break;
6318               case 2:
6319                  op = Q ? Iop_QSalN32x4 : Iop_QSalN32x2;
6320                  op_rev = Q ? Iop_SarN32x4 : Iop_SarN32x2;
6321                  break;
6322               case 3:
6323                  op = Q ? Iop_QSalN64x2 : Iop_QSalN64x1;
6324                  op_rev = Q ? Iop_SarN64x2 : Iop_Sar64;
6325                  break;
6326               default:
6327                  vassert(0);
6328            }
6329            DIP("vqshl.s%u %c%u, %c%u, #%u\n",
6330                8 << size,
6331                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6332         }
6333         if (Q) {
6334            tmp = newTemp(Ity_V128);
6335            res = newTemp(Ity_V128);
6336            reg_m = newTemp(Ity_V128);
6337            assign(reg_m, getQReg(mreg));
6338         } else {
6339            tmp = newTemp(Ity_I64);
6340            res = newTemp(Ity_I64);
6341            reg_m = newTemp(Ity_I64);
6342            assign(reg_m, getDRegI64(mreg));
6343         }
6344         assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6345         assign(tmp, binop(op_rev, mkexpr(res), mkU8(shift_imm)));
6346         setFlag_QC(mkexpr(tmp), mkexpr(reg_m), Q, condT);
6347         if (Q)
6348            putQReg(dreg, mkexpr(res), condT);
6349         else
6350            putDRegI64(dreg, mkexpr(res), condT);
6351         return True;
6352      case 8:
6353         if (!U) {
6354            if (L == 1)
6355               return False;
6356            size++;
6357            dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6358            mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
6359            if (mreg & 1)
6360               return False;
6361            mreg >>= 1;
6362            if (!B) {
6363               /* VSHRN*/
6364               IROp narOp;
6365               reg_m = newTemp(Ity_V128);
6366               assign(reg_m, getQReg(mreg));
6367               res = newTemp(Ity_I64);
6368               switch (size) {
6369                  case 1:
6370                     op = Iop_ShrN16x8;
6371                     narOp = Iop_NarrowUn16to8x8;
6372                     break;
6373                  case 2:
6374                     op = Iop_ShrN32x4;
6375                     narOp = Iop_NarrowUn32to16x4;
6376                     break;
6377                  case 3:
6378                     op = Iop_ShrN64x2;
6379                     narOp = Iop_NarrowUn64to32x2;
6380                     break;
6381                  default:
6382                     vassert(0);
6383               }
6384               assign(res, unop(narOp,
6385                                binop(op,
6386                                      mkexpr(reg_m),
6387                                      mkU8(shift_imm))));
6388               putDRegI64(dreg, mkexpr(res), condT);
6389               DIP("vshrn.i%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6390                   shift_imm);
6391               return True;
6392            } else {
6393               /* VRSHRN   */
6394               IROp addOp, shOp, narOp;
6395               IRExpr *imm_val;
6396               reg_m = newTemp(Ity_V128);
6397               assign(reg_m, getQReg(mreg));
6398               res = newTemp(Ity_I64);
6399               imm = 1L;
6400               switch (size) {
6401                  case 0: imm = (imm <<  8) | imm; /* fall through */
6402                  case 1: imm = (imm << 16) | imm; /* fall through */
6403                  case 2: imm = (imm << 32) | imm; /* fall through */
6404                  case 3: break;
6405                  default: vassert(0);
6406               }
6407               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
6408               switch (size) {
6409                  case 1:
6410                     addOp = Iop_Add16x8;
6411                     shOp = Iop_ShrN16x8;
6412                     narOp = Iop_NarrowUn16to8x8;
6413                     break;
6414                  case 2:
6415                     addOp = Iop_Add32x4;
6416                     shOp = Iop_ShrN32x4;
6417                     narOp = Iop_NarrowUn32to16x4;
6418                     break;
6419                  case 3:
6420                     addOp = Iop_Add64x2;
6421                     shOp = Iop_ShrN64x2;
6422                     narOp = Iop_NarrowUn64to32x2;
6423                     break;
6424                  default:
6425                     vassert(0);
6426               }
6427               assign(res, unop(narOp,
6428                                binop(addOp,
6429                                      binop(shOp,
6430                                            mkexpr(reg_m),
6431                                            mkU8(shift_imm)),
6432                                      binop(Iop_AndV128,
6433                                            binop(shOp,
6434                                                  mkexpr(reg_m),
6435                                                  mkU8(shift_imm - 1)),
6436                                            imm_val))));
6437               putDRegI64(dreg, mkexpr(res), condT);
6438               if (shift_imm == 0) {
6439                  DIP("vmov%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6440                      shift_imm);
6441               } else {
6442                  DIP("vrshrn.i%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6443                      shift_imm);
6444               }
6445               return True;
6446            }
6447         } else {
6448            /* fall through */
6449         }
6450      case 9:
6451         dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6452         mreg = ((theInstr >>  1) & 0x10) | (theInstr & 0xF);
6453         if (mreg & 1)
6454            return False;
6455         mreg >>= 1;
6456         size++;
6457         if ((theInstr >> 8) & 1) {
6458            switch (size) {
6459               case 1:
6460                  op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6461                  cvt = U ? Iop_QNarrowUn16Uto8Ux8 : Iop_QNarrowUn16Sto8Sx8;
6462                  cvt2 = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6463                  break;
6464               case 2:
6465                  op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6466                  cvt = U ? Iop_QNarrowUn32Uto16Ux4 : Iop_QNarrowUn32Sto16Sx4;
6467                  cvt2 = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6468                  break;
6469               case 3:
6470                  op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6471                  cvt = U ? Iop_QNarrowUn64Uto32Ux2 : Iop_QNarrowUn64Sto32Sx2;
6472                  cvt2 = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6473                  break;
6474               default:
6475                  vassert(0);
6476            }
6477            DIP("vq%sshrn.%c%u d%u, q%u, #%u\n", B ? "r" : "",
6478                U ? 'u' : 's', 8 << size, dreg, mreg, shift_imm);
6479         } else {
6480            vassert(U);
6481            switch (size) {
6482               case 1:
6483                  op = Iop_SarN16x8;
6484                  cvt = Iop_QNarrowUn16Sto8Ux8;
6485                  cvt2 = Iop_Widen8Uto16x8;
6486                  break;
6487               case 2:
6488                  op = Iop_SarN32x4;
6489                  cvt = Iop_QNarrowUn32Sto16Ux4;
6490                  cvt2 = Iop_Widen16Uto32x4;
6491                  break;
6492               case 3:
6493                  op = Iop_SarN64x2;
6494                  cvt = Iop_QNarrowUn64Sto32Ux2;
6495                  cvt2 = Iop_Widen32Uto64x2;
6496                  break;
6497               default:
6498                  vassert(0);
6499            }
6500            DIP("vq%sshrun.s%u d%u, q%u, #%u\n", B ? "r" : "",
6501                8 << size, dreg, mreg, shift_imm);
6502         }
6503         if (B) {
6504            if (shift_imm > 0) {
6505               imm = 1;
6506               switch (size) {
6507                  case 1: imm = (imm << 16) | imm; /* fall through */
6508                  case 2: imm = (imm << 32) | imm; /* fall through */
6509                  case 3: break;
6510                  case 0: default: vassert(0);
6511               }
6512               switch (size) {
6513                  case 1: add = Iop_Add16x8; break;
6514                  case 2: add = Iop_Add32x4; break;
6515                  case 3: add = Iop_Add64x2; break;
6516                  case 0: default: vassert(0);
6517               }
6518            }
6519         }
6520         reg_m = newTemp(Ity_V128);
6521         res = newTemp(Ity_V128);
6522         assign(reg_m, getQReg(mreg));
6523         if (B) {
6524            /* VQRSHRN, VQRSHRUN */
6525            assign(res, binop(add,
6526                              binop(op, mkexpr(reg_m), mkU8(shift_imm)),
6527                              binop(Iop_AndV128,
6528                                    binop(op,
6529                                          mkexpr(reg_m),
6530                                          mkU8(shift_imm - 1)),
6531                                    mkU128(imm))));
6532         } else {
6533            /* VQSHRN, VQSHRUN */
6534            assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6535         }
6536         setFlag_QC(unop(cvt2, unop(cvt, mkexpr(res))), mkexpr(res),
6537                    True, condT);
6538         putDRegI64(dreg, unop(cvt, mkexpr(res)), condT);
6539         return True;
6540      case 10:
6541         /* VSHLL
6542            VMOVL ::= VSHLL #0 */
6543         if (B)
6544            return False;
6545         if (dreg & 1)
6546            return False;
6547         dreg >>= 1;
6548         shift_imm = (8 << size) - shift_imm;
6549         res = newTemp(Ity_V128);
6550         switch (size) {
6551            case 0:
6552               op = Iop_ShlN16x8;
6553               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6554               break;
6555            case 1:
6556               op = Iop_ShlN32x4;
6557               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6558               break;
6559            case 2:
6560               op = Iop_ShlN64x2;
6561               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6562               break;
6563            case 3:
6564               return False;
6565            default:
6566               vassert(0);
6567         }
6568         assign(res, binop(op, unop(cvt, getDRegI64(mreg)), mkU8(shift_imm)));
6569         putQReg(dreg, mkexpr(res), condT);
6570         if (shift_imm == 0) {
6571            DIP("vmovl.%c%u q%u, d%u\n", U ? 'u' : 's', 8 << size,
6572                dreg, mreg);
6573         } else {
6574            DIP("vshll.%c%u q%u, d%u, #%u\n", U ? 'u' : 's', 8 << size,
6575                dreg, mreg, shift_imm);
6576         }
6577         return True;
6578      case 14:
6579      case 15:
6580         /* VCVT floating-point <-> fixed-point */
6581         if ((theInstr >> 8) & 1) {
6582            if (U) {
6583               op = Q ? Iop_F32ToFixed32Ux4_RZ : Iop_F32ToFixed32Ux2_RZ;
6584            } else {
6585               op = Q ? Iop_F32ToFixed32Sx4_RZ : Iop_F32ToFixed32Sx2_RZ;
6586            }
6587            DIP("vcvt.%c32.f32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6588                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6589                64 - ((theInstr >> 16) & 0x3f));
6590         } else {
6591            if (U) {
6592               op = Q ? Iop_Fixed32UToF32x4_RN : Iop_Fixed32UToF32x2_RN;
6593            } else {
6594               op = Q ? Iop_Fixed32SToF32x4_RN : Iop_Fixed32SToF32x2_RN;
6595            }
6596            DIP("vcvt.f32.%c32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6597                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6598                64 - ((theInstr >> 16) & 0x3f));
6599         }
6600         if (((theInstr >> 21) & 1) == 0)
6601            return False;
6602         if (Q) {
6603            putQReg(dreg, binop(op, getQReg(mreg),
6604                     mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6605         } else {
6606            putDRegI64(dreg, binop(op, getDRegI64(mreg),
6607                       mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6608         }
6609         return True;
6610      default:
6611         return False;
6612
6613   }
6614   return False;
6615}
6616
6617/* A7.4.5 Two registers, miscellaneous */
6618static
6619Bool dis_neon_data_2reg_misc ( UInt theInstr, IRTemp condT )
6620{
6621   UInt A = (theInstr >> 16) & 3;
6622   UInt B = (theInstr >> 6) & 0x1f;
6623   UInt Q = (theInstr >> 6) & 1;
6624   UInt U = (theInstr >> 24) & 1;
6625   UInt size = (theInstr >> 18) & 3;
6626   UInt dreg = get_neon_d_regno(theInstr);
6627   UInt mreg = get_neon_m_regno(theInstr);
6628   UInt F = (theInstr >> 10) & 1;
6629   IRTemp arg_d = IRTemp_INVALID;
6630   IRTemp arg_m = IRTemp_INVALID;
6631   IRTemp res = IRTemp_INVALID;
6632   switch (A) {
6633      case 0:
6634         if (Q) {
6635            arg_m = newTemp(Ity_V128);
6636            res = newTemp(Ity_V128);
6637            assign(arg_m, getQReg(mreg));
6638         } else {
6639            arg_m = newTemp(Ity_I64);
6640            res = newTemp(Ity_I64);
6641            assign(arg_m, getDRegI64(mreg));
6642         }
6643         switch (B >> 1) {
6644            case 0: {
6645               /* VREV64 */
6646               IROp op;
6647               switch (size) {
6648                  case 0:
6649                     op = Q ? Iop_Reverse64_8x16 : Iop_Reverse64_8x8;
6650                     break;
6651                  case 1:
6652                     op = Q ? Iop_Reverse64_16x8 : Iop_Reverse64_16x4;
6653                     break;
6654                  case 2:
6655                     op = Q ? Iop_Reverse64_32x4 : Iop_Reverse64_32x2;
6656                     break;
6657                  case 3:
6658                     return False;
6659                  default:
6660                     vassert(0);
6661               }
6662               assign(res, unop(op, mkexpr(arg_m)));
6663               DIP("vrev64.%u %c%u, %c%u\n", 8 << size,
6664                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6665               break;
6666            }
6667            case 1: {
6668               /* VREV32 */
6669               IROp op;
6670               switch (size) {
6671                  case 0:
6672                     op = Q ? Iop_Reverse32_8x16 : Iop_Reverse32_8x8;
6673                     break;
6674                  case 1:
6675                     op = Q ? Iop_Reverse32_16x8 : Iop_Reverse32_16x4;
6676                     break;
6677                  case 2:
6678                  case 3:
6679                     return False;
6680                  default:
6681                     vassert(0);
6682               }
6683               assign(res, unop(op, mkexpr(arg_m)));
6684               DIP("vrev32.%u %c%u, %c%u\n", 8 << size,
6685                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6686               break;
6687            }
6688            case 2: {
6689               /* VREV16 */
6690               IROp op;
6691               switch (size) {
6692                  case 0:
6693                     op = Q ? Iop_Reverse16_8x16 : Iop_Reverse16_8x8;
6694                     break;
6695                  case 1:
6696                  case 2:
6697                  case 3:
6698                     return False;
6699                  default:
6700                     vassert(0);
6701               }
6702               assign(res, unop(op, mkexpr(arg_m)));
6703               DIP("vrev16.%u %c%u, %c%u\n", 8 << size,
6704                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6705               break;
6706            }
6707            case 3:
6708               return False;
6709            case 4:
6710            case 5: {
6711               /* VPADDL */
6712               IROp op;
6713               U = (theInstr >> 7) & 1;
6714               if (Q) {
6715                  switch (size) {
6716                     case 0: op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16; break;
6717                     case 1: op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8; break;
6718                     case 2: op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4; break;
6719                     case 3: return False;
6720                     default: vassert(0);
6721                  }
6722               } else {
6723                  switch (size) {
6724                     case 0: op = U ? Iop_PwAddL8Ux8  : Iop_PwAddL8Sx8;  break;
6725                     case 1: op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4; break;
6726                     case 2: op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2; break;
6727                     case 3: return False;
6728                     default: vassert(0);
6729                  }
6730               }
6731               assign(res, unop(op, mkexpr(arg_m)));
6732               DIP("vpaddl.%c%u %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6733                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6734               break;
6735            }
6736            case 6:
6737            case 7:
6738               return False;
6739            case 8: {
6740               /* VCLS */
6741               IROp op;
6742               switch (size) {
6743                  case 0: op = Q ? Iop_Cls8Sx16 : Iop_Cls8Sx8; break;
6744                  case 1: op = Q ? Iop_Cls16Sx8 : Iop_Cls16Sx4; break;
6745                  case 2: op = Q ? Iop_Cls32Sx4 : Iop_Cls32Sx2; break;
6746                  case 3: return False;
6747                  default: vassert(0);
6748               }
6749               assign(res, unop(op, mkexpr(arg_m)));
6750               DIP("vcls.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6751                   Q ? 'q' : 'd', mreg);
6752               break;
6753            }
6754            case 9: {
6755               /* VCLZ */
6756               IROp op;
6757               switch (size) {
6758                  case 0: op = Q ? Iop_Clz8Sx16 : Iop_Clz8Sx8; break;
6759                  case 1: op = Q ? Iop_Clz16Sx8 : Iop_Clz16Sx4; break;
6760                  case 2: op = Q ? Iop_Clz32Sx4 : Iop_Clz32Sx2; break;
6761                  case 3: return False;
6762                  default: vassert(0);
6763               }
6764               assign(res, unop(op, mkexpr(arg_m)));
6765               DIP("vclz.i%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6766                   Q ? 'q' : 'd', mreg);
6767               break;
6768            }
6769            case 10:
6770               /* VCNT */
6771               assign(res, unop(Q ? Iop_Cnt8x16 : Iop_Cnt8x8, mkexpr(arg_m)));
6772               DIP("vcnt.8 %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6773                   mreg);
6774               break;
6775            case 11:
6776               /* VMVN */
6777               if (Q)
6778                  assign(res, unop(Iop_NotV128, mkexpr(arg_m)));
6779               else
6780                  assign(res, unop(Iop_Not64, mkexpr(arg_m)));
6781               DIP("vmvn %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6782                   mreg);
6783               break;
6784            case 12:
6785            case 13: {
6786               /* VPADAL */
6787               IROp op, add_op;
6788               U = (theInstr >> 7) & 1;
6789               if (Q) {
6790                  switch (size) {
6791                     case 0:
6792                        op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16;
6793                        add_op = Iop_Add16x8;
6794                        break;
6795                     case 1:
6796                        op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8;
6797                        add_op = Iop_Add32x4;
6798                        break;
6799                     case 2:
6800                        op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4;
6801                        add_op = Iop_Add64x2;
6802                        break;
6803                     case 3:
6804                        return False;
6805                     default:
6806                        vassert(0);
6807                  }
6808               } else {
6809                  switch (size) {
6810                     case 0:
6811                        op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8;
6812                        add_op = Iop_Add16x4;
6813                        break;
6814                     case 1:
6815                        op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4;
6816                        add_op = Iop_Add32x2;
6817                        break;
6818                     case 2:
6819                        op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2;
6820                        add_op = Iop_Add64;
6821                        break;
6822                     case 3:
6823                        return False;
6824                     default:
6825                        vassert(0);
6826                  }
6827               }
6828               if (Q) {
6829                  arg_d = newTemp(Ity_V128);
6830                  assign(arg_d, getQReg(dreg));
6831               } else {
6832                  arg_d = newTemp(Ity_I64);
6833                  assign(arg_d, getDRegI64(dreg));
6834               }
6835               assign(res, binop(add_op, unop(op, mkexpr(arg_m)),
6836                                         mkexpr(arg_d)));
6837               DIP("vpadal.%c%u %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6838                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6839               break;
6840            }
6841            case 14: {
6842               /* VQABS */
6843               IROp op_sub, op_qsub, op_cmp;
6844               IRTemp mask, tmp;
6845               IRExpr *zero1, *zero2;
6846               IRExpr *neg, *neg2;
6847               if (Q) {
6848                  zero1 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6849                  zero2 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6850                  mask = newTemp(Ity_V128);
6851                  tmp = newTemp(Ity_V128);
6852               } else {
6853                  zero1 = mkU64(0);
6854                  zero2 = mkU64(0);
6855                  mask = newTemp(Ity_I64);
6856                  tmp = newTemp(Ity_I64);
6857               }
6858               switch (size) {
6859                  case 0:
6860                     op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6861                     op_qsub = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6862                     op_cmp = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
6863                     break;
6864                  case 1:
6865                     op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6866                     op_qsub = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6867                     op_cmp = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4;
6868                     break;
6869                  case 2:
6870                     op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6871                     op_qsub = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6872                     op_cmp = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2;
6873                     break;
6874                  case 3:
6875                     return False;
6876                  default:
6877                     vassert(0);
6878               }
6879               assign(mask, binop(op_cmp, mkexpr(arg_m), zero1));
6880               neg = binop(op_qsub, zero2, mkexpr(arg_m));
6881               neg2 = binop(op_sub, zero2, mkexpr(arg_m));
6882               assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
6883                                 binop(Q ? Iop_AndV128 : Iop_And64,
6884                                       mkexpr(mask),
6885                                       mkexpr(arg_m)),
6886                                 binop(Q ? Iop_AndV128 : Iop_And64,
6887                                       unop(Q ? Iop_NotV128 : Iop_Not64,
6888                                            mkexpr(mask)),
6889                                       neg)));
6890               assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
6891                                 binop(Q ? Iop_AndV128 : Iop_And64,
6892                                       mkexpr(mask),
6893                                       mkexpr(arg_m)),
6894                                 binop(Q ? Iop_AndV128 : Iop_And64,
6895                                       unop(Q ? Iop_NotV128 : Iop_Not64,
6896                                            mkexpr(mask)),
6897                                       neg2)));
6898               setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
6899               DIP("vqabs.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6900                   Q ? 'q' : 'd', mreg);
6901               break;
6902            }
6903            case 15: {
6904               /* VQNEG */
6905               IROp op, op2;
6906               IRExpr *zero;
6907               if (Q) {
6908                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6909               } else {
6910                  zero = mkU64(0);
6911               }
6912               switch (size) {
6913                  case 0:
6914                     op = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6915                     op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6916                     break;
6917                  case 1:
6918                     op = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6919                     op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6920                     break;
6921                  case 2:
6922                     op = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6923                     op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6924                     break;
6925                  case 3:
6926                     return False;
6927                  default:
6928                     vassert(0);
6929               }
6930               assign(res, binop(op, zero, mkexpr(arg_m)));
6931               setFlag_QC(mkexpr(res), binop(op2, zero, mkexpr(arg_m)),
6932                          Q, condT);
6933               DIP("vqneg.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6934                   Q ? 'q' : 'd', mreg);
6935               break;
6936            }
6937            default:
6938               vassert(0);
6939         }
6940         if (Q) {
6941            putQReg(dreg, mkexpr(res), condT);
6942         } else {
6943            putDRegI64(dreg, mkexpr(res), condT);
6944         }
6945         return True;
6946      case 1:
6947         if (Q) {
6948            arg_m = newTemp(Ity_V128);
6949            res = newTemp(Ity_V128);
6950            assign(arg_m, getQReg(mreg));
6951         } else {
6952            arg_m = newTemp(Ity_I64);
6953            res = newTemp(Ity_I64);
6954            assign(arg_m, getDRegI64(mreg));
6955         }
6956         switch ((B >> 1) & 0x7) {
6957            case 0: {
6958               /* VCGT #0 */
6959               IRExpr *zero;
6960               IROp op;
6961               if (Q) {
6962                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6963               } else {
6964                  zero = mkU64(0);
6965               }
6966               if (F) {
6967                  switch (size) {
6968                     case 0: case 1: case 3: return False;
6969                     case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
6970                     default: vassert(0);
6971                  }
6972               } else {
6973                  switch (size) {
6974                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
6975                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
6976                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
6977                     case 3: return False;
6978                     default: vassert(0);
6979                  }
6980               }
6981               assign(res, binop(op, mkexpr(arg_m), zero));
6982               DIP("vcgt.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
6983                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6984               break;
6985            }
6986            case 1: {
6987               /* VCGE #0 */
6988               IROp op;
6989               IRExpr *zero;
6990               if (Q) {
6991                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6992               } else {
6993                  zero = mkU64(0);
6994               }
6995               if (F) {
6996                  switch (size) {
6997                     case 0: case 1: case 3: return False;
6998                     case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
6999                     default: vassert(0);
7000                  }
7001                  assign(res, binop(op, mkexpr(arg_m), zero));
7002               } else {
7003                  switch (size) {
7004                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7005                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7006                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7007                     case 3: return False;
7008                     default: vassert(0);
7009                  }
7010                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7011                                   binop(op, zero, mkexpr(arg_m))));
7012               }
7013               DIP("vcge.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7014                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7015               break;
7016            }
7017            case 2: {
7018               /* VCEQ #0 */
7019               IROp op;
7020               IRExpr *zero;
7021               if (F) {
7022                  if (Q) {
7023                     zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7024                  } else {
7025                     zero = mkU64(0);
7026                  }
7027                  switch (size) {
7028                     case 0: case 1: case 3: return False;
7029                     case 2: op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2; break;
7030                     default: vassert(0);
7031                  }
7032                  assign(res, binop(op, zero, mkexpr(arg_m)));
7033               } else {
7034                  switch (size) {
7035                     case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
7036                     case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
7037                     case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
7038                     case 3: return False;
7039                     default: vassert(0);
7040                  }
7041                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7042                                   unop(op, mkexpr(arg_m))));
7043               }
7044               DIP("vceq.%c%u %c%u, %c%u, #0\n", F ? 'f' : 'i', 8 << size,
7045                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7046               break;
7047            }
7048            case 3: {
7049               /* VCLE #0 */
7050               IRExpr *zero;
7051               IROp op;
7052               if (Q) {
7053                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7054               } else {
7055                  zero = mkU64(0);
7056               }
7057               if (F) {
7058                  switch (size) {
7059                     case 0: case 1: case 3: return False;
7060                     case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7061                     default: vassert(0);
7062                  }
7063                  assign(res, binop(op, zero, mkexpr(arg_m)));
7064               } else {
7065                  switch (size) {
7066                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7067                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7068                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7069                     case 3: return False;
7070                     default: vassert(0);
7071                  }
7072                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7073                                   binop(op, mkexpr(arg_m), zero)));
7074               }
7075               DIP("vcle.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7076                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7077               break;
7078            }
7079            case 4: {
7080               /* VCLT #0 */
7081               IROp op;
7082               IRExpr *zero;
7083               if (Q) {
7084                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7085               } else {
7086                  zero = mkU64(0);
7087               }
7088               if (F) {
7089                  switch (size) {
7090                     case 0: case 1: case 3: return False;
7091                     case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
7092                     default: vassert(0);
7093                  }
7094                  assign(res, binop(op, zero, mkexpr(arg_m)));
7095               } else {
7096                  switch (size) {
7097                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7098                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7099                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7100                     case 3: return False;
7101                     default: vassert(0);
7102                  }
7103                  assign(res, binop(op, zero, mkexpr(arg_m)));
7104               }
7105               DIP("vclt.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7106                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7107               break;
7108            }
7109            case 5:
7110               return False;
7111            case 6: {
7112               /* VABS */
7113               if (!F) {
7114                  IROp op;
7115                  switch(size) {
7116                     case 0: op = Q ? Iop_Abs8x16 : Iop_Abs8x8; break;
7117                     case 1: op = Q ? Iop_Abs16x8 : Iop_Abs16x4; break;
7118                     case 2: op = Q ? Iop_Abs32x4 : Iop_Abs32x2; break;
7119                     case 3: return False;
7120                     default: vassert(0);
7121                  }
7122                  assign(res, unop(op, mkexpr(arg_m)));
7123               } else {
7124                  assign(res, unop(Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2,
7125                                   mkexpr(arg_m)));
7126               }
7127               DIP("vabs.%c%u %c%u, %c%u\n",
7128                   F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7129                   Q ? 'q' : 'd', mreg);
7130               break;
7131            }
7132            case 7: {
7133               /* VNEG */
7134               IROp op;
7135               IRExpr *zero;
7136               if (F) {
7137                  switch (size) {
7138                     case 0: case 1: case 3: return False;
7139                     case 2: op = Q ? Iop_Neg32Fx4 : Iop_Neg32Fx2; break;
7140                     default: vassert(0);
7141                  }
7142                  assign(res, unop(op, mkexpr(arg_m)));
7143               } else {
7144                  if (Q) {
7145                     zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7146                  } else {
7147                     zero = mkU64(0);
7148                  }
7149                  switch (size) {
7150                     case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
7151                     case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
7152                     case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
7153                     case 3: return False;
7154                     default: vassert(0);
7155                  }
7156                  assign(res, binop(op, zero, mkexpr(arg_m)));
7157               }
7158               DIP("vneg.%c%u %c%u, %c%u\n",
7159                   F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7160                   Q ? 'q' : 'd', mreg);
7161               break;
7162            }
7163            default:
7164               vassert(0);
7165         }
7166         if (Q) {
7167            putQReg(dreg, mkexpr(res), condT);
7168         } else {
7169            putDRegI64(dreg, mkexpr(res), condT);
7170         }
7171         return True;
7172      case 2:
7173         if ((B >> 1) == 0) {
7174            /* VSWP */
7175            if (Q) {
7176               arg_m = newTemp(Ity_V128);
7177               assign(arg_m, getQReg(mreg));
7178               putQReg(mreg, getQReg(dreg), condT);
7179               putQReg(dreg, mkexpr(arg_m), condT);
7180            } else {
7181               arg_m = newTemp(Ity_I64);
7182               assign(arg_m, getDRegI64(mreg));
7183               putDRegI64(mreg, getDRegI64(dreg), condT);
7184               putDRegI64(dreg, mkexpr(arg_m), condT);
7185            }
7186            DIP("vswp %c%u, %c%u\n",
7187                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7188            return True;
7189         } else if ((B >> 1) == 1) {
7190            /* VTRN */
7191            IROp op_odd = Iop_INVALID, op_even = Iop_INVALID;
7192            IRTemp old_m, old_d, new_d, new_m;
7193            if (Q) {
7194               old_m = newTemp(Ity_V128);
7195               old_d = newTemp(Ity_V128);
7196               new_m = newTemp(Ity_V128);
7197               new_d = newTemp(Ity_V128);
7198               assign(old_m, getQReg(mreg));
7199               assign(old_d, getQReg(dreg));
7200            } else {
7201               old_m = newTemp(Ity_I64);
7202               old_d = newTemp(Ity_I64);
7203               new_m = newTemp(Ity_I64);
7204               new_d = newTemp(Ity_I64);
7205               assign(old_m, getDRegI64(mreg));
7206               assign(old_d, getDRegI64(dreg));
7207            }
7208            if (Q) {
7209               switch (size) {
7210                  case 0:
7211                     op_odd  = Iop_InterleaveOddLanes8x16;
7212                     op_even = Iop_InterleaveEvenLanes8x16;
7213                     break;
7214                  case 1:
7215                     op_odd  = Iop_InterleaveOddLanes16x8;
7216                     op_even = Iop_InterleaveEvenLanes16x8;
7217                     break;
7218                  case 2:
7219                     op_odd  = Iop_InterleaveOddLanes32x4;
7220                     op_even = Iop_InterleaveEvenLanes32x4;
7221                     break;
7222                  case 3:
7223                     return False;
7224                  default:
7225                     vassert(0);
7226               }
7227            } else {
7228               switch (size) {
7229                  case 0:
7230                     op_odd  = Iop_InterleaveOddLanes8x8;
7231                     op_even = Iop_InterleaveEvenLanes8x8;
7232                     break;
7233                  case 1:
7234                     op_odd  = Iop_InterleaveOddLanes16x4;
7235                     op_even = Iop_InterleaveEvenLanes16x4;
7236                     break;
7237                  case 2:
7238                     op_odd  = Iop_InterleaveHI32x2;
7239                     op_even = Iop_InterleaveLO32x2;
7240                     break;
7241                  case 3:
7242                     return False;
7243                  default:
7244                     vassert(0);
7245               }
7246            }
7247            assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7248            assign(new_m, binop(op_odd, mkexpr(old_m), mkexpr(old_d)));
7249            if (Q) {
7250               putQReg(dreg, mkexpr(new_d), condT);
7251               putQReg(mreg, mkexpr(new_m), condT);
7252            } else {
7253               putDRegI64(dreg, mkexpr(new_d), condT);
7254               putDRegI64(mreg, mkexpr(new_m), condT);
7255            }
7256            DIP("vtrn.%u %c%u, %c%u\n",
7257                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7258            return True;
7259         } else if ((B >> 1) == 2) {
7260            /* VUZP */
7261            IROp op_even, op_odd;
7262            IRTemp old_m, old_d, new_m, new_d;
7263            if (!Q && size == 2)
7264               return False;
7265            if (Q) {
7266               old_m = newTemp(Ity_V128);
7267               old_d = newTemp(Ity_V128);
7268               new_m = newTemp(Ity_V128);
7269               new_d = newTemp(Ity_V128);
7270               assign(old_m, getQReg(mreg));
7271               assign(old_d, getQReg(dreg));
7272            } else {
7273               old_m = newTemp(Ity_I64);
7274               old_d = newTemp(Ity_I64);
7275               new_m = newTemp(Ity_I64);
7276               new_d = newTemp(Ity_I64);
7277               assign(old_m, getDRegI64(mreg));
7278               assign(old_d, getDRegI64(dreg));
7279            }
7280            switch (size) {
7281               case 0:
7282                  op_odd  = Q ? Iop_CatOddLanes8x16 : Iop_CatOddLanes8x8;
7283                  op_even = Q ? Iop_CatEvenLanes8x16 : Iop_CatEvenLanes8x8;
7284                  break;
7285               case 1:
7286                  op_odd  = Q ? Iop_CatOddLanes16x8 : Iop_CatOddLanes16x4;
7287                  op_even = Q ? Iop_CatEvenLanes16x8 : Iop_CatEvenLanes16x4;
7288                  break;
7289               case 2:
7290                  op_odd  = Iop_CatOddLanes32x4;
7291                  op_even = Iop_CatEvenLanes32x4;
7292                  break;
7293               case 3:
7294                  return False;
7295               default:
7296                  vassert(0);
7297            }
7298            assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7299            assign(new_m, binop(op_odd,  mkexpr(old_m), mkexpr(old_d)));
7300            if (Q) {
7301               putQReg(dreg, mkexpr(new_d), condT);
7302               putQReg(mreg, mkexpr(new_m), condT);
7303            } else {
7304               putDRegI64(dreg, mkexpr(new_d), condT);
7305               putDRegI64(mreg, mkexpr(new_m), condT);
7306            }
7307            DIP("vuzp.%u %c%u, %c%u\n",
7308                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7309            return True;
7310         } else if ((B >> 1) == 3) {
7311            /* VZIP */
7312            IROp op_lo, op_hi;
7313            IRTemp old_m, old_d, new_m, new_d;
7314            if (!Q && size == 2)
7315               return False;
7316            if (Q) {
7317               old_m = newTemp(Ity_V128);
7318               old_d = newTemp(Ity_V128);
7319               new_m = newTemp(Ity_V128);
7320               new_d = newTemp(Ity_V128);
7321               assign(old_m, getQReg(mreg));
7322               assign(old_d, getQReg(dreg));
7323            } else {
7324               old_m = newTemp(Ity_I64);
7325               old_d = newTemp(Ity_I64);
7326               new_m = newTemp(Ity_I64);
7327               new_d = newTemp(Ity_I64);
7328               assign(old_m, getDRegI64(mreg));
7329               assign(old_d, getDRegI64(dreg));
7330            }
7331            switch (size) {
7332               case 0:
7333                  op_hi = Q ? Iop_InterleaveHI8x16 : Iop_InterleaveHI8x8;
7334                  op_lo = Q ? Iop_InterleaveLO8x16 : Iop_InterleaveLO8x8;
7335                  break;
7336               case 1:
7337                  op_hi = Q ? Iop_InterleaveHI16x8 : Iop_InterleaveHI16x4;
7338                  op_lo = Q ? Iop_InterleaveLO16x8 : Iop_InterleaveLO16x4;
7339                  break;
7340               case 2:
7341                  op_hi = Iop_InterleaveHI32x4;
7342                  op_lo = Iop_InterleaveLO32x4;
7343                  break;
7344               case 3:
7345                  return False;
7346               default:
7347                  vassert(0);
7348            }
7349            assign(new_d, binop(op_lo, mkexpr(old_m), mkexpr(old_d)));
7350            assign(new_m, binop(op_hi, mkexpr(old_m), mkexpr(old_d)));
7351            if (Q) {
7352               putQReg(dreg, mkexpr(new_d), condT);
7353               putQReg(mreg, mkexpr(new_m), condT);
7354            } else {
7355               putDRegI64(dreg, mkexpr(new_d), condT);
7356               putDRegI64(mreg, mkexpr(new_m), condT);
7357            }
7358            DIP("vzip.%u %c%u, %c%u\n",
7359                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7360            return True;
7361         } else if (B == 8) {
7362            /* VMOVN */
7363            IROp op;
7364            mreg >>= 1;
7365            switch (size) {
7366               case 0: op = Iop_NarrowUn16to8x8;  break;
7367               case 1: op = Iop_NarrowUn32to16x4; break;
7368               case 2: op = Iop_NarrowUn64to32x2; break;
7369               case 3: return False;
7370               default: vassert(0);
7371            }
7372            putDRegI64(dreg, unop(op, getQReg(mreg)), condT);
7373            DIP("vmovn.i%u d%u, q%u\n", 16 << size, dreg, mreg);
7374            return True;
7375         } else if (B == 9 || (B >> 1) == 5) {
7376            /* VQMOVN, VQMOVUN */
7377            IROp op, op2;
7378            IRTemp tmp;
7379            dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
7380            mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
7381            if (mreg & 1)
7382               return False;
7383            mreg >>= 1;
7384            switch (size) {
7385               case 0: op2 = Iop_NarrowUn16to8x8;  break;
7386               case 1: op2 = Iop_NarrowUn32to16x4; break;
7387               case 2: op2 = Iop_NarrowUn64to32x2; break;
7388               case 3: return False;
7389               default: vassert(0);
7390            }
7391            switch (B & 3) {
7392               case 0:
7393                  vassert(0);
7394               case 1:
7395                  switch (size) {
7396                     case 0: op = Iop_QNarrowUn16Sto8Ux8;  break;
7397                     case 1: op = Iop_QNarrowUn32Sto16Ux4; break;
7398                     case 2: op = Iop_QNarrowUn64Sto32Ux2; break;
7399                     case 3: return False;
7400                     default: vassert(0);
7401                  }
7402                  DIP("vqmovun.s%u d%u, q%u\n", 16 << size, dreg, mreg);
7403                  break;
7404               case 2:
7405                  switch (size) {
7406                     case 0: op = Iop_QNarrowUn16Sto8Sx8;  break;
7407                     case 1: op = Iop_QNarrowUn32Sto16Sx4; break;
7408                     case 2: op = Iop_QNarrowUn64Sto32Sx2; break;
7409                     case 3: return False;
7410                     default: vassert(0);
7411                  }
7412                  DIP("vqmovn.s%u d%u, q%u\n", 16 << size, dreg, mreg);
7413                  break;
7414               case 3:
7415                  switch (size) {
7416                     case 0: op = Iop_QNarrowUn16Uto8Ux8;  break;
7417                     case 1: op = Iop_QNarrowUn32Uto16Ux4; break;
7418                     case 2: op = Iop_QNarrowUn64Uto32Ux2; break;
7419                     case 3: return False;
7420                     default: vassert(0);
7421                  }
7422                  DIP("vqmovn.u%u d%u, q%u\n", 16 << size, dreg, mreg);
7423                  break;
7424               default:
7425                  vassert(0);
7426            }
7427            res = newTemp(Ity_I64);
7428            tmp = newTemp(Ity_I64);
7429            assign(res, unop(op, getQReg(mreg)));
7430            assign(tmp, unop(op2, getQReg(mreg)));
7431            setFlag_QC(mkexpr(res), mkexpr(tmp), False, condT);
7432            putDRegI64(dreg, mkexpr(res), condT);
7433            return True;
7434         } else if (B == 12) {
7435            /* VSHLL (maximum shift) */
7436            IROp op, cvt;
7437            UInt shift_imm;
7438            if (Q)
7439               return False;
7440            if (dreg & 1)
7441               return False;
7442            dreg >>= 1;
7443            shift_imm = 8 << size;
7444            res = newTemp(Ity_V128);
7445            switch (size) {
7446               case 0: op = Iop_ShlN16x8; cvt = Iop_Widen8Uto16x8;  break;
7447               case 1: op = Iop_ShlN32x4; cvt = Iop_Widen16Uto32x4; break;
7448               case 2: op = Iop_ShlN64x2; cvt = Iop_Widen32Uto64x2; break;
7449               case 3: return False;
7450               default: vassert(0);
7451            }
7452            assign(res, binop(op, unop(cvt, getDRegI64(mreg)),
7453                                  mkU8(shift_imm)));
7454            putQReg(dreg, mkexpr(res), condT);
7455            DIP("vshll.i%u q%u, d%u, #%u\n", 8 << size, dreg, mreg, 8 << size);
7456            return True;
7457         } else if ((B >> 3) == 3 && (B & 3) == 0) {
7458            /* VCVT (half<->single) */
7459            /* Half-precision extensions are needed to run this */
7460            vassert(0); // ATC
7461            if (((theInstr >> 18) & 3) != 1)
7462               return False;
7463            if ((theInstr >> 8) & 1) {
7464               if (dreg & 1)
7465                  return False;
7466               dreg >>= 1;
7467               putQReg(dreg, unop(Iop_F16toF32x4, getDRegI64(mreg)),
7468                     condT);
7469               DIP("vcvt.f32.f16 q%u, d%u\n", dreg, mreg);
7470            } else {
7471               if (mreg & 1)
7472                  return False;
7473               mreg >>= 1;
7474               putDRegI64(dreg, unop(Iop_F32toF16x4, getQReg(mreg)),
7475                                condT);
7476               DIP("vcvt.f16.f32 d%u, q%u\n", dreg, mreg);
7477            }
7478            return True;
7479         } else {
7480            return False;
7481         }
7482         vassert(0);
7483         return True;
7484      case 3:
7485         if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,0)) {
7486            /* VRECPE */
7487            IROp op;
7488            F = (theInstr >> 8) & 1;
7489            if (size != 2)
7490               return False;
7491            if (Q) {
7492               op = F ? Iop_Recip32Fx4 : Iop_Recip32x4;
7493               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7494               DIP("vrecpe.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7495            } else {
7496               op = F ? Iop_Recip32Fx2 : Iop_Recip32x2;
7497               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7498               DIP("vrecpe.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7499            }
7500            return True;
7501         } else if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,1)) {
7502            /* VRSQRTE */
7503            IROp op;
7504            F = (B >> 2) & 1;
7505            if (size != 2)
7506               return False;
7507            if (F) {
7508               /* fp */
7509               op = Q ? Iop_Rsqrte32Fx4 : Iop_Rsqrte32Fx2;
7510            } else {
7511               /* unsigned int */
7512               op = Q ? Iop_Rsqrte32x4 : Iop_Rsqrte32x2;
7513            }
7514            if (Q) {
7515               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7516               DIP("vrsqrte.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7517            } else {
7518               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7519               DIP("vrsqrte.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7520            }
7521            return True;
7522         } else if ((B >> 3) == 3) {
7523            /* VCVT (fp<->integer) */
7524            IROp op;
7525            if (size != 2)
7526               return False;
7527            switch ((B >> 1) & 3) {
7528               case 0:
7529                  op = Q ? Iop_I32StoFx4 : Iop_I32StoFx2;
7530                  DIP("vcvt.f32.s32 %c%u, %c%u\n",
7531                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7532                  break;
7533               case 1:
7534                  op = Q ? Iop_I32UtoFx4 : Iop_I32UtoFx2;
7535                  DIP("vcvt.f32.u32 %c%u, %c%u\n",
7536                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7537                  break;
7538               case 2:
7539                  op = Q ? Iop_FtoI32Sx4_RZ : Iop_FtoI32Sx2_RZ;
7540                  DIP("vcvt.s32.f32 %c%u, %c%u\n",
7541                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7542                  break;
7543               case 3:
7544                  op = Q ? Iop_FtoI32Ux4_RZ : Iop_FtoI32Ux2_RZ;
7545                  DIP("vcvt.u32.f32 %c%u, %c%u\n",
7546                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7547                  break;
7548               default:
7549                  vassert(0);
7550            }
7551            if (Q) {
7552               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7553            } else {
7554               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7555            }
7556            return True;
7557         } else {
7558            return False;
7559         }
7560         vassert(0);
7561         return True;
7562      default:
7563         vassert(0);
7564   }
7565   return False;
7566}
7567
7568/* A7.4.6 One register and a modified immediate value */
7569static
7570void ppNeonImm(UInt imm, UInt cmode, UInt op)
7571{
7572   int i;
7573   switch (cmode) {
7574      case 0: case 1: case 8: case 9:
7575         vex_printf("0x%x", imm);
7576         break;
7577      case 2: case 3: case 10: case 11:
7578         vex_printf("0x%x00", imm);
7579         break;
7580      case 4: case 5:
7581         vex_printf("0x%x0000", imm);
7582         break;
7583      case 6: case 7:
7584         vex_printf("0x%x000000", imm);
7585         break;
7586      case 12:
7587         vex_printf("0x%xff", imm);
7588         break;
7589      case 13:
7590         vex_printf("0x%xffff", imm);
7591         break;
7592      case 14:
7593         if (op) {
7594            vex_printf("0x");
7595            for (i = 7; i >= 0; i--)
7596               vex_printf("%s", (imm & (1 << i)) ? "ff" : "00");
7597         } else {
7598            vex_printf("0x%x", imm);
7599         }
7600         break;
7601      case 15:
7602         vex_printf("0x%x", imm);
7603         break;
7604   }
7605}
7606
7607static
7608const char *ppNeonImmType(UInt cmode, UInt op)
7609{
7610   switch (cmode) {
7611      case 0 ... 7:
7612      case 12: case 13:
7613         return "i32";
7614      case 8 ... 11:
7615         return "i16";
7616      case 14:
7617         if (op)
7618            return "i64";
7619         else
7620            return "i8";
7621      case 15:
7622         if (op)
7623            vassert(0);
7624         else
7625            return "f32";
7626      default:
7627         vassert(0);
7628   }
7629}
7630
7631static
7632void DIPimm(UInt imm, UInt cmode, UInt op,
7633            const char *instr, UInt Q, UInt dreg)
7634{
7635   if (vex_traceflags & VEX_TRACE_FE) {
7636      vex_printf("%s.%s %c%u, #", instr,
7637                 ppNeonImmType(cmode, op), Q ? 'q' : 'd', dreg);
7638      ppNeonImm(imm, cmode, op);
7639      vex_printf("\n");
7640   }
7641}
7642
7643static
7644Bool dis_neon_data_1reg_and_imm ( UInt theInstr, IRTemp condT )
7645{
7646   UInt dreg = get_neon_d_regno(theInstr);
7647   ULong imm_raw = ((theInstr >> 17) & 0x80) | ((theInstr >> 12) & 0x70) |
7648                  (theInstr & 0xf);
7649   ULong imm_raw_pp = imm_raw;
7650   UInt cmode = (theInstr >> 8) & 0xf;
7651   UInt op_bit = (theInstr >> 5) & 1;
7652   ULong imm = 0;
7653   UInt Q = (theInstr >> 6) & 1;
7654   int i, j;
7655   UInt tmp;
7656   IRExpr *imm_val;
7657   IRExpr *expr;
7658   IRTemp tmp_var;
7659   switch(cmode) {
7660      case 7: case 6:
7661         imm_raw = imm_raw << 8;
7662         /* fallthrough */
7663      case 5: case 4:
7664         imm_raw = imm_raw << 8;
7665         /* fallthrough */
7666      case 3: case 2:
7667         imm_raw = imm_raw << 8;
7668         /* fallthrough */
7669      case 0: case 1:
7670         imm = (imm_raw << 32) | imm_raw;
7671         break;
7672      case 11: case 10:
7673         imm_raw = imm_raw << 8;
7674         /* fallthrough */
7675      case 9: case 8:
7676         imm_raw = (imm_raw << 16) | imm_raw;
7677         imm = (imm_raw << 32) | imm_raw;
7678         break;
7679      case 13:
7680         imm_raw = (imm_raw << 8) | 0xff;
7681         /* fallthrough */
7682      case 12:
7683         imm_raw = (imm_raw << 8) | 0xff;
7684         imm = (imm_raw << 32) | imm_raw;
7685         break;
7686      case 14:
7687         if (! op_bit) {
7688            for(i = 0; i < 8; i++) {
7689               imm = (imm << 8) | imm_raw;
7690            }
7691         } else {
7692            for(i = 7; i >= 0; i--) {
7693               tmp = 0;
7694               for(j = 0; j < 8; j++) {
7695                  tmp = (tmp << 1) | ((imm_raw >> i) & 1);
7696               }
7697               imm = (imm << 8) | tmp;
7698            }
7699         }
7700         break;
7701      case 15:
7702         imm = (imm_raw & 0x80) << 5;
7703         imm |= ((~imm_raw & 0x40) << 5);
7704         for(i = 1; i <= 4; i++)
7705            imm |= (imm_raw & 0x40) << i;
7706         imm |= (imm_raw & 0x7f);
7707         imm = imm << 19;
7708         imm = (imm << 32) | imm;
7709         break;
7710      default:
7711         return False;
7712   }
7713   if (Q) {
7714      imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
7715   } else {
7716      imm_val = mkU64(imm);
7717   }
7718   if (((op_bit == 0) &&
7719      (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 12) == 12))) ||
7720      ((op_bit == 1) && (cmode == 14))) {
7721      /* VMOV (immediate) */
7722      if (Q) {
7723         putQReg(dreg, imm_val, condT);
7724      } else {
7725         putDRegI64(dreg, imm_val, condT);
7726      }
7727      DIPimm(imm_raw_pp, cmode, op_bit, "vmov", Q, dreg);
7728      return True;
7729   }
7730   if ((op_bit == 1) &&
7731      (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 14) == 12))) {
7732      /* VMVN (immediate) */
7733      if (Q) {
7734         putQReg(dreg, unop(Iop_NotV128, imm_val), condT);
7735      } else {
7736         putDRegI64(dreg, unop(Iop_Not64, imm_val), condT);
7737      }
7738      DIPimm(imm_raw_pp, cmode, op_bit, "vmvn", Q, dreg);
7739      return True;
7740   }
7741   if (Q) {
7742      tmp_var = newTemp(Ity_V128);
7743      assign(tmp_var, getQReg(dreg));
7744   } else {
7745      tmp_var = newTemp(Ity_I64);
7746      assign(tmp_var, getDRegI64(dreg));
7747   }
7748   if ((op_bit == 0) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7749      /* VORR (immediate) */
7750      if (Q)
7751         expr = binop(Iop_OrV128, mkexpr(tmp_var), imm_val);
7752      else
7753         expr = binop(Iop_Or64, mkexpr(tmp_var), imm_val);
7754      DIPimm(imm_raw_pp, cmode, op_bit, "vorr", Q, dreg);
7755   } else if ((op_bit == 1) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7756      /* VBIC (immediate) */
7757      if (Q)
7758         expr = binop(Iop_AndV128, mkexpr(tmp_var),
7759                                   unop(Iop_NotV128, imm_val));
7760      else
7761         expr = binop(Iop_And64, mkexpr(tmp_var), unop(Iop_Not64, imm_val));
7762      DIPimm(imm_raw_pp, cmode, op_bit, "vbic", Q, dreg);
7763   } else {
7764      return False;
7765   }
7766   if (Q)
7767      putQReg(dreg, expr, condT);
7768   else
7769      putDRegI64(dreg, expr, condT);
7770   return True;
7771}
7772
7773/* A7.4 Advanced SIMD data-processing instructions */
7774static
7775Bool dis_neon_data_processing ( UInt theInstr, IRTemp condT )
7776{
7777   UInt A = (theInstr >> 19) & 0x1F;
7778   UInt B = (theInstr >>  8) & 0xF;
7779   UInt C = (theInstr >>  4) & 0xF;
7780   UInt U = (theInstr >> 24) & 0x1;
7781
7782   if (! (A & 0x10)) {
7783      return dis_neon_data_3same(theInstr, condT);
7784   }
7785   if (((A & 0x17) == 0x10) && ((C & 0x9) == 0x1)) {
7786      return dis_neon_data_1reg_and_imm(theInstr, condT);
7787   }
7788   if ((C & 1) == 1) {
7789      return dis_neon_data_2reg_and_shift(theInstr, condT);
7790   }
7791   if (((C & 5) == 0) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7792      return dis_neon_data_3diff(theInstr, condT);
7793   }
7794   if (((C & 5) == 4) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7795      return dis_neon_data_2reg_and_scalar(theInstr, condT);
7796   }
7797   if ((A & 0x16) == 0x16) {
7798      if ((U == 0) && ((C & 1) == 0)) {
7799         return dis_neon_vext(theInstr, condT);
7800      }
7801      if ((U != 1) || ((C & 1) == 1))
7802         return False;
7803      if ((B & 8) == 0) {
7804         return dis_neon_data_2reg_misc(theInstr, condT);
7805      }
7806      if ((B & 12) == 8) {
7807         return dis_neon_vtb(theInstr, condT);
7808      }
7809      if ((B == 12) && ((C & 9) == 0)) {
7810         return dis_neon_vdup(theInstr, condT);
7811      }
7812   }
7813   return False;
7814}
7815
7816
7817/*------------------------------------------------------------*/
7818/*--- NEON loads and stores                                ---*/
7819/*------------------------------------------------------------*/
7820
7821/* For NEON memory operations, we use the standard scheme to handle
7822   conditionalisation: generate a jump around the instruction if the
7823   condition is false.  That's only necessary in Thumb mode, however,
7824   since in ARM mode NEON instructions are unconditional. */
7825
7826/* A helper function for what follows.  It assumes we already went
7827   uncond as per comments at the top of this section. */
7828static
7829void mk_neon_elem_load_to_one_lane( UInt rD, UInt inc, UInt index,
7830                                    UInt N, UInt size, IRTemp addr )
7831{
7832   UInt i;
7833   switch (size) {
7834      case 0:
7835         putDRegI64(rD, triop(Iop_SetElem8x8, getDRegI64(rD), mkU8(index),
7836                    loadLE(Ity_I8, mkexpr(addr))), IRTemp_INVALID);
7837         break;
7838      case 1:
7839         putDRegI64(rD, triop(Iop_SetElem16x4, getDRegI64(rD), mkU8(index),
7840                    loadLE(Ity_I16, mkexpr(addr))), IRTemp_INVALID);
7841         break;
7842      case 2:
7843         putDRegI64(rD, triop(Iop_SetElem32x2, getDRegI64(rD), mkU8(index),
7844                    loadLE(Ity_I32, mkexpr(addr))), IRTemp_INVALID);
7845         break;
7846      default:
7847         vassert(0);
7848   }
7849   for (i = 1; i <= N; i++) {
7850      switch (size) {
7851         case 0:
7852            putDRegI64(rD + i * inc,
7853                       triop(Iop_SetElem8x8,
7854                             getDRegI64(rD + i * inc),
7855                             mkU8(index),
7856                             loadLE(Ity_I8, binop(Iop_Add32,
7857                                                  mkexpr(addr),
7858                                                  mkU32(i * 1)))),
7859                       IRTemp_INVALID);
7860            break;
7861         case 1:
7862            putDRegI64(rD + i * inc,
7863                       triop(Iop_SetElem16x4,
7864                             getDRegI64(rD + i * inc),
7865                             mkU8(index),
7866                             loadLE(Ity_I16, binop(Iop_Add32,
7867                                                   mkexpr(addr),
7868                                                   mkU32(i * 2)))),
7869                       IRTemp_INVALID);
7870            break;
7871         case 2:
7872            putDRegI64(rD + i * inc,
7873                       triop(Iop_SetElem32x2,
7874                             getDRegI64(rD + i * inc),
7875                             mkU8(index),
7876                             loadLE(Ity_I32, binop(Iop_Add32,
7877                                                   mkexpr(addr),
7878                                                   mkU32(i * 4)))),
7879                       IRTemp_INVALID);
7880            break;
7881         default:
7882            vassert(0);
7883      }
7884   }
7885}
7886
7887/* A(nother) helper function for what follows.  It assumes we already
7888   went uncond as per comments at the top of this section. */
7889static
7890void mk_neon_elem_store_from_one_lane( UInt rD, UInt inc, UInt index,
7891                                       UInt N, UInt size, IRTemp addr )
7892{
7893   UInt i;
7894   switch (size) {
7895      case 0:
7896         storeLE(mkexpr(addr),
7897                 binop(Iop_GetElem8x8, getDRegI64(rD), mkU8(index)));
7898         break;
7899      case 1:
7900         storeLE(mkexpr(addr),
7901                 binop(Iop_GetElem16x4, getDRegI64(rD), mkU8(index)));
7902         break;
7903      case 2:
7904         storeLE(mkexpr(addr),
7905                 binop(Iop_GetElem32x2, getDRegI64(rD), mkU8(index)));
7906         break;
7907      default:
7908         vassert(0);
7909   }
7910   for (i = 1; i <= N; i++) {
7911      switch (size) {
7912         case 0:
7913            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 1)),
7914                    binop(Iop_GetElem8x8, getDRegI64(rD + i * inc),
7915                                          mkU8(index)));
7916            break;
7917         case 1:
7918            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 2)),
7919                    binop(Iop_GetElem16x4, getDRegI64(rD + i * inc),
7920                                           mkU8(index)));
7921            break;
7922         case 2:
7923            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 4)),
7924                    binop(Iop_GetElem32x2, getDRegI64(rD + i * inc),
7925                                           mkU8(index)));
7926            break;
7927         default:
7928            vassert(0);
7929      }
7930   }
7931}
7932
7933/* Generate 2x64 -> 2x64 deinterleave code, for VLD2.  Caller must
7934   make *u0 and *u1 be valid IRTemps before the call. */
7935static void math_DEINTERLEAVE_2 (/*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
7936                                 IRTemp i0, IRTemp i1, Int laneszB)
7937{
7938   /* The following assumes that the guest is little endian, and hence
7939      that the memory-side (interleaved) data is stored
7940      little-endianly. */
7941   vassert(u0 && u1);
7942   /* This is pretty easy, since we have primitives directly to
7943      hand. */
7944   if (laneszB == 4) {
7945      // memLE(128 bits) == A0 B0 A1 B1
7946      // i0 == B0 A0, i1 == B1 A1
7947      // u0 == A1 A0, u1 == B1 B0
7948      assign(*u0, binop(Iop_InterleaveLO32x2, mkexpr(i1), mkexpr(i0)));
7949      assign(*u1, binop(Iop_InterleaveHI32x2, mkexpr(i1), mkexpr(i0)));
7950   } else if (laneszB == 2) {
7951      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
7952      // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
7953      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
7954      assign(*u0, binop(Iop_CatEvenLanes16x4, mkexpr(i1), mkexpr(i0)));
7955      assign(*u1, binop(Iop_CatOddLanes16x4,  mkexpr(i1), mkexpr(i0)));
7956   } else if (laneszB == 1) {
7957      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
7958      // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
7959      // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
7960      assign(*u0, binop(Iop_CatEvenLanes8x8, mkexpr(i1), mkexpr(i0)));
7961      assign(*u1, binop(Iop_CatOddLanes8x8,  mkexpr(i1), mkexpr(i0)));
7962   } else {
7963      // Can never happen, since VLD2 only has valid lane widths of 32,
7964      // 16 or 8 bits.
7965      vpanic("math_DEINTERLEAVE_2");
7966   }
7967}
7968
7969/* Generate 2x64 -> 2x64 interleave code, for VST2.  Caller must make
7970   *u0 and *u1 be valid IRTemps before the call. */
7971static void math_INTERLEAVE_2 (/*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
7972                               IRTemp u0, IRTemp u1, Int laneszB)
7973{
7974   /* The following assumes that the guest is little endian, and hence
7975      that the memory-side (interleaved) data is stored
7976      little-endianly. */
7977   vassert(i0 && *i1);
7978   /* This is pretty easy, since we have primitives directly to
7979      hand. */
7980   if (laneszB == 4) {
7981      // memLE(128 bits) == A0 B0 A1 B1
7982      // i0 == B0 A0, i1 == B1 A1
7983      // u0 == A1 A0, u1 == B1 B0
7984      assign(*i0, binop(Iop_InterleaveLO32x2, mkexpr(u1), mkexpr(u0)));
7985      assign(*i1, binop(Iop_InterleaveHI32x2, mkexpr(u1), mkexpr(u0)));
7986   } else if (laneszB == 2) {
7987      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
7988      // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
7989      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
7990      assign(*i0, binop(Iop_InterleaveLO16x4, mkexpr(u1), mkexpr(u0)));
7991      assign(*i1, binop(Iop_InterleaveHI16x4, mkexpr(u1), mkexpr(u0)));
7992   } else if (laneszB == 1) {
7993      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
7994      // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
7995      // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
7996      assign(*i0, binop(Iop_InterleaveLO8x8, mkexpr(u1), mkexpr(u0)));
7997      assign(*i1, binop(Iop_InterleaveHI8x8, mkexpr(u1), mkexpr(u0)));
7998   } else {
7999      // Can never happen, since VST2 only has valid lane widths of 32,
8000      // 16 or 8 bits.
8001      vpanic("math_INTERLEAVE_2");
8002   }
8003}
8004
8005// Helper function for generating arbitrary slicing 'n' dicing of
8006// 3 8x8 vectors, as needed for VLD3.8 and VST3.8.
8007static IRExpr* math_PERM_8x8x3(const UChar* desc,
8008                               IRTemp s0, IRTemp s1, IRTemp s2)
8009{
8010   // desc is an array of 8 pairs, encoded as 16 bytes,
8011   // that describe how to assemble the result lanes, starting with
8012   // lane 7.  Each pair is: first component (0..2) says which of
8013   // s0/s1/s2 to use.  Second component (0..7) is the lane number
8014   // in the source to use.
8015   UInt si;
8016   for (si = 0; si < 7; si++) {
8017      vassert(desc[2 * si + 0] <= 2);
8018      vassert(desc[2 * si + 1] <= 7);
8019   }
8020   IRTemp h3 = newTemp(Ity_I64);
8021   IRTemp h2 = newTemp(Ity_I64);
8022   IRTemp h1 = newTemp(Ity_I64);
8023   IRTemp h0 = newTemp(Ity_I64);
8024   IRTemp srcs[3] = {s0, s1, s2};
8025#  define SRC_VEC(_lane)   mkexpr(srcs[desc[2 * (7-(_lane)) + 0]])
8026#  define SRC_SHIFT(_lane) mkU8(56-8*(desc[2 * (7-(_lane)) + 1]))
8027   assign(h3, binop(Iop_InterleaveHI8x8,
8028                    binop(Iop_Shl64, SRC_VEC(7), SRC_SHIFT(7)),
8029                    binop(Iop_Shl64, SRC_VEC(6), SRC_SHIFT(6))));
8030   assign(h2, binop(Iop_InterleaveHI8x8,
8031                    binop(Iop_Shl64, SRC_VEC(5), SRC_SHIFT(5)),
8032                    binop(Iop_Shl64, SRC_VEC(4), SRC_SHIFT(4))));
8033   assign(h1, binop(Iop_InterleaveHI8x8,
8034                    binop(Iop_Shl64, SRC_VEC(3), SRC_SHIFT(3)),
8035                    binop(Iop_Shl64, SRC_VEC(2), SRC_SHIFT(2))));
8036   assign(h0, binop(Iop_InterleaveHI8x8,
8037                    binop(Iop_Shl64, SRC_VEC(1), SRC_SHIFT(1)),
8038                    binop(Iop_Shl64, SRC_VEC(0), SRC_SHIFT(0))));
8039#  undef SRC_VEC
8040#  undef SRC_SHIFT
8041   // Now h3..h0 are 64 bit vectors with useful information only
8042   // in the top 16 bits.  We now concatentate those four 16-bit
8043   // groups so as to produce the final result.
8044   IRTemp w1 = newTemp(Ity_I64);
8045   IRTemp w0 = newTemp(Ity_I64);
8046   assign(w1, binop(Iop_InterleaveHI16x4, mkexpr(h3), mkexpr(h2)));
8047   assign(w0, binop(Iop_InterleaveHI16x4, mkexpr(h1), mkexpr(h0)));
8048   return binop(Iop_InterleaveHI32x2, mkexpr(w1), mkexpr(w0));
8049}
8050
8051/* Generate 3x64 -> 3x64 deinterleave code, for VLD3.  Caller must
8052   make *u0, *u1 and *u2 be valid IRTemps before the call. */
8053static void math_DEINTERLEAVE_3 (
8054               /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1, /*OUT*/IRTemp* u2,
8055               IRTemp i0, IRTemp i1, IRTemp i2, Int laneszB
8056            )
8057{
8058#  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8059#  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8060#  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8061   /* The following assumes that the guest is little endian, and hence
8062      that the memory-side (interleaved) data is stored
8063      little-endianly. */
8064   vassert(u0 && u1 && u2);
8065   if (laneszB == 4) {
8066      // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8067      // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8068      // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8069      assign(*u0, IHI32x2(SHL64(i1,  0), SHL64(i0, 32)));
8070      assign(*u1, IHI32x2(SHL64(i2, 32), SHL64(i0,  0)));
8071      assign(*u2, IHI32x2(SHL64(i2,  0), SHL64(i1, 32)));
8072   } else if (laneszB == 2) {
8073      // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8074      // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8075      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8076#     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8077                IHI32x2(                                      \
8078                   IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8079                           SHL64((_tmp2),48-16*(_la2))),      \
8080                   IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8081                           SHL64((_tmp0),48-16*(_la0))))
8082      assign(*u0, XXX(i2,1, i1,2, i0,3, i0,0));
8083      assign(*u1, XXX(i2,2, i1,3, i1,0, i0,1));
8084      assign(*u2, XXX(i2,3, i2,0, i1,1, i0,2));
8085#     undef XXX
8086   } else if (laneszB == 1) {
8087      // These describe how the result vectors [7..0] are
8088      // assembled from the source vectors.  Each pair is
8089      // (source vector number, lane number).
8090      static const UChar de0[16] = {2,5, 2,2, 1,7, 1,4, 1,1, 0,6, 0,3, 0,0};
8091      static const UChar de1[16] = {2,6, 2,3, 2,0, 1,5, 1,2, 0,7, 0,4, 0,1};
8092      static const UChar de2[16] = {2,7, 2,4, 2,1, 1,6, 1,3, 1,0, 0,5, 0,2};
8093      assign(*u0, math_PERM_8x8x3(de0, i0, i1, i2));
8094      assign(*u1, math_PERM_8x8x3(de1, i0, i1, i2));
8095      assign(*u2, math_PERM_8x8x3(de2, i0, i1, i2));
8096   } else {
8097      // Can never happen, since VLD3 only has valid lane widths of 32,
8098      // 16 or 8 bits.
8099      vpanic("math_DEINTERLEAVE_3");
8100   }
8101#  undef SHL64
8102#  undef IHI16x4
8103#  undef IHI32x2
8104}
8105
8106/* Generate 3x64 -> 3x64 interleave code, for VST3.  Caller must
8107   make *i0, *i1 and *i2 be valid IRTemps before the call. */
8108static void math_INTERLEAVE_3 (
8109               /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1, /*OUT*/IRTemp* i2,
8110               IRTemp u0, IRTemp u1, IRTemp u2, Int laneszB
8111            )
8112{
8113#  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8114#  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8115#  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8116   /* The following assumes that the guest is little endian, and hence
8117      that the memory-side (interleaved) data is stored
8118      little-endianly. */
8119   vassert(i0 && i1 && i2);
8120   if (laneszB == 4) {
8121      // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8122      // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8123      // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8124      assign(*i0, IHI32x2(SHL64(u1, 32), SHL64(u0, 32)));
8125      assign(*i1, IHI32x2(SHL64(u0,  0), SHL64(u2, 32)));
8126      assign(*i2, IHI32x2(SHL64(u2,  0), SHL64(u1,  0)));
8127   } else if (laneszB == 2) {
8128      // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8129      // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8130      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8131#     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8132                IHI32x2(                                      \
8133                   IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8134                           SHL64((_tmp2),48-16*(_la2))),      \
8135                   IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8136                           SHL64((_tmp0),48-16*(_la0))))
8137      assign(*i0, XXX(u0,1, u2,0, u1,0, u0,0));
8138      assign(*i1, XXX(u1,2, u0,2, u2,1, u1,1));
8139      assign(*i2, XXX(u2,3, u1,3, u0,3, u2,2));
8140#     undef XXX
8141   } else if (laneszB == 1) {
8142      // These describe how the result vectors [7..0] are
8143      // assembled from the source vectors.  Each pair is
8144      // (source vector number, lane number).
8145      static const UChar in0[16] = {1,2, 0,2, 2,1, 1,1, 0,1, 2,0, 1,0, 0,0};
8146      static const UChar in1[16] = {0,5, 2,4, 1,4, 0,4, 2,3, 1,3, 0,3, 2,2};
8147      static const UChar in2[16] = {2,7, 1,7, 0,7, 2,6, 1,6, 0,6, 2,5, 1,5};
8148      assign(*i0, math_PERM_8x8x3(in0, u0, u1, u2));
8149      assign(*i1, math_PERM_8x8x3(in1, u0, u1, u2));
8150      assign(*i2, math_PERM_8x8x3(in2, u0, u1, u2));
8151   } else {
8152      // Can never happen, since VST3 only has valid lane widths of 32,
8153      // 16 or 8 bits.
8154      vpanic("math_INTERLEAVE_3");
8155   }
8156#  undef SHL64
8157#  undef IHI16x4
8158#  undef IHI32x2
8159}
8160
8161/* Generate 4x64 -> 4x64 deinterleave code, for VLD4.  Caller must
8162   make *u0, *u1, *u2 and *u3 be valid IRTemps before the call. */
8163static void math_DEINTERLEAVE_4 (
8164               /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
8165               /*OUT*/IRTemp* u2, /*OUT*/IRTemp* u3,
8166               IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3, Int laneszB
8167            )
8168{
8169#  define IHI32x2(_t1, _t2) \
8170             binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8171#  define ILO32x2(_t1, _t2) \
8172             binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8173#  define IHI16x4(_t1, _t2) \
8174             binop(Iop_InterleaveHI16x4, mkexpr(_t1), mkexpr(_t2))
8175#  define ILO16x4(_t1, _t2) \
8176             binop(Iop_InterleaveLO16x4, mkexpr(_t1), mkexpr(_t2))
8177#  define IHI8x8(_t1, _e2) \
8178             binop(Iop_InterleaveHI8x8, mkexpr(_t1), _e2)
8179#  define SHL64(_tmp, _amt) \
8180             binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8181   /* The following assumes that the guest is little endian, and hence
8182      that the memory-side (interleaved) data is stored
8183      little-endianly. */
8184   vassert(u0 && u1 && u2 && u3);
8185   if (laneszB == 4) {
8186      assign(*u0, ILO32x2(i2, i0));
8187      assign(*u1, IHI32x2(i2, i0));
8188      assign(*u2, ILO32x2(i3, i1));
8189      assign(*u3, IHI32x2(i3, i1));
8190   } else if (laneszB == 2) {
8191      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8192      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8193      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8194      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8195      assign(b1b0a1a0, ILO16x4(i1, i0));
8196      assign(b3b2a3a2, ILO16x4(i3, i2));
8197      assign(d1d0c1c0, IHI16x4(i1, i0));
8198      assign(d3d2c3c2, IHI16x4(i3, i2));
8199      // And now do what we did for the 32-bit case.
8200      assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8201      assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8202      assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8203      assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8204   } else if (laneszB == 1) {
8205      // Deinterleave into 16-bit chunks, then do as the 16-bit case.
8206      IRTemp i0x = newTemp(Ity_I64);
8207      IRTemp i1x = newTemp(Ity_I64);
8208      IRTemp i2x = newTemp(Ity_I64);
8209      IRTemp i3x = newTemp(Ity_I64);
8210      assign(i0x, IHI8x8(i0, SHL64(i0, 32)));
8211      assign(i1x, IHI8x8(i1, SHL64(i1, 32)));
8212      assign(i2x, IHI8x8(i2, SHL64(i2, 32)));
8213      assign(i3x, IHI8x8(i3, SHL64(i3, 32)));
8214      // From here on is like the 16 bit case.
8215      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8216      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8217      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8218      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8219      assign(b1b0a1a0, ILO16x4(i1x, i0x));
8220      assign(b3b2a3a2, ILO16x4(i3x, i2x));
8221      assign(d1d0c1c0, IHI16x4(i1x, i0x));
8222      assign(d3d2c3c2, IHI16x4(i3x, i2x));
8223      // And now do what we did for the 32-bit case.
8224      assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8225      assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8226      assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8227      assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8228   } else {
8229      // Can never happen, since VLD4 only has valid lane widths of 32,
8230      // 16 or 8 bits.
8231      vpanic("math_DEINTERLEAVE_4");
8232   }
8233#  undef SHL64
8234#  undef IHI8x8
8235#  undef ILO16x4
8236#  undef IHI16x4
8237#  undef ILO32x2
8238#  undef IHI32x2
8239}
8240
8241/* Generate 4x64 -> 4x64 interleave code, for VST4.  Caller must
8242   make *i0, *i1, *i2 and *i3 be valid IRTemps before the call. */
8243static void math_INTERLEAVE_4 (
8244               /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
8245               /*OUT*/IRTemp* i2, /*OUT*/IRTemp* i3,
8246               IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3, Int laneszB
8247            )
8248{
8249#  define IHI32x2(_t1, _t2) \
8250             binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8251#  define ILO32x2(_t1, _t2) \
8252             binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8253#  define CEV16x4(_t1, _t2) \
8254             binop(Iop_CatEvenLanes16x4, mkexpr(_t1), mkexpr(_t2))
8255#  define COD16x4(_t1, _t2) \
8256             binop(Iop_CatOddLanes16x4, mkexpr(_t1), mkexpr(_t2))
8257#  define COD8x8(_t1, _e2) \
8258             binop(Iop_CatOddLanes8x8, mkexpr(_t1), _e2)
8259#  define SHL64(_tmp, _amt) \
8260             binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8261   /* The following assumes that the guest is little endian, and hence
8262      that the memory-side (interleaved) data is stored
8263      little-endianly. */
8264   vassert(u0 && u1 && u2 && u3);
8265   if (laneszB == 4) {
8266      assign(*i0, ILO32x2(u1, u0));
8267      assign(*i1, ILO32x2(u3, u2));
8268      assign(*i2, IHI32x2(u1, u0));
8269      assign(*i3, IHI32x2(u3, u2));
8270   } else if (laneszB == 2) {
8271      // First, interleave at the 32-bit lane size.
8272      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8273      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8274      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8275      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8276      assign(b1b0a1a0, ILO32x2(u1, u0));
8277      assign(b3b2a3a2, IHI32x2(u1, u0));
8278      assign(d1d0c1c0, ILO32x2(u3, u2));
8279      assign(d3d2c3c2, IHI32x2(u3, u2));
8280      // And interleave (cat) at the 16 bit size.
8281      assign(*i0, CEV16x4(d1d0c1c0, b1b0a1a0));
8282      assign(*i1, COD16x4(d1d0c1c0, b1b0a1a0));
8283      assign(*i2, CEV16x4(d3d2c3c2, b3b2a3a2));
8284      assign(*i3, COD16x4(d3d2c3c2, b3b2a3a2));
8285   } else if (laneszB == 1) {
8286      // First, interleave at the 32-bit lane size.
8287      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8288      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8289      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8290      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8291      assign(b1b0a1a0, ILO32x2(u1, u0));
8292      assign(b3b2a3a2, IHI32x2(u1, u0));
8293      assign(d1d0c1c0, ILO32x2(u3, u2));
8294      assign(d3d2c3c2, IHI32x2(u3, u2));
8295      // And interleave (cat) at the 16 bit size.
8296      IRTemp i0x = newTemp(Ity_I64);
8297      IRTemp i1x = newTemp(Ity_I64);
8298      IRTemp i2x = newTemp(Ity_I64);
8299      IRTemp i3x = newTemp(Ity_I64);
8300      assign(i0x, CEV16x4(d1d0c1c0, b1b0a1a0));
8301      assign(i1x, COD16x4(d1d0c1c0, b1b0a1a0));
8302      assign(i2x, CEV16x4(d3d2c3c2, b3b2a3a2));
8303      assign(i3x, COD16x4(d3d2c3c2, b3b2a3a2));
8304      // And rearrange within each word, to get the right 8 bit lanes.
8305      assign(*i0, COD8x8(i0x, SHL64(i0x, 8)));
8306      assign(*i1, COD8x8(i1x, SHL64(i1x, 8)));
8307      assign(*i2, COD8x8(i2x, SHL64(i2x, 8)));
8308      assign(*i3, COD8x8(i3x, SHL64(i3x, 8)));
8309   } else {
8310      // Can never happen, since VLD4 only has valid lane widths of 32,
8311      // 16 or 8 bits.
8312      vpanic("math_DEINTERLEAVE_4");
8313   }
8314#  undef SHL64
8315#  undef COD8x8
8316#  undef COD16x4
8317#  undef CEV16x4
8318#  undef ILO32x2
8319#  undef IHI32x2
8320}
8321
8322/* A7.7 Advanced SIMD element or structure load/store instructions */
8323static
8324Bool dis_neon_load_or_store ( UInt theInstr,
8325                              Bool isT, IRTemp condT )
8326{
8327#  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
8328   UInt bA = INSN(23,23);
8329   UInt fB = INSN(11,8);
8330   UInt bL = INSN(21,21);
8331   UInt rD = (INSN(22,22) << 4) | INSN(15,12);
8332   UInt rN = INSN(19,16);
8333   UInt rM = INSN(3,0);
8334   UInt N, size, i, j;
8335   UInt inc;
8336   UInt regs = 1;
8337
8338   if (isT) {
8339      vassert(condT != IRTemp_INVALID);
8340   } else {
8341      vassert(condT == IRTemp_INVALID);
8342   }
8343   /* So now, if condT is not IRTemp_INVALID, we know we're
8344      dealing with Thumb code. */
8345
8346   if (INSN(20,20) != 0)
8347      return False;
8348
8349   IRTemp initialRn = newTemp(Ity_I32);
8350   assign(initialRn, isT ? getIRegT(rN) : getIRegA(rN));
8351
8352   IRTemp initialRm = newTemp(Ity_I32);
8353   assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
8354
8355   /* There are 3 cases:
8356      (1) VSTn / VLDn (n-element structure from/to one lane)
8357      (2) VLDn (single element to all lanes)
8358      (3) VSTn / VLDn (multiple n-element structures)
8359   */
8360   if (bA) {
8361      N = fB & 3;
8362      if ((fB >> 2) < 3) {
8363         /* ------------ Case (1) ------------
8364            VSTn / VLDn (n-element structure from/to one lane) */
8365
8366         size = fB >> 2;
8367
8368         switch (size) {
8369            case 0: i = INSN(7,5); inc = 1; break;
8370            case 1: i = INSN(7,6); inc = INSN(5,5) ? 2 : 1; break;
8371            case 2: i = INSN(7,7); inc = INSN(6,6) ? 2 : 1; break;
8372            case 3: return False;
8373            default: vassert(0);
8374         }
8375
8376         IRTemp addr = newTemp(Ity_I32);
8377         assign(addr, mkexpr(initialRn));
8378
8379         // go uncond
8380         if (condT != IRTemp_INVALID)
8381            mk_skip_over_T32_if_cond_is_false(condT);
8382         // now uncond
8383
8384         if (bL)
8385            mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
8386         else
8387            mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
8388         DIP("v%s%u.%u {", bL ? "ld" : "st", N + 1, 8 << size);
8389         for (j = 0; j <= N; j++) {
8390            if (j)
8391               DIP(", ");
8392            DIP("d%u[%u]", rD + j * inc, i);
8393         }
8394         DIP("}, [r%u]", rN);
8395         if (rM != 13 && rM != 15) {
8396            DIP(", r%u\n", rM);
8397         } else {
8398            DIP("%s\n", (rM != 15) ? "!" : "");
8399         }
8400      } else {
8401         /* ------------ Case (2) ------------
8402            VLDn (single element to all lanes) */
8403         UInt r;
8404         if (bL == 0)
8405            return False;
8406
8407         inc = INSN(5,5) + 1;
8408         size = INSN(7,6);
8409
8410         /* size == 3 and size == 2 cases differ in alignment constraints */
8411         if (size == 3 && N == 3 && INSN(4,4) == 1)
8412            size = 2;
8413
8414         if (size == 0 && N == 0 && INSN(4,4) == 1)
8415            return False;
8416         if (N == 2 && INSN(4,4) == 1)
8417            return False;
8418         if (size == 3)
8419            return False;
8420
8421         // go uncond
8422         if (condT != IRTemp_INVALID)
8423            mk_skip_over_T32_if_cond_is_false(condT);
8424         // now uncond
8425
8426         IRTemp addr = newTemp(Ity_I32);
8427         assign(addr, mkexpr(initialRn));
8428
8429         if (N == 0 && INSN(5,5))
8430            regs = 2;
8431
8432         for (r = 0; r < regs; r++) {
8433            switch (size) {
8434               case 0:
8435                  putDRegI64(rD + r, unop(Iop_Dup8x8,
8436                                          loadLE(Ity_I8, mkexpr(addr))),
8437                             IRTemp_INVALID);
8438                  break;
8439               case 1:
8440                  putDRegI64(rD + r, unop(Iop_Dup16x4,
8441                                          loadLE(Ity_I16, mkexpr(addr))),
8442                             IRTemp_INVALID);
8443                  break;
8444               case 2:
8445                  putDRegI64(rD + r, unop(Iop_Dup32x2,
8446                                          loadLE(Ity_I32, mkexpr(addr))),
8447                             IRTemp_INVALID);
8448                  break;
8449               default:
8450                  vassert(0);
8451            }
8452            for (i = 1; i <= N; i++) {
8453               switch (size) {
8454                  case 0:
8455                     putDRegI64(rD + r + i * inc,
8456                                unop(Iop_Dup8x8,
8457                                     loadLE(Ity_I8, binop(Iop_Add32,
8458                                                          mkexpr(addr),
8459                                                          mkU32(i * 1)))),
8460                                IRTemp_INVALID);
8461                     break;
8462                  case 1:
8463                     putDRegI64(rD + r + i * inc,
8464                                unop(Iop_Dup16x4,
8465                                     loadLE(Ity_I16, binop(Iop_Add32,
8466                                                           mkexpr(addr),
8467                                                           mkU32(i * 2)))),
8468                                IRTemp_INVALID);
8469                     break;
8470                  case 2:
8471                     putDRegI64(rD + r + i * inc,
8472                                unop(Iop_Dup32x2,
8473                                     loadLE(Ity_I32, binop(Iop_Add32,
8474                                                           mkexpr(addr),
8475                                                           mkU32(i * 4)))),
8476                                IRTemp_INVALID);
8477                     break;
8478                  default:
8479                     vassert(0);
8480               }
8481            }
8482         }
8483         DIP("vld%u.%u {", N + 1, 8 << size);
8484         for (r = 0; r < regs; r++) {
8485            for (i = 0; i <= N; i++) {
8486               if (i || r)
8487                  DIP(", ");
8488               DIP("d%u[]", rD + r + i * inc);
8489            }
8490         }
8491         DIP("}, [r%u]", rN);
8492         if (rM != 13 && rM != 15) {
8493            DIP(", r%u\n", rM);
8494         } else {
8495            DIP("%s\n", (rM != 15) ? "!" : "");
8496         }
8497      }
8498      /* Writeback.  We're uncond here, so no condT-ing. */
8499      if (rM != 15) {
8500         if (rM == 13) {
8501            IRExpr* e = binop(Iop_Add32,
8502                              mkexpr(initialRn),
8503                              mkU32((1 << size) * (N + 1)));
8504            if (isT)
8505               putIRegT(rN, e, IRTemp_INVALID);
8506            else
8507               putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8508         } else {
8509            IRExpr* e = binop(Iop_Add32,
8510                              mkexpr(initialRn),
8511                              mkexpr(initialRm));
8512            if (isT)
8513               putIRegT(rN, e, IRTemp_INVALID);
8514            else
8515               putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8516         }
8517      }
8518      return True;
8519   } else {
8520      /* ------------ Case (3) ------------
8521         VSTn / VLDn (multiple n-element structures) */
8522      inc = (fB & 1) + 1;
8523
8524      if (fB == BITS4(0,0,1,0)       // Dd, Dd+1, Dd+2, Dd+3  inc = 1  regs = 4
8525          || fB == BITS4(0,1,1,0)    // Dd, Dd+1, Dd+2        inc = 1  regs = 3
8526          || fB == BITS4(0,1,1,1)    // Dd                    inc = 2  regs = 1
8527          || fB == BITS4(1,0,1,0)) { // Dd, Dd+1              inc = 1  regs = 2
8528         N = 0; // VLD1/VST1.  'inc' does not appear to have any
8529                // meaning for the VLD1/VST1 cases.  'regs' is the number of
8530                // registers involved.
8531         if (rD + regs > 32) return False;
8532      }
8533      else
8534      if (fB == BITS4(0,0,1,1)       // Dd, Dd+1, Dd+2, Dd+3  inc=2  regs = 2
8535          || fB == BITS4(1,0,0,0)    // Dd, Dd+1              inc=1  regs = 1
8536          || fB == BITS4(1,0,0,1)) { // Dd, Dd+2              inc=2  regs = 1
8537         N = 1; // VLD2/VST2.  'regs' is the number of register-pairs involved
8538         if (regs == 1 && inc == 1 && rD + 1 >= 32) return False;
8539         if (regs == 1 && inc == 2 && rD + 2 >= 32) return False;
8540         if (regs == 2 && inc == 2 && rD + 3 >= 32) return False;
8541      } else if (fB == BITS4(0,1,0,0) || fB == BITS4(0,1,0,1)) {
8542         N = 2; // VLD3/VST3
8543         if (inc == 1 && rD + 2 >= 32) return False;
8544         if (inc == 2 && rD + 4 >= 32) return False;
8545      } else if (fB == BITS4(0,0,0,0) || fB == BITS4(0,0,0,1)) {
8546         N = 3; // VLD4/VST4
8547         if (inc == 1 && rD + 3 >= 32) return False;
8548         if (inc == 2 && rD + 6 >= 32) return False;
8549      } else {
8550         return False;
8551      }
8552
8553      if (N == 1 && fB == BITS4(0,0,1,1)) {
8554         regs = 2;
8555      } else if (N == 0) {
8556         if (fB == BITS4(1,0,1,0)) {
8557            regs = 2;
8558         } else if (fB == BITS4(0,1,1,0)) {
8559            regs = 3;
8560         } else if (fB == BITS4(0,0,1,0)) {
8561            regs = 4;
8562         }
8563      }
8564
8565      size = INSN(7,6);
8566      if (N == 0 && size == 3)
8567         size = 2;
8568      if (size == 3)
8569         return False;
8570
8571      // go uncond
8572      if (condT != IRTemp_INVALID)
8573         mk_skip_over_T32_if_cond_is_false(condT);
8574      // now uncond
8575
8576      IRTemp addr = newTemp(Ity_I32);
8577      assign(addr, mkexpr(initialRn));
8578
8579      if (N == 0 /* No interleaving -- VLD1/VST1 */) {
8580         UInt r;
8581         vassert(regs == 1 || regs == 2 || regs == 3 || regs == 4);
8582         /* inc has no relevance here */
8583         for (r = 0; r < regs; r++) {
8584            if (bL)
8585               putDRegI64(rD+r, loadLE(Ity_I64, mkexpr(addr)), IRTemp_INVALID);
8586            else
8587               storeLE(mkexpr(addr), getDRegI64(rD+r));
8588            IRTemp tmp = newTemp(Ity_I32);
8589            assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(8)));
8590            addr = tmp;
8591         }
8592      }
8593      else
8594      if (N == 1 /* 2-interleaving -- VLD2/VST2 */) {
8595         vassert( (regs == 1 && (inc == 1 || inc == 2))
8596                   || (regs == 2 && inc == 2) );
8597         // Make 'nregs' be the number of registers and 'regstep'
8598         // equal the actual register-step.  The ARM encoding, using 'regs'
8599         // and 'inc', is bizarre.  After this, we have:
8600         // Dd, Dd+1              regs = 1, inc = 1,   nregs = 2, regstep = 1
8601         // Dd, Dd+2              regs = 1, inc = 2,   nregs = 2, regstep = 2
8602         // Dd, Dd+1, Dd+2, Dd+3  regs = 2, inc = 2,   nregs = 4, regstep = 1
8603         UInt nregs   = 2;
8604         UInt regstep = 1;
8605         if (regs == 1 && inc == 1) {
8606            /* nothing */
8607         } else if (regs == 1 && inc == 2) {
8608            regstep = 2;
8609         } else if (regs == 2 && inc == 2) {
8610            nregs = 4;
8611         } else {
8612            vassert(0);
8613         }
8614         // 'a' is address,
8615         // 'di' is interleaved data, 'du' is uninterleaved data
8616         if (nregs == 2) {
8617            IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8618            IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8619            IRTemp  di0 = newTemp(Ity_I64);
8620            IRTemp  di1 = newTemp(Ity_I64);
8621            IRTemp  du0 = newTemp(Ity_I64);
8622            IRTemp  du1 = newTemp(Ity_I64);
8623            if (bL) {
8624               assign(di0, loadLE(Ity_I64, a0));
8625               assign(di1, loadLE(Ity_I64, a1));
8626               math_DEINTERLEAVE_2(&du0, &du1, di0, di1, 1 << size);
8627               putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8628               putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8629            } else {
8630               assign(du0, getDRegI64(rD + 0 * regstep));
8631               assign(du1, getDRegI64(rD + 1 * regstep));
8632               math_INTERLEAVE_2(&di0, &di1, du0, du1, 1 << size);
8633               storeLE(a0, mkexpr(di0));
8634               storeLE(a1, mkexpr(di1));
8635            }
8636            IRTemp tmp = newTemp(Ity_I32);
8637            assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(16)));
8638            addr = tmp;
8639         } else {
8640            vassert(nregs == 4);
8641            vassert(regstep == 1);
8642            IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8643            IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8644            IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8645            IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8646            IRTemp  di0 = newTemp(Ity_I64);
8647            IRTemp  di1 = newTemp(Ity_I64);
8648            IRTemp  di2 = newTemp(Ity_I64);
8649            IRTemp  di3 = newTemp(Ity_I64);
8650            IRTemp  du0 = newTemp(Ity_I64);
8651            IRTemp  du1 = newTemp(Ity_I64);
8652            IRTemp  du2 = newTemp(Ity_I64);
8653            IRTemp  du3 = newTemp(Ity_I64);
8654            if (bL) {
8655               assign(di0, loadLE(Ity_I64, a0));
8656               assign(di1, loadLE(Ity_I64, a1));
8657               assign(di2, loadLE(Ity_I64, a2));
8658               assign(di3, loadLE(Ity_I64, a3));
8659               // Note spooky interleaving: du0, du2, di0, di1 etc
8660               math_DEINTERLEAVE_2(&du0, &du2, di0, di1, 1 << size);
8661               math_DEINTERLEAVE_2(&du1, &du3, di2, di3, 1 << size);
8662               putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8663               putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8664               putDRegI64(rD + 2 * regstep, mkexpr(du2), IRTemp_INVALID);
8665               putDRegI64(rD + 3 * regstep, mkexpr(du3), IRTemp_INVALID);
8666            } else {
8667               assign(du0, getDRegI64(rD + 0 * regstep));
8668               assign(du1, getDRegI64(rD + 1 * regstep));
8669               assign(du2, getDRegI64(rD + 2 * regstep));
8670               assign(du3, getDRegI64(rD + 3 * regstep));
8671               // Note spooky interleaving: du0, du2, di0, di1 etc
8672               math_INTERLEAVE_2(&di0, &di1, du0, du2, 1 << size);
8673               math_INTERLEAVE_2(&di2, &di3, du1, du3, 1 << size);
8674               storeLE(a0, mkexpr(di0));
8675               storeLE(a1, mkexpr(di1));
8676               storeLE(a2, mkexpr(di2));
8677               storeLE(a3, mkexpr(di3));
8678            }
8679
8680            IRTemp tmp = newTemp(Ity_I32);
8681            assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8682            addr = tmp;
8683         }
8684      }
8685      else
8686      if (N == 2 /* 3-interleaving -- VLD3/VST3 */) {
8687         // Dd, Dd+1, Dd+2   regs = 1, inc = 1
8688         // Dd, Dd+2, Dd+4   regs = 1, inc = 2
8689         vassert(regs == 1 && (inc == 1 || inc == 2));
8690         IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8691         IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8692         IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8693         IRTemp  di0 = newTemp(Ity_I64);
8694         IRTemp  di1 = newTemp(Ity_I64);
8695         IRTemp  di2 = newTemp(Ity_I64);
8696         IRTemp  du0 = newTemp(Ity_I64);
8697         IRTemp  du1 = newTemp(Ity_I64);
8698         IRTemp  du2 = newTemp(Ity_I64);
8699         if (bL) {
8700            assign(di0, loadLE(Ity_I64, a0));
8701            assign(di1, loadLE(Ity_I64, a1));
8702            assign(di2, loadLE(Ity_I64, a2));
8703            math_DEINTERLEAVE_3(&du0, &du1, &du2, di0, di1, di2, 1 << size);
8704            putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8705            putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8706            putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8707         } else {
8708            assign(du0, getDRegI64(rD + 0 * inc));
8709            assign(du1, getDRegI64(rD + 1 * inc));
8710            assign(du2, getDRegI64(rD + 2 * inc));
8711            math_INTERLEAVE_3(&di0, &di1, &di2, du0, du1, du2, 1 << size);
8712            storeLE(a0, mkexpr(di0));
8713            storeLE(a1, mkexpr(di1));
8714            storeLE(a2, mkexpr(di2));
8715         }
8716         IRTemp tmp = newTemp(Ity_I32);
8717         assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(24)));
8718         addr = tmp;
8719      }
8720      else
8721      if (N == 3 /* 4-interleaving -- VLD4/VST4 */) {
8722         // Dd, Dd+1, Dd+2, Dd+3   regs = 1, inc = 1
8723         // Dd, Dd+2, Dd+4, Dd+6   regs = 1, inc = 2
8724         vassert(regs == 1 && (inc == 1 || inc == 2));
8725         IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8726         IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8727         IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8728         IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8729         IRTemp  di0 = newTemp(Ity_I64);
8730         IRTemp  di1 = newTemp(Ity_I64);
8731         IRTemp  di2 = newTemp(Ity_I64);
8732         IRTemp  di3 = newTemp(Ity_I64);
8733         IRTemp  du0 = newTemp(Ity_I64);
8734         IRTemp  du1 = newTemp(Ity_I64);
8735         IRTemp  du2 = newTemp(Ity_I64);
8736         IRTemp  du3 = newTemp(Ity_I64);
8737         if (bL) {
8738            assign(di0, loadLE(Ity_I64, a0));
8739            assign(di1, loadLE(Ity_I64, a1));
8740            assign(di2, loadLE(Ity_I64, a2));
8741            assign(di3, loadLE(Ity_I64, a3));
8742            math_DEINTERLEAVE_4(&du0, &du1, &du2, &du3,
8743                                di0, di1, di2, di3, 1 << size);
8744            putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8745            putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8746            putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8747            putDRegI64(rD + 3 * inc, mkexpr(du3), IRTemp_INVALID);
8748         } else {
8749            assign(du0, getDRegI64(rD + 0 * inc));
8750            assign(du1, getDRegI64(rD + 1 * inc));
8751            assign(du2, getDRegI64(rD + 2 * inc));
8752            assign(du3, getDRegI64(rD + 3 * inc));
8753            math_INTERLEAVE_4(&di0, &di1, &di2, &di3,
8754                              du0, du1, du2, du3, 1 << size);
8755            storeLE(a0, mkexpr(di0));
8756            storeLE(a1, mkexpr(di1));
8757            storeLE(a2, mkexpr(di2));
8758            storeLE(a3, mkexpr(di3));
8759         }
8760         IRTemp tmp = newTemp(Ity_I32);
8761         assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8762         addr = tmp;
8763      }
8764      else {
8765         vassert(0);
8766      }
8767
8768      /* Writeback */
8769      if (rM != 15) {
8770         IRExpr* e;
8771         if (rM == 13) {
8772            e = binop(Iop_Add32, mkexpr(initialRn),
8773                                 mkU32(8 * (N + 1) * regs));
8774         } else {
8775            e = binop(Iop_Add32, mkexpr(initialRn),
8776                                 mkexpr(initialRm));
8777         }
8778         if (isT)
8779            putIRegT(rN, e, IRTemp_INVALID);
8780         else
8781            putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8782      }
8783
8784      DIP("v%s%u.%u {", bL ? "ld" : "st", N + 1, 8 << INSN(7,6));
8785      if ((inc == 1 && regs * (N + 1) > 1)
8786          || (inc == 2 && regs > 1 && N > 0)) {
8787         DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
8788      } else {
8789         UInt r;
8790         for (r = 0; r < regs; r++) {
8791            for (i = 0; i <= N; i++) {
8792               if (i || r)
8793                  DIP(", ");
8794               DIP("d%u", rD + r + i * inc);
8795            }
8796         }
8797      }
8798      DIP("}, [r%u]", rN);
8799      if (rM != 13 && rM != 15) {
8800         DIP(", r%u\n", rM);
8801      } else {
8802         DIP("%s\n", (rM != 15) ? "!" : "");
8803      }
8804      return True;
8805   }
8806#  undef INSN
8807}
8808
8809
8810/*------------------------------------------------------------*/
8811/*--- NEON, top level control                              ---*/
8812/*------------------------------------------------------------*/
8813
8814/* Both ARM and Thumb */
8815
8816/* Translate a NEON instruction.    If successful, returns
8817   True and *dres may or may not be updated.  If failure, returns
8818   False and doesn't change *dres nor create any IR.
8819
8820   The Thumb and ARM encodings are similar for the 24 bottom bits, but
8821   the top 8 bits are slightly different.  In both cases, the caller
8822   must pass the entire 32 bits.  Callers may pass any instruction;
8823   this ignores non-NEON ones.
8824
8825   Caller must supply an IRTemp 'condT' holding the gating condition,
8826   or IRTemp_INVALID indicating the insn is always executed.  In ARM
8827   code, this must always be IRTemp_INVALID because NEON insns are
8828   unconditional for ARM.
8829
8830   Finally, the caller must indicate whether this occurs in ARM or in
8831   Thumb code.
8832*/
8833static Bool decode_NEON_instruction (
8834               /*MOD*/DisResult* dres,
8835               UInt              insn32,
8836               IRTemp            condT,
8837               Bool              isT
8838            )
8839{
8840#  define INSN(_bMax,_bMin)  SLICE_UInt(insn32, (_bMax), (_bMin))
8841
8842   /* There are two kinds of instruction to deal with: load/store and
8843      data processing.  In each case, in ARM mode we merely identify
8844      the kind, and pass it on to the relevant sub-handler.  In Thumb
8845      mode we identify the kind, swizzle the bits around to make it
8846      have the same encoding as in ARM, and hand it on to the
8847      sub-handler.
8848   */
8849
8850   /* In ARM mode, NEON instructions can't be conditional. */
8851   if (!isT)
8852      vassert(condT == IRTemp_INVALID);
8853
8854   /* Data processing:
8855      Thumb: 111U 1111 AAAA Axxx xxxx BBBB CCCC xxxx
8856      ARM:   1111 001U AAAA Axxx xxxx BBBB CCCC xxxx
8857   */
8858   if (!isT && INSN(31,25) == BITS7(1,1,1,1,0,0,1)) {
8859      // ARM, DP
8860      return dis_neon_data_processing(INSN(31,0), condT);
8861   }
8862   if (isT && INSN(31,29) == BITS3(1,1,1)
8863       && INSN(27,24) == BITS4(1,1,1,1)) {
8864      // Thumb, DP
8865      UInt reformatted = INSN(23,0);
8866      reformatted |= (INSN(28,28) << 24); // U bit
8867      reformatted |= (BITS7(1,1,1,1,0,0,1) << 25);
8868      return dis_neon_data_processing(reformatted, condT);
8869   }
8870
8871   /* Load/store:
8872      Thumb: 1111 1001 AxL0 xxxx xxxx BBBB xxxx xxxx
8873      ARM:   1111 0100 AxL0 xxxx xxxx BBBB xxxx xxxx
8874   */
8875   if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
8876      // ARM, memory
8877      return dis_neon_load_or_store(INSN(31,0), isT, condT);
8878   }
8879   if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
8880      UInt reformatted = INSN(23,0);
8881      reformatted |= (BITS8(1,1,1,1,0,1,0,0) << 24);
8882      return dis_neon_load_or_store(reformatted, isT, condT);
8883   }
8884
8885   /* Doesn't match. */
8886   return False;
8887
8888#  undef INSN
8889}
8890
8891
8892/*------------------------------------------------------------*/
8893/*--- V6 MEDIA instructions                                ---*/
8894/*------------------------------------------------------------*/
8895
8896/* Both ARM and Thumb */
8897
8898/* Translate a V6 media instruction.    If successful, returns
8899   True and *dres may or may not be updated.  If failure, returns
8900   False and doesn't change *dres nor create any IR.
8901
8902   The Thumb and ARM encodings are completely different.  In Thumb
8903   mode, the caller must pass the entire 32 bits.  In ARM mode it must
8904   pass the lower 28 bits.  Apart from that, callers may pass any
8905   instruction; this function ignores anything it doesn't recognise.
8906
8907   Caller must supply an IRTemp 'condT' holding the gating condition,
8908   or IRTemp_INVALID indicating the insn is always executed.
8909
8910   Caller must also supply an ARMCondcode 'cond'.  This is only used
8911   for debug printing, no other purpose.  For ARM, this is simply the
8912   top 4 bits of the original instruction.  For Thumb, the condition
8913   is not (really) known until run time, and so ARMCondAL should be
8914   passed, only so that printing of these instructions does not show
8915   any condition.
8916
8917   Finally, the caller must indicate whether this occurs in ARM or in
8918   Thumb code.
8919*/
8920static Bool decode_V6MEDIA_instruction (
8921               /*MOD*/DisResult* dres,
8922               UInt              insnv6m,
8923               IRTemp            condT,
8924               ARMCondcode       conq,
8925               Bool              isT
8926            )
8927{
8928#  define INSNA(_bMax,_bMin)   SLICE_UInt(insnv6m, (_bMax), (_bMin))
8929#  define INSNT0(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 16) & 0xFFFF), \
8930                                           (_bMax), (_bMin) )
8931#  define INSNT1(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 0)  & 0xFFFF), \
8932                                           (_bMax), (_bMin) )
8933   HChar dis_buf[128];
8934   dis_buf[0] = 0;
8935
8936   if (isT) {
8937      vassert(conq == ARMCondAL);
8938   } else {
8939      vassert(INSNA(31,28) == BITS4(0,0,0,0)); // caller's obligation
8940      vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
8941   }
8942
8943   /* ----------- smulbb, smulbt, smultb, smultt ----------- */
8944   {
8945     UInt regD = 99, regM = 99, regN = 99, bitM = 0, bitN = 0;
8946     Bool gate = False;
8947
8948     if (isT) {
8949        if (INSNT0(15,4) == 0xFB1 && INSNT1(15,12) == BITS4(1,1,1,1)
8950            && INSNT1(7,6) == BITS2(0,0)) {
8951           regD = INSNT1(11,8);
8952           regM = INSNT1(3,0);
8953           regN = INSNT0(3,0);
8954           bitM = INSNT1(4,4);
8955           bitN = INSNT1(5,5);
8956           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
8957              gate = True;
8958        }
8959     } else {
8960        if (BITS8(0,0,0,1,0,1,1,0) == INSNA(27,20) &&
8961            BITS4(0,0,0,0)         == INSNA(15,12) &&
8962            BITS4(1,0,0,0)         == (INSNA(7,4) & BITS4(1,0,0,1)) ) {
8963           regD = INSNA(19,16);
8964           regM = INSNA(11,8);
8965           regN = INSNA(3,0);
8966           bitM = INSNA(6,6);
8967           bitN = INSNA(5,5);
8968           if (regD != 15 && regN != 15 && regM != 15)
8969              gate = True;
8970        }
8971     }
8972
8973     if (gate) {
8974        IRTemp srcN = newTemp(Ity_I32);
8975        IRTemp srcM = newTemp(Ity_I32);
8976        IRTemp res  = newTemp(Ity_I32);
8977
8978        assign( srcN, binop(Iop_Sar32,
8979                            binop(Iop_Shl32,
8980                                  isT ? getIRegT(regN) : getIRegA(regN),
8981                                  mkU8(bitN ? 0 : 16)), mkU8(16)) );
8982        assign( srcM, binop(Iop_Sar32,
8983                            binop(Iop_Shl32,
8984                                  isT ? getIRegT(regM) : getIRegA(regM),
8985                                  mkU8(bitM ? 0 : 16)), mkU8(16)) );
8986        assign( res, binop(Iop_Mul32, mkexpr(srcN), mkexpr(srcM)) );
8987
8988        if (isT)
8989           putIRegT( regD, mkexpr(res), condT );
8990        else
8991           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
8992
8993        DIP( "smul%c%c%s r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b',
8994             nCC(conq), regD, regN, regM );
8995        return True;
8996     }
8997     /* fall through */
8998   }
8999
9000   /* ------------ smulwb<y><c> <Rd>,<Rn>,<Rm> ------------- */
9001   /* ------------ smulwt<y><c> <Rd>,<Rn>,<Rm> ------------- */
9002   {
9003     UInt regD = 99, regN = 99, regM = 99, bitM = 0;
9004     Bool gate = False;
9005
9006     if (isT) {
9007        if (INSNT0(15,4) == 0xFB3 && INSNT1(15,12) == BITS4(1,1,1,1)
9008            && INSNT1(7,5) == BITS3(0,0,0)) {
9009          regN = INSNT0(3,0);
9010          regD = INSNT1(11,8);
9011          regM = INSNT1(3,0);
9012          bitM = INSNT1(4,4);
9013          if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9014             gate = True;
9015        }
9016     } else {
9017        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
9018            INSNA(15,12) == BITS4(0,0,0,0)         &&
9019            (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,1,0)) {
9020           regD = INSNA(19,16);
9021           regN = INSNA(3,0);
9022           regM = INSNA(11,8);
9023           bitM = INSNA(6,6);
9024           if (regD != 15 && regN != 15 && regM != 15)
9025              gate = True;
9026        }
9027     }
9028
9029     if (gate) {
9030        IRTemp irt_prod = newTemp(Ity_I64);
9031
9032        assign( irt_prod,
9033                binop(Iop_MullS32,
9034                      isT ? getIRegT(regN) : getIRegA(regN),
9035                      binop(Iop_Sar32,
9036                            binop(Iop_Shl32,
9037                                  isT ? getIRegT(regM) : getIRegA(regM),
9038                                  mkU8(bitM ? 0 : 16)),
9039                            mkU8(16))) );
9040
9041        IRExpr* ire_result = binop(Iop_Or32,
9042                                   binop( Iop_Shl32,
9043                                          unop(Iop_64HIto32, mkexpr(irt_prod)),
9044                                          mkU8(16) ),
9045                                   binop( Iop_Shr32,
9046                                          unop(Iop_64to32, mkexpr(irt_prod)),
9047                                          mkU8(16) ) );
9048
9049        if (isT)
9050           putIRegT( regD, ire_result, condT );
9051        else
9052           putIRegA( regD, ire_result, condT, Ijk_Boring );
9053
9054        DIP("smulw%c%s r%u, r%u, r%u\n",
9055            bitM ? 't' : 'b', nCC(conq),regD,regN,regM);
9056        return True;
9057     }
9058     /* fall through */
9059   }
9060
9061   /* ------------ pkhbt<c> Rd, Rn, Rm {,LSL #imm} ------------- */
9062   /* ------------ pkhtb<c> Rd, Rn, Rm {,ASR #imm} ------------- */
9063   {
9064     UInt regD = 99, regN = 99, regM = 99, imm5 = 99, shift_type = 99;
9065     Bool tbform = False;
9066     Bool gate = False;
9067
9068     if (isT) {
9069        if (INSNT0(15,4) == 0xEAC
9070            && INSNT1(15,15) == 0 && INSNT1(4,4) == 0) {
9071           regN = INSNT0(3,0);
9072           regD = INSNT1(11,8);
9073           regM = INSNT1(3,0);
9074           imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
9075           shift_type = (INSNT1(5,5) << 1) | 0;
9076           tbform = (INSNT1(5,5) == 0) ? False : True;
9077           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9078              gate = True;
9079        }
9080     } else {
9081        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
9082            INSNA(5,4)   == BITS2(0,1)             &&
9083            (INSNA(6,6)  == 0 || INSNA(6,6) == 1) ) {
9084           regD = INSNA(15,12);
9085           regN = INSNA(19,16);
9086           regM = INSNA(3,0);
9087           imm5 = INSNA(11,7);
9088           shift_type = (INSNA(6,6) << 1) | 0;
9089           tbform = (INSNA(6,6) == 0) ? False : True;
9090           if (regD != 15 && regN != 15 && regM != 15)
9091              gate = True;
9092        }
9093     }
9094
9095     if (gate) {
9096        IRTemp irt_regM       = newTemp(Ity_I32);
9097        IRTemp irt_regM_shift = newTemp(Ity_I32);
9098        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
9099        compute_result_and_C_after_shift_by_imm5(
9100           dis_buf, &irt_regM_shift, NULL, irt_regM, shift_type, imm5, regM );
9101
9102        UInt mask = (tbform == True) ? 0x0000FFFF : 0xFFFF0000;
9103        IRExpr* ire_result
9104          = binop( Iop_Or32,
9105                   binop(Iop_And32, mkexpr(irt_regM_shift), mkU32(mask)),
9106                   binop(Iop_And32, isT ? getIRegT(regN) : getIRegA(regN),
9107                                    unop(Iop_Not32, mkU32(mask))) );
9108
9109        if (isT)
9110           putIRegT( regD, ire_result, condT );
9111        else
9112           putIRegA( regD, ire_result, condT, Ijk_Boring );
9113
9114        DIP( "pkh%s%s r%u, r%u, r%u %s\n", tbform ? "tb" : "bt",
9115             nCC(conq), regD, regN, regM, dis_buf );
9116
9117        return True;
9118     }
9119     /* fall through */
9120   }
9121
9122   /* ---------- usat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9123   {
9124     UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9125     Bool gate = False;
9126
9127     if (isT) {
9128        if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,1,0)
9129            && INSNT0(4,4) == 0
9130            && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9131           regD       = INSNT1(11,8);
9132           regN       = INSNT0(3,0);
9133           shift_type = (INSNT0(5,5) << 1) | 0;
9134           imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9135           sat_imm    = INSNT1(4,0);
9136           if (!isBadRegT(regD) && !isBadRegT(regN))
9137              gate = True;
9138           if (shift_type == BITS2(1,0) && imm5 == 0)
9139              gate = False;
9140        }
9141     } else {
9142        if (INSNA(27,21) == BITS7(0,1,1,0,1,1,1) &&
9143            INSNA(5,4)   == BITS2(0,1)) {
9144           regD       = INSNA(15,12);
9145           regN       = INSNA(3,0);
9146           shift_type = (INSNA(6,6) << 1) | 0;
9147           imm5       = INSNA(11,7);
9148           sat_imm    = INSNA(20,16);
9149           if (regD != 15 && regN != 15)
9150              gate = True;
9151        }
9152     }
9153
9154     if (gate) {
9155        IRTemp irt_regN       = newTemp(Ity_I32);
9156        IRTemp irt_regN_shift = newTemp(Ity_I32);
9157        IRTemp irt_sat_Q      = newTemp(Ity_I32);
9158        IRTemp irt_result     = newTemp(Ity_I32);
9159
9160        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9161        compute_result_and_C_after_shift_by_imm5(
9162                dis_buf, &irt_regN_shift, NULL,
9163                irt_regN, shift_type, imm5, regN );
9164
9165        armUnsignedSatQ( &irt_result, &irt_sat_Q, irt_regN_shift, sat_imm );
9166        or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9167
9168        if (isT)
9169           putIRegT( regD, mkexpr(irt_result), condT );
9170        else
9171           putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9172
9173        DIP("usat%s r%u, #0x%04x, %s\n",
9174            nCC(conq), regD, imm5, dis_buf);
9175        return True;
9176     }
9177     /* fall through */
9178   }
9179
9180  /* ----------- ssat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9181   {
9182     UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9183     Bool gate = False;
9184
9185     if (isT) {
9186        if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9187            && INSNT0(4,4) == 0
9188            && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9189           regD       = INSNT1(11,8);
9190           regN       = INSNT0(3,0);
9191           shift_type = (INSNT0(5,5) << 1) | 0;
9192           imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9193           sat_imm    = INSNT1(4,0) + 1;
9194           if (!isBadRegT(regD) && !isBadRegT(regN))
9195              gate = True;
9196           if (shift_type == BITS2(1,0) && imm5 == 0)
9197              gate = False;
9198        }
9199     } else {
9200        if (INSNA(27,21) == BITS7(0,1,1,0,1,0,1) &&
9201            INSNA(5,4)   == BITS2(0,1)) {
9202           regD       = INSNA(15,12);
9203           regN       = INSNA(3,0);
9204           shift_type = (INSNA(6,6) << 1) | 0;
9205           imm5       = INSNA(11,7);
9206           sat_imm    = INSNA(20,16) + 1;
9207           if (regD != 15 && regN != 15)
9208              gate = True;
9209        }
9210     }
9211
9212     if (gate) {
9213        IRTemp irt_regN       = newTemp(Ity_I32);
9214        IRTemp irt_regN_shift = newTemp(Ity_I32);
9215        IRTemp irt_sat_Q      = newTemp(Ity_I32);
9216        IRTemp irt_result     = newTemp(Ity_I32);
9217
9218        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9219        compute_result_and_C_after_shift_by_imm5(
9220                dis_buf, &irt_regN_shift, NULL,
9221                irt_regN, shift_type, imm5, regN );
9222
9223        armSignedSatQ( irt_regN_shift, sat_imm, &irt_result, &irt_sat_Q );
9224        or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9225
9226        if (isT)
9227           putIRegT( regD, mkexpr(irt_result), condT );
9228        else
9229           putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9230
9231        DIP( "ssat%s r%u, #0x%04x, %s\n",
9232             nCC(conq), regD, imm5, dis_buf);
9233        return True;
9234    }
9235    /* fall through */
9236  }
9237
9238   /* ----------- ssat16<c> <Rd>,#<imm>,<Rn> ----------- */
9239   {
9240     UInt regD = 99, regN = 99, sat_imm = 99;
9241     Bool gate = False;
9242
9243     if (isT) {
9244        if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9245            && INSNT0(5,4) == BITS2(1,0)
9246            && INSNT1(15,12) == BITS4(0,0,0,0)
9247            && INSNT1(7,4) == BITS4(0,0,0,0)) {
9248           regD       = INSNT1(11,8);
9249           regN       = INSNT0(3,0);
9250           sat_imm    = INSNT1(3,0) + 1;
9251           if (!isBadRegT(regD) && !isBadRegT(regN))
9252              gate = True;
9253        }
9254     } else {
9255        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,1,0) &&
9256            INSNA(11,4)   == BITS8(1,1,1,1,0,0,1,1)) {
9257           regD       = INSNA(15,12);
9258           regN       = INSNA(3,0);
9259           sat_imm    = INSNA(19,16) + 1;
9260           if (regD != 15 && regN != 15)
9261              gate = True;
9262        }
9263     }
9264
9265     if (gate) {
9266        IRTemp irt_regN    = newTemp(Ity_I32);
9267        IRTemp irt_regN_lo = newTemp(Ity_I32);
9268        IRTemp irt_regN_hi = newTemp(Ity_I32);
9269        IRTemp irt_Q_lo    = newTemp(Ity_I32);
9270        IRTemp irt_Q_hi    = newTemp(Ity_I32);
9271        IRTemp irt_res_lo  = newTemp(Ity_I32);
9272        IRTemp irt_res_hi  = newTemp(Ity_I32);
9273
9274        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9275        assign( irt_regN_lo,
9276                binop( Iop_Sar32,
9277                       binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9278                       mkU8(16)) );
9279        assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9280
9281        armSignedSatQ( irt_regN_lo, sat_imm, &irt_res_lo, &irt_Q_lo );
9282        or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9283
9284        armSignedSatQ( irt_regN_hi, sat_imm, &irt_res_hi, &irt_Q_hi );
9285        or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9286
9287        IRExpr* ire_result
9288           = binop(Iop_Or32,
9289                   binop(Iop_And32, mkexpr(irt_res_lo), mkU32(0xFFFF)),
9290                   binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)));
9291        if (isT)
9292           putIRegT( regD, ire_result, condT );
9293        else
9294           putIRegA( regD, ire_result, condT, Ijk_Boring );
9295
9296        DIP( "ssat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9297        return True;
9298     }
9299     /* fall through */
9300   }
9301
9302   /* -------------- usat16<c> <Rd>,#<imm4>,<Rn> --------------- */
9303   {
9304     UInt regD = 99, regN = 99, sat_imm = 99;
9305     Bool gate = False;
9306
9307     if (isT) {
9308        if (INSNT0(15,4) == 0xF3A && (INSNT1(15,0) & 0xF0F0) == 0x0000) {
9309           regN = INSNT0(3,0);
9310           regD = INSNT1(11,8);
9311           sat_imm = INSNT1(3,0);
9312           if (!isBadRegT(regD) && !isBadRegT(regN))
9313              gate = True;
9314       }
9315     } else {
9316        if (INSNA(27,20) == BITS8(0,1,1,0,1,1,1,0) &&
9317            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9318            INSNA(7,4)   == BITS4(0,0,1,1)) {
9319           regD    = INSNA(15,12);
9320           regN    = INSNA(3,0);
9321           sat_imm = INSNA(19,16);
9322           if (regD != 15 && regN != 15)
9323              gate = True;
9324        }
9325     }
9326
9327     if (gate) {
9328        IRTemp irt_regN    = newTemp(Ity_I32);
9329        IRTemp irt_regN_lo = newTemp(Ity_I32);
9330        IRTemp irt_regN_hi = newTemp(Ity_I32);
9331        IRTemp irt_Q_lo    = newTemp(Ity_I32);
9332        IRTemp irt_Q_hi    = newTemp(Ity_I32);
9333        IRTemp irt_res_lo  = newTemp(Ity_I32);
9334        IRTemp irt_res_hi  = newTemp(Ity_I32);
9335
9336        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9337        assign( irt_regN_lo, binop( Iop_Sar32,
9338                                    binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9339                                    mkU8(16)) );
9340        assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9341
9342        armUnsignedSatQ( &irt_res_lo, &irt_Q_lo, irt_regN_lo, sat_imm );
9343        or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9344
9345        armUnsignedSatQ( &irt_res_hi, &irt_Q_hi, irt_regN_hi, sat_imm );
9346        or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9347
9348        IRExpr* ire_result = binop( Iop_Or32,
9349                                    binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)),
9350                                    mkexpr(irt_res_lo) );
9351
9352        if (isT)
9353           putIRegT( regD, ire_result, condT );
9354        else
9355           putIRegA( regD, ire_result, condT, Ijk_Boring );
9356
9357        DIP( "usat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9358        return True;
9359     }
9360     /* fall through */
9361   }
9362
9363   /* -------------- uadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9364   {
9365     UInt regD = 99, regN = 99, regM = 99;
9366     Bool gate = False;
9367
9368     if (isT) {
9369        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9370           regN = INSNT0(3,0);
9371           regD = INSNT1(11,8);
9372           regM = INSNT1(3,0);
9373           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9374              gate = True;
9375        }
9376     } else {
9377        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9378            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9379            INSNA(7,4)   == BITS4(0,0,0,1)) {
9380           regD = INSNA(15,12);
9381           regN = INSNA(19,16);
9382           regM = INSNA(3,0);
9383           if (regD != 15 && regN != 15 && regM != 15)
9384              gate = True;
9385        }
9386     }
9387
9388     if (gate) {
9389        IRTemp rNt  = newTemp(Ity_I32);
9390        IRTemp rMt  = newTemp(Ity_I32);
9391        IRTemp res  = newTemp(Ity_I32);
9392        IRTemp reso = newTemp(Ity_I32);
9393
9394        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9395        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9396
9397        assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9398        if (isT)
9399           putIRegT( regD, mkexpr(res), condT );
9400        else
9401           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9402
9403        assign(reso, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
9404        set_GE_32_10_from_bits_31_15(reso, condT);
9405
9406        DIP("uadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9407        return True;
9408     }
9409     /* fall through */
9410   }
9411
9412   /* -------------- sadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9413   {
9414     UInt regD = 99, regN = 99, regM = 99;
9415     Bool gate = False;
9416
9417     if (isT) {
9418        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9419           regN = INSNT0(3,0);
9420           regD = INSNT1(11,8);
9421           regM = INSNT1(3,0);
9422           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9423              gate = True;
9424        }
9425     } else {
9426        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9427            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9428            INSNA(7,4)   == BITS4(0,0,0,1)) {
9429           regD = INSNA(15,12);
9430           regN = INSNA(19,16);
9431           regM = INSNA(3,0);
9432           if (regD != 15 && regN != 15 && regM != 15)
9433              gate = True;
9434        }
9435     }
9436
9437     if (gate) {
9438        IRTemp rNt  = newTemp(Ity_I32);
9439        IRTemp rMt  = newTemp(Ity_I32);
9440        IRTemp res  = newTemp(Ity_I32);
9441        IRTemp reso = newTemp(Ity_I32);
9442
9443        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9444        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9445
9446        assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9447        if (isT)
9448           putIRegT( regD, mkexpr(res), condT );
9449        else
9450           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9451
9452        assign(reso, unop(Iop_Not32,
9453                          binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt))));
9454        set_GE_32_10_from_bits_31_15(reso, condT);
9455
9456        DIP("sadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9457        return True;
9458     }
9459     /* fall through */
9460   }
9461
9462   /* ---------------- usub16<c> <Rd>,<Rn>,<Rm> ---------------- */
9463   {
9464     UInt regD = 99, regN = 99, regM = 99;
9465     Bool gate = False;
9466
9467     if (isT) {
9468        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9469           regN = INSNT0(3,0);
9470           regD = INSNT1(11,8);
9471           regM = INSNT1(3,0);
9472           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9473              gate = True;
9474        }
9475     } else {
9476        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9477            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9478            INSNA(7,4)   == BITS4(0,1,1,1)) {
9479           regD = INSNA(15,12);
9480           regN = INSNA(19,16);
9481           regM = INSNA(3,0);
9482           if (regD != 15 && regN != 15 && regM != 15)
9483             gate = True;
9484        }
9485     }
9486
9487     if (gate) {
9488        IRTemp rNt  = newTemp(Ity_I32);
9489        IRTemp rMt  = newTemp(Ity_I32);
9490        IRTemp res  = newTemp(Ity_I32);
9491        IRTemp reso = newTemp(Ity_I32);
9492
9493        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9494        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9495
9496        assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9497        if (isT)
9498           putIRegT( regD, mkexpr(res), condT );
9499        else
9500           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9501
9502        assign(reso, unop(Iop_Not32,
9503                          binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt))));
9504        set_GE_32_10_from_bits_31_15(reso, condT);
9505
9506        DIP("usub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9507        return True;
9508     }
9509     /* fall through */
9510   }
9511
9512   /* -------------- ssub16<c> <Rd>,<Rn>,<Rm> -------------- */
9513   {
9514     UInt regD = 99, regN = 99, regM = 99;
9515     Bool gate = False;
9516
9517     if (isT) {
9518        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9519           regN = INSNT0(3,0);
9520           regD = INSNT1(11,8);
9521           regM = INSNT1(3,0);
9522           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9523              gate = True;
9524        }
9525     } else {
9526        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9527            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9528            INSNA(7,4)   == BITS4(0,1,1,1)) {
9529           regD = INSNA(15,12);
9530           regN = INSNA(19,16);
9531           regM = INSNA(3,0);
9532           if (regD != 15 && regN != 15 && regM != 15)
9533              gate = True;
9534        }
9535     }
9536
9537     if (gate) {
9538        IRTemp rNt  = newTemp(Ity_I32);
9539        IRTemp rMt  = newTemp(Ity_I32);
9540        IRTemp res  = newTemp(Ity_I32);
9541        IRTemp reso = newTemp(Ity_I32);
9542
9543        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9544        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9545
9546        assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9547        if (isT)
9548           putIRegT( regD, mkexpr(res), condT );
9549        else
9550           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9551
9552        assign(reso, unop(Iop_Not32,
9553                          binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt))));
9554        set_GE_32_10_from_bits_31_15(reso, condT);
9555
9556        DIP("ssub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9557        return True;
9558     }
9559     /* fall through */
9560   }
9561
9562   /* ----------------- uadd8<c> <Rd>,<Rn>,<Rm> ---------------- */
9563   {
9564     UInt regD = 99, regN = 99, regM = 99;
9565     Bool gate = False;
9566
9567     if (isT) {
9568        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9569           regN = INSNT0(3,0);
9570           regD = INSNT1(11,8);
9571           regM = INSNT1(3,0);
9572           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9573              gate = True;
9574        }
9575     } else {
9576        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9577            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9578            (INSNA(7,4)  == BITS4(1,0,0,1))) {
9579           regD = INSNA(15,12);
9580           regN = INSNA(19,16);
9581           regM = INSNA(3,0);
9582           if (regD != 15 && regN != 15 && regM != 15)
9583              gate = True;
9584        }
9585     }
9586
9587     if (gate) {
9588        IRTemp rNt  = newTemp(Ity_I32);
9589        IRTemp rMt  = newTemp(Ity_I32);
9590        IRTemp res  = newTemp(Ity_I32);
9591        IRTemp reso = newTemp(Ity_I32);
9592
9593        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9594        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9595
9596        assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9597        if (isT)
9598           putIRegT( regD, mkexpr(res), condT );
9599        else
9600           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9601
9602        assign(reso, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9603        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9604
9605        DIP("uadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9606        return True;
9607     }
9608     /* fall through */
9609   }
9610
9611   /* ------------------- sadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9612   {
9613     UInt regD = 99, regN = 99, regM = 99;
9614     Bool gate = False;
9615
9616     if (isT) {
9617        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9618           regN = INSNT0(3,0);
9619           regD = INSNT1(11,8);
9620           regM = INSNT1(3,0);
9621           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9622              gate = True;
9623        }
9624     } else {
9625        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9626            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9627            (INSNA(7,4)  == BITS4(1,0,0,1))) {
9628           regD = INSNA(15,12);
9629           regN = INSNA(19,16);
9630           regM = INSNA(3,0);
9631           if (regD != 15 && regN != 15 && regM != 15)
9632              gate = True;
9633        }
9634     }
9635
9636     if (gate) {
9637        IRTemp rNt  = newTemp(Ity_I32);
9638        IRTemp rMt  = newTemp(Ity_I32);
9639        IRTemp res  = newTemp(Ity_I32);
9640        IRTemp reso = newTemp(Ity_I32);
9641
9642        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9643        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9644
9645        assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9646        if (isT)
9647           putIRegT( regD, mkexpr(res), condT );
9648        else
9649           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9650
9651        assign(reso, unop(Iop_Not32,
9652                          binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt))));
9653        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9654
9655        DIP("sadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9656        return True;
9657     }
9658     /* fall through */
9659   }
9660
9661   /* ------------------- usub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9662   {
9663     UInt regD = 99, regN = 99, regM = 99;
9664     Bool gate = False;
9665
9666     if (isT) {
9667        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9668           regN = INSNT0(3,0);
9669           regD = INSNT1(11,8);
9670           regM = INSNT1(3,0);
9671           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9672              gate = True;
9673        }
9674     } else {
9675        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9676            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9677            (INSNA(7,4)  == BITS4(1,1,1,1))) {
9678           regD = INSNA(15,12);
9679           regN = INSNA(19,16);
9680           regM = INSNA(3,0);
9681           if (regD != 15 && regN != 15 && regM != 15)
9682             gate = True;
9683        }
9684     }
9685
9686     if (gate) {
9687        IRTemp rNt  = newTemp(Ity_I32);
9688        IRTemp rMt  = newTemp(Ity_I32);
9689        IRTemp res  = newTemp(Ity_I32);
9690        IRTemp reso = newTemp(Ity_I32);
9691
9692        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9693        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9694
9695        assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9696        if (isT)
9697           putIRegT( regD, mkexpr(res), condT );
9698        else
9699           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9700
9701        assign(reso, unop(Iop_Not32,
9702                          binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt))));
9703        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9704
9705        DIP("usub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9706        return True;
9707     }
9708     /* fall through */
9709   }
9710
9711   /* ------------------- ssub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9712   {
9713     UInt regD = 99, regN = 99, regM = 99;
9714     Bool gate = False;
9715
9716     if (isT) {
9717        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9718           regN = INSNT0(3,0);
9719           regD = INSNT1(11,8);
9720           regM = INSNT1(3,0);
9721           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9722              gate = True;
9723        }
9724     } else {
9725        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9726            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9727            INSNA(7,4)   == BITS4(1,1,1,1)) {
9728           regD = INSNA(15,12);
9729           regN = INSNA(19,16);
9730           regM = INSNA(3,0);
9731           if (regD != 15 && regN != 15 && regM != 15)
9732              gate = True;
9733        }
9734     }
9735
9736     if (gate) {
9737        IRTemp rNt  = newTemp(Ity_I32);
9738        IRTemp rMt  = newTemp(Ity_I32);
9739        IRTemp res  = newTemp(Ity_I32);
9740        IRTemp reso = newTemp(Ity_I32);
9741
9742        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9743        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9744
9745        assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9746        if (isT)
9747           putIRegT( regD, mkexpr(res), condT );
9748        else
9749           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9750
9751        assign(reso, unop(Iop_Not32,
9752                          binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt))));
9753        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9754
9755        DIP("ssub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9756        return True;
9757     }
9758     /* fall through */
9759   }
9760
9761   /* ------------------ qadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
9762   {
9763     UInt regD = 99, regN = 99, regM = 99;
9764     Bool gate = False;
9765
9766     if (isT) {
9767        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9768           regN = INSNT0(3,0);
9769           regD = INSNT1(11,8);
9770           regM = INSNT1(3,0);
9771           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9772              gate = True;
9773        }
9774     } else {
9775        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9776            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9777            INSNA(7,4)   == BITS4(1,0,0,1)) {
9778           regD = INSNA(15,12);
9779           regN = INSNA(19,16);
9780           regM = INSNA(3,0);
9781           if (regD != 15 && regN != 15 && regM != 15)
9782              gate = True;
9783        }
9784     }
9785
9786     if (gate) {
9787        IRTemp rNt   = newTemp(Ity_I32);
9788        IRTemp rMt   = newTemp(Ity_I32);
9789        IRTemp res_q = newTemp(Ity_I32);
9790
9791        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9792        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9793
9794        assign(res_q, binop(Iop_QAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
9795        if (isT)
9796           putIRegT( regD, mkexpr(res_q), condT );
9797        else
9798           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9799
9800        DIP("qadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9801        return True;
9802     }
9803     /* fall through */
9804   }
9805
9806   /* ------------------ qsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
9807   {
9808     UInt regD = 99, regN = 99, regM = 99;
9809     Bool gate = False;
9810
9811     if (isT) {
9812        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9813           regN = INSNT0(3,0);
9814           regD = INSNT1(11,8);
9815           regM = INSNT1(3,0);
9816           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9817              gate = True;
9818        }
9819     } else {
9820        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9821            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9822            INSNA(7,4)   == BITS4(1,1,1,1)) {
9823           regD = INSNA(15,12);
9824           regN = INSNA(19,16);
9825           regM = INSNA(3,0);
9826           if (regD != 15 && regN != 15 && regM != 15)
9827              gate = True;
9828        }
9829     }
9830
9831     if (gate) {
9832        IRTemp rNt   = newTemp(Ity_I32);
9833        IRTemp rMt   = newTemp(Ity_I32);
9834        IRTemp res_q = newTemp(Ity_I32);
9835
9836        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9837        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9838
9839        assign(res_q, binop(Iop_QSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
9840        if (isT)
9841           putIRegT( regD, mkexpr(res_q), condT );
9842        else
9843           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9844
9845        DIP("qsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9846        return True;
9847     }
9848     /* fall through */
9849   }
9850
9851   /* ------------------ uqadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9852   {
9853     UInt regD = 99, regN = 99, regM = 99;
9854     Bool gate = False;
9855
9856     if (isT) {
9857        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9858           regN = INSNT0(3,0);
9859           regD = INSNT1(11,8);
9860           regM = INSNT1(3,0);
9861           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9862              gate = True;
9863        }
9864     } else {
9865        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9866            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9867            (INSNA(7,4)  == BITS4(1,0,0,1))) {
9868           regD = INSNA(15,12);
9869           regN = INSNA(19,16);
9870           regM = INSNA(3,0);
9871           if (regD != 15 && regN != 15 && regM != 15)
9872              gate = True;
9873        }
9874     }
9875
9876     if (gate) {
9877        IRTemp rNt   = newTemp(Ity_I32);
9878        IRTemp rMt   = newTemp(Ity_I32);
9879        IRTemp res_q = newTemp(Ity_I32);
9880
9881        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9882        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9883
9884        assign(res_q, binop(Iop_QAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9885        if (isT)
9886           putIRegT( regD, mkexpr(res_q), condT );
9887        else
9888           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9889
9890        DIP("uqadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9891        return True;
9892     }
9893     /* fall through */
9894   }
9895
9896   /* ------------------ uqsub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9897   {
9898     UInt regD = 99, regN = 99, regM = 99;
9899     Bool gate = False;
9900
9901     if (isT) {
9902        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9903           regN = INSNT0(3,0);
9904           regD = INSNT1(11,8);
9905           regM = INSNT1(3,0);
9906           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9907              gate = True;
9908        }
9909     } else {
9910        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9911            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9912            (INSNA(7,4)  == BITS4(1,1,1,1))) {
9913           regD = INSNA(15,12);
9914           regN = INSNA(19,16);
9915           regM = INSNA(3,0);
9916           if (regD != 15 && regN != 15 && regM != 15)
9917             gate = True;
9918        }
9919     }
9920
9921     if (gate) {
9922        IRTemp rNt   = newTemp(Ity_I32);
9923        IRTemp rMt   = newTemp(Ity_I32);
9924        IRTemp res_q = newTemp(Ity_I32);
9925
9926        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9927        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9928
9929        assign(res_q, binop(Iop_QSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
9930        if (isT)
9931           putIRegT( regD, mkexpr(res_q), condT );
9932        else
9933           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9934
9935        DIP("uqsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9936        return True;
9937     }
9938     /* fall through */
9939   }
9940
9941   /* ----------------- uhadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
9942   {
9943     UInt regD = 99, regN = 99, regM = 99;
9944     Bool gate = False;
9945
9946     if (isT) {
9947        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
9948           regN = INSNT0(3,0);
9949           regD = INSNT1(11,8);
9950           regM = INSNT1(3,0);
9951           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9952              gate = True;
9953        }
9954     } else {
9955        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
9956            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9957            INSNA(7,4)   == BITS4(1,0,0,1)) {
9958           regD = INSNA(15,12);
9959           regN = INSNA(19,16);
9960           regM = INSNA(3,0);
9961           if (regD != 15 && regN != 15 && regM != 15)
9962              gate = True;
9963        }
9964     }
9965
9966     if (gate) {
9967        IRTemp rNt   = newTemp(Ity_I32);
9968        IRTemp rMt   = newTemp(Ity_I32);
9969        IRTemp res_q = newTemp(Ity_I32);
9970
9971        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9972        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9973
9974        assign(res_q, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9975        if (isT)
9976           putIRegT( regD, mkexpr(res_q), condT );
9977        else
9978           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9979
9980        DIP("uhadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9981        return True;
9982     }
9983     /* fall through */
9984   }
9985
9986   /* ----------------- uhadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
9987   {
9988     UInt regD = 99, regN = 99, regM = 99;
9989     Bool gate = False;
9990
9991     if (isT) {
9992        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
9993           regN = INSNT0(3,0);
9994           regD = INSNT1(11,8);
9995           regM = INSNT1(3,0);
9996           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9997              gate = True;
9998        }
9999     } else {
10000        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
10001            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10002            INSNA(7,4)   == BITS4(0,0,0,1)) {
10003           regD = INSNA(15,12);
10004           regN = INSNA(19,16);
10005           regM = INSNA(3,0);
10006           if (regD != 15 && regN != 15 && regM != 15)
10007              gate = True;
10008        }
10009     }
10010
10011     if (gate) {
10012        IRTemp rNt   = newTemp(Ity_I32);
10013        IRTemp rMt   = newTemp(Ity_I32);
10014        IRTemp res_q = newTemp(Ity_I32);
10015
10016        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10017        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10018
10019        assign(res_q, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
10020        if (isT)
10021           putIRegT( regD, mkexpr(res_q), condT );
10022        else
10023           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10024
10025        DIP("uhadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10026        return True;
10027     }
10028     /* fall through */
10029   }
10030
10031   /* ----------------- shadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
10032   {
10033     UInt regD = 99, regN = 99, regM = 99;
10034     Bool gate = False;
10035
10036     if (isT) {
10037        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
10038           regN = INSNT0(3,0);
10039           regD = INSNT1(11,8);
10040           regM = INSNT1(3,0);
10041           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10042              gate = True;
10043        }
10044     } else {
10045        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
10046            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10047            INSNA(7,4)   == BITS4(1,0,0,1)) {
10048           regD = INSNA(15,12);
10049           regN = INSNA(19,16);
10050           regM = INSNA(3,0);
10051           if (regD != 15 && regN != 15 && regM != 15)
10052              gate = True;
10053        }
10054     }
10055
10056     if (gate) {
10057        IRTemp rNt   = newTemp(Ity_I32);
10058        IRTemp rMt   = newTemp(Ity_I32);
10059        IRTemp res_q = newTemp(Ity_I32);
10060
10061        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10062        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10063
10064        assign(res_q, binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
10065        if (isT)
10066           putIRegT( regD, mkexpr(res_q), condT );
10067        else
10068           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10069
10070        DIP("shadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10071        return True;
10072     }
10073     /* fall through */
10074   }
10075
10076   /* ------------------ qadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
10077   {
10078     UInt regD = 99, regN = 99, regM = 99;
10079     Bool gate = False;
10080
10081     if (isT) {
10082        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10083           regN = INSNT0(3,0);
10084           regD = INSNT1(11,8);
10085           regM = INSNT1(3,0);
10086           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10087              gate = True;
10088        }
10089     } else {
10090        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10091            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10092            INSNA(7,4)   == BITS4(0,0,0,1)) {
10093           regD = INSNA(15,12);
10094           regN = INSNA(19,16);
10095           regM = INSNA(3,0);
10096           if (regD != 15 && regN != 15 && regM != 15)
10097              gate = True;
10098        }
10099     }
10100
10101     if (gate) {
10102        IRTemp rNt   = newTemp(Ity_I32);
10103        IRTemp rMt   = newTemp(Ity_I32);
10104        IRTemp res_q = newTemp(Ity_I32);
10105
10106        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10107        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10108
10109        assign(res_q, binop(Iop_QAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
10110        if (isT)
10111           putIRegT( regD, mkexpr(res_q), condT );
10112        else
10113           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10114
10115        DIP("qadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10116        return True;
10117     }
10118     /* fall through */
10119   }
10120
10121   /* ------------------ qsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
10122   {
10123     UInt regD = 99, regN = 99, regM = 99;
10124     Bool gate = False;
10125
10126      if (isT) {
10127        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10128           regN = INSNT0(3,0);
10129           regD = INSNT1(11,8);
10130           regM = INSNT1(3,0);
10131           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10132              gate = True;
10133        }
10134     } else {
10135        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10136            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10137            INSNA(7,4)   == BITS4(0,1,1,1)) {
10138           regD = INSNA(15,12);
10139           regN = INSNA(19,16);
10140           regM = INSNA(3,0);
10141           if (regD != 15 && regN != 15 && regM != 15)
10142             gate = True;
10143        }
10144     }
10145
10146     if (gate) {
10147        IRTemp rNt   = newTemp(Ity_I32);
10148        IRTemp rMt   = newTemp(Ity_I32);
10149        IRTemp res_q = newTemp(Ity_I32);
10150
10151        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10152        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10153
10154        assign(res_q, binop(Iop_QSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
10155        if (isT)
10156           putIRegT( regD, mkexpr(res_q), condT );
10157        else
10158           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10159
10160        DIP("qsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10161        return True;
10162     }
10163     /* fall through */
10164   }
10165
10166   /* ------------------- qsax<c> <Rd>,<Rn>,<Rm> ------------------- */
10167   /* note: the hardware seems to construct the result differently
10168      from wot the manual says. */
10169   {
10170     UInt regD = 99, regN = 99, regM = 99;
10171     Bool gate = False;
10172
10173     if (isT) {
10174        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10175           regN = INSNT0(3,0);
10176           regD = INSNT1(11,8);
10177           regM = INSNT1(3,0);
10178           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10179              gate = True;
10180        }
10181     } else {
10182        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10183            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10184            INSNA(7,4)   == BITS4(0,1,0,1)) {
10185           regD = INSNA(15,12);
10186           regN = INSNA(19,16);
10187           regM = INSNA(3,0);
10188           if (regD != 15 && regN != 15 && regM != 15)
10189              gate = True;
10190        }
10191     }
10192
10193     if (gate) {
10194        IRTemp irt_regN     = newTemp(Ity_I32);
10195        IRTemp irt_regM     = newTemp(Ity_I32);
10196        IRTemp irt_sum      = newTemp(Ity_I32);
10197        IRTemp irt_diff     = newTemp(Ity_I32);
10198        IRTemp irt_sum_res  = newTemp(Ity_I32);
10199        IRTemp irt_diff_res = newTemp(Ity_I32);
10200
10201        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10202        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10203
10204        assign( irt_diff,
10205                binop( Iop_Sub32,
10206                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10207                       binop( Iop_Sar32,
10208                              binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10209                              mkU8(16) ) ) );
10210        armSignedSatQ( irt_diff, 0x10, &irt_diff_res, NULL);
10211
10212        assign( irt_sum,
10213                binop( Iop_Add32,
10214                       binop( Iop_Sar32,
10215                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10216                              mkU8(16) ),
10217                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) )) );
10218        armSignedSatQ( irt_sum, 0x10, &irt_sum_res, NULL );
10219
10220        IRExpr* ire_result = binop( Iop_Or32,
10221                                    binop( Iop_Shl32, mkexpr(irt_diff_res),
10222                                           mkU8(16) ),
10223                                    binop( Iop_And32, mkexpr(irt_sum_res),
10224                                           mkU32(0xFFFF)) );
10225
10226        if (isT)
10227           putIRegT( regD, ire_result, condT );
10228        else
10229           putIRegA( regD, ire_result, condT, Ijk_Boring );
10230
10231        DIP( "qsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10232        return True;
10233     }
10234     /* fall through */
10235   }
10236
10237   /* ------------------- qasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10238   {
10239     UInt regD = 99, regN = 99, regM = 99;
10240     Bool gate = False;
10241
10242     if (isT) {
10243        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10244           regN = INSNT0(3,0);
10245           regD = INSNT1(11,8);
10246           regM = INSNT1(3,0);
10247           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10248              gate = True;
10249        }
10250     } else {
10251        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10252            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10253            INSNA(7,4)   == BITS4(0,0,1,1)) {
10254           regD = INSNA(15,12);
10255           regN = INSNA(19,16);
10256           regM = INSNA(3,0);
10257           if (regD != 15 && regN != 15 && regM != 15)
10258              gate = True;
10259        }
10260     }
10261
10262     if (gate) {
10263        IRTemp irt_regN     = newTemp(Ity_I32);
10264        IRTemp irt_regM     = newTemp(Ity_I32);
10265        IRTemp irt_sum      = newTemp(Ity_I32);
10266        IRTemp irt_diff     = newTemp(Ity_I32);
10267        IRTemp irt_res_sum  = newTemp(Ity_I32);
10268        IRTemp irt_res_diff = newTemp(Ity_I32);
10269
10270        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10271        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10272
10273        assign( irt_diff,
10274                binop( Iop_Sub32,
10275                       binop( Iop_Sar32,
10276                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10277                              mkU8(16) ),
10278                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10279        armSignedSatQ( irt_diff, 0x10, &irt_res_diff, NULL );
10280
10281        assign( irt_sum,
10282                binop( Iop_Add32,
10283                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10284                       binop( Iop_Sar32,
10285                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10286                              mkU8(16) ) ) );
10287        armSignedSatQ( irt_sum, 0x10, &irt_res_sum, NULL );
10288
10289        IRExpr* ire_result
10290          = binop( Iop_Or32,
10291                   binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
10292                   binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
10293
10294        if (isT)
10295           putIRegT( regD, ire_result, condT );
10296        else
10297           putIRegA( regD, ire_result, condT, Ijk_Boring );
10298
10299        DIP( "qasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10300        return True;
10301     }
10302     /* fall through */
10303   }
10304
10305   /* ------------------- sasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10306   {
10307     UInt regD = 99, regN = 99, regM = 99;
10308     Bool gate = False;
10309
10310     if (isT) {
10311        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
10312           regN = INSNT0(3,0);
10313           regD = INSNT1(11,8);
10314           regM = INSNT1(3,0);
10315           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10316              gate = True;
10317        }
10318     } else {
10319        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
10320            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10321            INSNA(7,4)   == BITS4(0,0,1,1)) {
10322           regD = INSNA(15,12);
10323           regN = INSNA(19,16);
10324           regM = INSNA(3,0);
10325           if (regD != 15 && regN != 15 && regM != 15)
10326              gate = True;
10327        }
10328     }
10329
10330     if (gate) {
10331        IRTemp irt_regN = newTemp(Ity_I32);
10332        IRTemp irt_regM = newTemp(Ity_I32);
10333        IRTemp irt_sum  = newTemp(Ity_I32);
10334        IRTemp irt_diff = newTemp(Ity_I32);
10335
10336        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10337        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10338
10339        assign( irt_diff,
10340                binop( Iop_Sub32,
10341                       binop( Iop_Sar32,
10342                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10343                              mkU8(16) ),
10344                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10345
10346        assign( irt_sum,
10347                binop( Iop_Add32,
10348                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10349                       binop( Iop_Sar32,
10350                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10351                              mkU8(16) ) ) );
10352
10353        IRExpr* ire_result
10354          = binop( Iop_Or32,
10355                   binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
10356                   binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
10357
10358        IRTemp ge10 = newTemp(Ity_I32);
10359        assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
10360        put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
10361        put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
10362
10363        IRTemp ge32 = newTemp(Ity_I32);
10364        assign(ge32, unop(Iop_Not32, mkexpr(irt_sum)));
10365        put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
10366        put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
10367
10368        if (isT)
10369           putIRegT( regD, ire_result, condT );
10370        else
10371           putIRegA( regD, ire_result, condT, Ijk_Boring );
10372
10373        DIP( "sasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10374        return True;
10375     }
10376     /* fall through */
10377   }
10378
10379   /* --------------- smuad, smuadx<c><Rd>,<Rn>,<Rm> --------------- */
10380   /* --------------- smsad, smsadx<c><Rd>,<Rn>,<Rm> --------------- */
10381   {
10382     UInt regD = 99, regN = 99, regM = 99, bitM = 99;
10383     Bool gate = False, isAD = False;
10384
10385     if (isT) {
10386        if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10387            && (INSNT1(15,0) & 0xF0E0) == 0xF000) {
10388           regN = INSNT0(3,0);
10389           regD = INSNT1(11,8);
10390           regM = INSNT1(3,0);
10391           bitM = INSNT1(4,4);
10392           isAD = INSNT0(15,4) == 0xFB2;
10393           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10394              gate = True;
10395        }
10396     } else {
10397        if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10398            INSNA(15,12) == BITS4(1,1,1,1)         &&
10399            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) {
10400           regD = INSNA(19,16);
10401           regN = INSNA(3,0);
10402           regM = INSNA(11,8);
10403           bitM = INSNA(5,5);
10404           isAD = INSNA(6,6) == 0;
10405           if (regD != 15 && regN != 15 && regM != 15)
10406              gate = True;
10407        }
10408     }
10409
10410     if (gate) {
10411        IRTemp irt_regN    = newTemp(Ity_I32);
10412        IRTemp irt_regM    = newTemp(Ity_I32);
10413        IRTemp irt_prod_lo = newTemp(Ity_I32);
10414        IRTemp irt_prod_hi = newTemp(Ity_I32);
10415        IRTemp tmpM        = newTemp(Ity_I32);
10416
10417        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10418
10419        assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10420        assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10421
10422        assign( irt_prod_lo,
10423                binop( Iop_Mul32,
10424                       binop( Iop_Sar32,
10425                              binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
10426                              mkU8(16) ),
10427                       binop( Iop_Sar32,
10428                              binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10429                              mkU8(16) ) ) );
10430        assign( irt_prod_hi, binop(Iop_Mul32,
10431                                   binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)),
10432                                   binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) );
10433        IRExpr* ire_result
10434           = binop( isAD ? Iop_Add32 : Iop_Sub32,
10435                    mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) );
10436
10437        if (isT)
10438           putIRegT( regD, ire_result, condT );
10439        else
10440           putIRegA( regD, ire_result, condT, Ijk_Boring );
10441
10442        if (isAD) {
10443           or_into_QFLAG32(
10444              signed_overflow_after_Add32( ire_result,
10445                                           irt_prod_lo, irt_prod_hi ),
10446              condT
10447           );
10448        }
10449
10450        DIP("smu%cd%s%s r%u, r%u, r%u\n",
10451            isAD ? 'a' : 's',
10452            bitM ? "x" : "", nCC(conq), regD, regN, regM);
10453        return True;
10454     }
10455     /* fall through */
10456   }
10457
10458   /* --------------- smlad{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10459   /* --------------- smlsd{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10460   {
10461     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10462     Bool gate = False, isAD = False;
10463
10464     if (isT) {
10465       if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10466           && INSNT1(7,5) == BITS3(0,0,0)) {
10467           regN = INSNT0(3,0);
10468           regD = INSNT1(11,8);
10469           regM = INSNT1(3,0);
10470           regA = INSNT1(15,12);
10471           bitM = INSNT1(4,4);
10472           isAD = INSNT0(15,4) == 0xFB2;
10473           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10474               && !isBadRegT(regA))
10475              gate = True;
10476        }
10477     } else {
10478        if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10479            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
10480           regD = INSNA(19,16);
10481           regA = INSNA(15,12);
10482           regN = INSNA(3,0);
10483           regM = INSNA(11,8);
10484           bitM = INSNA(5,5);
10485           isAD = INSNA(6,6) == 0;
10486           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10487              gate = True;
10488        }
10489     }
10490
10491     if (gate) {
10492        IRTemp irt_regN    = newTemp(Ity_I32);
10493        IRTemp irt_regM    = newTemp(Ity_I32);
10494        IRTemp irt_regA    = newTemp(Ity_I32);
10495        IRTemp irt_prod_lo = newTemp(Ity_I32);
10496        IRTemp irt_prod_hi = newTemp(Ity_I32);
10497        IRTemp irt_sum     = newTemp(Ity_I32);
10498        IRTemp tmpM        = newTemp(Ity_I32);
10499
10500        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10501        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10502
10503        assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10504        assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10505
10506        assign( irt_prod_lo,
10507                binop(Iop_Mul32,
10508                      binop(Iop_Sar32,
10509                            binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10510                            mkU8(16)),
10511                      binop(Iop_Sar32,
10512                            binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10513                            mkU8(16))) );
10514        assign( irt_prod_hi,
10515                binop( Iop_Mul32,
10516                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10517                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10518        assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32,
10519                                mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) );
10520
10521        IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA));
10522
10523        if (isT)
10524           putIRegT( regD, ire_result, condT );
10525        else
10526           putIRegA( regD, ire_result, condT, Ijk_Boring );
10527
10528        if (isAD) {
10529           or_into_QFLAG32(
10530              signed_overflow_after_Add32( mkexpr(irt_sum),
10531                                           irt_prod_lo, irt_prod_hi ),
10532              condT
10533           );
10534        }
10535
10536        or_into_QFLAG32(
10537           signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ),
10538           condT
10539        );
10540
10541        DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n",
10542            isAD ? 'a' : 's',
10543            bitM ? "x" : "", nCC(conq), regD, regN, regM, regA);
10544        return True;
10545     }
10546     /* fall through */
10547   }
10548
10549   /* ----- smlabb, smlabt, smlatb, smlatt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10550   {
10551     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99, bitN = 99;
10552     Bool gate = False;
10553
10554     if (isT) {
10555        if (INSNT0(15,4) == 0xFB1 && INSNT1(7,6) == BITS2(0,0)) {
10556           regN = INSNT0(3,0);
10557           regD = INSNT1(11,8);
10558           regM = INSNT1(3,0);
10559           regA = INSNT1(15,12);
10560           bitM = INSNT1(4,4);
10561           bitN = INSNT1(5,5);
10562           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10563               && !isBadRegT(regA))
10564              gate = True;
10565        }
10566     } else {
10567        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
10568            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10569           regD = INSNA(19,16);
10570           regN = INSNA(3,0);
10571           regM = INSNA(11,8);
10572           regA = INSNA(15,12);
10573           bitM = INSNA(6,6);
10574           bitN = INSNA(5,5);
10575           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10576              gate = True;
10577        }
10578     }
10579
10580     if (gate) {
10581        IRTemp irt_regA = newTemp(Ity_I32);
10582        IRTemp irt_prod = newTemp(Ity_I32);
10583
10584        assign( irt_prod,
10585                binop(Iop_Mul32,
10586                      binop(Iop_Sar32,
10587                            binop(Iop_Shl32,
10588                                  isT ? getIRegT(regN) : getIRegA(regN),
10589                                  mkU8(bitN ? 0 : 16)),
10590                            mkU8(16)),
10591                      binop(Iop_Sar32,
10592                            binop(Iop_Shl32,
10593                                  isT ? getIRegT(regM) : getIRegA(regM),
10594                                  mkU8(bitM ? 0 : 16)),
10595                            mkU8(16))) );
10596
10597        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10598
10599        IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_prod), mkexpr(irt_regA));
10600
10601        if (isT)
10602           putIRegT( regD, ire_result, condT );
10603        else
10604           putIRegA( regD, ire_result, condT, Ijk_Boring );
10605
10606        or_into_QFLAG32(
10607           signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ),
10608           condT
10609        );
10610
10611        DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n",
10612             bitN ? 't' : 'b', bitM ? 't' : 'b',
10613             nCC(conq), regD, regN, regM, regA );
10614        return True;
10615     }
10616     /* fall through */
10617   }
10618
10619   /* ----- smlalbb, smlalbt, smlaltb, smlaltt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10620   {
10621     UInt regDHi = 99, regN = 99, regM = 99, regDLo = 99, bitM = 99, bitN = 99;
10622     Bool gate = False;
10623
10624     if (isT) {
10625        if (INSNT0(15,4) == 0xFBC && INSNT1(7,6) == BITS2(1,0)) {
10626           regN   = INSNT0(3,0);
10627           regDHi = INSNT1(11,8);
10628           regM   = INSNT1(3,0);
10629           regDLo = INSNT1(15,12);
10630           bitM   = INSNT1(4,4);
10631           bitN   = INSNT1(5,5);
10632           if (!isBadRegT(regDHi) && !isBadRegT(regN) && !isBadRegT(regM)
10633               && !isBadRegT(regDLo) && regDHi != regDLo)
10634              gate = True;
10635        }
10636     } else {
10637        if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
10638            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10639           regDHi = INSNA(19,16);
10640           regN   = INSNA(3,0);
10641           regM   = INSNA(11,8);
10642           regDLo = INSNA(15,12);
10643           bitM   = INSNA(6,6);
10644           bitN   = INSNA(5,5);
10645           if (regDHi != 15 && regN != 15 && regM != 15 && regDLo != 15 &&
10646               regDHi != regDLo)
10647              gate = True;
10648        }
10649     }
10650
10651     if (gate) {
10652        IRTemp irt_regD  = newTemp(Ity_I64);
10653        IRTemp irt_prod  = newTemp(Ity_I64);
10654        IRTemp irt_res   = newTemp(Ity_I64);
10655        IRTemp irt_resHi = newTemp(Ity_I32);
10656        IRTemp irt_resLo = newTemp(Ity_I32);
10657
10658        assign( irt_prod,
10659                binop(Iop_MullS32,
10660                      binop(Iop_Sar32,
10661                            binop(Iop_Shl32,
10662                                  isT ? getIRegT(regN) : getIRegA(regN),
10663                                  mkU8(bitN ? 0 : 16)),
10664                            mkU8(16)),
10665                      binop(Iop_Sar32,
10666                            binop(Iop_Shl32,
10667                                  isT ? getIRegT(regM) : getIRegA(regM),
10668                                  mkU8(bitM ? 0 : 16)),
10669                            mkU8(16))) );
10670
10671        assign( irt_regD, binop(Iop_32HLto64,
10672                                isT ? getIRegT(regDHi) : getIRegA(regDHi),
10673                                isT ? getIRegT(regDLo) : getIRegA(regDLo)) );
10674        assign( irt_res, binop(Iop_Add64, mkexpr(irt_regD), mkexpr(irt_prod)) );
10675        assign( irt_resHi, unop(Iop_64HIto32, mkexpr(irt_res)) );
10676        assign( irt_resLo, unop(Iop_64to32, mkexpr(irt_res)) );
10677
10678        if (isT) {
10679           putIRegT( regDHi, mkexpr(irt_resHi), condT );
10680           putIRegT( regDLo, mkexpr(irt_resLo), condT );
10681        } else {
10682           putIRegA( regDHi, mkexpr(irt_resHi), condT, Ijk_Boring );
10683           putIRegA( regDLo, mkexpr(irt_resLo), condT, Ijk_Boring );
10684        }
10685
10686        DIP( "smlal%c%c%s r%u, r%u, r%u, r%u\n",
10687             bitN ? 't' : 'b', bitM ? 't' : 'b',
10688             nCC(conq), regDHi, regN, regM, regDLo );
10689        return True;
10690     }
10691     /* fall through */
10692   }
10693
10694   /* ----- smlawb, smlawt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10695   {
10696     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10697     Bool gate = False;
10698
10699     if (isT) {
10700        if (INSNT0(15,4) == 0xFB3 && INSNT1(7,5) == BITS3(0,0,0)) {
10701           regN = INSNT0(3,0);
10702           regD = INSNT1(11,8);
10703           regM = INSNT1(3,0);
10704           regA = INSNT1(15,12);
10705           bitM = INSNT1(4,4);
10706           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10707               && !isBadRegT(regA))
10708              gate = True;
10709        }
10710     } else {
10711        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
10712            (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,0,0)) {
10713           regD = INSNA(19,16);
10714           regN = INSNA(3,0);
10715           regM = INSNA(11,8);
10716           regA = INSNA(15,12);
10717           bitM = INSNA(6,6);
10718           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10719              gate = True;
10720        }
10721     }
10722
10723     if (gate) {
10724        IRTemp irt_regA = newTemp(Ity_I32);
10725        IRTemp irt_prod = newTemp(Ity_I64);
10726
10727        assign( irt_prod,
10728                binop(Iop_MullS32,
10729                      isT ? getIRegT(regN) : getIRegA(regN),
10730                      binop(Iop_Sar32,
10731                            binop(Iop_Shl32,
10732                                  isT ? getIRegT(regM) : getIRegA(regM),
10733                                  mkU8(bitM ? 0 : 16)),
10734                            mkU8(16))) );
10735
10736        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10737
10738        IRTemp prod32 = newTemp(Ity_I32);
10739        assign(prod32,
10740               binop(Iop_Or32,
10741                     binop(Iop_Shl32, unop(Iop_64HIto32, mkexpr(irt_prod)), mkU8(16)),
10742                     binop(Iop_Shr32, unop(Iop_64to32, mkexpr(irt_prod)), mkU8(16))
10743        ));
10744
10745        IRExpr* ire_result = binop(Iop_Add32, mkexpr(prod32), mkexpr(irt_regA));
10746
10747        if (isT)
10748           putIRegT( regD, ire_result, condT );
10749        else
10750           putIRegA( regD, ire_result, condT, Ijk_Boring );
10751
10752        or_into_QFLAG32(
10753           signed_overflow_after_Add32( ire_result, prod32, irt_regA ),
10754           condT
10755        );
10756
10757        DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n",
10758             bitM ? 't' : 'b',
10759             nCC(conq), regD, regN, regM, regA );
10760        return True;
10761     }
10762     /* fall through */
10763   }
10764
10765   /* ------------------- sel<c> <Rd>,<Rn>,<Rm> -------------------- */
10766   /* fixme: fix up the test in v6media.c so that we can pass the ge
10767      flags as part of the test. */
10768   {
10769     UInt regD = 99, regN = 99, regM = 99;
10770     Bool gate = False;
10771
10772     if (isT) {
10773        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
10774           regN = INSNT0(3,0);
10775           regD = INSNT1(11,8);
10776           regM = INSNT1(3,0);
10777           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10778              gate = True;
10779        }
10780     } else {
10781        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
10782            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10783            INSNA(7,4)   == BITS4(1,0,1,1)) {
10784           regD = INSNA(15,12);
10785           regN = INSNA(19,16);
10786           regM = INSNA(3,0);
10787           if (regD != 15 && regN != 15 && regM != 15)
10788              gate = True;
10789        }
10790     }
10791
10792     if (gate) {
10793        IRTemp irt_ge_flag0 = newTemp(Ity_I32);
10794        IRTemp irt_ge_flag1 = newTemp(Ity_I32);
10795        IRTemp irt_ge_flag2 = newTemp(Ity_I32);
10796        IRTemp irt_ge_flag3 = newTemp(Ity_I32);
10797
10798        assign( irt_ge_flag0, get_GEFLAG32(0) );
10799        assign( irt_ge_flag1, get_GEFLAG32(1) );
10800        assign( irt_ge_flag2, get_GEFLAG32(2) );
10801        assign( irt_ge_flag3, get_GEFLAG32(3) );
10802
10803        IRExpr* ire_ge_flag0_or
10804          = binop(Iop_Or32, mkexpr(irt_ge_flag0),
10805                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag0)));
10806        IRExpr* ire_ge_flag1_or
10807          = binop(Iop_Or32, mkexpr(irt_ge_flag1),
10808                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag1)));
10809        IRExpr* ire_ge_flag2_or
10810          = binop(Iop_Or32, mkexpr(irt_ge_flag2),
10811                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag2)));
10812        IRExpr* ire_ge_flag3_or
10813          = binop(Iop_Or32, mkexpr(irt_ge_flag3),
10814                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag3)));
10815
10816        IRExpr* ire_ge_flags
10817          = binop( Iop_Or32,
10818                   binop(Iop_Or32,
10819                         binop(Iop_And32,
10820                               binop(Iop_Sar32, ire_ge_flag0_or, mkU8(31)),
10821                               mkU32(0x000000ff)),
10822                         binop(Iop_And32,
10823                               binop(Iop_Sar32, ire_ge_flag1_or, mkU8(31)),
10824                               mkU32(0x0000ff00))),
10825                   binop(Iop_Or32,
10826                         binop(Iop_And32,
10827                               binop(Iop_Sar32, ire_ge_flag2_or, mkU8(31)),
10828                               mkU32(0x00ff0000)),
10829                         binop(Iop_And32,
10830                               binop(Iop_Sar32, ire_ge_flag3_or, mkU8(31)),
10831                               mkU32(0xff000000))) );
10832
10833        IRExpr* ire_result
10834          = binop(Iop_Or32,
10835                  binop(Iop_And32,
10836                        isT ? getIRegT(regN) : getIRegA(regN),
10837                        ire_ge_flags ),
10838                  binop(Iop_And32,
10839                        isT ? getIRegT(regM) : getIRegA(regM),
10840                        unop(Iop_Not32, ire_ge_flags)));
10841
10842        if (isT)
10843           putIRegT( regD, ire_result, condT );
10844        else
10845           putIRegA( regD, ire_result, condT, Ijk_Boring );
10846
10847        DIP("sel%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10848        return True;
10849     }
10850     /* fall through */
10851   }
10852
10853   /* ----------------- uxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
10854   {
10855     UInt regD = 99, regN = 99, regM = 99, rotate = 99;
10856     Bool gate = False;
10857
10858     if (isT) {
10859        if (INSNT0(15,4) == 0xFA3 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
10860           regN   = INSNT0(3,0);
10861           regD   = INSNT1(11,8);
10862           regM   = INSNT1(3,0);
10863           rotate = INSNT1(5,4);
10864           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10865              gate = True;
10866        }
10867     } else {
10868        if (INSNA(27,20) == BITS8(0,1,1,0,1,1,0,0) &&
10869            INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
10870           regD   = INSNA(15,12);
10871           regN   = INSNA(19,16);
10872           regM   = INSNA(3,0);
10873           rotate = INSNA(11,10);
10874           if (regD != 15 && regN != 15 && regM != 15)
10875             gate = True;
10876        }
10877     }
10878
10879     if (gate) {
10880        IRTemp irt_regN = newTemp(Ity_I32);
10881        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10882
10883        IRTemp irt_regM = newTemp(Ity_I32);
10884        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10885
10886        IRTemp irt_rot = newTemp(Ity_I32);
10887        assign( irt_rot, binop(Iop_And32,
10888                               genROR32(irt_regM, 8 * rotate),
10889                               mkU32(0x00FF00FF)) );
10890
10891        IRExpr* resLo
10892           = binop(Iop_And32,
10893                   binop(Iop_Add32, mkexpr(irt_regN), mkexpr(irt_rot)),
10894                   mkU32(0x0000FFFF));
10895
10896        IRExpr* resHi
10897           = binop(Iop_Add32,
10898                   binop(Iop_And32, mkexpr(irt_regN), mkU32(0xFFFF0000)),
10899                   binop(Iop_And32, mkexpr(irt_rot),  mkU32(0xFFFF0000)));
10900
10901        IRExpr* ire_result
10902           = binop( Iop_Or32, resHi, resLo );
10903
10904        if (isT)
10905           putIRegT( regD, ire_result, condT );
10906        else
10907           putIRegA( regD, ire_result, condT, Ijk_Boring );
10908
10909        DIP( "uxtab16%s r%u, r%u, r%u, ROR #%u\n",
10910             nCC(conq), regD, regN, regM, 8 * rotate );
10911        return True;
10912     }
10913     /* fall through */
10914   }
10915
10916   /* --------------- usad8  Rd,Rn,Rm    ---------------- */
10917   /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */
10918   {
10919     UInt rD = 99, rN = 99, rM = 99, rA = 99;
10920     Bool gate = False;
10921
10922     if (isT) {
10923       if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) {
10924           rN = INSNT0(3,0);
10925           rA = INSNT1(15,12);
10926           rD = INSNT1(11,8);
10927           rM = INSNT1(3,0);
10928           if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13)
10929              gate = True;
10930        }
10931     } else {
10932        if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) &&
10933            INSNA(7,4)   == BITS4(0,0,0,1) ) {
10934           rD = INSNA(19,16);
10935           rA = INSNA(15,12);
10936           rM = INSNA(11,8);
10937           rN = INSNA(3,0);
10938           if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */)
10939              gate = True;
10940        }
10941     }
10942     /* We allow rA == 15, to denote the usad8 (no accumulator) case. */
10943
10944     if (gate) {
10945        IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN);
10946        IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM);
10947        IRExpr* rAe = rA == 15 ? mkU32(0)
10948                               : (isT ? getIRegT(rA) : getIRegA(rA));
10949        IRExpr* res = binop(Iop_Add32,
10950                            binop(Iop_Sad8Ux4, rNe, rMe),
10951                            rAe);
10952        if (isT)
10953           putIRegT( rD, res, condT );
10954        else
10955           putIRegA( rD, res, condT, Ijk_Boring );
10956
10957        if (rA == 15) {
10958           DIP( "usad8%s r%u, r%u, r%u\n",
10959                nCC(conq), rD, rN, rM );
10960        } else {
10961           DIP( "usada8%s r%u, r%u, r%u, r%u\n",
10962                nCC(conq), rD, rN, rM, rA );
10963        }
10964        return True;
10965     }
10966     /* fall through */
10967   }
10968
10969   /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
10970   {
10971     UInt regD = 99, regN = 99, regM = 99;
10972     Bool gate = False;
10973
10974     if (isT) {
10975        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
10976           regN = INSNT0(3,0);
10977           regD = INSNT1(11,8);
10978           regM = INSNT1(3,0);
10979           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10980              gate = True;
10981        }
10982     } else {
10983        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
10984            INSNA(11,8)  == BITS4(0,0,0,0)         &&
10985            INSNA(7,4)   == BITS4(0,1,0,1)) {
10986           regD = INSNA(15,12);
10987           regN = INSNA(19,16);
10988           regM = INSNA(3,0);
10989           if (regD != 15 && regN != 15 && regM != 15)
10990              gate = True;
10991        }
10992     }
10993
10994     if (gate) {
10995        IRTemp rNt   = newTemp(Ity_I32);
10996        IRTemp rMt   = newTemp(Ity_I32);
10997        IRTemp res_q = newTemp(Ity_I32);
10998
10999        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11000        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11001
11002        assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
11003        if (isT)
11004           putIRegT( regD, mkexpr(res_q), condT );
11005        else
11006           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11007
11008        or_into_QFLAG32(
11009           signed_overflow_after_Add32(
11010              binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11011           condT
11012        );
11013
11014        DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11015        return True;
11016     }
11017     /* fall through */
11018   }
11019
11020   /* ------------------ qdadd<c> <Rd>,<Rm>,<Rn> ------------------- */
11021   {
11022     UInt regD = 99, regN = 99, regM = 99;
11023     Bool gate = False;
11024
11025     if (isT) {
11026        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF090) {
11027           regN = INSNT0(3,0);
11028           regD = INSNT1(11,8);
11029           regM = INSNT1(3,0);
11030           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11031              gate = True;
11032        }
11033     } else {
11034        if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
11035            INSNA(11,8)  == BITS4(0,0,0,0)         &&
11036            INSNA(7,4)   == BITS4(0,1,0,1)) {
11037           regD = INSNA(15,12);
11038           regN = INSNA(19,16);
11039           regM = INSNA(3,0);
11040           if (regD != 15 && regN != 15 && regM != 15)
11041              gate = True;
11042        }
11043     }
11044
11045     if (gate) {
11046        IRTemp rNt   = newTemp(Ity_I32);
11047        IRTemp rMt   = newTemp(Ity_I32);
11048        IRTemp rN_d  = newTemp(Ity_I32);
11049        IRTemp res_q = newTemp(Ity_I32);
11050
11051        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11052        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11053
11054        or_into_QFLAG32(
11055           signed_overflow_after_Add32(
11056              binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11057           condT
11058        );
11059
11060        assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11061        assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rN_d)));
11062        if (isT)
11063           putIRegT( regD, mkexpr(res_q), condT );
11064        else
11065           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11066
11067        or_into_QFLAG32(
11068           signed_overflow_after_Add32(
11069              binop(Iop_Add32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11070           condT
11071        );
11072
11073        DIP("qdadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11074        return True;
11075     }
11076     /* fall through */
11077   }
11078
11079   /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
11080   {
11081     UInt regD = 99, regN = 99, regM = 99;
11082     Bool gate = False;
11083
11084     if (isT) {
11085        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
11086           regN = INSNT0(3,0);
11087           regD = INSNT1(11,8);
11088           regM = INSNT1(3,0);
11089           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11090              gate = True;
11091        }
11092     } else {
11093        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
11094            INSNA(11,8)  == BITS4(0,0,0,0)         &&
11095            INSNA(7,4)   == BITS4(0,1,0,1)) {
11096           regD = INSNA(15,12);
11097           regN = INSNA(19,16);
11098           regM = INSNA(3,0);
11099           if (regD != 15 && regN != 15 && regM != 15)
11100              gate = True;
11101        }
11102     }
11103
11104     if (gate) {
11105        IRTemp rNt   = newTemp(Ity_I32);
11106        IRTemp rMt   = newTemp(Ity_I32);
11107        IRTemp res_q = newTemp(Ity_I32);
11108
11109        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11110        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11111
11112        assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
11113        if (isT)
11114           putIRegT( regD, mkexpr(res_q), condT );
11115        else
11116           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11117
11118        or_into_QFLAG32(
11119           signed_overflow_after_Sub32(
11120              binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11121           condT
11122        );
11123
11124        DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11125        return True;
11126     }
11127     /* fall through */
11128   }
11129
11130   /* ------------------ qdsub<c> <Rd>,<Rm>,<Rn> ------------------- */
11131   {
11132     UInt regD = 99, regN = 99, regM = 99;
11133     Bool gate = False;
11134
11135     if (isT) {
11136        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0B0) {
11137           regN = INSNT0(3,0);
11138           regD = INSNT1(11,8);
11139           regM = INSNT1(3,0);
11140           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11141              gate = True;
11142        }
11143     } else {
11144        if (INSNA(27,20) == BITS8(0,0,0,1,0,1,1,0) &&
11145            INSNA(11,8)  == BITS4(0,0,0,0)         &&
11146            INSNA(7,4)   == BITS4(0,1,0,1)) {
11147           regD = INSNA(15,12);
11148           regN = INSNA(19,16);
11149           regM = INSNA(3,0);
11150           if (regD != 15 && regN != 15 && regM != 15)
11151              gate = True;
11152        }
11153     }
11154
11155     if (gate) {
11156        IRTemp rNt   = newTemp(Ity_I32);
11157        IRTemp rMt   = newTemp(Ity_I32);
11158        IRTemp rN_d  = newTemp(Ity_I32);
11159        IRTemp res_q = newTemp(Ity_I32);
11160
11161        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11162        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11163
11164        or_into_QFLAG32(
11165           signed_overflow_after_Add32(
11166              binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11167           condT
11168        );
11169
11170        assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11171        assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rN_d)));
11172        if (isT)
11173           putIRegT( regD, mkexpr(res_q), condT );
11174        else
11175           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11176
11177        or_into_QFLAG32(
11178           signed_overflow_after_Sub32(
11179              binop(Iop_Sub32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11180           condT
11181        );
11182
11183        DIP("qdsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11184        return True;
11185     }
11186     /* fall through */
11187   }
11188
11189   /* ------------------ uqsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
11190   {
11191     UInt regD = 99, regN = 99, regM = 99;
11192     Bool gate = False;
11193
11194     if (isT) {
11195        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11196           regN = INSNT0(3,0);
11197           regD = INSNT1(11,8);
11198           regM = INSNT1(3,0);
11199           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11200              gate = True;
11201        }
11202     } else {
11203        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11204            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11205            INSNA(7,4)   == BITS4(0,1,1,1)) {
11206           regD = INSNA(15,12);
11207           regN = INSNA(19,16);
11208           regM = INSNA(3,0);
11209           if (regD != 15 && regN != 15 && regM != 15)
11210             gate = True;
11211        }
11212     }
11213
11214     if (gate) {
11215        IRTemp rNt   = newTemp(Ity_I32);
11216        IRTemp rMt   = newTemp(Ity_I32);
11217        IRTemp res_q = newTemp(Ity_I32);
11218
11219        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11220        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11221
11222        assign(res_q, binop(Iop_QSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11223        if (isT)
11224           putIRegT( regD, mkexpr(res_q), condT );
11225        else
11226           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11227
11228        DIP("uqsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11229        return True;
11230     }
11231     /* fall through */
11232   }
11233
11234   /* ----------------- shadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
11235   {
11236     UInt regD = 99, regN = 99, regM = 99;
11237     Bool gate = False;
11238
11239     if (isT) {
11240        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11241           regN = INSNT0(3,0);
11242           regD = INSNT1(11,8);
11243           regM = INSNT1(3,0);
11244           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11245              gate = True;
11246        }
11247     } else {
11248        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11249            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11250            INSNA(7,4)   == BITS4(0,0,0,1)) {
11251           regD = INSNA(15,12);
11252           regN = INSNA(19,16);
11253           regM = INSNA(3,0);
11254           if (regD != 15 && regN != 15 && regM != 15)
11255              gate = True;
11256        }
11257     }
11258
11259     if (gate) {
11260        IRTemp rNt   = newTemp(Ity_I32);
11261        IRTemp rMt   = newTemp(Ity_I32);
11262        IRTemp res_q = newTemp(Ity_I32);
11263
11264        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11265        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11266
11267        assign(res_q, binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
11268        if (isT)
11269           putIRegT( regD, mkexpr(res_q), condT );
11270        else
11271           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11272
11273        DIP("shadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11274        return True;
11275     }
11276     /* fall through */
11277   }
11278
11279   /* ----------------- uhsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11280   {
11281     UInt regD = 99, regN = 99, regM = 99;
11282     Bool gate = False;
11283
11284     if (isT) {
11285        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11286           regN = INSNT0(3,0);
11287           regD = INSNT1(11,8);
11288           regM = INSNT1(3,0);
11289           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11290              gate = True;
11291        }
11292     } else {
11293        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11294            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11295            INSNA(7,4)   == BITS4(1,1,1,1)) {
11296           regD = INSNA(15,12);
11297           regN = INSNA(19,16);
11298           regM = INSNA(3,0);
11299           if (regD != 15 && regN != 15 && regM != 15)
11300              gate = True;
11301        }
11302     }
11303
11304     if (gate) {
11305        IRTemp rNt   = newTemp(Ity_I32);
11306        IRTemp rMt   = newTemp(Ity_I32);
11307        IRTemp res_q = newTemp(Ity_I32);
11308
11309        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11310        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11311
11312        assign(res_q, binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
11313        if (isT)
11314           putIRegT( regD, mkexpr(res_q), condT );
11315        else
11316           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11317
11318        DIP("uhsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11319        return True;
11320     }
11321     /* fall through */
11322   }
11323
11324   /* ----------------- uhsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
11325   {
11326     UInt regD = 99, regN = 99, regM = 99;
11327     Bool gate = False;
11328
11329     if (isT) {
11330        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11331           regN = INSNT0(3,0);
11332           regD = INSNT1(11,8);
11333           regM = INSNT1(3,0);
11334           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11335              gate = True;
11336        }
11337     } else {
11338        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11339            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11340            INSNA(7,4)   == BITS4(0,1,1,1)) {
11341           regD = INSNA(15,12);
11342           regN = INSNA(19,16);
11343           regM = INSNA(3,0);
11344           if (regD != 15 && regN != 15 && regM != 15)
11345              gate = True;
11346        }
11347     }
11348
11349     if (gate) {
11350        IRTemp rNt   = newTemp(Ity_I32);
11351        IRTemp rMt   = newTemp(Ity_I32);
11352        IRTemp res_q = newTemp(Ity_I32);
11353
11354        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11355        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11356
11357        assign(res_q, binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11358        if (isT)
11359           putIRegT( regD, mkexpr(res_q), condT );
11360        else
11361           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11362
11363        DIP("uhsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11364        return True;
11365     }
11366     /* fall through */
11367   }
11368
11369   /* ------------------ uqadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
11370   {
11371     UInt regD = 99, regN = 99, regM = 99;
11372     Bool gate = False;
11373
11374     if (isT) {
11375        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11376           regN = INSNT0(3,0);
11377           regD = INSNT1(11,8);
11378           regM = INSNT1(3,0);
11379           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11380              gate = True;
11381        }
11382     } else {
11383        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11384            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11385            INSNA(7,4)   == BITS4(0,0,0,1)) {
11386           regD = INSNA(15,12);
11387           regN = INSNA(19,16);
11388           regM = INSNA(3,0);
11389           if (regD != 15 && regN != 15 && regM != 15)
11390              gate = True;
11391        }
11392     }
11393
11394     if (gate) {
11395        IRTemp rNt   = newTemp(Ity_I32);
11396        IRTemp rMt   = newTemp(Ity_I32);
11397        IRTemp res_q = newTemp(Ity_I32);
11398
11399        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11400        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11401
11402        assign(res_q, binop(Iop_QAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
11403        if (isT)
11404           putIRegT( regD, mkexpr(res_q), condT );
11405        else
11406           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11407
11408        DIP("uqadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11409        return True;
11410     }
11411     /* fall through */
11412   }
11413
11414   /* ------------------- uqsax<c> <Rd>,<Rn>,<Rm> ------------------- */
11415   {
11416     UInt regD = 99, regN = 99, regM = 99;
11417     Bool gate = False;
11418
11419     if (isT) {
11420        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11421           regN = INSNT0(3,0);
11422           regD = INSNT1(11,8);
11423           regM = INSNT1(3,0);
11424           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11425              gate = True;
11426        }
11427     } else {
11428        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11429            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11430            INSNA(7,4)   == BITS4(0,1,0,1)) {
11431           regD = INSNA(15,12);
11432           regN = INSNA(19,16);
11433           regM = INSNA(3,0);
11434           if (regD != 15 && regN != 15 && regM != 15)
11435              gate = True;
11436        }
11437     }
11438
11439     if (gate) {
11440        IRTemp irt_regN     = newTemp(Ity_I32);
11441        IRTemp irt_regM     = newTemp(Ity_I32);
11442        IRTemp irt_sum      = newTemp(Ity_I32);
11443        IRTemp irt_diff     = newTemp(Ity_I32);
11444        IRTemp irt_sum_res  = newTemp(Ity_I32);
11445        IRTemp irt_diff_res = newTemp(Ity_I32);
11446
11447        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11448        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11449
11450        assign( irt_diff,
11451                binop( Iop_Sub32,
11452                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11453                       binop( Iop_Shr32,
11454                              binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
11455                              mkU8(16) ) ) );
11456        armUnsignedSatQ( &irt_diff_res, NULL, irt_diff, 0x10);
11457
11458        assign( irt_sum,
11459                binop( Iop_Add32,
11460                       binop( Iop_Shr32,
11461                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11462                              mkU8(16) ),
11463                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) )) );
11464        armUnsignedSatQ( &irt_sum_res, NULL, irt_sum, 0x10 );
11465
11466        IRExpr* ire_result = binop( Iop_Or32,
11467                                    binop( Iop_Shl32, mkexpr(irt_diff_res),
11468                                           mkU8(16) ),
11469                                    binop( Iop_And32, mkexpr(irt_sum_res),
11470                                           mkU32(0xFFFF)) );
11471
11472        if (isT)
11473           putIRegT( regD, ire_result, condT );
11474        else
11475           putIRegA( regD, ire_result, condT, Ijk_Boring );
11476
11477        DIP( "uqsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11478        return True;
11479     }
11480     /* fall through */
11481   }
11482
11483   /* ------------------- uqasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11484   {
11485     UInt regD = 99, regN = 99, regM = 99;
11486     Bool gate = False;
11487
11488     if (isT) {
11489        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11490           regN = INSNT0(3,0);
11491           regD = INSNT1(11,8);
11492           regM = INSNT1(3,0);
11493           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11494              gate = True;
11495        }
11496     } else {
11497        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11498            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11499            INSNA(7,4)   == BITS4(0,0,1,1)) {
11500           regD = INSNA(15,12);
11501           regN = INSNA(19,16);
11502           regM = INSNA(3,0);
11503           if (regD != 15 && regN != 15 && regM != 15)
11504              gate = True;
11505        }
11506     }
11507
11508     if (gate) {
11509        IRTemp irt_regN     = newTemp(Ity_I32);
11510        IRTemp irt_regM     = newTemp(Ity_I32);
11511        IRTemp irt_sum      = newTemp(Ity_I32);
11512        IRTemp irt_diff     = newTemp(Ity_I32);
11513        IRTemp irt_res_sum  = newTemp(Ity_I32);
11514        IRTemp irt_res_diff = newTemp(Ity_I32);
11515
11516        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11517        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11518
11519        assign( irt_diff,
11520                binop( Iop_Sub32,
11521                       binop( Iop_Shr32,
11522                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11523                              mkU8(16) ),
11524                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11525        armUnsignedSatQ( &irt_res_diff, NULL, irt_diff, 0x10 );
11526
11527        assign( irt_sum,
11528                binop( Iop_Add32,
11529                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11530                       binop( Iop_Shr32,
11531                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11532                              mkU8(16) ) ) );
11533        armUnsignedSatQ( &irt_res_sum, NULL, irt_sum, 0x10 );
11534
11535        IRExpr* ire_result
11536          = binop( Iop_Or32,
11537                   binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
11538                   binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
11539
11540        if (isT)
11541           putIRegT( regD, ire_result, condT );
11542        else
11543           putIRegA( regD, ire_result, condT, Ijk_Boring );
11544
11545        DIP( "uqasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11546        return True;
11547     }
11548     /* fall through */
11549   }
11550
11551   /* ------------------- usax<c> <Rd>,<Rn>,<Rm> ------------------- */
11552   {
11553     UInt regD = 99, regN = 99, regM = 99;
11554     Bool gate = False;
11555
11556     if (isT) {
11557        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11558           regN = INSNT0(3,0);
11559           regD = INSNT1(11,8);
11560           regM = INSNT1(3,0);
11561           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11562              gate = True;
11563        }
11564     } else {
11565        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11566            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11567            INSNA(7,4)   == BITS4(0,1,0,1)) {
11568           regD = INSNA(15,12);
11569           regN = INSNA(19,16);
11570           regM = INSNA(3,0);
11571           if (regD != 15 && regN != 15 && regM != 15)
11572              gate = True;
11573        }
11574     }
11575
11576     if (gate) {
11577        IRTemp irt_regN = newTemp(Ity_I32);
11578        IRTemp irt_regM = newTemp(Ity_I32);
11579        IRTemp irt_sum  = newTemp(Ity_I32);
11580        IRTemp irt_diff = newTemp(Ity_I32);
11581
11582        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11583        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11584
11585        assign( irt_sum,
11586                binop( Iop_Add32,
11587                       unop( Iop_16Uto32,
11588                             unop( Iop_32to16, mkexpr(irt_regN) )
11589                       ),
11590                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11591
11592        assign( irt_diff,
11593                binop( Iop_Sub32,
11594                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11595                       unop( Iop_16Uto32,
11596                             unop( Iop_32to16, mkexpr(irt_regM) )
11597                       )
11598                )
11599        );
11600
11601        IRExpr* ire_result
11602          = binop( Iop_Or32,
11603                   binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11604                   binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11605
11606        IRTemp ge10 = newTemp(Ity_I32);
11607        assign( ge10, IRExpr_ITE( binop( Iop_CmpLE32U,
11608                                         mkU32(0x10000), mkexpr(irt_sum) ),
11609                                  mkU32(1), mkU32(0) ) );
11610        put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
11611        put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
11612
11613        IRTemp ge32 = newTemp(Ity_I32);
11614        assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11615        put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11616        put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11617
11618        if (isT)
11619           putIRegT( regD, ire_result, condT );
11620        else
11621           putIRegA( regD, ire_result, condT, Ijk_Boring );
11622
11623        DIP( "usax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11624        return True;
11625     }
11626     /* fall through */
11627   }
11628
11629   /* ------------------- uasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11630   {
11631     UInt regD = 99, regN = 99, regM = 99;
11632     Bool gate = False;
11633
11634     if (isT) {
11635        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11636           regN = INSNT0(3,0);
11637           regD = INSNT1(11,8);
11638           regM = INSNT1(3,0);
11639           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11640              gate = True;
11641        }
11642     } else {
11643        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11644            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11645            INSNA(7,4)   == BITS4(0,0,1,1)) {
11646           regD = INSNA(15,12);
11647           regN = INSNA(19,16);
11648           regM = INSNA(3,0);
11649           if (regD != 15 && regN != 15 && regM != 15)
11650              gate = True;
11651        }
11652     }
11653
11654     if (gate) {
11655        IRTemp irt_regN = newTemp(Ity_I32);
11656        IRTemp irt_regM = newTemp(Ity_I32);
11657        IRTemp irt_sum  = newTemp(Ity_I32);
11658        IRTemp irt_diff = newTemp(Ity_I32);
11659
11660        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11661        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11662
11663        assign( irt_diff,
11664                binop( Iop_Sub32,
11665                       unop( Iop_16Uto32,
11666                             unop( Iop_32to16, mkexpr(irt_regN) )
11667                       ),
11668                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11669
11670        assign( irt_sum,
11671                binop( Iop_Add32,
11672                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11673                       unop( Iop_16Uto32,
11674                             unop( Iop_32to16, mkexpr(irt_regM) )
11675                       ) ) );
11676
11677        IRExpr* ire_result
11678          = binop( Iop_Or32,
11679                   binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
11680                   binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
11681
11682        IRTemp ge10 = newTemp(Ity_I32);
11683        assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
11684        put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11685        put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11686
11687        IRTemp ge32 = newTemp(Ity_I32);
11688        assign( ge32, IRExpr_ITE( binop( Iop_CmpLE32U,
11689                                         mkU32(0x10000), mkexpr(irt_sum) ),
11690                                  mkU32(1), mkU32(0) ) );
11691        put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
11692        put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
11693
11694        if (isT)
11695           putIRegT( regD, ire_result, condT );
11696        else
11697           putIRegA( regD, ire_result, condT, Ijk_Boring );
11698
11699        DIP( "uasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11700        return True;
11701     }
11702     /* fall through */
11703   }
11704
11705   /* ------------------- ssax<c> <Rd>,<Rn>,<Rm> ------------------- */
11706   {
11707     UInt regD = 99, regN = 99, regM = 99;
11708     Bool gate = False;
11709
11710     if (isT) {
11711        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
11712           regN = INSNT0(3,0);
11713           regD = INSNT1(11,8);
11714           regM = INSNT1(3,0);
11715           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11716              gate = True;
11717        }
11718     } else {
11719        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
11720            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11721            INSNA(7,4)   == BITS4(0,1,0,1)) {
11722           regD = INSNA(15,12);
11723           regN = INSNA(19,16);
11724           regM = INSNA(3,0);
11725           if (regD != 15 && regN != 15 && regM != 15)
11726              gate = True;
11727        }
11728     }
11729
11730     if (gate) {
11731        IRTemp irt_regN = newTemp(Ity_I32);
11732        IRTemp irt_regM = newTemp(Ity_I32);
11733        IRTemp irt_sum  = newTemp(Ity_I32);
11734        IRTemp irt_diff = newTemp(Ity_I32);
11735
11736        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11737        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11738
11739        assign( irt_sum,
11740                binop( Iop_Add32,
11741                       binop( Iop_Sar32,
11742                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11743                              mkU8(16) ),
11744                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
11745
11746        assign( irt_diff,
11747                binop( Iop_Sub32,
11748                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
11749                       binop( Iop_Sar32,
11750                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11751                              mkU8(16) ) ) );
11752
11753        IRExpr* ire_result
11754          = binop( Iop_Or32,
11755                   binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11756                   binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11757
11758        IRTemp ge10 = newTemp(Ity_I32);
11759        assign(ge10, unop(Iop_Not32, mkexpr(irt_sum)));
11760        put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11761        put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11762
11763        IRTemp ge32 = newTemp(Ity_I32);
11764        assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11765        put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11766        put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11767
11768        if (isT)
11769           putIRegT( regD, ire_result, condT );
11770        else
11771           putIRegA( regD, ire_result, condT, Ijk_Boring );
11772
11773        DIP( "ssax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11774        return True;
11775     }
11776     /* fall through */
11777   }
11778
11779   /* ----------------- shsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11780   {
11781     UInt regD = 99, regN = 99, regM = 99;
11782     Bool gate = False;
11783
11784     if (isT) {
11785        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11786           regN = INSNT0(3,0);
11787           regD = INSNT1(11,8);
11788           regM = INSNT1(3,0);
11789           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11790              gate = True;
11791        }
11792     } else {
11793        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11794            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11795            INSNA(7,4)   == BITS4(1,1,1,1)) {
11796           regD = INSNA(15,12);
11797           regN = INSNA(19,16);
11798           regM = INSNA(3,0);
11799           if (regD != 15 && regN != 15 && regM != 15)
11800              gate = True;
11801        }
11802     }
11803
11804     if (gate) {
11805        IRTemp rNt   = newTemp(Ity_I32);
11806        IRTemp rMt   = newTemp(Ity_I32);
11807        IRTemp res_q = newTemp(Ity_I32);
11808
11809        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11810        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11811
11812        assign(res_q, binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
11813        if (isT)
11814           putIRegT( regD, mkexpr(res_q), condT );
11815        else
11816           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11817
11818        DIP("shsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11819        return True;
11820     }
11821     /* fall through */
11822   }
11823
11824   /* ----------------- sxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
11825   {
11826     UInt regD = 99, regN = 99, regM = 99, rotate = 99;
11827     Bool gate = False;
11828
11829     if (isT) {
11830        if (INSNT0(15,4) == 0xFA2 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
11831           regN   = INSNT0(3,0);
11832           regD   = INSNT1(11,8);
11833           regM   = INSNT1(3,0);
11834           rotate = INSNT1(5,4);
11835           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11836              gate = True;
11837        }
11838     } else {
11839        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
11840            INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
11841           regD   = INSNA(15,12);
11842           regN   = INSNA(19,16);
11843           regM   = INSNA(3,0);
11844           rotate = INSNA(11,10);
11845           if (regD != 15 && regN != 15 && regM != 15)
11846             gate = True;
11847        }
11848     }
11849
11850     if (gate) {
11851        IRTemp irt_regN = newTemp(Ity_I32);
11852        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11853
11854        IRTemp irt_regM = newTemp(Ity_I32);
11855        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11856
11857        IRTemp irt_rot = newTemp(Ity_I32);
11858        assign( irt_rot, genROR32(irt_regM, 8 * rotate) );
11859
11860        /* FIXME Maybe we can write this arithmetic in shorter form. */
11861        IRExpr* resLo
11862           = binop(Iop_And32,
11863                   binop(Iop_Add32,
11864                         mkexpr(irt_regN),
11865                         unop(Iop_16Uto32,
11866                              unop(Iop_8Sto16,
11867                                   unop(Iop_32to8, mkexpr(irt_rot))))),
11868                   mkU32(0x0000FFFF));
11869
11870        IRExpr* resHi
11871           = binop(Iop_And32,
11872                   binop(Iop_Add32,
11873                         mkexpr(irt_regN),
11874                         binop(Iop_Shl32,
11875                               unop(Iop_16Uto32,
11876                                    unop(Iop_8Sto16,
11877                                         unop(Iop_32to8,
11878                                              binop(Iop_Shr32,
11879                                                    mkexpr(irt_rot),
11880                                                    mkU8(16))))),
11881                               mkU8(16))),
11882                   mkU32(0xFFFF0000));
11883
11884        IRExpr* ire_result
11885           = binop( Iop_Or32, resHi, resLo );
11886
11887        if (isT)
11888           putIRegT( regD, ire_result, condT );
11889        else
11890           putIRegA( regD, ire_result, condT, Ijk_Boring );
11891
11892        DIP( "sxtab16%s r%u, r%u, r%u, ROR #%u\n",
11893             nCC(conq), regD, regN, regM, 8 * rotate );
11894        return True;
11895     }
11896     /* fall through */
11897   }
11898
11899   /* ----------------- shasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11900   {
11901     UInt regD = 99, regN = 99, regM = 99;
11902     Bool gate = False;
11903
11904     if (isT) {
11905        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11906           regN = INSNT0(3,0);
11907           regD = INSNT1(11,8);
11908           regM = INSNT1(3,0);
11909           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11910              gate = True;
11911        }
11912     } else {
11913        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11914            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11915            INSNA(7,4)   == BITS4(0,0,1,1)) {
11916           regD = INSNA(15,12);
11917           regN = INSNA(19,16);
11918           regM = INSNA(3,0);
11919           if (regD != 15 && regN != 15 && regM != 15)
11920              gate = True;
11921        }
11922     }
11923
11924     if (gate) {
11925        IRTemp rNt   = newTemp(Ity_I32);
11926        IRTemp rMt   = newTemp(Ity_I32);
11927        IRTemp irt_diff  = newTemp(Ity_I32);
11928        IRTemp irt_sum   = newTemp(Ity_I32);
11929        IRTemp res_q = newTemp(Ity_I32);
11930
11931        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11932        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11933
11934        assign( irt_diff,
11935                binop(Iop_Sub32,
11936                      unop(Iop_16Sto32,
11937                           unop(Iop_32to16,
11938                                mkexpr(rNt)
11939                           )
11940                      ),
11941                      unop(Iop_16Sto32,
11942                           unop(Iop_32to16,
11943                                binop(Iop_Shr32,
11944                                      mkexpr(rMt), mkU8(16)
11945                                )
11946                           )
11947                      )
11948                )
11949        );
11950
11951        assign( irt_sum,
11952                binop(Iop_Add32,
11953                      unop(Iop_16Sto32,
11954                           unop(Iop_32to16,
11955                                binop(Iop_Shr32,
11956                                      mkexpr(rNt), mkU8(16)
11957                                )
11958                           )
11959                      ),
11960                      unop(Iop_16Sto32,
11961                           unop(Iop_32to16, mkexpr(rMt)
11962                           )
11963                      )
11964                )
11965        );
11966
11967        assign( res_q,
11968                binop(Iop_Or32,
11969                      unop(Iop_16Uto32,
11970                           unop(Iop_32to16,
11971                                binop(Iop_Shr32,
11972                                      mkexpr(irt_diff), mkU8(1)
11973                                )
11974                           )
11975                      ),
11976                      binop(Iop_Shl32,
11977                            binop(Iop_Shr32,
11978                                  mkexpr(irt_sum), mkU8(1)
11979                            ),
11980                            mkU8(16)
11981                     )
11982                )
11983        );
11984
11985        if (isT)
11986           putIRegT( regD, mkexpr(res_q), condT );
11987        else
11988           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11989
11990        DIP("shasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11991        return True;
11992     }
11993     /* fall through */
11994   }
11995
11996   /* ----------------- uhasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11997   {
11998     UInt regD = 99, regN = 99, regM = 99;
11999     Bool gate = False;
12000
12001     if (isT) {
12002        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12003           regN = INSNT0(3,0);
12004           regD = INSNT1(11,8);
12005           regM = INSNT1(3,0);
12006           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12007              gate = True;
12008        }
12009     } else {
12010        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12011            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12012            INSNA(7,4)   == BITS4(0,0,1,1)) {
12013           regD = INSNA(15,12);
12014           regN = INSNA(19,16);
12015           regM = INSNA(3,0);
12016           if (regD != 15 && regN != 15 && regM != 15)
12017              gate = True;
12018        }
12019     }
12020
12021     if (gate) {
12022        IRTemp rNt   = newTemp(Ity_I32);
12023        IRTemp rMt   = newTemp(Ity_I32);
12024        IRTemp irt_diff  = newTemp(Ity_I32);
12025        IRTemp irt_sum   = newTemp(Ity_I32);
12026        IRTemp res_q = newTemp(Ity_I32);
12027
12028        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12029        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12030
12031        assign( irt_diff,
12032                binop(Iop_Sub32,
12033                      unop(Iop_16Uto32,
12034                           unop(Iop_32to16,
12035                                mkexpr(rNt)
12036                           )
12037                      ),
12038                      unop(Iop_16Uto32,
12039                           unop(Iop_32to16,
12040                                binop(Iop_Shr32,
12041                                      mkexpr(rMt), mkU8(16)
12042                                )
12043                           )
12044                      )
12045                )
12046        );
12047
12048        assign( irt_sum,
12049                binop(Iop_Add32,
12050                      unop(Iop_16Uto32,
12051                           unop(Iop_32to16,
12052                                binop(Iop_Shr32,
12053                                      mkexpr(rNt), mkU8(16)
12054                                )
12055                           )
12056                      ),
12057                      unop(Iop_16Uto32,
12058                           unop(Iop_32to16, mkexpr(rMt)
12059                           )
12060                      )
12061                )
12062        );
12063
12064        assign( res_q,
12065                binop(Iop_Or32,
12066                      unop(Iop_16Uto32,
12067                           unop(Iop_32to16,
12068                                binop(Iop_Shr32,
12069                                      mkexpr(irt_diff), mkU8(1)
12070                                )
12071                           )
12072                      ),
12073                      binop(Iop_Shl32,
12074                            binop(Iop_Shr32,
12075                                  mkexpr(irt_sum), mkU8(1)
12076                            ),
12077                            mkU8(16)
12078                     )
12079                )
12080        );
12081
12082        if (isT)
12083           putIRegT( regD, mkexpr(res_q), condT );
12084        else
12085           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12086
12087        DIP("uhasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12088        return True;
12089     }
12090     /* fall through */
12091   }
12092
12093   /* ----------------- shsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12094   {
12095     UInt regD = 99, regN = 99, regM = 99;
12096     Bool gate = False;
12097
12098     if (isT) {
12099        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12100           regN = INSNT0(3,0);
12101           regD = INSNT1(11,8);
12102           regM = INSNT1(3,0);
12103           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12104              gate = True;
12105        }
12106     } else {
12107        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12108            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12109            INSNA(7,4)   == BITS4(0,1,0,1)) {
12110           regD = INSNA(15,12);
12111           regN = INSNA(19,16);
12112           regM = INSNA(3,0);
12113           if (regD != 15 && regN != 15 && regM != 15)
12114              gate = True;
12115        }
12116     }
12117
12118     if (gate) {
12119        IRTemp rNt   = newTemp(Ity_I32);
12120        IRTemp rMt   = newTemp(Ity_I32);
12121        IRTemp irt_diff  = newTemp(Ity_I32);
12122        IRTemp irt_sum   = newTemp(Ity_I32);
12123        IRTemp res_q = newTemp(Ity_I32);
12124
12125        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12126        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12127
12128        assign( irt_sum,
12129                binop(Iop_Add32,
12130                      unop(Iop_16Sto32,
12131                           unop(Iop_32to16,
12132                                mkexpr(rNt)
12133                           )
12134                      ),
12135                      unop(Iop_16Sto32,
12136                           unop(Iop_32to16,
12137                                binop(Iop_Shr32,
12138                                      mkexpr(rMt), mkU8(16)
12139                                )
12140                           )
12141                      )
12142                )
12143        );
12144
12145        assign( irt_diff,
12146                binop(Iop_Sub32,
12147                      unop(Iop_16Sto32,
12148                           unop(Iop_32to16,
12149                                binop(Iop_Shr32,
12150                                      mkexpr(rNt), mkU8(16)
12151                                )
12152                           )
12153                      ),
12154                      unop(Iop_16Sto32,
12155                           unop(Iop_32to16, mkexpr(rMt)
12156                           )
12157                      )
12158                )
12159        );
12160
12161        assign( res_q,
12162                binop(Iop_Or32,
12163                      unop(Iop_16Uto32,
12164                           unop(Iop_32to16,
12165                                binop(Iop_Shr32,
12166                                      mkexpr(irt_sum), mkU8(1)
12167                                )
12168                           )
12169                      ),
12170                      binop(Iop_Shl32,
12171                            binop(Iop_Shr32,
12172                                  mkexpr(irt_diff), mkU8(1)
12173                            ),
12174                            mkU8(16)
12175                     )
12176                )
12177        );
12178
12179        if (isT)
12180           putIRegT( regD, mkexpr(res_q), condT );
12181        else
12182           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12183
12184        DIP("shsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12185        return True;
12186     }
12187     /* fall through */
12188   }
12189
12190   /* ----------------- uhsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12191   {
12192     UInt regD = 99, regN = 99, regM = 99;
12193     Bool gate = False;
12194
12195     if (isT) {
12196        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12197           regN = INSNT0(3,0);
12198           regD = INSNT1(11,8);
12199           regM = INSNT1(3,0);
12200           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12201              gate = True;
12202        }
12203     } else {
12204        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12205            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12206            INSNA(7,4)   == BITS4(0,1,0,1)) {
12207           regD = INSNA(15,12);
12208           regN = INSNA(19,16);
12209           regM = INSNA(3,0);
12210           if (regD != 15 && regN != 15 && regM != 15)
12211              gate = True;
12212        }
12213     }
12214
12215     if (gate) {
12216        IRTemp rNt   = newTemp(Ity_I32);
12217        IRTemp rMt   = newTemp(Ity_I32);
12218        IRTemp irt_diff  = newTemp(Ity_I32);
12219        IRTemp irt_sum   = newTemp(Ity_I32);
12220        IRTemp res_q = newTemp(Ity_I32);
12221
12222        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12223        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12224
12225        assign( irt_sum,
12226                binop(Iop_Add32,
12227                      unop(Iop_16Uto32,
12228                           unop(Iop_32to16,
12229                                mkexpr(rNt)
12230                           )
12231                      ),
12232                      unop(Iop_16Uto32,
12233                           unop(Iop_32to16,
12234                                binop(Iop_Shr32,
12235                                      mkexpr(rMt), mkU8(16)
12236                                )
12237                           )
12238                      )
12239                )
12240        );
12241
12242        assign( irt_diff,
12243                binop(Iop_Sub32,
12244                      unop(Iop_16Uto32,
12245                           unop(Iop_32to16,
12246                                binop(Iop_Shr32,
12247                                      mkexpr(rNt), mkU8(16)
12248                                )
12249                           )
12250                      ),
12251                      unop(Iop_16Uto32,
12252                           unop(Iop_32to16, mkexpr(rMt)
12253                           )
12254                      )
12255                )
12256        );
12257
12258        assign( res_q,
12259                binop(Iop_Or32,
12260                      unop(Iop_16Uto32,
12261                           unop(Iop_32to16,
12262                                binop(Iop_Shr32,
12263                                      mkexpr(irt_sum), mkU8(1)
12264                                )
12265                           )
12266                      ),
12267                      binop(Iop_Shl32,
12268                            binop(Iop_Shr32,
12269                                  mkexpr(irt_diff), mkU8(1)
12270                            ),
12271                            mkU8(16)
12272                     )
12273                )
12274        );
12275
12276        if (isT)
12277           putIRegT( regD, mkexpr(res_q), condT );
12278        else
12279           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12280
12281        DIP("uhsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12282        return True;
12283     }
12284     /* fall through */
12285   }
12286
12287   /* ----------------- shsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
12288   {
12289     UInt regD = 99, regN = 99, regM = 99;
12290     Bool gate = False;
12291
12292     if (isT) {
12293        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12294           regN = INSNT0(3,0);
12295           regD = INSNT1(11,8);
12296           regM = INSNT1(3,0);
12297           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12298              gate = True;
12299        }
12300     } else {
12301        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12302            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12303            INSNA(7,4)   == BITS4(0,1,1,1)) {
12304           regD = INSNA(15,12);
12305           regN = INSNA(19,16);
12306           regM = INSNA(3,0);
12307           if (regD != 15 && regN != 15 && regM != 15)
12308              gate = True;
12309        }
12310     }
12311
12312     if (gate) {
12313        IRTemp rNt   = newTemp(Ity_I32);
12314        IRTemp rMt   = newTemp(Ity_I32);
12315        IRTemp res_q = newTemp(Ity_I32);
12316
12317        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12318        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12319
12320        assign(res_q, binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
12321        if (isT)
12322           putIRegT( regD, mkexpr(res_q), condT );
12323        else
12324           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12325
12326        DIP("shsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12327        return True;
12328     }
12329     /* fall through */
12330   }
12331
12332   /* ----------------- smmls{r}<c> <Rd>,<Rn>,<Rm>,<Ra> ------------------- */
12333   {
12334     UInt rD = 99, rN = 99, rM = 99, rA = 99;
12335     Bool round  = False;
12336     Bool gate   = False;
12337
12338     if (isT) {
12339        if (INSNT0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
12340            && INSNT0(6,4) == BITS3(1,1,0)
12341            && INSNT1(7,5) == BITS3(0,0,0)) {
12342           round = INSNT1(4,4);
12343           rA    = INSNT1(15,12);
12344           rD    = INSNT1(11,8);
12345           rM    = INSNT1(3,0);
12346           rN    = INSNT0(3,0);
12347           if (!isBadRegT(rD)
12348               && !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rA))
12349              gate = True;
12350        }
12351     } else {
12352        if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,1)
12353            && INSNA(15,12) != BITS4(1,1,1,1)
12354            && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(1,1,0,1)) {
12355           round = INSNA(5,5);
12356           rD    = INSNA(19,16);
12357           rA    = INSNA(15,12);
12358           rM    = INSNA(11,8);
12359           rN    = INSNA(3,0);
12360           if (rD != 15 && rM != 15 && rN != 15)
12361              gate = True;
12362        }
12363     }
12364     if (gate) {
12365        IRTemp irt_rA   = newTemp(Ity_I32);
12366        IRTemp irt_rN   = newTemp(Ity_I32);
12367        IRTemp irt_rM   = newTemp(Ity_I32);
12368        assign( irt_rA, isT ? getIRegT(rA) : getIRegA(rA) );
12369        assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12370        assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12371        IRExpr* res
12372        = unop(Iop_64HIto32,
12373               binop(Iop_Add64,
12374                     binop(Iop_Sub64,
12375                           binop(Iop_32HLto64, mkexpr(irt_rA), mkU32(0)),
12376                           binop(Iop_MullS32, mkexpr(irt_rN), mkexpr(irt_rM))),
12377                     mkU64(round ? 0x80000000ULL : 0ULL)));
12378        if (isT)
12379           putIRegT( rD, res, condT );
12380        else
12381           putIRegA(rD, res, condT, Ijk_Boring);
12382        DIP("smmls%s%s r%u, r%u, r%u, r%u\n",
12383            round ? "r" : "", nCC(conq), rD, rN, rM, rA);
12384        return True;
12385     }
12386     /* fall through */
12387   }
12388
12389   /* -------------- smlald{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12390   {
12391     UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12392     Bool m_swap = False;
12393     Bool gate   = False;
12394
12395     if (isT) {
12396        if (INSNT0(15,4) == 0xFBC &&
12397            (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0)) {
12398           rN     = INSNT0(3,0);
12399           rDlo   = INSNT1(15,12);
12400           rDhi   = INSNT1(11,8);
12401           rM     = INSNT1(3,0);
12402           m_swap = (INSNT1(4,4) & 1) == 1;
12403           if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
12404               && !isBadRegT(rM) && rDhi != rDlo)
12405              gate = True;
12406        }
12407     } else {
12408        if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0)
12409            && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
12410           rN     = INSNA(3,0);
12411           rDlo   = INSNA(15,12);
12412           rDhi   = INSNA(19,16);
12413           rM     = INSNA(11,8);
12414           m_swap = ( INSNA(5,5) & 1 ) == 1;
12415           if (rDlo != 15 && rDhi != 15
12416               && rN != 15 && rM != 15 && rDlo != rDhi)
12417              gate = True;
12418        }
12419     }
12420
12421     if (gate) {
12422        IRTemp irt_rM   = newTemp(Ity_I32);
12423        IRTemp irt_rN   = newTemp(Ity_I32);
12424        IRTemp irt_rDhi = newTemp(Ity_I32);
12425        IRTemp irt_rDlo = newTemp(Ity_I32);
12426        IRTemp op_2     = newTemp(Ity_I32);
12427        IRTemp pr_1     = newTemp(Ity_I64);
12428        IRTemp pr_2     = newTemp(Ity_I64);
12429        IRTemp result   = newTemp(Ity_I64);
12430        IRTemp resHi    = newTemp(Ity_I32);
12431        IRTemp resLo    = newTemp(Ity_I32);
12432        assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM));
12433        assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN));
12434        assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi));
12435        assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo));
12436        assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12437        assign( pr_1, binop(Iop_MullS32,
12438                            unop(Iop_16Sto32,
12439                                 unop(Iop_32to16, mkexpr(irt_rN))
12440                            ),
12441                            unop(Iop_16Sto32,
12442                                 unop(Iop_32to16, mkexpr(op_2))
12443                            )
12444                      )
12445        );
12446        assign( pr_2, binop(Iop_MullS32,
12447                            binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12448                            binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12449                      )
12450        );
12451        assign( result, binop(Iop_Add64,
12452                              binop(Iop_Add64,
12453                                    mkexpr(pr_1),
12454                                    mkexpr(pr_2)
12455                              ),
12456                              binop(Iop_32HLto64,
12457                                    mkexpr(irt_rDhi),
12458                                    mkexpr(irt_rDlo)
12459                              )
12460                        )
12461        );
12462        assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12463        assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12464        if (isT) {
12465           putIRegT( rDhi, mkexpr(resHi), condT );
12466           putIRegT( rDlo, mkexpr(resLo), condT );
12467        } else {
12468           putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12469           putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12470        }
12471        DIP("smlald%c%s r%u, r%u, r%u, r%u\n",
12472            m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12473        return True;
12474     }
12475     /* fall through */
12476   }
12477
12478   /* -------------- smlsld{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12479   {
12480     UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12481     Bool m_swap = False;
12482     Bool gate   = False;
12483
12484     if (isT) {
12485        if ((INSNT0(15,4) == 0xFBD &&
12486            (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0))) {
12487           rN     = INSNT0(3,0);
12488           rDlo   = INSNT1(15,12);
12489           rDhi   = INSNT1(11,8);
12490           rM     = INSNT1(3,0);
12491           m_swap = (INSNT1(4,4) & 1) == 1;
12492           if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN) &&
12493               !isBadRegT(rM) && rDhi != rDlo)
12494              gate = True;
12495        }
12496     } else {
12497        if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0) &&
12498            (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,1,0,1)) {
12499           rN     = INSNA(3,0);
12500           rDlo   = INSNA(15,12);
12501           rDhi   = INSNA(19,16);
12502           rM     = INSNA(11,8);
12503           m_swap = (INSNA(5,5) & 1) == 1;
12504           if (rDlo != 15 && rDhi != 15 &&
12505               rN != 15 && rM != 15 && rDlo != rDhi)
12506              gate = True;
12507        }
12508     }
12509     if (gate) {
12510        IRTemp irt_rM   = newTemp(Ity_I32);
12511        IRTemp irt_rN   = newTemp(Ity_I32);
12512        IRTemp irt_rDhi = newTemp(Ity_I32);
12513        IRTemp irt_rDlo = newTemp(Ity_I32);
12514        IRTemp op_2     = newTemp(Ity_I32);
12515        IRTemp pr_1     = newTemp(Ity_I64);
12516        IRTemp pr_2     = newTemp(Ity_I64);
12517        IRTemp result   = newTemp(Ity_I64);
12518        IRTemp resHi    = newTemp(Ity_I32);
12519        IRTemp resLo    = newTemp(Ity_I32);
12520        assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12521        assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12522        assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi) );
12523        assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo) );
12524        assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12525        assign( pr_1, binop(Iop_MullS32,
12526                            unop(Iop_16Sto32,
12527                                 unop(Iop_32to16, mkexpr(irt_rN))
12528                            ),
12529                            unop(Iop_16Sto32,
12530                                 unop(Iop_32to16, mkexpr(op_2))
12531                            )
12532                      )
12533        );
12534        assign( pr_2, binop(Iop_MullS32,
12535                            binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12536                            binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12537                      )
12538        );
12539        assign( result, binop(Iop_Add64,
12540                              binop(Iop_Sub64,
12541                                    mkexpr(pr_1),
12542                                    mkexpr(pr_2)
12543                              ),
12544                              binop(Iop_32HLto64,
12545                                    mkexpr(irt_rDhi),
12546                                    mkexpr(irt_rDlo)
12547                              )
12548                        )
12549        );
12550        assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12551        assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12552        if (isT) {
12553           putIRegT( rDhi, mkexpr(resHi), condT );
12554           putIRegT( rDlo, mkexpr(resLo), condT );
12555        } else {
12556           putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12557           putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12558        }
12559        DIP("smlsld%c%s r%u, r%u, r%u, r%u\n",
12560            m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12561        return True;
12562     }
12563     /* fall through */
12564   }
12565
12566   /* ---------- Doesn't match anything. ---------- */
12567   return False;
12568
12569#  undef INSNA
12570#  undef INSNT0
12571#  undef INSNT1
12572}
12573
12574
12575/*------------------------------------------------------------*/
12576/*--- LDMxx/STMxx helper (both ARM and Thumb32)            ---*/
12577/*------------------------------------------------------------*/
12578
12579/* Generate IR for LDMxx and STMxx.  This is complex.  Assumes it's
12580   unconditional, so the caller must produce a jump-around before
12581   calling this, if the insn is to be conditional.  Caller is
12582   responsible for all validation of parameters.  For LDMxx, if PC is
12583   amongst the values loaded, caller is also responsible for
12584   generating the jump. */
12585static void mk_ldm_stm ( Bool arm,     /* True: ARM, False: Thumb */
12586                         UInt rN,      /* base reg */
12587                         UInt bINC,    /* 1: inc,  0: dec */
12588                         UInt bBEFORE, /* 1: inc/dec before, 0: after */
12589                         UInt bW,      /* 1: writeback to Rn */
12590                         UInt bL,      /* 1: load, 0: store */
12591                         UInt regList )
12592{
12593   Int i, r, m, nRegs;
12594   IRTemp jk = Ijk_Boring;
12595
12596   /* Get hold of the old Rn value.  We might need to write its value
12597      to memory during a store, and if it's also the writeback
12598      register then we need to get its value now.  We can't treat it
12599      exactly like the other registers we're going to transfer,
12600      because for xxMDA and xxMDB writeback forms, the generated IR
12601      updates Rn in the guest state before any transfers take place.
12602      We have to do this as per comments below, in order that if Rn is
12603      the stack pointer then it always has a value is below or equal
12604      to any of the transfer addresses.  Ick. */
12605   IRTemp oldRnT = newTemp(Ity_I32);
12606   assign(oldRnT, arm ? getIRegA(rN) : getIRegT(rN));
12607
12608   IRTemp anchorT = newTemp(Ity_I32);
12609   /* The old (Addison-Wesley) ARM ARM seems to say that LDMxx/STMxx
12610      ignore the bottom two bits of the address.  However, Cortex-A8
12611      doesn't seem to care.  Hence: */
12612   /* No .. don't force alignment .. */
12613   /* assign(anchorT, binop(Iop_And32, mkexpr(oldRnT), mkU32(~3U))); */
12614   /* Instead, use the potentially misaligned address directly. */
12615   assign(anchorT, mkexpr(oldRnT));
12616
12617   IROp opADDorSUB = bINC ? Iop_Add32 : Iop_Sub32;
12618   // bINC == 1:  xxMIA, xxMIB
12619   // bINC == 0:  xxMDA, xxMDB
12620
12621   // For xxMDA and xxMDB, update Rn first if necessary.  We have
12622   // to do this first so that, for the common idiom of the transfers
12623   // faulting because we're pushing stuff onto a stack and the stack
12624   // is growing down onto allocate-on-fault pages (as Valgrind simulates),
12625   // we need to have the SP up-to-date "covering" (pointing below) the
12626   // transfer area.  For the same reason, if we are doing xxMIA or xxMIB,
12627   // do the transfer first, and then update rN afterwards.
12628   nRegs = 0;
12629   for (i = 0; i < 16; i++) {
12630     if ((regList & (1 << i)) != 0)
12631         nRegs++;
12632   }
12633   if (bW == 1 && !bINC) {
12634      IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
12635      if (arm)
12636         putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
12637      else
12638         putIRegT( rN, e, IRTemp_INVALID );
12639   }
12640
12641   // Make up a list of the registers to transfer, and their offsets
12642   // in memory relative to the anchor.  If the base reg (Rn) is part
12643   // of the transfer, then do it last for a load and first for a store.
12644   UInt xReg[16], xOff[16];
12645   Int  nX = 0;
12646   m = 0;
12647   for (i = 0; i < 16; i++) {
12648      r = bINC ? i : (15-i);
12649      if (0 == (regList & (1<<r)))
12650         continue;
12651      if (bBEFORE)
12652         m++;
12653      /* paranoia: check we aren't transferring the writeback
12654         register during a load. Should be assured by decode-point
12655         check above. */
12656      if (bW == 1 && bL == 1)
12657         vassert(r != rN);
12658
12659      xOff[nX] = 4 * m;
12660      xReg[nX] = r;
12661      nX++;
12662
12663      if (!bBEFORE)
12664         m++;
12665   }
12666   vassert(m == nRegs);
12667   vassert(nX == nRegs);
12668   vassert(nX <= 16);
12669
12670   if (bW == 0 && (regList & (1<<rN)) != 0) {
12671      /* Non-writeback, and basereg is to be transferred.  Do its
12672         transfer last for a load and first for a store.  Requires
12673         reordering xOff/xReg. */
12674      if (0) {
12675         vex_printf("\nREG_LIST_PRE: (rN=%d)\n", rN);
12676         for (i = 0; i < nX; i++)
12677            vex_printf("reg %d   off %d\n", xReg[i], xOff[i]);
12678         vex_printf("\n");
12679      }
12680
12681      vassert(nX > 0);
12682      for (i = 0; i < nX; i++) {
12683         if (xReg[i] == rN)
12684             break;
12685      }
12686      vassert(i < nX); /* else we didn't find it! */
12687      UInt tReg = xReg[i];
12688      UInt tOff = xOff[i];
12689      if (bL == 1) {
12690         /* load; make this transfer happen last */
12691         if (i < nX-1) {
12692            for (m = i+1; m < nX; m++) {
12693               xReg[m-1] = xReg[m];
12694               xOff[m-1] = xOff[m];
12695            }
12696            vassert(m == nX);
12697            xReg[m-1] = tReg;
12698            xOff[m-1] = tOff;
12699         }
12700      } else {
12701         /* store; make this transfer happen first */
12702         if (i > 0) {
12703            for (m = i-1; m >= 0; m--) {
12704               xReg[m+1] = xReg[m];
12705               xOff[m+1] = xOff[m];
12706            }
12707            vassert(m == -1);
12708            xReg[0] = tReg;
12709            xOff[0] = tOff;
12710         }
12711      }
12712
12713      if (0) {
12714         vex_printf("REG_LIST_POST:\n");
12715         for (i = 0; i < nX; i++)
12716            vex_printf("reg %d   off %d\n", xReg[i], xOff[i]);
12717         vex_printf("\n");
12718      }
12719   }
12720
12721   /* According to the Cortex A8 TRM Sec. 5.2.1, LDM(1) with r13 as the base
12722       register and PC in the register list is a return for purposes of branch
12723       prediction.
12724      The ARM ARM Sec. C9.10.1 further specifies that writeback must be enabled
12725       to be counted in event 0x0E (Procedure return).*/
12726   if (rN == 13 && bL == 1 && bINC && !bBEFORE && bW == 1) {
12727      jk = Ijk_Ret;
12728   }
12729
12730   /* Actually generate the transfers */
12731   for (i = 0; i < nX; i++) {
12732      r = xReg[i];
12733      if (bL == 1) {
12734         IRExpr* e = loadLE(Ity_I32,
12735                            binop(opADDorSUB, mkexpr(anchorT),
12736                                  mkU32(xOff[i])));
12737         if (arm) {
12738            putIRegA( r, e, IRTemp_INVALID, jk );
12739         } else {
12740            // no: putIRegT( r, e, IRTemp_INVALID );
12741            // putIRegT refuses to write to R15.  But that might happen.
12742            // Since this is uncond, and we need to be able to
12743            // write the PC, just use the low level put:
12744            llPutIReg( r, e );
12745         }
12746      } else {
12747         /* if we're storing Rn, make sure we use the correct
12748            value, as per extensive comments above */
12749         storeLE( binop(opADDorSUB, mkexpr(anchorT), mkU32(xOff[i])),
12750                  r == rN ? mkexpr(oldRnT)
12751                          : (arm ? getIRegA(r) : getIRegT(r) ) );
12752      }
12753   }
12754
12755   // If we are doing xxMIA or xxMIB,
12756   // do the transfer first, and then update rN afterwards.
12757   if (bW == 1 && bINC) {
12758      IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
12759      if (arm)
12760         putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
12761      else
12762         putIRegT( rN, e, IRTemp_INVALID );
12763   }
12764}
12765
12766
12767/*------------------------------------------------------------*/
12768/*--- VFP (CP 10 and 11) instructions                      ---*/
12769/*------------------------------------------------------------*/
12770
12771/* Both ARM and Thumb */
12772
12773/* Translate a CP10 or CP11 instruction.  If successful, returns
12774   True and *dres may or may not be updated.  If failure, returns
12775   False and doesn't change *dres nor create any IR.
12776
12777   The ARM and Thumb encodings are identical for the low 28 bits of
12778   the insn (yay!) and that's what the caller must supply, iow, imm28
12779   has the top 4 bits masked out.  Caller is responsible for
12780   determining whether the masked-out bits are valid for a CP10/11
12781   insn.  The rules for the top 4 bits are:
12782
12783     ARM: 0000 to 1110 allowed, and this is the gating condition.
12784     1111 (NV) is not allowed.
12785
12786     Thumb: must be 1110.  The gating condition is taken from
12787     ITSTATE in the normal way.
12788
12789   Conditionalisation:
12790
12791   Caller must supply an IRTemp 'condT' holding the gating condition,
12792   or IRTemp_INVALID indicating the insn is always executed.
12793
12794   Caller must also supply an ARMCondcode 'cond'.  This is only used
12795   for debug printing, no other purpose.  For ARM, this is simply the
12796   top 4 bits of the original instruction.  For Thumb, the condition
12797   is not (really) known until run time, and so ARMCondAL should be
12798   passed, only so that printing of these instructions does not show
12799   any condition.
12800
12801   Finally, the caller must indicate whether this occurs in ARM or
12802   Thumb code.
12803*/
12804static Bool decode_CP10_CP11_instruction (
12805               /*MOD*/DisResult* dres,
12806               UInt              insn28,
12807               IRTemp            condT,
12808               ARMCondcode       conq,
12809               Bool              isT
12810            )
12811{
12812#  define INSN(_bMax,_bMin)  SLICE_UInt(insn28, (_bMax), (_bMin))
12813
12814   vassert(INSN(31,28) == BITS4(0,0,0,0)); // caller's obligation
12815
12816   if (isT) {
12817      vassert(conq == ARMCondAL);
12818   } else {
12819      vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
12820   }
12821
12822   /* ----------------------------------------------------------- */
12823   /* -- VFP instructions -- double precision (mostly)         -- */
12824   /* ----------------------------------------------------------- */
12825
12826   /* --------------------- fldmx, fstmx --------------------- */
12827   /*
12828                                 31   27   23   19 15 11   7   0
12829                                         P U WL
12830      C4-100, C5-26  1  FSTMX    cond 1100 1000 Rn Dd 1011 offset
12831      C4-100, C5-28  2  FSTMIAX  cond 1100 1010 Rn Dd 1011 offset
12832      C4-100, C5-30  3  FSTMDBX  cond 1101 0010 Rn Dd 1011 offset
12833
12834      C4-42, C5-26   1  FLDMX    cond 1100 1001 Rn Dd 1011 offset
12835      C4-42, C5-28   2  FLDMIAX  cond 1100 1011 Rn Dd 1011 offset
12836      C4-42, C5-30   3  FLDMDBX  cond 1101 0011 Rn Dd 1011 offset
12837
12838      Regs transferred: Dd .. D(d + (offset-3)/2)
12839      offset must be odd, must not imply a reg > 15
12840      IA/DB: Rn is changed by (4 + 8 x # regs transferred)
12841
12842      case coding:
12843         1  at-Rn   (access at Rn)
12844         2  ia-Rn   (access at Rn, then Rn += 4+8n)
12845         3  db-Rn   (Rn -= 4+8n,   then access at Rn)
12846   */
12847   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
12848       && INSN(11,8) == BITS4(1,0,1,1)) {
12849      UInt bP      = (insn28 >> 24) & 1;
12850      UInt bU      = (insn28 >> 23) & 1;
12851      UInt bW      = (insn28 >> 21) & 1;
12852      UInt bL      = (insn28 >> 20) & 1;
12853      UInt offset  = (insn28 >> 0) & 0xFF;
12854      UInt rN      = INSN(19,16);
12855      UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
12856      UInt nRegs   = (offset - 1) / 2;
12857      UInt summary = 0;
12858      Int  i;
12859
12860      /**/ if (bP == 0 && bU == 1 && bW == 0) {
12861         summary = 1;
12862      }
12863      else if (bP == 0 && bU == 1 && bW == 1) {
12864         summary = 2;
12865      }
12866      else if (bP == 1 && bU == 0 && bW == 1) {
12867         summary = 3;
12868      }
12869      else goto after_vfp_fldmx_fstmx;
12870
12871      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
12872      if (rN == 15 && (summary == 2 || summary == 3 || isT))
12873         goto after_vfp_fldmx_fstmx;
12874
12875      /* offset must be odd, and specify at least one register */
12876      if (0 == (offset & 1) || offset < 3)
12877         goto after_vfp_fldmx_fstmx;
12878
12879      /* can't transfer regs after D15 */
12880      if (dD + nRegs - 1 >= 32)
12881         goto after_vfp_fldmx_fstmx;
12882
12883      /* Now, we can't do a conditional load or store, since that very
12884         likely will generate an exception.  So we have to take a side
12885         exit at this point if the condition is false. */
12886      if (condT != IRTemp_INVALID) {
12887         if (isT)
12888            mk_skip_over_T32_if_cond_is_false( condT );
12889         else
12890            mk_skip_over_A32_if_cond_is_false( condT );
12891         condT = IRTemp_INVALID;
12892      }
12893      /* Ok, now we're unconditional.  Do the load or store. */
12894
12895      /* get the old Rn value */
12896      IRTemp rnT = newTemp(Ity_I32);
12897      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
12898                           rN == 15));
12899
12900      /* make a new value for Rn, post-insn */
12901      IRTemp rnTnew = IRTemp_INVALID;
12902      if (summary == 2 || summary == 3) {
12903         rnTnew = newTemp(Ity_I32);
12904         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
12905                              mkexpr(rnT),
12906                              mkU32(4 + 8 * nRegs)));
12907      }
12908
12909      /* decide on the base transfer address */
12910      IRTemp taT = newTemp(Ity_I32);
12911      assign(taT,  summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
12912
12913      /* update Rn if necessary -- in case 3, we're moving it down, so
12914         update before any memory reference, in order to keep Memcheck
12915         and V's stack-extending logic (on linux) happy */
12916      if (summary == 3) {
12917         if (isT)
12918            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
12919         else
12920            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
12921      }
12922
12923      /* generate the transfers */
12924      for (i = 0; i < nRegs; i++) {
12925         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
12926         if (bL) {
12927            putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
12928         } else {
12929            storeLE(addr, getDReg(dD + i));
12930         }
12931      }
12932
12933      /* update Rn if necessary -- in case 2, we're moving it up, so
12934         update after any memory reference, in order to keep Memcheck
12935         and V's stack-extending logic (on linux) happy */
12936      if (summary == 2) {
12937         if (isT)
12938            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
12939         else
12940            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
12941      }
12942
12943      const HChar* nm = bL==1 ? "ld" : "st";
12944      switch (summary) {
12945         case 1:  DIP("f%smx%s r%u, {d%u-d%u}\n",
12946                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
12947                  break;
12948         case 2:  DIP("f%smiax%s r%u!, {d%u-d%u}\n",
12949                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
12950                  break;
12951         case 3:  DIP("f%smdbx%s r%u!, {d%u-d%u}\n",
12952                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
12953                  break;
12954         default: vassert(0);
12955      }
12956
12957      goto decode_success_vfp;
12958      /* FIXME alignment constraints? */
12959   }
12960
12961  after_vfp_fldmx_fstmx:
12962
12963   /* --------------------- fldmd, fstmd --------------------- */
12964   /*
12965                                 31   27   23   19 15 11   7   0
12966                                         P U WL
12967      C4-96, C5-26   1  FSTMD    cond 1100 1000 Rn Dd 1011 offset
12968      C4-96, C5-28   2  FSTMDIA  cond 1100 1010 Rn Dd 1011 offset
12969      C4-96, C5-30   3  FSTMDDB  cond 1101 0010 Rn Dd 1011 offset
12970
12971      C4-38, C5-26   1  FLDMD    cond 1100 1001 Rn Dd 1011 offset
12972      C4-38, C5-28   2  FLDMIAD  cond 1100 1011 Rn Dd 1011 offset
12973      C4-38, C5-30   3  FLDMDBD  cond 1101 0011 Rn Dd 1011 offset
12974
12975      Regs transferred: Dd .. D(d + (offset-2)/2)
12976      offset must be even, must not imply a reg > 15
12977      IA/DB: Rn is changed by (8 x # regs transferred)
12978
12979      case coding:
12980         1  at-Rn   (access at Rn)
12981         2  ia-Rn   (access at Rn, then Rn += 8n)
12982         3  db-Rn   (Rn -= 8n,     then access at Rn)
12983   */
12984   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
12985       && INSN(11,8) == BITS4(1,0,1,1)) {
12986      UInt bP      = (insn28 >> 24) & 1;
12987      UInt bU      = (insn28 >> 23) & 1;
12988      UInt bW      = (insn28 >> 21) & 1;
12989      UInt bL      = (insn28 >> 20) & 1;
12990      UInt offset  = (insn28 >> 0) & 0xFF;
12991      UInt rN      = INSN(19,16);
12992      UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
12993      UInt nRegs   = offset / 2;
12994      UInt summary = 0;
12995      Int  i;
12996
12997      /**/ if (bP == 0 && bU == 1 && bW == 0) {
12998         summary = 1;
12999      }
13000      else if (bP == 0 && bU == 1 && bW == 1) {
13001         summary = 2;
13002      }
13003      else if (bP == 1 && bU == 0 && bW == 1) {
13004         summary = 3;
13005      }
13006      else goto after_vfp_fldmd_fstmd;
13007
13008      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
13009      if (rN == 15 && (summary == 2 || summary == 3 || isT))
13010         goto after_vfp_fldmd_fstmd;
13011
13012      /* offset must be even, and specify at least one register */
13013      if (1 == (offset & 1) || offset < 2)
13014         goto after_vfp_fldmd_fstmd;
13015
13016      /* can't transfer regs after D15 */
13017      if (dD + nRegs - 1 >= 32)
13018         goto after_vfp_fldmd_fstmd;
13019
13020      /* Now, we can't do a conditional load or store, since that very
13021         likely will generate an exception.  So we have to take a side
13022         exit at this point if the condition is false. */
13023      if (condT != IRTemp_INVALID) {
13024         if (isT)
13025            mk_skip_over_T32_if_cond_is_false( condT );
13026         else
13027            mk_skip_over_A32_if_cond_is_false( condT );
13028         condT = IRTemp_INVALID;
13029      }
13030      /* Ok, now we're unconditional.  Do the load or store. */
13031
13032      /* get the old Rn value */
13033      IRTemp rnT = newTemp(Ity_I32);
13034      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
13035                           rN == 15));
13036
13037      /* make a new value for Rn, post-insn */
13038      IRTemp rnTnew = IRTemp_INVALID;
13039      if (summary == 2 || summary == 3) {
13040         rnTnew = newTemp(Ity_I32);
13041         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
13042                              mkexpr(rnT),
13043                              mkU32(8 * nRegs)));
13044      }
13045
13046      /* decide on the base transfer address */
13047      IRTemp taT = newTemp(Ity_I32);
13048      assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
13049
13050      /* update Rn if necessary -- in case 3, we're moving it down, so
13051         update before any memory reference, in order to keep Memcheck
13052         and V's stack-extending logic (on linux) happy */
13053      if (summary == 3) {
13054         if (isT)
13055            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
13056         else
13057            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
13058      }
13059
13060      /* generate the transfers */
13061      for (i = 0; i < nRegs; i++) {
13062         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
13063         if (bL) {
13064            putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
13065         } else {
13066            storeLE(addr, getDReg(dD + i));
13067         }
13068      }
13069
13070      /* update Rn if necessary -- in case 2, we're moving it up, so
13071         update after any memory reference, in order to keep Memcheck
13072         and V's stack-extending logic (on linux) happy */
13073      if (summary == 2) {
13074         if (isT)
13075            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
13076         else
13077            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
13078      }
13079
13080      const HChar* nm = bL==1 ? "ld" : "st";
13081      switch (summary) {
13082         case 1:  DIP("f%smd%s r%u, {d%u-d%u}\n",
13083                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
13084                  break;
13085         case 2:  DIP("f%smiad%s r%u!, {d%u-d%u}\n",
13086                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
13087                  break;
13088         case 3:  DIP("f%smdbd%s r%u!, {d%u-d%u}\n",
13089                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
13090                  break;
13091         default: vassert(0);
13092      }
13093
13094      goto decode_success_vfp;
13095      /* FIXME alignment constraints? */
13096   }
13097
13098  after_vfp_fldmd_fstmd:
13099
13100   /* ------------------- fmrx, fmxr ------------------- */
13101   if (BITS8(1,1,1,0,1,1,1,1) == INSN(27,20)
13102       && BITS4(1,0,1,0) == INSN(11,8)
13103       && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
13104      UInt rD  = INSN(15,12);
13105      UInt reg = INSN(19,16);
13106      if (reg == BITS4(0,0,0,1)) {
13107         if (rD == 15) {
13108            IRTemp nzcvT = newTemp(Ity_I32);
13109            /* When rD is 15, we are copying the top 4 bits of FPSCR
13110               into CPSR.  That is, set the flags thunk to COPY and
13111               install FPSCR[31:28] as the value to copy. */
13112            assign(nzcvT, binop(Iop_And32,
13113                                IRExpr_Get(OFFB_FPSCR, Ity_I32),
13114                                mkU32(0xF0000000)));
13115            setFlags_D1(ARMG_CC_OP_COPY, nzcvT, condT);
13116            DIP("fmstat%s\n", nCC(conq));
13117         } else {
13118            /* Otherwise, merely transfer FPSCR to r0 .. r14. */
13119            IRExpr* e = IRExpr_Get(OFFB_FPSCR, Ity_I32);
13120            if (isT)
13121               putIRegT(rD, e, condT);
13122            else
13123               putIRegA(rD, e, condT, Ijk_Boring);
13124            DIP("fmrx%s r%u, fpscr\n", nCC(conq), rD);
13125         }
13126         goto decode_success_vfp;
13127      }
13128      /* fall through */
13129   }
13130
13131   if (BITS8(1,1,1,0,1,1,1,0) == INSN(27,20)
13132       && BITS4(1,0,1,0) == INSN(11,8)
13133       && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
13134      UInt rD  = INSN(15,12);
13135      UInt reg = INSN(19,16);
13136      if (reg == BITS4(0,0,0,1)) {
13137         putMiscReg32(OFFB_FPSCR,
13138                      isT ? getIRegT(rD) : getIRegA(rD), condT);
13139         DIP("fmxr%s fpscr, r%u\n", nCC(conq), rD);
13140         goto decode_success_vfp;
13141      }
13142      /* fall through */
13143   }
13144
13145   /* --------------------- vmov --------------------- */
13146   // VMOV dM, rD, rN
13147   if (0x0C400B10 == (insn28 & 0x0FF00FD0)) {
13148      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
13149      UInt rD = INSN(15,12); /* lo32 */
13150      UInt rN = INSN(19,16); /* hi32 */
13151      if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))) {
13152         /* fall through */
13153      } else {
13154         putDReg(dM,
13155                 unop(Iop_ReinterpI64asF64,
13156                      binop(Iop_32HLto64,
13157                            isT ? getIRegT(rN) : getIRegA(rN),
13158                            isT ? getIRegT(rD) : getIRegA(rD))),
13159                 condT);
13160         DIP("vmov%s d%u, r%u, r%u\n", nCC(conq), dM, rD, rN);
13161         goto decode_success_vfp;
13162      }
13163      /* fall through */
13164   }
13165
13166   // VMOV rD, rN, dM
13167   if (0x0C500B10 == (insn28 & 0x0FF00FD0)) {
13168      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
13169      UInt rD = INSN(15,12); /* lo32 */
13170      UInt rN = INSN(19,16); /* hi32 */
13171      if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))
13172          || rD == rN) {
13173         /* fall through */
13174      } else {
13175         IRTemp i64 = newTemp(Ity_I64);
13176         assign(i64, unop(Iop_ReinterpF64asI64, getDReg(dM)));
13177         IRExpr* hi32 = unop(Iop_64HIto32, mkexpr(i64));
13178         IRExpr* lo32 = unop(Iop_64to32,   mkexpr(i64));
13179         if (isT) {
13180            putIRegT(rN, hi32, condT);
13181            putIRegT(rD, lo32, condT);
13182         } else {
13183            putIRegA(rN, hi32, condT, Ijk_Boring);
13184            putIRegA(rD, lo32, condT, Ijk_Boring);
13185         }
13186         DIP("vmov%s r%u, r%u, d%u\n", nCC(conq), rD, rN, dM);
13187         goto decode_success_vfp;
13188      }
13189      /* fall through */
13190   }
13191
13192   // VMOV sD, sD+1, rN, rM
13193   if (0x0C400A10 == (insn28 & 0x0FF00FD0)) {
13194      UInt sD = (INSN(3,0) << 1) | INSN(5,5);
13195      UInt rN = INSN(15,12);
13196      UInt rM = INSN(19,16);
13197      if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
13198          || sD == 31) {
13199         /* fall through */
13200      } else {
13201         putFReg(sD,
13202                 unop(Iop_ReinterpI32asF32, isT ? getIRegT(rN) : getIRegA(rN)),
13203                 condT);
13204         putFReg(sD+1,
13205                 unop(Iop_ReinterpI32asF32, isT ? getIRegT(rM) : getIRegA(rM)),
13206                 condT);
13207         DIP("vmov%s, s%u, s%u, r%u, r%u\n",
13208              nCC(conq), sD, sD + 1, rN, rM);
13209         goto decode_success_vfp;
13210      }
13211   }
13212
13213   // VMOV rN, rM, sD, sD+1
13214   if (0x0C500A10 == (insn28 & 0x0FF00FD0)) {
13215      UInt sD = (INSN(3,0) << 1) | INSN(5,5);
13216      UInt rN = INSN(15,12);
13217      UInt rM = INSN(19,16);
13218      if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
13219          || sD == 31 || rN == rM) {
13220         /* fall through */
13221      } else {
13222         IRExpr* res0 = unop(Iop_ReinterpF32asI32, getFReg(sD));
13223         IRExpr* res1 = unop(Iop_ReinterpF32asI32, getFReg(sD+1));
13224         if (isT) {
13225            putIRegT(rN, res0, condT);
13226            putIRegT(rM, res1, condT);
13227         } else {
13228            putIRegA(rN, res0, condT, Ijk_Boring);
13229            putIRegA(rM, res1, condT, Ijk_Boring);
13230         }
13231         DIP("vmov%s, r%u, r%u, s%u, s%u\n",
13232             nCC(conq), rN, rM, sD, sD + 1);
13233         goto decode_success_vfp;
13234      }
13235   }
13236
13237   // VMOV rD[x], rT  (ARM core register to scalar)
13238   if (0x0E000B10 == (insn28 & 0x0F900F1F)) {
13239      UInt rD  = (INSN(7,7) << 4) | INSN(19,16);
13240      UInt rT  = INSN(15,12);
13241      UInt opc = (INSN(22,21) << 2) | INSN(6,5);
13242      UInt index;
13243      if (rT == 15 || (isT && rT == 13)) {
13244         /* fall through */
13245      } else {
13246         if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
13247            index = opc & 7;
13248            putDRegI64(rD, triop(Iop_SetElem8x8,
13249                                 getDRegI64(rD),
13250                                 mkU8(index),
13251                                 unop(Iop_32to8,
13252                                      isT ? getIRegT(rT) : getIRegA(rT))),
13253                           condT);
13254            DIP("vmov%s.8 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
13255            goto decode_success_vfp;
13256         }
13257         else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
13258            index = (opc >> 1) & 3;
13259            putDRegI64(rD, triop(Iop_SetElem16x4,
13260                                 getDRegI64(rD),
13261                                 mkU8(index),
13262                                 unop(Iop_32to16,
13263                                      isT ? getIRegT(rT) : getIRegA(rT))),
13264                           condT);
13265            DIP("vmov%s.16 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
13266            goto decode_success_vfp;
13267         }
13268         else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0)) {
13269            index = (opc >> 2) & 1;
13270            putDRegI64(rD, triop(Iop_SetElem32x2,
13271                                 getDRegI64(rD),
13272                                 mkU8(index),
13273                                 isT ? getIRegT(rT) : getIRegA(rT)),
13274                           condT);
13275            DIP("vmov%s.32 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
13276            goto decode_success_vfp;
13277         } else {
13278            /* fall through */
13279         }
13280      }
13281   }
13282
13283   // VMOV (scalar to ARM core register)
13284   // VMOV rT, rD[x]
13285   if (0x0E100B10 == (insn28 & 0x0F100F1F)) {
13286      UInt rN  = (INSN(7,7) << 4) | INSN(19,16);
13287      UInt rT  = INSN(15,12);
13288      UInt U   = INSN(23,23);
13289      UInt opc = (INSN(22,21) << 2) | INSN(6,5);
13290      UInt index;
13291      if (rT == 15 || (isT && rT == 13)) {
13292         /* fall through */
13293      } else {
13294         if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
13295            index = opc & 7;
13296            IRExpr* e = unop(U ? Iop_8Uto32 : Iop_8Sto32,
13297                             binop(Iop_GetElem8x8,
13298                                   getDRegI64(rN),
13299                                   mkU8(index)));
13300            if (isT)
13301               putIRegT(rT, e, condT);
13302            else
13303               putIRegA(rT, e, condT, Ijk_Boring);
13304            DIP("vmov%s.%c8 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
13305                  rT, rN, index);
13306            goto decode_success_vfp;
13307         }
13308         else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
13309            index = (opc >> 1) & 3;
13310            IRExpr* e = unop(U ? Iop_16Uto32 : Iop_16Sto32,
13311                             binop(Iop_GetElem16x4,
13312                                   getDRegI64(rN),
13313                                   mkU8(index)));
13314            if (isT)
13315               putIRegT(rT, e, condT);
13316            else
13317               putIRegA(rT, e, condT, Ijk_Boring);
13318            DIP("vmov%s.%c16 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
13319                  rT, rN, index);
13320            goto decode_success_vfp;
13321         }
13322         else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0) && U == 0) {
13323            index = (opc >> 2) & 1;
13324            IRExpr* e = binop(Iop_GetElem32x2, getDRegI64(rN), mkU8(index));
13325            if (isT)
13326               putIRegT(rT, e, condT);
13327            else
13328               putIRegA(rT, e, condT, Ijk_Boring);
13329            DIP("vmov%s.32 r%u, d%u[%u]\n", nCC(conq), rT, rN, index);
13330            goto decode_success_vfp;
13331         } else {
13332            /* fall through */
13333         }
13334      }
13335   }
13336
13337   // VMOV.F32 sD, #imm
13338   // FCONSTS sD, #imm
13339   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13340       && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,0)) {
13341      UInt rD   = (INSN(15,12) << 1) | INSN(22,22);
13342      UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
13343      UInt b    = (imm8 >> 6) & 1;
13344      UInt imm;
13345      imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,(imm8 >> 5) & 1) << 8)
13346             | ((imm8 & 0x1f) << 3);
13347      imm <<= 16;
13348      putFReg(rD, unop(Iop_ReinterpI32asF32, mkU32(imm)), condT);
13349      DIP("fconsts%s s%u #%u", nCC(conq), rD, imm8);
13350      goto decode_success_vfp;
13351   }
13352
13353   // VMOV.F64 dD, #imm
13354   // FCONSTD dD, #imm
13355   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13356       && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,1)) {
13357      UInt rD   = INSN(15,12) | (INSN(22,22) << 4);
13358      UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
13359      UInt b    = (imm8 >> 6) & 1;
13360      ULong imm;
13361      imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,b) << 8)
13362             | BITS8(b,b,0,0,0,0,0,0) | (imm8 & 0x3f);
13363      imm <<= 48;
13364      putDReg(rD, unop(Iop_ReinterpI64asF64, mkU64(imm)), condT);
13365      DIP("fconstd%s d%u #%u", nCC(conq), rD, imm8);
13366      goto decode_success_vfp;
13367   }
13368
13369   /* ---------------------- vdup ------------------------- */
13370   // VDUP dD, rT
13371   // VDUP qD, rT
13372   if (BITS8(1,1,1,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,1))
13373       && BITS4(1,0,1,1) == INSN(11,8) && INSN(6,6) == 0 && INSN(4,4) == 1) {
13374      UInt rD   = (INSN(7,7) << 4) | INSN(19,16);
13375      UInt rT   = INSN(15,12);
13376      UInt Q    = INSN(21,21);
13377      UInt size = (INSN(22,22) << 1) | INSN(5,5);
13378      if (rT == 15 || (isT && rT == 13) || size == 3 || (Q && (rD & 1))) {
13379         /* fall through */
13380      } else {
13381         IRExpr* e = isT ? getIRegT(rT) : getIRegA(rT);
13382         if (Q) {
13383            rD >>= 1;
13384            switch (size) {
13385               case 0:
13386                  putQReg(rD, unop(Iop_Dup32x4, e), condT);
13387                  break;
13388               case 1:
13389                  putQReg(rD, unop(Iop_Dup16x8, unop(Iop_32to16, e)),
13390                              condT);
13391                  break;
13392               case 2:
13393                  putQReg(rD, unop(Iop_Dup8x16, unop(Iop_32to8, e)),
13394                              condT);
13395                  break;
13396               default:
13397                  vassert(0);
13398            }
13399            DIP("vdup.%u q%u, r%u\n", 32 / (1<<size), rD, rT);
13400         } else {
13401            switch (size) {
13402               case 0:
13403                  putDRegI64(rD, unop(Iop_Dup32x2, e), condT);
13404                  break;
13405               case 1:
13406                  putDRegI64(rD, unop(Iop_Dup16x4, unop(Iop_32to16, e)),
13407                               condT);
13408                  break;
13409               case 2:
13410                  putDRegI64(rD, unop(Iop_Dup8x8, unop(Iop_32to8, e)),
13411                               condT);
13412                  break;
13413               default:
13414                  vassert(0);
13415            }
13416            DIP("vdup.%u d%u, r%u\n", 32 / (1<<size), rD, rT);
13417         }
13418         goto decode_success_vfp;
13419      }
13420   }
13421
13422   /* --------------------- f{ld,st}d --------------------- */
13423   // FLDD, FSTD
13424   if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
13425       && BITS4(1,0,1,1) == INSN(11,8)) {
13426      UInt dD     = INSN(15,12) | (INSN(22,22) << 4);
13427      UInt rN     = INSN(19,16);
13428      UInt offset = (insn28 & 0xFF) << 2;
13429      UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
13430      UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
13431      /* make unconditional */
13432      if (condT != IRTemp_INVALID) {
13433         if (isT)
13434            mk_skip_over_T32_if_cond_is_false( condT );
13435         else
13436            mk_skip_over_A32_if_cond_is_false( condT );
13437         condT = IRTemp_INVALID;
13438      }
13439      IRTemp ea = newTemp(Ity_I32);
13440      assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
13441                       align4if(isT ? getIRegT(rN) : getIRegA(rN),
13442                                rN == 15),
13443                       mkU32(offset)));
13444      if (bL) {
13445         putDReg(dD, loadLE(Ity_F64,mkexpr(ea)), IRTemp_INVALID);
13446      } else {
13447         storeLE(mkexpr(ea), getDReg(dD));
13448      }
13449      DIP("f%sd%s d%u, [r%u, %c#%u]\n",
13450          bL ? "ld" : "st", nCC(conq), dD, rN,
13451          bU ? '+' : '-', offset);
13452      goto decode_success_vfp;
13453   }
13454
13455   /* --------------------- dp insns (D) --------------------- */
13456   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
13457       && BITS4(1,0,1,1) == INSN(11,8)
13458       && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
13459      UInt    dM  = INSN(3,0)   | (INSN(5,5) << 4);       /* argR */
13460      UInt    dD  = INSN(15,12) | (INSN(22,22) << 4);   /* dst/acc */
13461      UInt    dN  = INSN(19,16) | (INSN(7,7) << 4);     /* argL */
13462      UInt    bP  = (insn28 >> 23) & 1;
13463      UInt    bQ  = (insn28 >> 21) & 1;
13464      UInt    bR  = (insn28 >> 20) & 1;
13465      UInt    bS  = (insn28 >> 6) & 1;
13466      UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
13467      IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
13468      switch (opc) {
13469         case BITS4(0,0,0,0): /* MAC: d + n * m */
13470            putDReg(dD, triop(Iop_AddF64, rm,
13471                              getDReg(dD),
13472                              triop(Iop_MulF64, rm, getDReg(dN),
13473                                                    getDReg(dM))),
13474                        condT);
13475            DIP("fmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13476            goto decode_success_vfp;
13477         case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
13478            putDReg(dD, triop(Iop_AddF64, rm,
13479                              getDReg(dD),
13480                              unop(Iop_NegF64,
13481                                   triop(Iop_MulF64, rm, getDReg(dN),
13482                                                         getDReg(dM)))),
13483                        condT);
13484            DIP("fnmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13485            goto decode_success_vfp;
13486         case BITS4(0,0,1,0): /* MSC: - d + n * m */
13487            putDReg(dD, triop(Iop_AddF64, rm,
13488                              unop(Iop_NegF64, getDReg(dD)),
13489                              triop(Iop_MulF64, rm, getDReg(dN),
13490                                                    getDReg(dM))),
13491                        condT);
13492            DIP("fmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13493            goto decode_success_vfp;
13494         case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
13495            putDReg(dD, triop(Iop_AddF64, rm,
13496                              unop(Iop_NegF64, getDReg(dD)),
13497                              unop(Iop_NegF64,
13498                                   triop(Iop_MulF64, rm, getDReg(dN),
13499                                                         getDReg(dM)))),
13500                        condT);
13501            DIP("fnmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13502            goto decode_success_vfp;
13503         case BITS4(0,1,0,0): /* MUL: n * m */
13504            putDReg(dD, triop(Iop_MulF64, rm, getDReg(dN), getDReg(dM)),
13505                        condT);
13506            DIP("fmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13507            goto decode_success_vfp;
13508         case BITS4(0,1,0,1): /* NMUL: - n * m */
13509            putDReg(dD, unop(Iop_NegF64,
13510                             triop(Iop_MulF64, rm, getDReg(dN),
13511                                                   getDReg(dM))),
13512                    condT);
13513            DIP("fnmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13514            goto decode_success_vfp;
13515         case BITS4(0,1,1,0): /* ADD: n + m */
13516            putDReg(dD, triop(Iop_AddF64, rm, getDReg(dN), getDReg(dM)),
13517                        condT);
13518            DIP("faddd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13519            goto decode_success_vfp;
13520         case BITS4(0,1,1,1): /* SUB: n - m */
13521            putDReg(dD, triop(Iop_SubF64, rm, getDReg(dN), getDReg(dM)),
13522                        condT);
13523            DIP("fsubd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13524            goto decode_success_vfp;
13525         case BITS4(1,0,0,0): /* DIV: n / m */
13526            putDReg(dD, triop(Iop_DivF64, rm, getDReg(dN), getDReg(dM)),
13527                        condT);
13528            DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13529            goto decode_success_vfp;
13530         case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
13531            /* XXXROUNDINGFIXME look up ARM reference for fused
13532               multiply-add rounding */
13533            putDReg(dD, triop(Iop_AddF64, rm,
13534                              getDReg(dD),
13535                              triop(Iop_MulF64, rm, getDReg(dN),
13536                                                    getDReg(dM))),
13537                        condT);
13538            DIP("vfmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13539            goto decode_success_vfp;
13540         case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
13541            /* XXXROUNDINGFIXME look up ARM reference for fused
13542               multiply-add rounding */
13543            putDReg(dD, triop(Iop_AddF64, rm,
13544                              getDReg(dD),
13545                              triop(Iop_MulF64, rm,
13546                                    unop(Iop_NegF64, getDReg(dN)),
13547                                    getDReg(dM))),
13548                        condT);
13549            DIP("vfmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13550            goto decode_success_vfp;
13551         default:
13552            break;
13553      }
13554   }
13555
13556   /* --------------------- compares (D) --------------------- */
13557   /*          31   27   23   19   15 11   7    3
13558                 28   24   20   16 12    8    4    0
13559      FCMPD    cond 1110 1D11 0100 Dd 1011 0100 Dm
13560      FCMPED   cond 1110 1D11 0100 Dd 1011 1100 Dm
13561      FCMPZD   cond 1110 1D11 0101 Dd 1011 0100 0000
13562      FCMPZED  cond 1110 1D11 0101 Dd 1011 1100 0000
13563                                 Z         N
13564
13565      Z=0 Compare Dd vs Dm     and set FPSCR 31:28 accordingly
13566      Z=1 Compare Dd vs zero
13567
13568      N=1 generates Invalid Operation exn if either arg is any kind of NaN
13569      N=0 generates Invalid Operation exn if either arg is a signalling NaN
13570      (Not that we pay any attention to N here)
13571   */
13572   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13573       && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
13574       && BITS4(1,0,1,1) == INSN(11,8)
13575       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
13576      UInt bZ = (insn28 >> 16) & 1;
13577      UInt bN = (insn28 >> 7) & 1;
13578      UInt dD = INSN(15,12) | (INSN(22,22) << 4);
13579      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
13580      if (bZ && INSN(3,0) != 0) {
13581         /* does not decode; fall through */
13582      } else {
13583         IRTemp argL = newTemp(Ity_F64);
13584         IRTemp argR = newTemp(Ity_F64);
13585         IRTemp irRes = newTemp(Ity_I32);
13586         assign(argL, getDReg(dD));
13587         assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0)) : getDReg(dM));
13588         assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
13589
13590         IRTemp nzcv     = IRTemp_INVALID;
13591         IRTemp oldFPSCR = newTemp(Ity_I32);
13592         IRTemp newFPSCR = newTemp(Ity_I32);
13593
13594         /* This is where the fun starts.  We have to convert 'irRes'
13595            from an IR-convention return result (IRCmpF64Result) to an
13596            ARM-encoded (N,Z,C,V) group.  The final result is in the
13597            bottom 4 bits of 'nzcv'. */
13598         /* Map compare result from IR to ARM(nzcv) */
13599         /*
13600            FP cmp result | IR   | ARM(nzcv)
13601            --------------------------------
13602            UN              0x45   0011
13603            LT              0x01   1000
13604            GT              0x00   0010
13605            EQ              0x40   0110
13606         */
13607         nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13608
13609         /* And update FPSCR accordingly */
13610         assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
13611         assign(newFPSCR,
13612                binop(Iop_Or32,
13613                      binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
13614                      binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
13615
13616         putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
13617
13618         if (bZ) {
13619            DIP("fcmpz%sd%s d%u\n", bN ? "e" : "", nCC(conq), dD);
13620         } else {
13621            DIP("fcmp%sd%s d%u, d%u\n", bN ? "e" : "", nCC(conq), dD, dM);
13622         }
13623         goto decode_success_vfp;
13624      }
13625      /* fall through */
13626   }
13627
13628   /* --------------------- unary (D) --------------------- */
13629   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13630       && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
13631       && BITS4(1,0,1,1) == INSN(11,8)
13632       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
13633      UInt dD  = INSN(15,12) | (INSN(22,22) << 4);
13634      UInt dM  = INSN(3,0) | (INSN(5,5) << 4);
13635      UInt b16 = (insn28 >> 16) & 1;
13636      UInt b7  = (insn28 >> 7) & 1;
13637      /**/ if (b16 == 0 && b7 == 0) {
13638         // FCPYD
13639         putDReg(dD, getDReg(dM), condT);
13640         DIP("fcpyd%s d%u, d%u\n", nCC(conq), dD, dM);
13641         goto decode_success_vfp;
13642      }
13643      else if (b16 == 0 && b7 == 1) {
13644         // FABSD
13645         putDReg(dD, unop(Iop_AbsF64, getDReg(dM)), condT);
13646         DIP("fabsd%s d%u, d%u\n", nCC(conq), dD, dM);
13647         goto decode_success_vfp;
13648      }
13649      else if (b16 == 1 && b7 == 0) {
13650         // FNEGD
13651         putDReg(dD, unop(Iop_NegF64, getDReg(dM)), condT);
13652         DIP("fnegd%s d%u, d%u\n", nCC(conq), dD, dM);
13653         goto decode_success_vfp;
13654      }
13655      else if (b16 == 1 && b7 == 1) {
13656         // FSQRTD
13657         IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
13658         putDReg(dD, binop(Iop_SqrtF64, rm, getDReg(dM)), condT);
13659         DIP("fsqrtd%s d%u, d%u\n", nCC(conq), dD, dM);
13660         goto decode_success_vfp;
13661      }
13662      else
13663         vassert(0);
13664
13665      /* fall through */
13666   }
13667
13668   /* ----------------- I <-> D conversions ----------------- */
13669
13670   // F{S,U}ITOD dD, fM
13671   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13672       && BITS4(1,0,0,0) == (INSN(19,16) & BITS4(1,1,1,1))
13673       && BITS4(1,0,1,1) == INSN(11,8)
13674       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
13675      UInt bM    = (insn28 >> 5) & 1;
13676      UInt fM    = (INSN(3,0) << 1) | bM;
13677      UInt dD    = INSN(15,12) | (INSN(22,22) << 4);
13678      UInt syned = (insn28 >> 7) & 1;
13679      if (syned) {
13680         // FSITOD
13681         putDReg(dD, unop(Iop_I32StoF64,
13682                          unop(Iop_ReinterpF32asI32, getFReg(fM))),
13683                 condT);
13684         DIP("fsitod%s d%u, s%u\n", nCC(conq), dD, fM);
13685      } else {
13686         // FUITOD
13687         putDReg(dD, unop(Iop_I32UtoF64,
13688                          unop(Iop_ReinterpF32asI32, getFReg(fM))),
13689                 condT);
13690         DIP("fuitod%s d%u, s%u\n", nCC(conq), dD, fM);
13691      }
13692      goto decode_success_vfp;
13693   }
13694
13695   // FTO{S,U}ID fD, dM
13696   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13697       && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
13698       && BITS4(1,0,1,1) == INSN(11,8)
13699       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
13700      UInt   bD    = (insn28 >> 22) & 1;
13701      UInt   fD    = (INSN(15,12) << 1) | bD;
13702      UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
13703      UInt   bZ    = (insn28 >> 7) & 1;
13704      UInt   syned = (insn28 >> 16) & 1;
13705      IRTemp rmode = newTemp(Ity_I32);
13706      assign(rmode, bZ ? mkU32(Irrm_ZERO)
13707                       : mkexpr(mk_get_IR_rounding_mode()));
13708      if (syned) {
13709         // FTOSID
13710         putFReg(fD, unop(Iop_ReinterpI32asF32,
13711                          binop(Iop_F64toI32S, mkexpr(rmode),
13712                                getDReg(dM))),
13713                 condT);
13714         DIP("ftosi%sd%s s%u, d%u\n", bZ ? "z" : "",
13715             nCC(conq), fD, dM);
13716      } else {
13717         // FTOUID
13718         putFReg(fD, unop(Iop_ReinterpI32asF32,
13719                          binop(Iop_F64toI32U, mkexpr(rmode),
13720                                getDReg(dM))),
13721                 condT);
13722         DIP("ftoui%sd%s s%u, d%u\n", bZ ? "z" : "",
13723             nCC(conq), fD, dM);
13724      }
13725      goto decode_success_vfp;
13726   }
13727
13728   /* ----------------------------------------------------------- */
13729   /* -- VFP instructions -- single precision                  -- */
13730   /* ----------------------------------------------------------- */
13731
13732   /* --------------------- fldms, fstms --------------------- */
13733   /*
13734                                 31   27   23   19 15 11   7   0
13735                                         P UDWL
13736      C4-98, C5-26   1  FSTMD    cond 1100 1x00 Rn Fd 1010 offset
13737      C4-98, C5-28   2  FSTMDIA  cond 1100 1x10 Rn Fd 1010 offset
13738      C4-98, C5-30   3  FSTMDDB  cond 1101 0x10 Rn Fd 1010 offset
13739
13740      C4-40, C5-26   1  FLDMD    cond 1100 1x01 Rn Fd 1010 offset
13741      C4-40, C5-26   2  FLDMIAD  cond 1100 1x11 Rn Fd 1010 offset
13742      C4-40, C5-26   3  FLDMDBD  cond 1101 0x11 Rn Fd 1010 offset
13743
13744      Regs transferred: F(Fd:D) .. F(Fd:d + offset)
13745      offset must not imply a reg > 15
13746      IA/DB: Rn is changed by (4 x # regs transferred)
13747
13748      case coding:
13749         1  at-Rn   (access at Rn)
13750         2  ia-Rn   (access at Rn, then Rn += 4n)
13751         3  db-Rn   (Rn -= 4n,     then access at Rn)
13752   */
13753   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
13754       && INSN(11,8) == BITS4(1,0,1,0)) {
13755      UInt bP      = (insn28 >> 24) & 1;
13756      UInt bU      = (insn28 >> 23) & 1;
13757      UInt bW      = (insn28 >> 21) & 1;
13758      UInt bL      = (insn28 >> 20) & 1;
13759      UInt bD      = (insn28 >> 22) & 1;
13760      UInt offset  = (insn28 >> 0) & 0xFF;
13761      UInt rN      = INSN(19,16);
13762      UInt fD      = (INSN(15,12) << 1) | bD;
13763      UInt nRegs   = offset;
13764      UInt summary = 0;
13765      Int  i;
13766
13767      /**/ if (bP == 0 && bU == 1 && bW == 0) {
13768         summary = 1;
13769      }
13770      else if (bP == 0 && bU == 1 && bW == 1) {
13771         summary = 2;
13772      }
13773      else if (bP == 1 && bU == 0 && bW == 1) {
13774         summary = 3;
13775      }
13776      else goto after_vfp_fldms_fstms;
13777
13778      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
13779      if (rN == 15 && (summary == 2 || summary == 3 || isT))
13780         goto after_vfp_fldms_fstms;
13781
13782      /* offset must specify at least one register */
13783      if (offset < 1)
13784         goto after_vfp_fldms_fstms;
13785
13786      /* can't transfer regs after S31 */
13787      if (fD + nRegs - 1 >= 32)
13788         goto after_vfp_fldms_fstms;
13789
13790      /* Now, we can't do a conditional load or store, since that very
13791         likely will generate an exception.  So we have to take a side
13792         exit at this point if the condition is false. */
13793      if (condT != IRTemp_INVALID) {
13794         if (isT)
13795            mk_skip_over_T32_if_cond_is_false( condT );
13796         else
13797            mk_skip_over_A32_if_cond_is_false( condT );
13798         condT = IRTemp_INVALID;
13799      }
13800      /* Ok, now we're unconditional.  Do the load or store. */
13801
13802      /* get the old Rn value */
13803      IRTemp rnT = newTemp(Ity_I32);
13804      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
13805                           rN == 15));
13806
13807      /* make a new value for Rn, post-insn */
13808      IRTemp rnTnew = IRTemp_INVALID;
13809      if (summary == 2 || summary == 3) {
13810         rnTnew = newTemp(Ity_I32);
13811         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
13812                              mkexpr(rnT),
13813                              mkU32(4 * nRegs)));
13814      }
13815
13816      /* decide on the base transfer address */
13817      IRTemp taT = newTemp(Ity_I32);
13818      assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
13819
13820      /* update Rn if necessary -- in case 3, we're moving it down, so
13821         update before any memory reference, in order to keep Memcheck
13822         and V's stack-extending logic (on linux) happy */
13823      if (summary == 3) {
13824         if (isT)
13825            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
13826         else
13827            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
13828      }
13829
13830      /* generate the transfers */
13831      for (i = 0; i < nRegs; i++) {
13832         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(4*i));
13833         if (bL) {
13834            putFReg(fD + i, loadLE(Ity_F32, addr), IRTemp_INVALID);
13835         } else {
13836            storeLE(addr, getFReg(fD + i));
13837         }
13838      }
13839
13840      /* update Rn if necessary -- in case 2, we're moving it up, so
13841         update after any memory reference, in order to keep Memcheck
13842         and V's stack-extending logic (on linux) happy */
13843      if (summary == 2) {
13844         if (isT)
13845            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
13846         else
13847            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
13848      }
13849
13850      const HChar* nm = bL==1 ? "ld" : "st";
13851      switch (summary) {
13852         case 1:  DIP("f%sms%s r%u, {s%u-s%u}\n",
13853                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
13854                  break;
13855         case 2:  DIP("f%smias%s r%u!, {s%u-s%u}\n",
13856                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
13857                  break;
13858         case 3:  DIP("f%smdbs%s r%u!, {s%u-s%u}\n",
13859                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
13860                  break;
13861         default: vassert(0);
13862      }
13863
13864      goto decode_success_vfp;
13865      /* FIXME alignment constraints? */
13866   }
13867
13868  after_vfp_fldms_fstms:
13869
13870   /* --------------------- fmsr, fmrs --------------------- */
13871   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
13872       && BITS4(1,0,1,0) == INSN(11,8)
13873       && BITS4(0,0,0,0) == INSN(3,0)
13874       && BITS4(0,0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
13875      UInt rD  = INSN(15,12);
13876      UInt b7  = (insn28 >> 7) & 1;
13877      UInt fN  = (INSN(19,16) << 1) | b7;
13878      UInt b20 = (insn28 >> 20) & 1;
13879      if (rD == 15) {
13880         /* fall through */
13881         /* Let's assume that no sane person would want to do
13882            floating-point transfers to or from the program counter,
13883            and simply decline to decode the instruction.  The ARM ARM
13884            doesn't seem to explicitly disallow this case, though. */
13885      } else {
13886         if (b20) {
13887            IRExpr* res = unop(Iop_ReinterpF32asI32, getFReg(fN));
13888            if (isT)
13889               putIRegT(rD, res, condT);
13890            else
13891               putIRegA(rD, res, condT, Ijk_Boring);
13892            DIP("fmrs%s r%u, s%u\n", nCC(conq), rD, fN);
13893         } else {
13894            putFReg(fN, unop(Iop_ReinterpI32asF32,
13895                             isT ? getIRegT(rD) : getIRegA(rD)),
13896                        condT);
13897            DIP("fmsr%s s%u, r%u\n", nCC(conq), fN, rD);
13898         }
13899         goto decode_success_vfp;
13900      }
13901      /* fall through */
13902   }
13903
13904   /* --------------------- f{ld,st}s --------------------- */
13905   // FLDS, FSTS
13906   if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
13907       && BITS4(1,0,1,0) == INSN(11,8)) {
13908      UInt bD     = (insn28 >> 22) & 1;
13909      UInt fD     = (INSN(15,12) << 1) | bD;
13910      UInt rN     = INSN(19,16);
13911      UInt offset = (insn28 & 0xFF) << 2;
13912      UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
13913      UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
13914      /* make unconditional */
13915      if (condT != IRTemp_INVALID) {
13916         if (isT)
13917            mk_skip_over_T32_if_cond_is_false( condT );
13918         else
13919            mk_skip_over_A32_if_cond_is_false( condT );
13920         condT = IRTemp_INVALID;
13921      }
13922      IRTemp ea = newTemp(Ity_I32);
13923      assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
13924                       align4if(isT ? getIRegT(rN) : getIRegA(rN),
13925                                rN == 15),
13926                       mkU32(offset)));
13927      if (bL) {
13928         putFReg(fD, loadLE(Ity_F32,mkexpr(ea)), IRTemp_INVALID);
13929      } else {
13930         storeLE(mkexpr(ea), getFReg(fD));
13931      }
13932      DIP("f%ss%s s%u, [r%u, %c#%u]\n",
13933          bL ? "ld" : "st", nCC(conq), fD, rN,
13934          bU ? '+' : '-', offset);
13935      goto decode_success_vfp;
13936   }
13937
13938   /* --------------------- dp insns (F) --------------------- */
13939   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
13940       && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
13941       && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
13942      UInt    bM  = (insn28 >> 5) & 1;
13943      UInt    bD  = (insn28 >> 22) & 1;
13944      UInt    bN  = (insn28 >> 7) & 1;
13945      UInt    fM  = (INSN(3,0) << 1) | bM;   /* argR */
13946      UInt    fD  = (INSN(15,12) << 1) | bD; /* dst/acc */
13947      UInt    fN  = (INSN(19,16) << 1) | bN; /* argL */
13948      UInt    bP  = (insn28 >> 23) & 1;
13949      UInt    bQ  = (insn28 >> 21) & 1;
13950      UInt    bR  = (insn28 >> 20) & 1;
13951      UInt    bS  = (insn28 >> 6) & 1;
13952      UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
13953      IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
13954      switch (opc) {
13955         case BITS4(0,0,0,0): /* MAC: d + n * m */
13956            putFReg(fD, triop(Iop_AddF32, rm,
13957                              getFReg(fD),
13958                              triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
13959                        condT);
13960            DIP("fmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13961            goto decode_success_vfp;
13962         case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
13963            putFReg(fD, triop(Iop_AddF32, rm,
13964                              getFReg(fD),
13965                              unop(Iop_NegF32,
13966                                   triop(Iop_MulF32, rm, getFReg(fN),
13967                                                         getFReg(fM)))),
13968                        condT);
13969            DIP("fnmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13970            goto decode_success_vfp;
13971         case BITS4(0,0,1,0): /* MSC: - d + n * m */
13972            putFReg(fD, triop(Iop_AddF32, rm,
13973                              unop(Iop_NegF32, getFReg(fD)),
13974                              triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
13975                        condT);
13976            DIP("fmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13977            goto decode_success_vfp;
13978         case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
13979            putFReg(fD, triop(Iop_AddF32, rm,
13980                              unop(Iop_NegF32, getFReg(fD)),
13981                              unop(Iop_NegF32,
13982                                   triop(Iop_MulF32, rm,
13983                                                     getFReg(fN),
13984                                                    getFReg(fM)))),
13985                        condT);
13986            DIP("fnmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13987            goto decode_success_vfp;
13988         case BITS4(0,1,0,0): /* MUL: n * m */
13989            putFReg(fD, triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM)),
13990                        condT);
13991            DIP("fmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13992            goto decode_success_vfp;
13993         case BITS4(0,1,0,1): /* NMUL: - n * m */
13994            putFReg(fD, unop(Iop_NegF32,
13995                             triop(Iop_MulF32, rm, getFReg(fN),
13996                                                   getFReg(fM))),
13997                    condT);
13998            DIP("fnmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13999            goto decode_success_vfp;
14000         case BITS4(0,1,1,0): /* ADD: n + m */
14001            putFReg(fD, triop(Iop_AddF32, rm, getFReg(fN), getFReg(fM)),
14002                        condT);
14003            DIP("fadds%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14004            goto decode_success_vfp;
14005         case BITS4(0,1,1,1): /* SUB: n - m */
14006            putFReg(fD, triop(Iop_SubF32, rm, getFReg(fN), getFReg(fM)),
14007                        condT);
14008            DIP("fsubs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14009            goto decode_success_vfp;
14010         case BITS4(1,0,0,0): /* DIV: n / m */
14011            putFReg(fD, triop(Iop_DivF32, rm, getFReg(fN), getFReg(fM)),
14012                        condT);
14013            DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14014            goto decode_success_vfp;
14015         case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
14016            /* XXXROUNDINGFIXME look up ARM reference for fused
14017               multiply-add rounding */
14018            putFReg(fD, triop(Iop_AddF32, rm,
14019                              getFReg(fD),
14020                              triop(Iop_MulF32, rm, getFReg(fN),
14021                                                    getFReg(fM))),
14022                        condT);
14023            DIP("vfmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14024            goto decode_success_vfp;
14025         case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
14026            /* XXXROUNDINGFIXME look up ARM reference for fused
14027               multiply-add rounding */
14028            putFReg(fD, triop(Iop_AddF32, rm,
14029                              getFReg(fD),
14030                              triop(Iop_MulF32, rm,
14031                                    unop(Iop_NegF32, getFReg(fN)),
14032                                    getFReg(fM))),
14033                        condT);
14034            DIP("vfmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14035            goto decode_success_vfp;
14036         default:
14037            break;
14038      }
14039   }
14040
14041   /* --------------------- compares (S) --------------------- */
14042   /*          31   27   23   19   15 11   7    3
14043                 28   24   20   16 12    8    4    0
14044      FCMPS    cond 1110 1D11 0100 Fd 1010 01M0 Fm
14045      FCMPES   cond 1110 1D11 0100 Fd 1010 11M0 Fm
14046      FCMPZS   cond 1110 1D11 0101 Fd 1010 0100 0000
14047      FCMPZED  cond 1110 1D11 0101 Fd 1010 1100 0000
14048                                 Z         N
14049
14050      Z=0 Compare Fd:D vs Fm:M     and set FPSCR 31:28 accordingly
14051      Z=1 Compare Fd:D vs zero
14052
14053      N=1 generates Invalid Operation exn if either arg is any kind of NaN
14054      N=0 generates Invalid Operation exn if either arg is a signalling NaN
14055      (Not that we pay any attention to N here)
14056   */
14057   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14058       && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
14059       && BITS4(1,0,1,0) == INSN(11,8)
14060       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
14061      UInt bZ = (insn28 >> 16) & 1;
14062      UInt bN = (insn28 >> 7) & 1;
14063      UInt bD = (insn28 >> 22) & 1;
14064      UInt bM = (insn28 >> 5) & 1;
14065      UInt fD = (INSN(15,12) << 1) | bD;
14066      UInt fM = (INSN(3,0) << 1) | bM;
14067      if (bZ && (INSN(3,0) != 0 || (INSN(7,4) & 3) != 0)) {
14068         /* does not decode; fall through */
14069      } else {
14070         IRTemp argL = newTemp(Ity_F64);
14071         IRTemp argR = newTemp(Ity_F64);
14072         IRTemp irRes = newTemp(Ity_I32);
14073
14074         assign(argL, unop(Iop_F32toF64, getFReg(fD)));
14075         assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0))
14076                         : unop(Iop_F32toF64, getFReg(fM)));
14077         assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
14078
14079         IRTemp nzcv     = IRTemp_INVALID;
14080         IRTemp oldFPSCR = newTemp(Ity_I32);
14081         IRTemp newFPSCR = newTemp(Ity_I32);
14082
14083         /* This is where the fun starts.  We have to convert 'irRes'
14084            from an IR-convention return result (IRCmpF64Result) to an
14085            ARM-encoded (N,Z,C,V) group.  The final result is in the
14086            bottom 4 bits of 'nzcv'. */
14087         /* Map compare result from IR to ARM(nzcv) */
14088         /*
14089            FP cmp result | IR   | ARM(nzcv)
14090            --------------------------------
14091            UN              0x45   0011
14092            LT              0x01   1000
14093            GT              0x00   0010
14094            EQ              0x40   0110
14095         */
14096         nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
14097
14098         /* And update FPSCR accordingly */
14099         assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
14100         assign(newFPSCR,
14101                binop(Iop_Or32,
14102                      binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
14103                      binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
14104
14105         putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
14106
14107         if (bZ) {
14108            DIP("fcmpz%ss%s s%u\n", bN ? "e" : "", nCC(conq), fD);
14109         } else {
14110            DIP("fcmp%ss%s s%u, s%u\n", bN ? "e" : "",
14111                nCC(conq), fD, fM);
14112         }
14113         goto decode_success_vfp;
14114      }
14115      /* fall through */
14116   }
14117
14118   /* --------------------- unary (S) --------------------- */
14119   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14120       && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
14121       && BITS4(1,0,1,0) == INSN(11,8)
14122       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
14123      UInt bD = (insn28 >> 22) & 1;
14124      UInt bM = (insn28 >> 5) & 1;
14125      UInt fD  = (INSN(15,12) << 1) | bD;
14126      UInt fM  = (INSN(3,0) << 1) | bM;
14127      UInt b16 = (insn28 >> 16) & 1;
14128      UInt b7  = (insn28 >> 7) & 1;
14129      /**/ if (b16 == 0 && b7 == 0) {
14130         // FCPYS
14131         putFReg(fD, getFReg(fM), condT);
14132         DIP("fcpys%s s%u, s%u\n", nCC(conq), fD, fM);
14133         goto decode_success_vfp;
14134      }
14135      else if (b16 == 0 && b7 == 1) {
14136         // FABSS
14137         putFReg(fD, unop(Iop_AbsF32, getFReg(fM)), condT);
14138         DIP("fabss%s s%u, s%u\n", nCC(conq), fD, fM);
14139         goto decode_success_vfp;
14140      }
14141      else if (b16 == 1 && b7 == 0) {
14142         // FNEGS
14143         putFReg(fD, unop(Iop_NegF32, getFReg(fM)), condT);
14144         DIP("fnegs%s s%u, s%u\n", nCC(conq), fD, fM);
14145         goto decode_success_vfp;
14146      }
14147      else if (b16 == 1 && b7 == 1) {
14148         // FSQRTS
14149         IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
14150         putFReg(fD, binop(Iop_SqrtF32, rm, getFReg(fM)), condT);
14151         DIP("fsqrts%s s%u, s%u\n", nCC(conq), fD, fM);
14152         goto decode_success_vfp;
14153      }
14154      else
14155         vassert(0);
14156
14157      /* fall through */
14158   }
14159
14160   /* ----------------- I <-> S conversions ----------------- */
14161
14162   // F{S,U}ITOS fD, fM
14163   /* These are more complex than FSITOD/FUITOD.  In the D cases, a 32
14164      bit int will always fit within the 53 bit mantissa, so there's
14165      no possibility of a loss of precision, but that's obviously not
14166      the case here.  Hence this case possibly requires rounding, and
14167      so it drags in the current rounding mode. */
14168   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14169       && BITS4(1,0,0,0) == INSN(19,16)
14170       && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
14171       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
14172      UInt bM    = (insn28 >> 5) & 1;
14173      UInt bD    = (insn28 >> 22) & 1;
14174      UInt fM    = (INSN(3,0) << 1) | bM;
14175      UInt fD    = (INSN(15,12) << 1) | bD;
14176      UInt syned = (insn28 >> 7) & 1;
14177      IRTemp rmode = newTemp(Ity_I32);
14178      assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
14179      if (syned) {
14180         // FSITOS
14181         putFReg(fD, binop(Iop_F64toF32,
14182                           mkexpr(rmode),
14183                           unop(Iop_I32StoF64,
14184                                unop(Iop_ReinterpF32asI32, getFReg(fM)))),
14185                 condT);
14186         DIP("fsitos%s s%u, s%u\n", nCC(conq), fD, fM);
14187      } else {
14188         // FUITOS
14189         putFReg(fD, binop(Iop_F64toF32,
14190                           mkexpr(rmode),
14191                           unop(Iop_I32UtoF64,
14192                                unop(Iop_ReinterpF32asI32, getFReg(fM)))),
14193                 condT);
14194         DIP("fuitos%s s%u, s%u\n", nCC(conq), fD, fM);
14195      }
14196      goto decode_success_vfp;
14197   }
14198
14199   // FTO{S,U}IS fD, fM
14200   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14201       && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
14202       && BITS4(1,0,1,0) == INSN(11,8)
14203       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
14204      UInt   bM    = (insn28 >> 5) & 1;
14205      UInt   bD    = (insn28 >> 22) & 1;
14206      UInt   fD    = (INSN(15,12) << 1) | bD;
14207      UInt   fM    = (INSN(3,0) << 1) | bM;
14208      UInt   bZ    = (insn28 >> 7) & 1;
14209      UInt   syned = (insn28 >> 16) & 1;
14210      IRTemp rmode = newTemp(Ity_I32);
14211      assign(rmode, bZ ? mkU32(Irrm_ZERO)
14212                       : mkexpr(mk_get_IR_rounding_mode()));
14213      if (syned) {
14214         // FTOSIS
14215         putFReg(fD, unop(Iop_ReinterpI32asF32,
14216                          binop(Iop_F64toI32S, mkexpr(rmode),
14217                                unop(Iop_F32toF64, getFReg(fM)))),
14218                 condT);
14219         DIP("ftosi%ss%s s%u, d%u\n", bZ ? "z" : "",
14220             nCC(conq), fD, fM);
14221         goto decode_success_vfp;
14222      } else {
14223         // FTOUIS
14224         putFReg(fD, unop(Iop_ReinterpI32asF32,
14225                          binop(Iop_F64toI32U, mkexpr(rmode),
14226                                unop(Iop_F32toF64, getFReg(fM)))),
14227                 condT);
14228         DIP("ftoui%ss%s s%u, d%u\n", bZ ? "z" : "",
14229             nCC(conq), fD, fM);
14230         goto decode_success_vfp;
14231      }
14232   }
14233
14234   /* ----------------- S <-> D conversions ----------------- */
14235
14236   // FCVTDS
14237   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14238       && BITS4(0,1,1,1) == INSN(19,16)
14239       && BITS4(1,0,1,0) == INSN(11,8)
14240       && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
14241      UInt dD = INSN(15,12) | (INSN(22,22) << 4);
14242      UInt bM = (insn28 >> 5) & 1;
14243      UInt fM = (INSN(3,0) << 1) | bM;
14244      putDReg(dD, unop(Iop_F32toF64, getFReg(fM)), condT);
14245      DIP("fcvtds%s d%u, s%u\n", nCC(conq), dD, fM);
14246      goto decode_success_vfp;
14247   }
14248
14249   // FCVTSD
14250   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14251       && BITS4(0,1,1,1) == INSN(19,16)
14252       && BITS4(1,0,1,1) == INSN(11,8)
14253       && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
14254      UInt   bD    = (insn28 >> 22) & 1;
14255      UInt   fD    = (INSN(15,12) << 1) | bD;
14256      UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
14257      IRTemp rmode = newTemp(Ity_I32);
14258      assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
14259      putFReg(fD, binop(Iop_F64toF32, mkexpr(rmode), getDReg(dM)),
14260                  condT);
14261      DIP("fcvtsd%s s%u, d%u\n", nCC(conq), fD, dM);
14262      goto decode_success_vfp;
14263   }
14264
14265   /* --------------- VCVT fixed<->floating, VFP --------------- */
14266   /*          31   27   23   19   15 11   7    3
14267                 28   24   20   16 12    8    4    0
14268
14269               cond 1110 1D11 1p1U Vd 101f x1i0 imm4
14270
14271      VCVT<c>.<Td>.F64 <Dd>, <Dd>, #fbits
14272      VCVT<c>.<Td>.F32 <Dd>, <Dd>, #fbits
14273      VCVT<c>.F64.<Td> <Dd>, <Dd>, #fbits
14274      VCVT<c>.F32.<Td> <Dd>, <Dd>, #fbits
14275      are of this form.  We only handle a subset of the cases though.
14276   */
14277   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14278       && BITS4(1,0,1,0) == (INSN(19,16) & BITS4(1,0,1,0))
14279       && BITS3(1,0,1) == INSN(11,9)
14280       && BITS3(1,0,0) == (INSN(6,4) & BITS3(1,0,1))) {
14281      UInt bD        = INSN(22,22);
14282      UInt bOP       = INSN(18,18);
14283      UInt bU        = INSN(16,16);
14284      UInt Vd        = INSN(15,12);
14285      UInt bSF       = INSN(8,8);
14286      UInt bSX       = INSN(7,7);
14287      UInt bI        = INSN(5,5);
14288      UInt imm4      = INSN(3,0);
14289      Bool to_fixed  = bOP == 1;
14290      Bool dp_op     = bSF == 1;
14291      Bool unsyned   = bU == 1;
14292      UInt size      = bSX == 0 ? 16 : 32;
14293      Int  frac_bits = size - ((imm4 << 1) | bI);
14294      UInt d         = dp_op  ? ((bD << 4) | Vd)  : ((Vd << 1) | bD);
14295      if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && !dp_op
14296                                            && size == 32) {
14297         /* VCVT.F32.{S,U}32 S[d], S[d], #frac_bits */
14298         /* This generates really horrible code.  We could potentially
14299            do much better. */
14300         IRTemp rmode = newTemp(Ity_I32);
14301         assign(rmode, mkU32(Irrm_NEAREST)); // per the spec
14302         IRTemp src32 = newTemp(Ity_I32);
14303         assign(src32,  unop(Iop_ReinterpF32asI32, getFReg(d)));
14304         IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
14305                                mkexpr(src32 ) );
14306         IRTemp scale = newTemp(Ity_F64);
14307         assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
14308         IRExpr* rm     = mkU32(Irrm_NEAREST);
14309         IRExpr* resF64 = triop(Iop_DivF64,
14310                                rm, as_F64,
14311                                triop(Iop_AddF64, rm, mkexpr(scale),
14312                                                      mkexpr(scale)));
14313         IRExpr* resF32 = binop(Iop_F64toF32, mkexpr(rmode), resF64);
14314         putFReg(d, resF32, condT);
14315         DIP("vcvt.f32.%c32, s%u, s%u, #%d\n",
14316             unsyned ? 'u' : 's', d, d, frac_bits);
14317         goto decode_success_vfp;
14318      }
14319      if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && dp_op
14320                                            && size == 32) {
14321         /* VCVT.F64.{S,U}32 D[d], D[d], #frac_bits */
14322         /* This generates really horrible code.  We could potentially
14323            do much better. */
14324         IRTemp src32 = newTemp(Ity_I32);
14325         assign(src32, unop(Iop_64to32, getDRegI64(d)));
14326         IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
14327                                mkexpr(src32 ) );
14328         IRTemp scale = newTemp(Ity_F64);
14329         assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
14330         IRExpr* rm     = mkU32(Irrm_NEAREST);
14331         IRExpr* resF64 = triop(Iop_DivF64,
14332                                rm, as_F64,
14333                                triop(Iop_AddF64, rm, mkexpr(scale),
14334                                                      mkexpr(scale)));
14335         putDReg(d, resF64, condT);
14336         DIP("vcvt.f64.%c32, d%u, d%u, #%d\n",
14337             unsyned ? 'u' : 's', d, d, frac_bits);
14338         goto decode_success_vfp;
14339      }
14340      if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && dp_op
14341                                            && size == 32) {
14342         /* VCVT.{S,U}32.F64 D[d], D[d], #frac_bits */
14343         IRTemp srcF64 = newTemp(Ity_F64);
14344         assign(srcF64, getDReg(d));
14345         IRTemp scale = newTemp(Ity_F64);
14346         assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
14347         IRTemp scaledF64 = newTemp(Ity_F64);
14348         IRExpr* rm = mkU32(Irrm_NEAREST);
14349         assign(scaledF64, triop(Iop_MulF64,
14350                                 rm, mkexpr(srcF64),
14351                                 triop(Iop_AddF64, rm, mkexpr(scale),
14352                                                       mkexpr(scale))));
14353         IRTemp rmode = newTemp(Ity_I32);
14354         assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
14355         IRTemp asI32 = newTemp(Ity_I32);
14356         assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
14357                             mkexpr(rmode), mkexpr(scaledF64)));
14358         putDRegI64(d, unop(unsyned ? Iop_32Uto64 : Iop_32Sto64,
14359                            mkexpr(asI32)), condT);
14360         goto decode_success_vfp;
14361      }
14362      /* fall through */
14363   }
14364
14365   /* FAILURE */
14366   return False;
14367
14368  decode_success_vfp:
14369   /* Check that any accepted insn really is a CP10 or CP11 insn, iow,
14370      assert that we aren't accepting, in this fn, insns that actually
14371      should be handled somewhere else. */
14372   vassert(INSN(11,9) == BITS3(1,0,1)); // 11:8 = 1010 or 1011
14373   return True;
14374
14375#  undef INSN
14376}
14377
14378
14379/*------------------------------------------------------------*/
14380/*--- Instructions in NV (never) space                     ---*/
14381/*------------------------------------------------------------*/
14382
14383/* ARM only */
14384/* Translate a NV space instruction.  If successful, returns True and
14385   *dres may or may not be updated.  If failure, returns False and
14386   doesn't change *dres nor create any IR.
14387
14388   Note that all NEON instructions (in ARM mode) are handled through
14389   here, since they are all in NV space.
14390*/
14391static Bool decode_NV_instruction ( /*MOD*/DisResult* dres,
14392                                    VexArchInfo* archinfo,
14393                                    UInt insn )
14394{
14395#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
14396#  define INSN_COND          SLICE_UInt(insn, 31, 28)
14397
14398   HChar dis_buf[128];
14399
14400   // Should only be called for NV instructions
14401   vassert(BITS4(1,1,1,1) == INSN_COND);
14402
14403   /* ------------------------ pld ------------------------ */
14404   if (BITS8(0,1,0,1, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
14405       && BITS4(1,1,1,1) == INSN(15,12)) {
14406      UInt rN    = INSN(19,16);
14407      UInt imm12 = INSN(11,0);
14408      UInt bU    = INSN(23,23);
14409      DIP("pld [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
14410      return True;
14411   }
14412
14413   if (BITS8(0,1,1,1, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
14414       && BITS4(1,1,1,1) == INSN(15,12)
14415       && 0 == INSN(4,4)) {
14416      UInt rN   = INSN(19,16);
14417      UInt rM   = INSN(3,0);
14418      UInt imm5 = INSN(11,7);
14419      UInt sh2  = INSN(6,5);
14420      UInt bU   = INSN(23,23);
14421      if (rM != 15) {
14422         IRExpr* eaE = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
14423                                                       sh2, imm5, dis_buf);
14424         IRTemp eaT = newTemp(Ity_I32);
14425         /* Bind eaE to a temp merely for debugging-vex purposes, so we
14426            can check it's a plausible decoding.  It will get removed
14427            by iropt a little later on. */
14428         vassert(eaE);
14429         assign(eaT, eaE);
14430         DIP("pld %s\n", dis_buf);
14431         return True;
14432      }
14433      /* fall through */
14434   }
14435
14436   /* ------------------------ pli ------------------------ */
14437   if (BITS8(0,1,0,0, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
14438       && BITS4(1,1,1,1) == INSN(15,12)) {
14439      UInt rN    = INSN(19,16);
14440      UInt imm12 = INSN(11,0);
14441      UInt bU    = INSN(23,23);
14442      DIP("pli [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
14443      return True;
14444   }
14445
14446   /* --------------------- Interworking branches --------------------- */
14447
14448   // BLX (1), viz, unconditional branch and link to R15+simm24
14449   // and set CPSR.T = 1, that is, switch to Thumb mode
14450   if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
14451      UInt bitH   = INSN(24,24);
14452      Int  uimm24 = INSN(23,0);
14453      Int  simm24 = (((uimm24 << 8) >> 8) << 2) + (bitH << 1);
14454      /* Now this is a bit tricky.  Since we're decoding an ARM insn,
14455         it is implies that CPSR.T == 0.  Hence the current insn's
14456         address is guaranteed to be of the form X--(30)--X00.  So, no
14457         need to mask any bits off it.  But need to set the lowest bit
14458         to 1 to denote we're in Thumb mode after this, since
14459         guest_R15T has CPSR.T as the lowest bit.  And we can't chase
14460         into the call, so end the block at this point. */
14461      UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
14462      putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
14463                    IRTemp_INVALID/*because AL*/, Ijk_Boring );
14464      llPutIReg(15, mkU32(dst));
14465      dres->jk_StopHere = Ijk_Call;
14466      dres->whatNext    = Dis_StopHere;
14467      DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
14468      return True;
14469   }
14470
14471   /* ------------------- v7 barrier insns ------------------- */
14472   switch (insn) {
14473      case 0xF57FF06F: /* ISB */
14474         stmt( IRStmt_MBE(Imbe_Fence) );
14475         DIP("ISB\n");
14476         return True;
14477      case 0xF57FF04F: /* DSB sy */
14478      case 0xF57FF04E: /* DSB st */
14479      case 0xF57FF04B: /* DSB ish */
14480      case 0xF57FF04A: /* DSB ishst */
14481      case 0xF57FF047: /* DSB nsh */
14482      case 0xF57FF046: /* DSB nshst */
14483      case 0xF57FF043: /* DSB osh */
14484      case 0xF57FF042: /* DSB oshst */
14485         stmt( IRStmt_MBE(Imbe_Fence) );
14486         DIP("DSB\n");
14487         return True;
14488      case 0xF57FF05F: /* DMB sy */
14489      case 0xF57FF05E: /* DMB st */
14490      case 0xF57FF05B: /* DMB ish */
14491      case 0xF57FF05A: /* DMB ishst */
14492      case 0xF57FF057: /* DMB nsh */
14493      case 0xF57FF056: /* DMB nshst */
14494      case 0xF57FF053: /* DMB osh */
14495      case 0xF57FF052: /* DMB oshst */
14496         stmt( IRStmt_MBE(Imbe_Fence) );
14497         DIP("DMB\n");
14498         return True;
14499      default:
14500         break;
14501   }
14502
14503   /* ------------------- CLREX ------------------ */
14504   if (insn == 0xF57FF01F) {
14505      /* AFAICS, this simply cancels a (all?) reservations made by a
14506         (any?) preceding LDREX(es).  Arrange to hand it through to
14507         the back end. */
14508      stmt( IRStmt_MBE(Imbe_CancelReservation) );
14509      DIP("clrex\n");
14510      return True;
14511   }
14512
14513   /* ------------------- NEON ------------------- */
14514   if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
14515      Bool ok_neon = decode_NEON_instruction(
14516                        dres, insn, IRTemp_INVALID/*unconditional*/,
14517                        False/*!isT*/
14518                     );
14519      if (ok_neon)
14520         return True;
14521   }
14522
14523   // unrecognised
14524   return False;
14525
14526#  undef INSN_COND
14527#  undef INSN
14528}
14529
14530
14531/*------------------------------------------------------------*/
14532/*--- Disassemble a single ARM instruction                 ---*/
14533/*------------------------------------------------------------*/
14534
14535/* Disassemble a single ARM instruction into IR.  The instruction is
14536   located in host memory at guest_instr, and has (decoded) guest IP
14537   of guest_R15_curr_instr_notENC, which will have been set before the
14538   call here. */
14539
14540static
14541DisResult disInstr_ARM_WRK (
14542             Bool         (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
14543             Bool         resteerCisOk,
14544             void*        callback_opaque,
14545             UChar*       guest_instr,
14546             VexArchInfo* archinfo,
14547             VexAbiInfo*  abiinfo,
14548             Bool         sigill_diag
14549          )
14550{
14551   // A macro to fish bits out of 'insn'.
14552#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
14553#  define INSN_COND          SLICE_UInt(insn, 31, 28)
14554
14555   DisResult dres;
14556   UInt      insn;
14557   //Bool      allow_VFP = False;
14558   //UInt      hwcaps = archinfo->hwcaps;
14559   IRTemp    condT; /* :: Ity_I32 */
14560   UInt      summary;
14561   HChar     dis_buf[128];  // big enough to hold LDMIA etc text
14562
14563   /* What insn variants are we supporting today? */
14564   //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
14565   // etc etc
14566
14567   /* Set result defaults. */
14568   dres.whatNext    = Dis_Continue;
14569   dres.len         = 4;
14570   dres.continueAt  = 0;
14571   dres.jk_StopHere = Ijk_INVALID;
14572
14573   /* Set default actions for post-insn handling of writes to r15, if
14574      required. */
14575   r15written = False;
14576   r15guard   = IRTemp_INVALID; /* unconditional */
14577   r15kind    = Ijk_Boring;
14578
14579   /* At least this is simple on ARM: insns are all 4 bytes long, and
14580      4-aligned.  So just fish the whole thing out of memory right now
14581      and have done. */
14582   insn = getUIntLittleEndianly( guest_instr );
14583
14584   if (0) vex_printf("insn: 0x%x\n", insn);
14585
14586   DIP("\t(arm) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
14587
14588   vassert(0 == (guest_R15_curr_instr_notENC & 3));
14589
14590   /* ----------------------------------------------------------- */
14591
14592   /* Spot "Special" instructions (see comment at top of file). */
14593   {
14594      UChar* code = (UChar*)guest_instr;
14595      /* Spot the 16-byte preamble:
14596
14597         e1a0c1ec  mov r12, r12, ROR #3
14598         e1a0c6ec  mov r12, r12, ROR #13
14599         e1a0ceec  mov r12, r12, ROR #29
14600         e1a0c9ec  mov r12, r12, ROR #19
14601      */
14602      UInt word1 = 0xE1A0C1EC;
14603      UInt word2 = 0xE1A0C6EC;
14604      UInt word3 = 0xE1A0CEEC;
14605      UInt word4 = 0xE1A0C9EC;
14606      if (getUIntLittleEndianly(code+ 0) == word1 &&
14607          getUIntLittleEndianly(code+ 4) == word2 &&
14608          getUIntLittleEndianly(code+ 8) == word3 &&
14609          getUIntLittleEndianly(code+12) == word4) {
14610         /* Got a "Special" instruction preamble.  Which one is it? */
14611         if (getUIntLittleEndianly(code+16) == 0xE18AA00A
14612                                               /* orr r10,r10,r10 */) {
14613            /* R3 = client_request ( R4 ) */
14614            DIP("r3 = client_request ( %%r4 )\n");
14615            llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
14616            dres.jk_StopHere = Ijk_ClientReq;
14617            dres.whatNext    = Dis_StopHere;
14618            goto decode_success;
14619         }
14620         else
14621         if (getUIntLittleEndianly(code+16) == 0xE18BB00B
14622                                               /* orr r11,r11,r11 */) {
14623            /* R3 = guest_NRADDR */
14624            DIP("r3 = guest_NRADDR\n");
14625            dres.len = 20;
14626            llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
14627            goto decode_success;
14628         }
14629         else
14630         if (getUIntLittleEndianly(code+16) == 0xE18CC00C
14631                                               /* orr r12,r12,r12 */) {
14632            /*  branch-and-link-to-noredir R4 */
14633            DIP("branch-and-link-to-noredir r4\n");
14634            llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
14635            llPutIReg(15, llGetIReg(4));
14636            dres.jk_StopHere = Ijk_NoRedir;
14637            dres.whatNext    = Dis_StopHere;
14638            goto decode_success;
14639         }
14640         else
14641         if (getUIntLittleEndianly(code+16) == 0xE1899009
14642                                               /* orr r9,r9,r9 */) {
14643            /* IR injection */
14644            DIP("IR injection\n");
14645            vex_inject_ir(irsb, Iend_LE);
14646            // Invalidate the current insn. The reason is that the IRop we're
14647            // injecting here can change. In which case the translation has to
14648            // be redone. For ease of handling, we simply invalidate all the
14649            // time.
14650            stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
14651            stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
14652            llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
14653            dres.whatNext    = Dis_StopHere;
14654            dres.jk_StopHere = Ijk_InvalICache;
14655            goto decode_success;
14656         }
14657         /* We don't know what it is.  Set opc1/opc2 so decode_failure
14658            can print the insn following the Special-insn preamble. */
14659         insn = getUIntLittleEndianly(code+16);
14660         goto decode_failure;
14661         /*NOTREACHED*/
14662      }
14663
14664   }
14665
14666   /* ----------------------------------------------------------- */
14667
14668   /* Main ARM instruction decoder starts here. */
14669
14670   /* Deal with the condition.  Strategy is to merely generate a
14671      condition temporary at this point (or IRTemp_INVALID, meaning
14672      unconditional).  We leave it to lower-level instruction decoders
14673      to decide whether they can generate straight-line code, or
14674      whether they must generate a side exit before the instruction.
14675      condT :: Ity_I32 and is always either zero or one. */
14676   condT = IRTemp_INVALID;
14677   switch ( (ARMCondcode)INSN_COND ) {
14678      case ARMCondNV: {
14679         // Illegal instruction prior to v5 (see ARM ARM A3-5), but
14680         // some cases are acceptable
14681         Bool ok = decode_NV_instruction(&dres, archinfo, insn);
14682         if (ok)
14683            goto decode_success;
14684         else
14685            goto decode_failure;
14686      }
14687      case ARMCondAL: // Always executed
14688         break;
14689      case ARMCondEQ: case ARMCondNE: case ARMCondHS: case ARMCondLO:
14690      case ARMCondMI: case ARMCondPL: case ARMCondVS: case ARMCondVC:
14691      case ARMCondHI: case ARMCondLS: case ARMCondGE: case ARMCondLT:
14692      case ARMCondGT: case ARMCondLE:
14693         condT = newTemp(Ity_I32);
14694         assign( condT, mk_armg_calculate_condition( INSN_COND ));
14695         break;
14696   }
14697
14698   /* ----------------------------------------------------------- */
14699   /* -- ARMv5 integer instructions                            -- */
14700   /* ----------------------------------------------------------- */
14701
14702   /* ---------------- Data processing ops ------------------- */
14703
14704   if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0))
14705       && !(INSN(25,25) == 0 && INSN(7,7) == 1 && INSN(4,4) == 1)) {
14706      IRTemp  shop = IRTemp_INVALID; /* shifter operand */
14707      IRTemp  shco = IRTemp_INVALID; /* shifter carry out */
14708      UInt    rD   = (insn >> 12) & 0xF; /* 15:12 */
14709      UInt    rN   = (insn >> 16) & 0xF; /* 19:16 */
14710      UInt    bitS = (insn >> 20) & 1; /* 20:20 */
14711      IRTemp  rNt  = IRTemp_INVALID;
14712      IRTemp  res  = IRTemp_INVALID;
14713      IRTemp  oldV = IRTemp_INVALID;
14714      IRTemp  oldC = IRTemp_INVALID;
14715      const HChar*  name = NULL;
14716      IROp    op   = Iop_INVALID;
14717      Bool    ok;
14718
14719      switch (INSN(24,21)) {
14720
14721         /* --------- ADD, SUB, AND, OR --------- */
14722         case BITS4(0,1,0,0): /* ADD:  Rd = Rn + shifter_operand */
14723            name = "add"; op = Iop_Add32; goto rd_eq_rn_op_SO;
14724         case BITS4(0,0,1,0): /* SUB:  Rd = Rn - shifter_operand */
14725            name = "sub"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
14726         case BITS4(0,0,1,1): /* RSB:  Rd = shifter_operand - Rn */
14727            name = "rsb"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
14728         case BITS4(0,0,0,0): /* AND:  Rd = Rn & shifter_operand */
14729            name = "and"; op = Iop_And32; goto rd_eq_rn_op_SO;
14730         case BITS4(1,1,0,0): /* OR:   Rd = Rn | shifter_operand */
14731            name = "orr"; op = Iop_Or32; goto rd_eq_rn_op_SO;
14732         case BITS4(0,0,0,1): /* EOR:  Rd = Rn ^ shifter_operand */
14733            name = "eor"; op = Iop_Xor32; goto rd_eq_rn_op_SO;
14734         case BITS4(1,1,1,0): /* BIC:  Rd = Rn & ~shifter_operand */
14735            name = "bic"; op = Iop_And32; goto rd_eq_rn_op_SO;
14736         rd_eq_rn_op_SO: {
14737            Bool isRSB = False;
14738            Bool isBIC = False;
14739            switch (INSN(24,21)) {
14740               case BITS4(0,0,1,1):
14741                  vassert(op == Iop_Sub32); isRSB = True; break;
14742               case BITS4(1,1,1,0):
14743                  vassert(op == Iop_And32); isBIC = True; break;
14744               default:
14745                  break;
14746            }
14747            rNt = newTemp(Ity_I32);
14748            assign(rNt, getIRegA(rN));
14749            ok = mk_shifter_operand(
14750                    INSN(25,25), INSN(11,0),
14751                    &shop, bitS ? &shco : NULL, dis_buf
14752                 );
14753            if (!ok)
14754               break;
14755            res = newTemp(Ity_I32);
14756            // compute the main result
14757            if (isRSB) {
14758               // reverse-subtract: shifter_operand - Rn
14759               vassert(op == Iop_Sub32);
14760               assign(res, binop(op, mkexpr(shop), mkexpr(rNt)) );
14761            } else if (isBIC) {
14762               // andn: shifter_operand & ~Rn
14763               vassert(op == Iop_And32);
14764               assign(res, binop(op, mkexpr(rNt),
14765                                     unop(Iop_Not32, mkexpr(shop))) );
14766            } else {
14767               // normal: Rn op shifter_operand
14768               assign(res, binop(op, mkexpr(rNt), mkexpr(shop)) );
14769            }
14770            // but don't commit it until after we've finished
14771            // all necessary reads from the guest state
14772            if (bitS
14773                && (op == Iop_And32 || op == Iop_Or32 || op == Iop_Xor32)) {
14774               oldV = newTemp(Ity_I32);
14775               assign( oldV, mk_armg_calculate_flag_v() );
14776            }
14777            // can't safely read guest state after here
14778            // now safe to put the main result
14779            putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
14780            // XXXX!! not safe to read any guest state after
14781            // this point (I think the code below doesn't do that).
14782            if (!bitS)
14783               vassert(shco == IRTemp_INVALID);
14784            /* Update the flags thunk if necessary */
14785            if (bitS) {
14786               vassert(shco != IRTemp_INVALID);
14787               switch (op) {
14788                  case Iop_Add32:
14789                     setFlags_D1_D2( ARMG_CC_OP_ADD, rNt, shop, condT );
14790                     break;
14791                  case Iop_Sub32:
14792                     if (isRSB) {
14793                        setFlags_D1_D2( ARMG_CC_OP_SUB, shop, rNt, condT );
14794                     } else {
14795                        setFlags_D1_D2( ARMG_CC_OP_SUB, rNt, shop, condT );
14796                     }
14797                     break;
14798                  case Iop_And32: /* BIC and AND set the flags the same */
14799                  case Iop_Or32:
14800                  case Iop_Xor32:
14801                     // oldV has been read just above
14802                     setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
14803                                        res, shco, oldV, condT );
14804                     break;
14805                  default:
14806                     vassert(0);
14807               }
14808            }
14809            DIP("%s%s%s r%u, r%u, %s\n",
14810                name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
14811            goto decode_success;
14812         }
14813
14814         /* --------- MOV, MVN --------- */
14815         case BITS4(1,1,0,1):   /* MOV: Rd = shifter_operand */
14816         case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
14817            Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
14818            IRTemp jk = Ijk_Boring;
14819            if (rN != 0)
14820               break; /* rN must be zero */
14821            ok = mk_shifter_operand(
14822                    INSN(25,25), INSN(11,0),
14823                    &shop, bitS ? &shco : NULL, dis_buf
14824                 );
14825            if (!ok)
14826               break;
14827            res = newTemp(Ity_I32);
14828            assign( res, isMVN ? unop(Iop_Not32, mkexpr(shop))
14829                               : mkexpr(shop) );
14830            if (bitS) {
14831               vassert(shco != IRTemp_INVALID);
14832               oldV = newTemp(Ity_I32);
14833               assign( oldV, mk_armg_calculate_flag_v() );
14834            } else {
14835               vassert(shco == IRTemp_INVALID);
14836            }
14837            /* According to the Cortex A8 TRM Sec. 5.2.1, MOV PC, r14 is a
14838                return for purposes of branch prediction. */
14839            if (!isMVN && INSN(11,0) == 14) {
14840              jk = Ijk_Ret;
14841            }
14842            // can't safely read guest state after here
14843            putIRegA( rD, mkexpr(res), condT, jk );
14844            /* Update the flags thunk if necessary */
14845            if (bitS) {
14846               setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
14847                                  res, shco, oldV, condT );
14848            }
14849            DIP("%s%s%s r%u, %s\n",
14850                isMVN ? "mvn" : "mov",
14851                nCC(INSN_COND), bitS ? "s" : "", rD, dis_buf );
14852            goto decode_success;
14853         }
14854
14855         /* --------- CMP --------- */
14856         case BITS4(1,0,1,0):   /* CMP:  (void) Rn - shifter_operand */
14857         case BITS4(1,0,1,1): { /* CMN:  (void) Rn + shifter_operand */
14858            Bool isCMN = INSN(24,21) == BITS4(1,0,1,1);
14859            if (rD != 0)
14860               break; /* rD must be zero */
14861            if (bitS == 0)
14862               break; /* if S (bit 20) is not set, it's not CMP/CMN */
14863            rNt = newTemp(Ity_I32);
14864            assign(rNt, getIRegA(rN));
14865            ok = mk_shifter_operand(
14866                    INSN(25,25), INSN(11,0),
14867                    &shop, NULL, dis_buf
14868                 );
14869            if (!ok)
14870               break;
14871            // can't safely read guest state after here
14872            /* Update the flags thunk. */
14873            setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
14874                            rNt, shop, condT );
14875            DIP("%s%s r%u, %s\n",
14876                isCMN ? "cmn" : "cmp",
14877                nCC(INSN_COND), rN, dis_buf );
14878            goto decode_success;
14879         }
14880
14881         /* --------- TST --------- */
14882         case BITS4(1,0,0,0):   /* TST:  (void) Rn & shifter_operand */
14883         case BITS4(1,0,0,1): { /* TEQ:  (void) Rn ^ shifter_operand */
14884            Bool isTEQ = INSN(24,21) == BITS4(1,0,0,1);
14885            if (rD != 0)
14886               break; /* rD must be zero */
14887            if (bitS == 0)
14888               break; /* if S (bit 20) is not set, it's not TST/TEQ */
14889            rNt = newTemp(Ity_I32);
14890            assign(rNt, getIRegA(rN));
14891            ok = mk_shifter_operand(
14892                    INSN(25,25), INSN(11,0),
14893                    &shop, &shco, dis_buf
14894                 );
14895            if (!ok)
14896               break;
14897            /* Update the flags thunk. */
14898            res = newTemp(Ity_I32);
14899            assign( res, binop(isTEQ ? Iop_Xor32 : Iop_And32,
14900                               mkexpr(rNt), mkexpr(shop)) );
14901            oldV = newTemp(Ity_I32);
14902            assign( oldV, mk_armg_calculate_flag_v() );
14903            // can't safely read guest state after here
14904            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
14905                               res, shco, oldV, condT );
14906            DIP("%s%s r%u, %s\n",
14907                isTEQ ? "teq" : "tst",
14908                nCC(INSN_COND), rN, dis_buf );
14909            goto decode_success;
14910         }
14911
14912         /* --------- ADC, SBC, RSC --------- */
14913         case BITS4(0,1,0,1): /* ADC:  Rd = Rn + shifter_operand + oldC */
14914            name = "adc"; goto rd_eq_rn_op_SO_op_oldC;
14915         case BITS4(0,1,1,0): /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
14916            name = "sbc"; goto rd_eq_rn_op_SO_op_oldC;
14917         case BITS4(0,1,1,1): /* RSC:  Rd = shifter_operand - Rn - (oldC ^ 1) */
14918            name = "rsc"; goto rd_eq_rn_op_SO_op_oldC;
14919         rd_eq_rn_op_SO_op_oldC: {
14920            // FIXME: shco isn't used for anything.  Get rid of it.
14921            rNt = newTemp(Ity_I32);
14922            assign(rNt, getIRegA(rN));
14923            ok = mk_shifter_operand(
14924                    INSN(25,25), INSN(11,0),
14925                    &shop, bitS ? &shco : NULL, dis_buf
14926                 );
14927            if (!ok)
14928               break;
14929            oldC = newTemp(Ity_I32);
14930            assign( oldC, mk_armg_calculate_flag_c() );
14931            res = newTemp(Ity_I32);
14932            // compute the main result
14933            switch (INSN(24,21)) {
14934               case BITS4(0,1,0,1): /* ADC */
14935                  assign(res,
14936                         binop(Iop_Add32,
14937                               binop(Iop_Add32, mkexpr(rNt), mkexpr(shop)),
14938                               mkexpr(oldC) ));
14939                  break;
14940               case BITS4(0,1,1,0): /* SBC */
14941                  assign(res,
14942                         binop(Iop_Sub32,
14943                               binop(Iop_Sub32, mkexpr(rNt), mkexpr(shop)),
14944                               binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
14945                  break;
14946               case BITS4(0,1,1,1): /* RSC */
14947                  assign(res,
14948                         binop(Iop_Sub32,
14949                               binop(Iop_Sub32, mkexpr(shop), mkexpr(rNt)),
14950                               binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
14951                  break;
14952               default:
14953                  vassert(0);
14954            }
14955            // but don't commit it until after we've finished
14956            // all necessary reads from the guest state
14957            // now safe to put the main result
14958            putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
14959            // XXXX!! not safe to read any guest state after
14960            // this point (I think the code below doesn't do that).
14961            if (!bitS)
14962               vassert(shco == IRTemp_INVALID);
14963            /* Update the flags thunk if necessary */
14964            if (bitS) {
14965               vassert(shco != IRTemp_INVALID);
14966               switch (INSN(24,21)) {
14967                  case BITS4(0,1,0,1): /* ADC */
14968                     setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
14969                                        rNt, shop, oldC, condT );
14970                     break;
14971                  case BITS4(0,1,1,0): /* SBC */
14972                     setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
14973                                        rNt, shop, oldC, condT );
14974                     break;
14975                  case BITS4(0,1,1,1): /* RSC */
14976                     setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
14977                                        shop, rNt, oldC, condT );
14978                     break;
14979                  default:
14980                     vassert(0);
14981               }
14982            }
14983            DIP("%s%s%s r%u, r%u, %s\n",
14984                name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
14985            goto decode_success;
14986         }
14987
14988         default:
14989            vassert(0);
14990      }
14991   } /* if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0)) */
14992
14993   /* --------------------- Load/store (ubyte & word) -------- */
14994   // LDR STR LDRB STRB
14995   /*                 31   27   23   19 15 11    6   4 3  # highest bit
14996                        28   24   20 16 12
14997      A5-20   1 | 16  cond 0101 UB0L Rn Rd imm12
14998      A5-22   1 | 32  cond 0111 UBOL Rn Rd imm5  sh2 0 Rm
14999      A5-24   2 | 16  cond 0101 UB1L Rn Rd imm12
15000      A5-26   2 | 32  cond 0111 UB1L Rn Rd imm5  sh2 0 Rm
15001      A5-28   3 | 16  cond 0100 UB0L Rn Rd imm12
15002      A5-32   3 | 32  cond 0110 UB0L Rn Rd imm5  sh2 0 Rm
15003   */
15004   /* case coding:
15005             1   at-ea               (access at ea)
15006             2   at-ea-then-upd      (access at ea, then Rn = ea)
15007             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
15008      ea coding
15009             16  Rn +/- imm12
15010             32  Rn +/- Rm sh2 imm5
15011   */
15012   /* Quickly skip over all of this for hopefully most instructions */
15013   if ((INSN(27,24) & BITS4(1,1,0,0)) != BITS4(0,1,0,0))
15014      goto after_load_store_ubyte_or_word;
15015
15016   summary = 0;
15017
15018   /**/ if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 0) {
15019      summary = 1 | 16;
15020   }
15021   else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 0
15022                                          && INSN(4,4) == 0) {
15023      summary = 1 | 32;
15024   }
15025   else if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 1) {
15026      summary = 2 | 16;
15027   }
15028   else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 1
15029                                          && INSN(4,4) == 0) {
15030      summary = 2 | 32;
15031   }
15032   else if (INSN(27,24) == BITS4(0,1,0,0) && INSN(21,21) == 0) {
15033      summary = 3 | 16;
15034   }
15035   else if (INSN(27,24) == BITS4(0,1,1,0) && INSN(21,21) == 0
15036                                          && INSN(4,4) == 0) {
15037      summary = 3 | 32;
15038   }
15039   else goto after_load_store_ubyte_or_word;
15040
15041   { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
15042     UInt rD = (insn >> 12) & 0xF; /* 15:12 */
15043     UInt rM = (insn >> 0)  & 0xF; /*  3:0  */
15044     UInt bU = (insn >> 23) & 1;      /* 23 */
15045     UInt bB = (insn >> 22) & 1;      /* 22 */
15046     UInt bL = (insn >> 20) & 1;      /* 20 */
15047     UInt imm12 = (insn >> 0) & 0xFFF; /* 11:0 */
15048     UInt imm5  = (insn >> 7) & 0x1F;  /* 11:7 */
15049     UInt sh2   = (insn >> 5) & 3;     /* 6:5 */
15050
15051     /* Skip some invalid cases, which would lead to two competing
15052        updates to the same register, or which are otherwise
15053        disallowed by the spec. */
15054     switch (summary) {
15055        case 1 | 16:
15056           break;
15057        case 1 | 32:
15058           if (rM == 15) goto after_load_store_ubyte_or_word;
15059           break;
15060        case 2 | 16: case 3 | 16:
15061           if (rN == 15) goto after_load_store_ubyte_or_word;
15062           if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
15063           break;
15064        case 2 | 32: case 3 | 32:
15065           if (rM == 15) goto after_load_store_ubyte_or_word;
15066           if (rN == 15) goto after_load_store_ubyte_or_word;
15067           if (rN == rM) goto after_load_store_ubyte_or_word;
15068           if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
15069           break;
15070        default:
15071           vassert(0);
15072     }
15073
15074     /* compute the effective address.  Bind it to a tmp since we
15075        may need to use it twice. */
15076     IRExpr* eaE = NULL;
15077     switch (summary & 0xF0) {
15078        case 16:
15079           eaE = mk_EA_reg_plusminus_imm12( rN, bU, imm12, dis_buf );
15080           break;
15081        case 32:
15082           eaE = mk_EA_reg_plusminus_shifted_reg( rN, bU, rM, sh2, imm5,
15083                                                  dis_buf );
15084           break;
15085     }
15086     vassert(eaE);
15087     IRTemp eaT = newTemp(Ity_I32);
15088     assign(eaT, eaE);
15089
15090     /* get the old Rn value */
15091     IRTemp rnT = newTemp(Ity_I32);
15092     assign(rnT, getIRegA(rN));
15093
15094     /* decide on the transfer address */
15095     IRTemp taT = IRTemp_INVALID;
15096     switch (summary & 0x0F) {
15097        case 1: case 2: taT = eaT; break;
15098        case 3:         taT = rnT; break;
15099     }
15100     vassert(taT != IRTemp_INVALID);
15101
15102     if (bL == 0) {
15103       /* Store.  If necessary, update the base register before the
15104          store itself, so that the common idiom of "str rX, [sp,
15105          #-4]!" (store rX at sp-4, then do new sp = sp-4, a.k.a "push
15106          rX") doesn't cause Memcheck to complain that the access is
15107          below the stack pointer.  Also, not updating sp before the
15108          store confuses Valgrind's dynamic stack-extending logic.  So
15109          do it before the store.  Hence we need to snarf the store
15110          data before doing the basereg update. */
15111
15112        /* get hold of the data to be stored */
15113        IRTemp rDt = newTemp(Ity_I32);
15114        assign(rDt, getIRegA(rD));
15115
15116        /* Update Rn if necessary. */
15117        switch (summary & 0x0F) {
15118           case 2: case 3:
15119              putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
15120              break;
15121        }
15122
15123        /* generate the transfer */
15124        if (bB == 0) { // word store
15125           storeGuardedLE( mkexpr(taT), mkexpr(rDt), condT );
15126        } else { // byte store
15127           vassert(bB == 1);
15128           storeGuardedLE( mkexpr(taT), unop(Iop_32to8, mkexpr(rDt)), condT );
15129        }
15130
15131     } else {
15132        /* Load */
15133        vassert(bL == 1);
15134
15135        /* generate the transfer */
15136        if (bB == 0) { // word load
15137           IRTemp jk = Ijk_Boring;
15138           /* According to the Cortex A8 TRM Sec. 5.2.1, LDR(1) with r13 as the
15139               base register and PC as the destination register is a return for
15140               purposes of branch prediction.
15141              The ARM ARM Sec. C9.10.1 further specifies that it must use a
15142               post-increment by immediate addressing mode to be counted in
15143               event 0x0E (Procedure return).*/
15144           if (rN == 13 && summary == (3 | 16) && bB == 0) {
15145              jk = Ijk_Ret;
15146           }
15147           IRTemp tD = newTemp(Ity_I32);
15148           loadGuardedLE( tD, ILGop_Ident32,
15149                          mkexpr(taT), llGetIReg(rD), condT );
15150           /* "rD == 15 ? condT : IRTemp_INVALID": simply
15151              IRTemp_INVALID would be correct in all cases here, and
15152              for the non-r15 case it generates better code, by
15153              avoiding two tests of the cond (since it is already
15154              tested by loadGuardedLE).  However, the logic at the end
15155              of this function, that deals with writes to r15, has an
15156              optimisation which depends on seeing whether or not the
15157              write is conditional.  Hence in this particular case we
15158              let it "see" the guard condition. */
15159           putIRegA( rD, mkexpr(tD),
15160                     rD == 15 ? condT : IRTemp_INVALID, jk );
15161        } else { // byte load
15162           vassert(bB == 1);
15163           IRTemp tD = newTemp(Ity_I32);
15164           loadGuardedLE( tD, ILGop_8Uto32, mkexpr(taT), llGetIReg(rD), condT );
15165           /* No point in similar 3rd arg complexity here, since we
15166              can't sanely write anything to r15 like this. */
15167           putIRegA( rD, mkexpr(tD), IRTemp_INVALID, Ijk_Boring );
15168        }
15169
15170        /* Update Rn if necessary. */
15171        switch (summary & 0x0F) {
15172           case 2: case 3:
15173              // should be assured by logic above:
15174              if (bL == 1)
15175                 vassert(rD != rN); /* since we just wrote rD */
15176              putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
15177              break;
15178        }
15179     }
15180
15181     switch (summary & 0x0F) {
15182        case 1:  DIP("%sr%s%s r%u, %s\n",
15183                     bL == 0 ? "st" : "ld",
15184                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
15185                 break;
15186        case 2:  DIP("%sr%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
15187                     bL == 0 ? "st" : "ld",
15188                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
15189                 break;
15190        case 3:  DIP("%sr%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
15191                     bL == 0 ? "st" : "ld",
15192                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
15193                 break;
15194        default: vassert(0);
15195     }
15196
15197     /* XXX deal with alignment constraints */
15198
15199     goto decode_success;
15200
15201     /* Complications:
15202
15203        For all loads: if the Amode specifies base register
15204        writeback, and the same register is specified for Rd and Rn,
15205        the results are UNPREDICTABLE.
15206
15207        For all loads and stores: if R15 is written, branch to
15208        that address afterwards.
15209
15210        STRB: straightforward
15211        LDRB: loaded data is zero extended
15212        STR:  lowest 2 bits of address are ignored
15213        LDR:  if the lowest 2 bits of the address are nonzero
15214              then the loaded value is rotated right by 8 * the lowest 2 bits
15215     */
15216   }
15217
15218  after_load_store_ubyte_or_word:
15219
15220   /* --------------------- Load/store (sbyte & hword) -------- */
15221   // LDRH LDRSH STRH LDRSB
15222   /*                 31   27   23   19 15 11   7    3     # highest bit
15223                        28   24   20 16 12    8    4    0
15224      A5-36   1 | 16  cond 0001 U10L Rn Rd im4h 1SH1 im4l
15225      A5-38   1 | 32  cond 0001 U00L Rn Rd 0000 1SH1 Rm
15226      A5-40   2 | 16  cond 0001 U11L Rn Rd im4h 1SH1 im4l
15227      A5-42   2 | 32  cond 0001 U01L Rn Rd 0000 1SH1 Rm
15228      A5-44   3 | 16  cond 0000 U10L Rn Rd im4h 1SH1 im4l
15229      A5-46   3 | 32  cond 0000 U00L Rn Rd 0000 1SH1 Rm
15230   */
15231   /* case coding:
15232             1   at-ea               (access at ea)
15233             2   at-ea-then-upd      (access at ea, then Rn = ea)
15234             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
15235      ea coding
15236             16  Rn +/- imm8
15237             32  Rn +/- Rm
15238   */
15239   /* Quickly skip over all of this for hopefully most instructions */
15240   if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
15241      goto after_load_store_sbyte_or_hword;
15242
15243   /* Check the "1SH1" thing. */
15244   if ((INSN(7,4) & BITS4(1,0,0,1)) != BITS4(1,0,0,1))
15245      goto after_load_store_sbyte_or_hword;
15246
15247   summary = 0;
15248
15249   /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,0)) {
15250      summary = 1 | 16;
15251   }
15252   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,0)) {
15253      summary = 1 | 32;
15254   }
15255   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,1)) {
15256      summary = 2 | 16;
15257   }
15258   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,1)) {
15259      summary = 2 | 32;
15260   }
15261   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(1,0)) {
15262      summary = 3 | 16;
15263   }
15264   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(0,0)) {
15265      summary = 3 | 32;
15266   }
15267   else goto after_load_store_sbyte_or_hword;
15268
15269   { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
15270     UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
15271     UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
15272     UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
15273     UInt bL   = (insn >> 20) & 1;   /* 20 L=1 load, L=0 store */
15274     UInt bH   = (insn >> 5) & 1;    /* H=1 halfword, H=0 byte */
15275     UInt bS   = (insn >> 6) & 1;    /* S=1 signed, S=0 unsigned */
15276     UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
15277
15278     /* Skip combinations that are either meaningless or already
15279        handled by main word-or-unsigned-byte load-store
15280        instructions. */
15281     if (bS == 0 && bH == 0) /* "unsigned byte" */
15282        goto after_load_store_sbyte_or_hword;
15283     if (bS == 1 && bL == 0) /* "signed store" */
15284        goto after_load_store_sbyte_or_hword;
15285
15286     /* Require 11:8 == 0 for Rn +/- Rm cases */
15287     if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
15288        goto after_load_store_sbyte_or_hword;
15289
15290     /* Skip some invalid cases, which would lead to two competing
15291        updates to the same register, or which are otherwise
15292        disallowed by the spec. */
15293     switch (summary) {
15294        case 1 | 16:
15295           break;
15296        case 1 | 32:
15297           if (rM == 15) goto after_load_store_sbyte_or_hword;
15298           break;
15299        case 2 | 16: case 3 | 16:
15300           if (rN == 15) goto after_load_store_sbyte_or_hword;
15301           if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
15302           break;
15303        case 2 | 32: case 3 | 32:
15304           if (rM == 15) goto after_load_store_sbyte_or_hword;
15305           if (rN == 15) goto after_load_store_sbyte_or_hword;
15306           if (rN == rM) goto after_load_store_sbyte_or_hword;
15307           if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
15308           break;
15309        default:
15310           vassert(0);
15311     }
15312
15313     /* If this is a branch, make it unconditional at this point.
15314        Doing conditional branches in-line is too complex (for now).
15315        Note that you'd have to be insane to use any of these loads to
15316        do a branch, since they only load 16 bits at most, but we
15317        handle it just in case. */
15318     if (bL == 1 && rD == 15 && condT != IRTemp_INVALID) {
15319        // go uncond
15320        mk_skip_over_A32_if_cond_is_false( condT );
15321        condT = IRTemp_INVALID;
15322        // now uncond
15323     }
15324
15325     /* compute the effective address.  Bind it to a tmp since we
15326        may need to use it twice. */
15327     IRExpr* eaE = NULL;
15328     switch (summary & 0xF0) {
15329        case 16:
15330           eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
15331           break;
15332        case 32:
15333           eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
15334           break;
15335     }
15336     vassert(eaE);
15337     IRTemp eaT = newTemp(Ity_I32);
15338     assign(eaT, eaE);
15339
15340     /* get the old Rn value */
15341     IRTemp rnT = newTemp(Ity_I32);
15342     assign(rnT, getIRegA(rN));
15343
15344     /* decide on the transfer address */
15345     IRTemp taT = IRTemp_INVALID;
15346     switch (summary & 0x0F) {
15347        case 1: case 2: taT = eaT; break;
15348        case 3:         taT = rnT; break;
15349     }
15350     vassert(taT != IRTemp_INVALID);
15351
15352     /* ll previous value of rD, for dealing with conditional loads */
15353     IRTemp llOldRd = newTemp(Ity_I32);
15354     assign(llOldRd, llGetIReg(rD));
15355
15356     /* halfword store  H 1  L 0  S 0
15357        uhalf load      H 1  L 1  S 0
15358        shalf load      H 1  L 1  S 1
15359        sbyte load      H 0  L 1  S 1
15360     */
15361     const HChar* name = NULL;
15362     /* generate the transfer */
15363     /**/ if (bH == 1 && bL == 0 && bS == 0) { // halfword store
15364        storeGuardedLE( mkexpr(taT),
15365                        unop(Iop_32to16, getIRegA(rD)), condT );
15366        name = "strh";
15367     }
15368     else if (bH == 1 && bL == 1 && bS == 0) { // uhalf load
15369        IRTemp newRd = newTemp(Ity_I32);
15370        loadGuardedLE( newRd, ILGop_16Uto32,
15371                       mkexpr(taT), mkexpr(llOldRd), condT );
15372        putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
15373        name = "ldrh";
15374     }
15375     else if (bH == 1 && bL == 1 && bS == 1) { // shalf load
15376        IRTemp newRd = newTemp(Ity_I32);
15377        loadGuardedLE( newRd, ILGop_16Sto32,
15378                       mkexpr(taT), mkexpr(llOldRd), condT );
15379        putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
15380        name = "ldrsh";
15381     }
15382     else if (bH == 0 && bL == 1 && bS == 1) { // sbyte load
15383        IRTemp newRd = newTemp(Ity_I32);
15384        loadGuardedLE( newRd, ILGop_8Sto32,
15385                       mkexpr(taT), mkexpr(llOldRd), condT );
15386        putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
15387        name = "ldrsb";
15388     }
15389     else
15390        vassert(0); // should be assured by logic above
15391
15392     /* Update Rn if necessary. */
15393     switch (summary & 0x0F) {
15394        case 2: case 3:
15395           // should be assured by logic above:
15396           if (bL == 1)
15397              vassert(rD != rN); /* since we just wrote rD */
15398           putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
15399           break;
15400     }
15401
15402     switch (summary & 0x0F) {
15403        case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
15404                 break;
15405        case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
15406                     name, nCC(INSN_COND), rD, dis_buf);
15407                 break;
15408        case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
15409                     name, nCC(INSN_COND), rD, dis_buf);
15410                 break;
15411        default: vassert(0);
15412     }
15413
15414     /* XXX deal with alignment constraints */
15415
15416     goto decode_success;
15417
15418     /* Complications:
15419
15420        For all loads: if the Amode specifies base register
15421        writeback, and the same register is specified for Rd and Rn,
15422        the results are UNPREDICTABLE.
15423
15424        For all loads and stores: if R15 is written, branch to
15425        that address afterwards.
15426
15427        Misaligned halfword stores => Unpredictable
15428        Misaligned halfword loads  => Unpredictable
15429     */
15430   }
15431
15432  after_load_store_sbyte_or_hword:
15433
15434   /* --------------------- Load/store multiple -------------- */
15435   // LD/STMIA LD/STMIB LD/STMDA LD/STMDB
15436   // Remarkably complex and difficult to get right
15437   // match 27:20 as 100XX0WL
15438   if (BITS8(1,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,1,0,0))) {
15439      // A5-50 LD/STMIA  cond 1000 10WL Rn RegList
15440      // A5-51 LD/STMIB  cond 1001 10WL Rn RegList
15441      // A5-53 LD/STMDA  cond 1000 00WL Rn RegList
15442      // A5-53 LD/STMDB  cond 1001 00WL Rn RegList
15443      //                   28   24   20 16       0
15444
15445      UInt bINC    = (insn >> 23) & 1;
15446      UInt bBEFORE = (insn >> 24) & 1;
15447
15448      UInt bL      = (insn >> 20) & 1;  /* load=1, store=0 */
15449      UInt bW      = (insn >> 21) & 1;  /* Rn wback=1, no wback=0 */
15450      UInt rN      = (insn >> 16) & 0xF;
15451      UInt regList = insn & 0xFFFF;
15452      /* Skip some invalid cases, which would lead to two competing
15453         updates to the same register, or which are otherwise
15454         disallowed by the spec.  Note the test above has required
15455         that S == 0, since that looks like a kernel-mode only thing.
15456         Done by forcing the real pattern, viz 100XXSWL to actually be
15457         100XX0WL. */
15458      if (rN == 15) goto after_load_store_multiple;
15459      // reglist can't be empty
15460      if (regList == 0) goto after_load_store_multiple;
15461      // if requested to writeback Rn, and this is a load instruction,
15462      // then Rn can't appear in RegList, since we'd have two competing
15463      // new values for Rn.  We do however accept this case for store
15464      // instructions.
15465      if (bW == 1 && bL == 1 && ((1 << rN) & regList) > 0)
15466         goto after_load_store_multiple;
15467
15468      /* Now, we can't do a conditional load or store, since that very
15469         likely will generate an exception.  So we have to take a side
15470         exit at this point if the condition is false. */
15471      if (condT != IRTemp_INVALID) {
15472         mk_skip_over_A32_if_cond_is_false( condT );
15473         condT = IRTemp_INVALID;
15474      }
15475
15476      /* Ok, now we're unconditional.  Generate the IR. */
15477      mk_ldm_stm( True/*arm*/, rN, bINC, bBEFORE, bW, bL, regList );
15478
15479      DIP("%sm%c%c%s r%u%s, {0x%04x}\n",
15480          bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
15481          nCC(INSN_COND),
15482          rN, bW ? "!" : "", regList);
15483
15484      goto decode_success;
15485   }
15486
15487  after_load_store_multiple:
15488
15489   /* --------------------- Control flow --------------------- */
15490   // B, BL (Branch, or Branch-and-Link, to immediate offset)
15491   //
15492   if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
15493      UInt link   = (insn >> 24) & 1;
15494      UInt uimm24 = insn & ((1<<24)-1);
15495      Int  simm24 = (Int)uimm24;
15496      UInt dst    = guest_R15_curr_instr_notENC + 8
15497                    + (((simm24 << 8) >> 8) << 2);
15498      IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
15499      if (link) {
15500         putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
15501                      condT, Ijk_Boring);
15502      }
15503      if (condT == IRTemp_INVALID) {
15504         /* unconditional transfer to 'dst'.  See if we can simply
15505            continue tracing at the destination. */
15506         if (resteerOkFn( callback_opaque, (Addr64)dst )) {
15507            /* yes */
15508            dres.whatNext   = Dis_ResteerU;
15509            dres.continueAt = (Addr64)dst;
15510         } else {
15511            /* no; terminate the SB at this point. */
15512            llPutIReg(15, mkU32(dst));
15513            dres.jk_StopHere = jk;
15514            dres.whatNext    = Dis_StopHere;
15515         }
15516         DIP("b%s 0x%x\n", link ? "l" : "", dst);
15517      } else {
15518         /* conditional transfer to 'dst' */
15519         const HChar* comment = "";
15520
15521         /* First see if we can do some speculative chasing into one
15522            arm or the other.  Be conservative and only chase if
15523            !link, that is, this is a normal conditional branch to a
15524            known destination. */
15525         if (!link
15526             && resteerCisOk
15527             && vex_control.guest_chase_cond
15528             && dst < guest_R15_curr_instr_notENC
15529             && resteerOkFn( callback_opaque, (Addr64)(Addr32)dst) ) {
15530            /* Speculation: assume this backward branch is taken.  So
15531               we need to emit a side-exit to the insn following this
15532               one, on the negation of the condition, and continue at
15533               the branch target address (dst). */
15534            stmt( IRStmt_Exit( unop(Iop_Not1,
15535                                    unop(Iop_32to1, mkexpr(condT))),
15536                               Ijk_Boring,
15537                               IRConst_U32(guest_R15_curr_instr_notENC+4),
15538                               OFFB_R15T ));
15539            dres.whatNext   = Dis_ResteerC;
15540            dres.continueAt = (Addr64)(Addr32)dst;
15541            comment = "(assumed taken)";
15542         }
15543         else
15544         if (!link
15545             && resteerCisOk
15546             && vex_control.guest_chase_cond
15547             && dst >= guest_R15_curr_instr_notENC
15548             && resteerOkFn( callback_opaque,
15549                             (Addr64)(Addr32)
15550                                     (guest_R15_curr_instr_notENC+4)) ) {
15551            /* Speculation: assume this forward branch is not taken.
15552               So we need to emit a side-exit to dst (the dest) and
15553               continue disassembling at the insn immediately
15554               following this one. */
15555            stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
15556                               Ijk_Boring,
15557                               IRConst_U32(dst),
15558                               OFFB_R15T ));
15559            dres.whatNext   = Dis_ResteerC;
15560            dres.continueAt = (Addr64)(Addr32)
15561                                      (guest_R15_curr_instr_notENC+4);
15562            comment = "(assumed not taken)";
15563         }
15564         else {
15565            /* Conservative default translation - end the block at
15566               this point. */
15567            stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
15568                               jk, IRConst_U32(dst), OFFB_R15T ));
15569            llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4));
15570            dres.jk_StopHere = Ijk_Boring;
15571            dres.whatNext    = Dis_StopHere;
15572         }
15573         DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
15574             dst, comment);
15575      }
15576      goto decode_success;
15577   }
15578
15579   // B, BL (Branch, or Branch-and-Link, to a register)
15580   // NB: interworking branch
15581   if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
15582       && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
15583       && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
15584           || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
15585      IRTemp  dst = newTemp(Ity_I32);
15586      UInt    link = (INSN(11,4) >> 1) & 1;
15587      UInt    rM   = INSN(3,0);
15588      // we don't decode the case (link && rM == 15), as that's
15589      // Unpredictable.
15590      if (!(link && rM == 15)) {
15591         if (condT != IRTemp_INVALID) {
15592            mk_skip_over_A32_if_cond_is_false( condT );
15593         }
15594         // rM contains an interworking address exactly as we require
15595         // (with continuation CPSR.T in bit 0), so we can use it
15596         // as-is, with no masking.
15597         assign( dst, getIRegA(rM) );
15598         if (link) {
15599            putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
15600                      IRTemp_INVALID/*because AL*/, Ijk_Boring );
15601         }
15602         llPutIReg(15, mkexpr(dst));
15603         dres.jk_StopHere = link ? Ijk_Call
15604                                 : (rM == 14 ? Ijk_Ret : Ijk_Boring);
15605         dres.whatNext    = Dis_StopHere;
15606         if (condT == IRTemp_INVALID) {
15607            DIP("b%sx r%u\n", link ? "l" : "", rM);
15608         } else {
15609            DIP("b%sx%s r%u\n", link ? "l" : "", nCC(INSN_COND), rM);
15610         }
15611         goto decode_success;
15612      }
15613      /* else: (link && rM == 15): just fall through */
15614   }
15615
15616   /* --- NB: ARM interworking branches are in NV space, hence
15617      are handled elsewhere by decode_NV_instruction.
15618      ---
15619   */
15620
15621   /* --------------------- Clz --------------------- */
15622   // CLZ
15623   if (INSN(27,20) == BITS8(0,0,0,1,0,1,1,0)
15624       && INSN(19,16) == BITS4(1,1,1,1)
15625       && INSN(11,4) == BITS8(1,1,1,1,0,0,0,1)) {
15626      UInt rD = INSN(15,12);
15627      UInt rM = INSN(3,0);
15628      IRTemp arg = newTemp(Ity_I32);
15629      IRTemp res = newTemp(Ity_I32);
15630      assign(arg, getIRegA(rM));
15631      assign(res, IRExpr_ITE(
15632                     binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
15633                     mkU32(32),
15634                     unop(Iop_Clz32, mkexpr(arg))
15635            ));
15636      putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
15637      DIP("clz%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
15638      goto decode_success;
15639   }
15640
15641   /* --------------------- Mul etc --------------------- */
15642   // MUL
15643   if (BITS8(0,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
15644       && INSN(15,12) == BITS4(0,0,0,0)
15645       && INSN(7,4) == BITS4(1,0,0,1)) {
15646      UInt bitS = (insn >> 20) & 1; /* 20:20 */
15647      UInt rD = INSN(19,16);
15648      UInt rS = INSN(11,8);
15649      UInt rM = INSN(3,0);
15650      if (rD == 15 || rM == 15 || rS == 15) {
15651         /* Unpredictable; don't decode; fall through */
15652      } else {
15653         IRTemp argL = newTemp(Ity_I32);
15654         IRTemp argR = newTemp(Ity_I32);
15655         IRTemp res  = newTemp(Ity_I32);
15656         IRTemp oldC = IRTemp_INVALID;
15657         IRTemp oldV = IRTemp_INVALID;
15658         assign( argL, getIRegA(rM));
15659         assign( argR, getIRegA(rS));
15660         assign( res, binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) );
15661         if (bitS) {
15662            oldC = newTemp(Ity_I32);
15663            assign(oldC, mk_armg_calculate_flag_c());
15664            oldV = newTemp(Ity_I32);
15665            assign(oldV, mk_armg_calculate_flag_v());
15666         }
15667         // now update guest state
15668         putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
15669         if (bitS) {
15670            IRTemp pair = newTemp(Ity_I32);
15671            assign( pair, binop(Iop_Or32,
15672                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
15673                                mkexpr(oldV)) );
15674            setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
15675         }
15676         DIP("mul%c%s r%u, r%u, r%u\n",
15677             bitS ? 's' : ' ', nCC(INSN_COND), rD, rM, rS);
15678         goto decode_success;
15679      }
15680      /* fall through */
15681   }
15682
15683   /* --------------------- Integer Divides --------------------- */
15684   // SDIV
15685   if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20)
15686       && INSN(15,12) == BITS4(1,1,1,1)
15687       && INSN(7,4) == BITS4(0,0,0,1)) {
15688      UInt rD = INSN(19,16);
15689      UInt rM = INSN(11,8);
15690      UInt rN = INSN(3,0);
15691      if (rD == 15 || rM == 15 || rN == 15) {
15692         /* Unpredictable; don't decode; fall through */
15693      } else {
15694         IRTemp res  = newTemp(Ity_I32);
15695         IRTemp argL = newTemp(Ity_I32);
15696         IRTemp argR = newTemp(Ity_I32);
15697         assign(argL, getIRegA(rN));
15698         assign(argR, getIRegA(rM));
15699         assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
15700         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
15701         DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM);
15702         goto decode_success;
15703      }
15704    }
15705
15706   // UDIV
15707   if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20)
15708       && INSN(15,12) == BITS4(1,1,1,1)
15709       && INSN(7,4) == BITS4(0,0,0,1)) {
15710      UInt rD = INSN(19,16);
15711      UInt rM = INSN(11,8);
15712      UInt rN = INSN(3,0);
15713      if (rD == 15 || rM == 15 || rN == 15) {
15714         /* Unpredictable; don't decode; fall through */
15715      } else {
15716         IRTemp res  = newTemp(Ity_I32);
15717         IRTemp argL = newTemp(Ity_I32);
15718         IRTemp argR = newTemp(Ity_I32);
15719         assign(argL, getIRegA(rN));
15720         assign(argR, getIRegA(rM));
15721         assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
15722         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
15723         DIP("udiv r%u, r%u, r%u\n", rD, rN, rM);
15724         goto decode_success;
15725      }
15726   }
15727
15728   // MLA, MLS
15729   if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
15730       && INSN(7,4) == BITS4(1,0,0,1)) {
15731      UInt bitS  = (insn >> 20) & 1; /* 20:20 */
15732      UInt isMLS = (insn >> 22) & 1; /* 22:22 */
15733      UInt rD = INSN(19,16);
15734      UInt rN = INSN(15,12);
15735      UInt rS = INSN(11,8);
15736      UInt rM = INSN(3,0);
15737      if (bitS == 1 && isMLS == 1) {
15738         /* This isn't allowed (MLS that sets flags).  don't decode;
15739            fall through */
15740      }
15741      else
15742      if (rD == 15 || rM == 15 || rS == 15 || rN == 15) {
15743         /* Unpredictable; don't decode; fall through */
15744      } else {
15745         IRTemp argL = newTemp(Ity_I32);
15746         IRTemp argR = newTemp(Ity_I32);
15747         IRTemp argP = newTemp(Ity_I32);
15748         IRTemp res  = newTemp(Ity_I32);
15749         IRTemp oldC = IRTemp_INVALID;
15750         IRTemp oldV = IRTemp_INVALID;
15751         assign( argL, getIRegA(rM));
15752         assign( argR, getIRegA(rS));
15753         assign( argP, getIRegA(rN));
15754         assign( res, binop(isMLS ? Iop_Sub32 : Iop_Add32,
15755                            mkexpr(argP),
15756                            binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) ));
15757         if (bitS) {
15758            vassert(!isMLS); // guaranteed above
15759            oldC = newTemp(Ity_I32);
15760            assign(oldC, mk_armg_calculate_flag_c());
15761            oldV = newTemp(Ity_I32);
15762            assign(oldV, mk_armg_calculate_flag_v());
15763         }
15764         // now update guest state
15765         putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
15766         if (bitS) {
15767            IRTemp pair = newTemp(Ity_I32);
15768            assign( pair, binop(Iop_Or32,
15769                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
15770                                mkexpr(oldV)) );
15771            setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
15772         }
15773         DIP("ml%c%c%s r%u, r%u, r%u, r%u\n",
15774             isMLS ? 's' : 'a', bitS ? 's' : ' ',
15775             nCC(INSN_COND), rD, rM, rS, rN);
15776         goto decode_success;
15777      }
15778      /* fall through */
15779   }
15780
15781   // SMULL, UMULL
15782   if (BITS8(0,0,0,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
15783       && INSN(7,4) == BITS4(1,0,0,1)) {
15784      UInt bitS = (insn >> 20) & 1; /* 20:20 */
15785      UInt rDhi = INSN(19,16);
15786      UInt rDlo = INSN(15,12);
15787      UInt rS   = INSN(11,8);
15788      UInt rM   = INSN(3,0);
15789      UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
15790      if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
15791         /* Unpredictable; don't decode; fall through */
15792      } else {
15793         IRTemp argL  = newTemp(Ity_I32);
15794         IRTemp argR  = newTemp(Ity_I32);
15795         IRTemp res   = newTemp(Ity_I64);
15796         IRTemp resHi = newTemp(Ity_I32);
15797         IRTemp resLo = newTemp(Ity_I32);
15798         IRTemp oldC  = IRTemp_INVALID;
15799         IRTemp oldV  = IRTemp_INVALID;
15800         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
15801         assign( argL, getIRegA(rM));
15802         assign( argR, getIRegA(rS));
15803         assign( res, binop(mulOp, mkexpr(argL), mkexpr(argR)) );
15804         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
15805         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
15806         if (bitS) {
15807            oldC = newTemp(Ity_I32);
15808            assign(oldC, mk_armg_calculate_flag_c());
15809            oldV = newTemp(Ity_I32);
15810            assign(oldV, mk_armg_calculate_flag_v());
15811         }
15812         // now update guest state
15813         putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
15814         putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
15815         if (bitS) {
15816            IRTemp pair = newTemp(Ity_I32);
15817            assign( pair, binop(Iop_Or32,
15818                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
15819                                mkexpr(oldV)) );
15820            setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
15821         }
15822         DIP("%cmull%c%s r%u, r%u, r%u, r%u\n",
15823             isS ? 's' : 'u', bitS ? 's' : ' ',
15824             nCC(INSN_COND), rDlo, rDhi, rM, rS);
15825         goto decode_success;
15826      }
15827      /* fall through */
15828   }
15829
15830   // SMLAL, UMLAL
15831   if (BITS8(0,0,0,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
15832       && INSN(7,4) == BITS4(1,0,0,1)) {
15833      UInt bitS = (insn >> 20) & 1; /* 20:20 */
15834      UInt rDhi = INSN(19,16);
15835      UInt rDlo = INSN(15,12);
15836      UInt rS   = INSN(11,8);
15837      UInt rM   = INSN(3,0);
15838      UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
15839      if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
15840         /* Unpredictable; don't decode; fall through */
15841      } else {
15842         IRTemp argL  = newTemp(Ity_I32);
15843         IRTemp argR  = newTemp(Ity_I32);
15844         IRTemp old   = newTemp(Ity_I64);
15845         IRTemp res   = newTemp(Ity_I64);
15846         IRTemp resHi = newTemp(Ity_I32);
15847         IRTemp resLo = newTemp(Ity_I32);
15848         IRTemp oldC  = IRTemp_INVALID;
15849         IRTemp oldV  = IRTemp_INVALID;
15850         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
15851         assign( argL, getIRegA(rM));
15852         assign( argR, getIRegA(rS));
15853         assign( old, binop(Iop_32HLto64, getIRegA(rDhi), getIRegA(rDlo)) );
15854         assign( res, binop(Iop_Add64,
15855                            mkexpr(old),
15856                            binop(mulOp, mkexpr(argL), mkexpr(argR))) );
15857         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
15858         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
15859         if (bitS) {
15860            oldC = newTemp(Ity_I32);
15861            assign(oldC, mk_armg_calculate_flag_c());
15862            oldV = newTemp(Ity_I32);
15863            assign(oldV, mk_armg_calculate_flag_v());
15864         }
15865         // now update guest state
15866         putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
15867         putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
15868         if (bitS) {
15869            IRTemp pair = newTemp(Ity_I32);
15870            assign( pair, binop(Iop_Or32,
15871                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
15872                                mkexpr(oldV)) );
15873            setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
15874         }
15875         DIP("%cmlal%c%s r%u, r%u, r%u, r%u\n",
15876             isS ? 's' : 'u', bitS ? 's' : ' ', nCC(INSN_COND),
15877             rDlo, rDhi, rM, rS);
15878         goto decode_success;
15879      }
15880      /* fall through */
15881   }
15882
15883   // UMAAL
15884   if (BITS8(0,0,0,0,0,1,0,0) == INSN(27,20) && INSN(7,4) == BITS4(1,0,0,1)) {
15885      UInt rDhi = INSN(19,16);
15886      UInt rDlo = INSN(15,12);
15887      UInt rM   = INSN(11,8);
15888      UInt rN   = INSN(3,0);
15889      if (rDlo == 15 || rDhi == 15 || rN == 15 || rM == 15 || rDhi == rDlo)  {
15890         /* Unpredictable; don't decode; fall through */
15891      } else {
15892         IRTemp argN   = newTemp(Ity_I32);
15893         IRTemp argM   = newTemp(Ity_I32);
15894         IRTemp argDhi = newTemp(Ity_I32);
15895         IRTemp argDlo = newTemp(Ity_I32);
15896         IRTemp res    = newTemp(Ity_I64);
15897         IRTemp resHi  = newTemp(Ity_I32);
15898         IRTemp resLo  = newTemp(Ity_I32);
15899         assign( argN,   getIRegA(rN) );
15900         assign( argM,   getIRegA(rM) );
15901         assign( argDhi, getIRegA(rDhi) );
15902         assign( argDlo, getIRegA(rDlo) );
15903         assign( res,
15904                 binop(Iop_Add64,
15905                       binop(Iop_Add64,
15906                             binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
15907                             unop(Iop_32Uto64, mkexpr(argDhi))),
15908                       unop(Iop_32Uto64, mkexpr(argDlo))) );
15909         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
15910         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
15911         // now update guest state
15912         putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
15913         putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
15914         DIP("umaal %s r%u, r%u, r%u, r%u\n",
15915             nCC(INSN_COND), rDlo, rDhi, rN, rM);
15916         goto decode_success;
15917      }
15918      /* fall through */
15919   }
15920
15921   /* --------------------- Msr etc --------------------- */
15922
15923   // MSR apsr, #imm
15924   if (INSN(27,20) == BITS8(0,0,1,1,0,0,1,0)
15925       && INSN(17,12) == BITS6(0,0,1,1,1,1)) {
15926      UInt write_ge    = INSN(18,18);
15927      UInt write_nzcvq = INSN(19,19);
15928      if (write_nzcvq || write_ge) {
15929         UInt   imm = (INSN(11,0) >> 0) & 0xFF;
15930         UInt   rot = 2 * ((INSN(11,0) >> 8) & 0xF);
15931         IRTemp immT = newTemp(Ity_I32);
15932         vassert(rot <= 30);
15933         imm = ROR32(imm, rot);
15934         assign(immT, mkU32(imm));
15935         desynthesise_APSR( write_nzcvq, write_ge, immT, condT );
15936         DIP("msr%s cpsr%s%sf, #0x%08x\n", nCC(INSN_COND),
15937             write_nzcvq ? "f" : "", write_ge ? "g" : "", imm);
15938         goto decode_success;
15939      }
15940      /* fall through */
15941   }
15942
15943   // MSR apsr, reg
15944   if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
15945       && INSN(17,12) == BITS6(0,0,1,1,1,1)
15946       && INSN(11,4) == BITS8(0,0,0,0,0,0,0,0)) {
15947      UInt rN          = INSN(3,0);
15948      UInt write_ge    = INSN(18,18);
15949      UInt write_nzcvq = INSN(19,19);
15950      if (rN != 15 && (write_nzcvq || write_ge)) {
15951         IRTemp rNt = newTemp(Ity_I32);
15952         assign(rNt, getIRegA(rN));
15953         desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
15954         DIP("msr%s cpsr_%s%s, r%u\n", nCC(INSN_COND),
15955             write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
15956         goto decode_success;
15957      }
15958      /* fall through */
15959   }
15960
15961   // MRS rD, cpsr
15962   if ((insn & 0x0FFF0FFF) == 0x010F0000) {
15963      UInt rD   = INSN(15,12);
15964      if (rD != 15) {
15965         IRTemp apsr = synthesise_APSR();
15966         putIRegA( rD, mkexpr(apsr), condT, Ijk_Boring );
15967         DIP("mrs%s r%u, cpsr\n", nCC(INSN_COND), rD);
15968         goto decode_success;
15969      }
15970      /* fall through */
15971   }
15972
15973   /* --------------------- Svc --------------------- */
15974   if (BITS8(1,1,1,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))) {
15975      UInt imm24 = (insn >> 0) & 0xFFFFFF;
15976      if (imm24 == 0) {
15977         /* A syscall.  We can't do this conditionally, hence: */
15978         if (condT != IRTemp_INVALID) {
15979            mk_skip_over_A32_if_cond_is_false( condT );
15980         }
15981         // AL after here
15982         llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 ));
15983         dres.jk_StopHere = Ijk_Sys_syscall;
15984         dres.whatNext    = Dis_StopHere;
15985         DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
15986         goto decode_success;
15987      }
15988      /* fall through */
15989   }
15990
15991   /* ------------------------ swp ------------------------ */
15992
15993   // SWP, SWPB
15994   if (BITS8(0,0,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15995       && BITS4(0,0,0,0) == INSN(11,8)
15996       && BITS4(1,0,0,1) == INSN(7,4)) {
15997      UInt   rN   = INSN(19,16);
15998      UInt   rD   = INSN(15,12);
15999      UInt   rM   = INSN(3,0);
16000      IRTemp tRn  = newTemp(Ity_I32);
16001      IRTemp tNew = newTemp(Ity_I32);
16002      IRTemp tOld = IRTemp_INVALID;
16003      IRTemp tSC1 = newTemp(Ity_I1);
16004      UInt   isB  = (insn >> 22) & 1;
16005
16006      if (rD == 15 || rN == 15 || rM == 15 || rN == rM || rN == rD) {
16007         /* undecodable; fall through */
16008      } else {
16009         /* make unconditional */
16010         if (condT != IRTemp_INVALID) {
16011            mk_skip_over_A32_if_cond_is_false( condT );
16012            condT = IRTemp_INVALID;
16013         }
16014         /* Ok, now we're unconditional.  Generate a LL-SC loop. */
16015         assign(tRn, getIRegA(rN));
16016         assign(tNew, getIRegA(rM));
16017         if (isB) {
16018            /* swpb */
16019            tOld = newTemp(Ity_I8);
16020            stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
16021                              NULL/*=>isLL*/) );
16022            stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
16023                              unop(Iop_32to8, mkexpr(tNew))) );
16024         } else {
16025            /* swp */
16026            tOld = newTemp(Ity_I32);
16027            stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
16028                              NULL/*=>isLL*/) );
16029            stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
16030                              mkexpr(tNew)) );
16031         }
16032         stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
16033                           /*Ijk_NoRedir*/Ijk_Boring,
16034                           IRConst_U32(guest_R15_curr_instr_notENC),
16035                           OFFB_R15T ));
16036         putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
16037                      IRTemp_INVALID, Ijk_Boring);
16038         DIP("swp%s%s r%u, r%u, [r%u]\n",
16039             isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
16040         goto decode_success;
16041      }
16042      /* fall through */
16043   }
16044
16045   /* ----------------------------------------------------------- */
16046   /* -- ARMv6 instructions                                    -- */
16047   /* ----------------------------------------------------------- */
16048
16049   /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */
16050
16051   // LDREXD, LDREX, LDREXH, LDREXB
16052   if (0x01900F9F == (insn & 0x0F900FFF)) {
16053      UInt   rT    = INSN(15,12);
16054      UInt   rN    = INSN(19,16);
16055      IRType ty    = Ity_INVALID;
16056      IROp   widen = Iop_INVALID;
16057      const HChar* nm = NULL;
16058      Bool   valid = True;
16059      switch (INSN(22,21)) {
16060         case 0: nm = "";  ty = Ity_I32; break;
16061         case 1: nm = "d"; ty = Ity_I64; break;
16062         case 2: nm = "b"; ty = Ity_I8;  widen = Iop_8Uto32; break;
16063         case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break;
16064         default: vassert(0);
16065      }
16066      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
16067         if (rT == 15 || rN == 15)
16068            valid = False;
16069      } else {
16070         vassert(ty == Ity_I64);
16071         if ((rT & 1) == 1 || rT == 14 || rN == 15)
16072            valid = False;
16073      }
16074      if (valid) {
16075         IRTemp res;
16076         /* make unconditional */
16077         if (condT != IRTemp_INVALID) {
16078           mk_skip_over_A32_if_cond_is_false( condT );
16079           condT = IRTemp_INVALID;
16080         }
16081         /* Ok, now we're unconditional.  Do the load. */
16082         res = newTemp(ty);
16083         // FIXME: assumes little-endian guest
16084         stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
16085                           NULL/*this is a load*/) );
16086         if (ty == Ity_I64) {
16087            // FIXME: assumes little-endian guest
16088            putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)),
16089                           IRTemp_INVALID, Ijk_Boring);
16090            putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)),
16091                           IRTemp_INVALID, Ijk_Boring);
16092            DIP("ldrex%s%s r%u, r%u, [r%u]\n",
16093                nm, nCC(INSN_COND), rT+0, rT+1, rN);
16094         } else {
16095            putIRegA(rT, widen == Iop_INVALID
16096                            ? mkexpr(res) : unop(widen, mkexpr(res)),
16097                     IRTemp_INVALID, Ijk_Boring);
16098            DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN);
16099         }
16100         goto decode_success;
16101      }
16102      /* undecodable; fall through */
16103   }
16104
16105   // STREXD, STREX, STREXH, STREXB
16106   if (0x01800F90 == (insn & 0x0F900FF0)) {
16107      UInt   rT     = INSN(3,0);
16108      UInt   rN     = INSN(19,16);
16109      UInt   rD     = INSN(15,12);
16110      IRType ty     = Ity_INVALID;
16111      IROp   narrow = Iop_INVALID;
16112      const HChar* nm = NULL;
16113      Bool   valid  = True;
16114      switch (INSN(22,21)) {
16115         case 0: nm = "";  ty = Ity_I32; break;
16116         case 1: nm = "d"; ty = Ity_I64; break;
16117         case 2: nm = "b"; ty = Ity_I8;  narrow = Iop_32to8; break;
16118         case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break;
16119         default: vassert(0);
16120      }
16121      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
16122         if (rD == 15 || rN == 15 || rT == 15
16123             || rD == rN || rD == rT)
16124            valid = False;
16125      } else {
16126         vassert(ty == Ity_I64);
16127         if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15
16128             || rD == rN || rD == rT || rD == rT+1)
16129            valid = False;
16130      }
16131      if (valid) {
16132         IRTemp resSC1, resSC32, data;
16133         /* make unconditional */
16134         if (condT != IRTemp_INVALID) {
16135            mk_skip_over_A32_if_cond_is_false( condT );
16136            condT = IRTemp_INVALID;
16137         }
16138         /* Ok, now we're unconditional.  Do the store. */
16139         data = newTemp(ty);
16140         assign(data,
16141                ty == Ity_I64
16142                   // FIXME: assumes little-endian guest
16143                   ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0))
16144                   : narrow == Iop_INVALID
16145                      ? getIRegA(rT)
16146                      : unop(narrow, getIRegA(rT)));
16147         resSC1 = newTemp(Ity_I1);
16148         // FIXME: assumes little-endian guest
16149         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
16150
16151         /* Set rD to 1 on failure, 0 on success.  Currently we have
16152            resSC1 == 0 on failure, 1 on success. */
16153         resSC32 = newTemp(Ity_I32);
16154         assign(resSC32,
16155                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
16156
16157         putIRegA(rD, mkexpr(resSC32),
16158                      IRTemp_INVALID, Ijk_Boring);
16159         if (ty == Ity_I64) {
16160            DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
16161                nm, nCC(INSN_COND), rD, rT, rT+1, rN);
16162         } else {
16163            DIP("strex%s%s r%u, r%u, [r%u]\n",
16164                nm, nCC(INSN_COND), rD, rT, rN);
16165         }
16166         goto decode_success;
16167      }
16168      /* fall through */
16169   }
16170
16171   /* --------------------- movw, movt --------------------- */
16172   if (0x03000000 == (insn & 0x0FF00000)
16173       || 0x03400000 == (insn & 0x0FF00000)) /* pray for CSE */ {
16174      UInt rD    = INSN(15,12);
16175      UInt imm16 = (insn & 0xFFF) | ((insn >> 4) & 0x0000F000);
16176      UInt isT   = (insn >> 22) & 1;
16177      if (rD == 15) {
16178         /* forget it */
16179      } else {
16180         if (isT) {
16181            putIRegA(rD,
16182                     binop(Iop_Or32,
16183                           binop(Iop_And32, getIRegA(rD), mkU32(0xFFFF)),
16184                           mkU32(imm16 << 16)),
16185                     condT, Ijk_Boring);
16186            DIP("movt%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
16187            goto decode_success;
16188         } else {
16189            putIRegA(rD, mkU32(imm16), condT, Ijk_Boring);
16190            DIP("movw%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
16191            goto decode_success;
16192         }
16193      }
16194      /* fall through */
16195   }
16196
16197   /* ----------- uxtb, sxtb, uxth, sxth, uxtb16, sxtb16 ----------- */
16198   /* FIXME: this is an exact duplicate of the Thumb version.  They
16199      should be commoned up. */
16200   if (BITS8(0,1,1,0,1, 0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,0))
16201       && BITS4(1,1,1,1) == INSN(19,16)
16202       && BITS4(0,1,1,1) == INSN(7,4)
16203       && BITS4(0,0, 0,0) == (INSN(11,8) & BITS4(0,0,1,1))) {
16204      UInt subopc = INSN(27,20) & BITS8(0,0,0,0,0, 1,1,1);
16205      if (subopc != BITS4(0,0,0,1) && subopc != BITS4(0,1,0,1)) {
16206         Int    rot  = (INSN(11,8) >> 2) & 3;
16207         UInt   rM   = INSN(3,0);
16208         UInt   rD   = INSN(15,12);
16209         IRTemp srcT = newTemp(Ity_I32);
16210         IRTemp rotT = newTemp(Ity_I32);
16211         IRTemp dstT = newTemp(Ity_I32);
16212         const HChar* nm = "???";
16213         assign(srcT, getIRegA(rM));
16214         assign(rotT, genROR32(srcT, 8 * rot)); /* 0, 8, 16 or 24 only */
16215         switch (subopc) {
16216            case BITS4(0,1,1,0): // UXTB
16217               assign(dstT, unop(Iop_8Uto32, unop(Iop_32to8, mkexpr(rotT))));
16218               nm = "uxtb";
16219               break;
16220            case BITS4(0,0,1,0): // SXTB
16221               assign(dstT, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rotT))));
16222               nm = "sxtb";
16223               break;
16224            case BITS4(0,1,1,1): // UXTH
16225               assign(dstT, unop(Iop_16Uto32, unop(Iop_32to16, mkexpr(rotT))));
16226               nm = "uxth";
16227               break;
16228            case BITS4(0,0,1,1): // SXTH
16229               assign(dstT, unop(Iop_16Sto32, unop(Iop_32to16, mkexpr(rotT))));
16230               nm = "sxth";
16231               break;
16232            case BITS4(0,1,0,0): // UXTB16
16233               assign(dstT, binop(Iop_And32, mkexpr(rotT), mkU32(0x00FF00FF)));
16234               nm = "uxtb16";
16235               break;
16236            case BITS4(0,0,0,0): { // SXTB16
16237               IRTemp lo32 = newTemp(Ity_I32);
16238               IRTemp hi32 = newTemp(Ity_I32);
16239               assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
16240               assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
16241               assign(
16242                  dstT,
16243                  binop(Iop_Or32,
16244                        binop(Iop_And32,
16245                              unop(Iop_8Sto32,
16246                                   unop(Iop_32to8, mkexpr(lo32))),
16247                              mkU32(0xFFFF)),
16248                        binop(Iop_Shl32,
16249                              unop(Iop_8Sto32,
16250                                   unop(Iop_32to8, mkexpr(hi32))),
16251                              mkU8(16))
16252               ));
16253               nm = "sxtb16";
16254               break;
16255            }
16256            default:
16257               vassert(0); // guarded by "if" above
16258         }
16259         putIRegA(rD, mkexpr(dstT), condT, Ijk_Boring);
16260         DIP("%s%s r%u, r%u, ROR #%u\n", nm, nCC(INSN_COND), rD, rM, rot);
16261         goto decode_success;
16262      }
16263      /* fall through */
16264   }
16265
16266   /* ------------------- bfi, bfc ------------------- */
16267   if (BITS8(0,1,1,1,1,1,0, 0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
16268       && BITS4(0, 0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
16269      UInt rD  = INSN(15,12);
16270      UInt rN  = INSN(3,0);
16271      UInt msb = (insn >> 16) & 0x1F; /* 20:16 */
16272      UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
16273      if (rD == 15 || msb < lsb) {
16274         /* undecodable; fall through */
16275      } else {
16276         IRTemp src    = newTemp(Ity_I32);
16277         IRTemp olddst = newTemp(Ity_I32);
16278         IRTemp newdst = newTemp(Ity_I32);
16279         UInt   mask = 1 << (msb - lsb);
16280         mask = (mask - 1) + mask;
16281         vassert(mask != 0); // guaranteed by "msb < lsb" check above
16282         mask <<= lsb;
16283
16284         assign(src, rN == 15 ? mkU32(0) : getIRegA(rN));
16285         assign(olddst, getIRegA(rD));
16286         assign(newdst,
16287                binop(Iop_Or32,
16288                   binop(Iop_And32,
16289                         binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
16290                         mkU32(mask)),
16291                   binop(Iop_And32,
16292                         mkexpr(olddst),
16293                         mkU32(~mask)))
16294               );
16295
16296         putIRegA(rD, mkexpr(newdst), condT, Ijk_Boring);
16297
16298         if (rN == 15) {
16299            DIP("bfc%s r%u, #%u, #%u\n",
16300                nCC(INSN_COND), rD, lsb, msb-lsb+1);
16301         } else {
16302            DIP("bfi%s r%u, r%u, #%u, #%u\n",
16303                nCC(INSN_COND), rD, rN, lsb, msb-lsb+1);
16304         }
16305         goto decode_success;
16306      }
16307      /* fall through */
16308   }
16309
16310   /* ------------------- {u,s}bfx ------------------- */
16311   if (BITS8(0,1,1,1,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
16312       && BITS4(0,1,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
16313      UInt rD  = INSN(15,12);
16314      UInt rN  = INSN(3,0);
16315      UInt wm1 = (insn >> 16) & 0x1F; /* 20:16 */
16316      UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
16317      UInt msb = lsb + wm1;
16318      UInt isU = (insn >> 22) & 1;    /* 22:22 */
16319      if (rD == 15 || rN == 15 || msb >= 32) {
16320         /* undecodable; fall through */
16321      } else {
16322         IRTemp src  = newTemp(Ity_I32);
16323         IRTemp tmp  = newTemp(Ity_I32);
16324         IRTemp res  = newTemp(Ity_I32);
16325         UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
16326         vassert(msb >= 0 && msb <= 31);
16327         vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
16328
16329         assign(src, getIRegA(rN));
16330         assign(tmp, binop(Iop_And32,
16331                           binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
16332                           mkU32(mask)));
16333         assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
16334                           binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
16335                           mkU8(31-wm1)));
16336
16337         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16338
16339         DIP("%s%s r%u, r%u, #%u, #%u\n",
16340             isU ? "ubfx" : "sbfx",
16341             nCC(INSN_COND), rD, rN, lsb, wm1 + 1);
16342         goto decode_success;
16343      }
16344      /* fall through */
16345   }
16346
16347   /* --------------------- Load/store doubleword ------------- */
16348   // LDRD STRD
16349   /*                 31   27   23   19 15 11   7    3     # highest bit
16350                        28   24   20 16 12    8    4    0
16351      A5-36   1 | 16  cond 0001 U100 Rn Rd im4h 11S1 im4l
16352      A5-38   1 | 32  cond 0001 U000 Rn Rd 0000 11S1 Rm
16353      A5-40   2 | 16  cond 0001 U110 Rn Rd im4h 11S1 im4l
16354      A5-42   2 | 32  cond 0001 U010 Rn Rd 0000 11S1 Rm
16355      A5-44   3 | 16  cond 0000 U100 Rn Rd im4h 11S1 im4l
16356      A5-46   3 | 32  cond 0000 U000 Rn Rd 0000 11S1 Rm
16357   */
16358   /* case coding:
16359             1   at-ea               (access at ea)
16360             2   at-ea-then-upd      (access at ea, then Rn = ea)
16361             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
16362      ea coding
16363             16  Rn +/- imm8
16364             32  Rn +/- Rm
16365   */
16366   /* Quickly skip over all of this for hopefully most instructions */
16367   if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
16368      goto after_load_store_doubleword;
16369
16370   /* Check the "11S1" thing. */
16371   if ((INSN(7,4) & BITS4(1,1,0,1)) != BITS4(1,1,0,1))
16372      goto after_load_store_doubleword;
16373
16374   summary = 0;
16375
16376   /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,0,0)) {
16377      summary = 1 | 16;
16378   }
16379   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,0,0)) {
16380      summary = 1 | 32;
16381   }
16382   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,1,0)) {
16383      summary = 2 | 16;
16384   }
16385   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,1,0)) {
16386      summary = 2 | 32;
16387   }
16388   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(1,0,0)) {
16389      summary = 3 | 16;
16390   }
16391   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(0,0,0)) {
16392      summary = 3 | 32;
16393   }
16394   else goto after_load_store_doubleword;
16395
16396   { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
16397     UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
16398     UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
16399     UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
16400     UInt bS   = (insn >> 5) & 1;    /* S=1 store, S=0 load */
16401     UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
16402
16403     /* Require rD to be an even numbered register */
16404     if ((rD & 1) != 0)
16405        goto after_load_store_doubleword;
16406
16407     /* Require 11:8 == 0 for Rn +/- Rm cases */
16408     if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
16409        goto after_load_store_doubleword;
16410
16411     /* Skip some invalid cases, which would lead to two competing
16412        updates to the same register, or which are otherwise
16413        disallowed by the spec. */
16414     switch (summary) {
16415        case 1 | 16:
16416           break;
16417        case 1 | 32:
16418           if (rM == 15) goto after_load_store_doubleword;
16419           break;
16420        case 2 | 16: case 3 | 16:
16421           if (rN == 15) goto after_load_store_doubleword;
16422           if (bS == 0 && (rN == rD || rN == rD+1))
16423              goto after_load_store_doubleword;
16424           break;
16425        case 2 | 32: case 3 | 32:
16426           if (rM == 15) goto after_load_store_doubleword;
16427           if (rN == 15) goto after_load_store_doubleword;
16428           if (rN == rM) goto after_load_store_doubleword;
16429           if (bS == 0 && (rN == rD || rN == rD+1))
16430              goto after_load_store_doubleword;
16431           break;
16432        default:
16433           vassert(0);
16434     }
16435
16436     /* If this is a branch, make it unconditional at this point.
16437        Doing conditional branches in-line is too complex (for
16438        now). */
16439     vassert((rD & 1) == 0); /* from tests above */
16440     if (bS == 0 && rD+1 == 15 && condT != IRTemp_INVALID) {
16441        // go uncond
16442        mk_skip_over_A32_if_cond_is_false( condT );
16443        condT = IRTemp_INVALID;
16444        // now uncond
16445     }
16446
16447     /* compute the effective address.  Bind it to a tmp since we
16448        may need to use it twice. */
16449     IRExpr* eaE = NULL;
16450     switch (summary & 0xF0) {
16451        case 16:
16452           eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
16453           break;
16454        case 32:
16455           eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
16456           break;
16457     }
16458     vassert(eaE);
16459     IRTemp eaT = newTemp(Ity_I32);
16460     assign(eaT, eaE);
16461
16462     /* get the old Rn value */
16463     IRTemp rnT = newTemp(Ity_I32);
16464     assign(rnT, getIRegA(rN));
16465
16466     /* decide on the transfer address */
16467     IRTemp taT = IRTemp_INVALID;
16468     switch (summary & 0x0F) {
16469        case 1: case 2: taT = eaT; break;
16470        case 3:         taT = rnT; break;
16471     }
16472     vassert(taT != IRTemp_INVALID);
16473
16474     /* XXX deal with alignment constraints */
16475     /* XXX: but the A8 doesn't seem to trap for misaligned loads, so,
16476        ignore alignment issues for the time being. */
16477
16478     /* For almost all cases, we do the writeback after the transfers.
16479        However, that leaves the stack "uncovered" in this case:
16480           strd    rD, [sp, #-8]
16481        In which case, do the writeback to SP now, instead of later.
16482        This is bad in that it makes the insn non-restartable if the
16483        accesses fault, but at least keeps Memcheck happy. */
16484     Bool writeback_already_done = False;
16485     if (bS == 1 /*store*/ && summary == (2 | 16)
16486         && rN == 13 && rN != rD && rN != rD+1
16487         && bU == 0/*minus*/ && imm8 == 8) {
16488        putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16489        writeback_already_done = True;
16490     }
16491
16492     /* doubleword store  S 1
16493        doubleword load   S 0
16494     */
16495     const HChar* name = NULL;
16496     /* generate the transfers */
16497     if (bS == 1) { // doubleword store
16498        storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(0)),
16499                        getIRegA(rD+0), condT );
16500        storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(4)),
16501                        getIRegA(rD+1), condT );
16502        name = "strd";
16503     } else { // doubleword load
16504        IRTemp oldRd0 = newTemp(Ity_I32);
16505        IRTemp oldRd1 = newTemp(Ity_I32);
16506        assign(oldRd0, llGetIReg(rD+0));
16507        assign(oldRd1, llGetIReg(rD+1));
16508        IRTemp newRd0 = newTemp(Ity_I32);
16509        IRTemp newRd1 = newTemp(Ity_I32);
16510        loadGuardedLE( newRd0, ILGop_Ident32,
16511                       binop(Iop_Add32, mkexpr(taT), mkU32(0)),
16512                       mkexpr(oldRd0), condT );
16513        putIRegA( rD+0, mkexpr(newRd0), IRTemp_INVALID, Ijk_Boring );
16514        loadGuardedLE( newRd1, ILGop_Ident32,
16515                       binop(Iop_Add32, mkexpr(taT), mkU32(4)),
16516                       mkexpr(oldRd1), condT );
16517        putIRegA( rD+1, mkexpr(newRd1), IRTemp_INVALID, Ijk_Boring );
16518        name = "ldrd";
16519     }
16520
16521     /* Update Rn if necessary. */
16522     switch (summary & 0x0F) {
16523        case 2: case 3:
16524           // should be assured by logic above:
16525           vassert(rN != 15); /* from checks above */
16526           if (bS == 0) {
16527              vassert(rD+0 != rN); /* since we just wrote rD+0 */
16528              vassert(rD+1 != rN); /* since we just wrote rD+1 */
16529           }
16530           if (!writeback_already_done)
16531              putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16532           break;
16533     }
16534
16535     switch (summary & 0x0F) {
16536        case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
16537                 break;
16538        case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
16539                     name, nCC(INSN_COND), rD, dis_buf);
16540                 break;
16541        case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
16542                     name, nCC(INSN_COND), rD, dis_buf);
16543                 break;
16544        default: vassert(0);
16545     }
16546
16547     goto decode_success;
16548   }
16549
16550  after_load_store_doubleword:
16551
16552   /* ------------------- {s,u}xtab ------------- */
16553   if (BITS8(0,1,1,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
16554       && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
16555       && BITS4(0,1,1,1) == INSN(7,4)) {
16556      UInt rN  = INSN(19,16);
16557      UInt rD  = INSN(15,12);
16558      UInt rM  = INSN(3,0);
16559      UInt rot = (insn >> 10) & 3;
16560      UInt isU = INSN(22,22);
16561      if (rN == 15/*it's {S,U}XTB*/ || rD == 15 || rM == 15) {
16562         /* undecodable; fall through */
16563      } else {
16564         IRTemp srcL = newTemp(Ity_I32);
16565         IRTemp srcR = newTemp(Ity_I32);
16566         IRTemp res  = newTemp(Ity_I32);
16567         assign(srcR, getIRegA(rM));
16568         assign(srcL, getIRegA(rN));
16569         assign(res,  binop(Iop_Add32,
16570                            mkexpr(srcL),
16571                            unop(isU ? Iop_8Uto32 : Iop_8Sto32,
16572                                 unop(Iop_32to8,
16573                                      genROR32(srcR, 8 * rot)))));
16574         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16575         DIP("%cxtab%s r%u, r%u, r%u, ror #%u\n",
16576             isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
16577         goto decode_success;
16578      }
16579      /* fall through */
16580   }
16581
16582   /* ------------------- {s,u}xtah ------------- */
16583   if (BITS8(0,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
16584       && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
16585       && BITS4(0,1,1,1) == INSN(7,4)) {
16586      UInt rN  = INSN(19,16);
16587      UInt rD  = INSN(15,12);
16588      UInt rM  = INSN(3,0);
16589      UInt rot = (insn >> 10) & 3;
16590      UInt isU = INSN(22,22);
16591      if (rN == 15/*it's {S,U}XTH*/ || rD == 15 || rM == 15) {
16592         /* undecodable; fall through */
16593      } else {
16594         IRTemp srcL = newTemp(Ity_I32);
16595         IRTemp srcR = newTemp(Ity_I32);
16596         IRTemp res  = newTemp(Ity_I32);
16597         assign(srcR, getIRegA(rM));
16598         assign(srcL, getIRegA(rN));
16599         assign(res,  binop(Iop_Add32,
16600                            mkexpr(srcL),
16601                            unop(isU ? Iop_16Uto32 : Iop_16Sto32,
16602                                 unop(Iop_32to16,
16603                                      genROR32(srcR, 8 * rot)))));
16604         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16605
16606         DIP("%cxtah%s r%u, r%u, r%u, ror #%u\n",
16607             isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
16608         goto decode_success;
16609      }
16610      /* fall through */
16611   }
16612
16613   /* ------------------- rev16, rev ------------------ */
16614   if (INSN(27,16) == 0x6BF
16615       && (INSN(11,4) == 0xFB/*rev16*/ || INSN(11,4) == 0xF3/*rev*/)) {
16616      Bool isREV = INSN(11,4) == 0xF3;
16617      UInt rM    = INSN(3,0);
16618      UInt rD    = INSN(15,12);
16619      if (rM != 15 && rD != 15) {
16620         IRTemp rMt = newTemp(Ity_I32);
16621         assign(rMt, getIRegA(rM));
16622         IRTemp res = isREV ? gen_REV(rMt) : gen_REV16(rMt);
16623         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16624         DIP("rev%s%s r%u, r%u\n", isREV ? "" : "16",
16625             nCC(INSN_COND), rD, rM);
16626         goto decode_success;
16627      }
16628   }
16629
16630   /* ------------------- revsh ----------------------- */
16631   if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xFB) {
16632      UInt rM = INSN(3,0);
16633      UInt rD = INSN(15,12);
16634      if (rM != 15 && rD != 15) {
16635         IRTemp irt_rM  = newTemp(Ity_I32);
16636         IRTemp irt_hi  = newTemp(Ity_I32);
16637         IRTemp irt_low = newTemp(Ity_I32);
16638         IRTemp irt_res = newTemp(Ity_I32);
16639         assign(irt_rM, getIRegA(rM));
16640         assign(irt_hi,
16641                binop(Iop_Sar32,
16642                      binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
16643                      mkU8(16)
16644                )
16645         );
16646         assign(irt_low,
16647                binop(Iop_And32,
16648                      binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
16649                      mkU32(0xFF)
16650                )
16651         );
16652         assign(irt_res,
16653                binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
16654         );
16655         putIRegA(rD, mkexpr(irt_res), condT, Ijk_Boring);
16656         DIP("revsh%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
16657         goto decode_success;
16658      }
16659   }
16660
16661   /* ------------------- rbit ------------------ */
16662   if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xF3) {
16663      UInt rD = INSN(15,12);
16664      UInt rM = INSN(3,0);
16665      if (rD != 15 && rM != 15) {
16666         IRTemp arg = newTemp(Ity_I32);
16667         assign(arg, getIRegA(rM));
16668         IRTemp res = gen_BITREV(arg);
16669         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16670         DIP("rbit r%u, r%u\n", rD, rM);
16671         goto decode_success;
16672      }
16673   }
16674
16675   /* ------------------- smmul ------------------ */
16676   if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
16677       && INSN(15,12) == BITS4(1,1,1,1)
16678       && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
16679      UInt bitR = INSN(5,5);
16680      UInt rD = INSN(19,16);
16681      UInt rM = INSN(11,8);
16682      UInt rN = INSN(3,0);
16683      if (rD != 15 && rM != 15 && rN != 15) {
16684         IRExpr* res
16685         = unop(Iop_64HIto32,
16686                binop(Iop_Add64,
16687                      binop(Iop_MullS32, getIRegA(rN), getIRegA(rM)),
16688                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
16689         putIRegA(rD, res, condT, Ijk_Boring);
16690         DIP("smmul%s%s r%u, r%u, r%u\n",
16691             nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM);
16692         goto decode_success;
16693      }
16694   }
16695
16696   /* ------------------- smmla ------------------ */
16697   if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
16698       && INSN(15,12) != BITS4(1,1,1,1)
16699       && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
16700      UInt bitR = INSN(5,5);
16701      UInt rD = INSN(19,16);
16702      UInt rA = INSN(15,12);
16703      UInt rM = INSN(11,8);
16704      UInt rN = INSN(3,0);
16705      if (rD != 15 && rM != 15 && rN != 15) {
16706         IRExpr* res
16707         = unop(Iop_64HIto32,
16708                binop(Iop_Add64,
16709                      binop(Iop_Add64,
16710                            binop(Iop_32HLto64, getIRegA(rA), mkU32(0)),
16711                            binop(Iop_MullS32, getIRegA(rN), getIRegA(rM))),
16712                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
16713         putIRegA(rD, res, condT, Ijk_Boring);
16714         DIP("smmla%s%s r%u, r%u, r%u, r%u\n",
16715             nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM, rA);
16716         goto decode_success;
16717      }
16718   }
16719
16720   /* ------------------- NOP ------------------ */
16721   if (0x0320F000 == (insn & 0x0FFFFFFF)) {
16722      DIP("nop%s\n", nCC(INSN_COND));
16723      goto decode_success;
16724   }
16725
16726   /* -------------- (A1) LDRT reg+/-#imm12 -------------- */
16727   /* Load Register Unprivileged:
16728      ldrt<c> Rt, [Rn] {, #+/-imm12}
16729   */
16730   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,1) ) {
16731      UInt rT     = INSN(15,12);
16732      UInt rN     = INSN(19,16);
16733      UInt imm12  = INSN(11,0);
16734      UInt bU     = INSN(23,23);
16735      Bool valid  = True;
16736      if (rT == 15 || rN == 15 || rN == rT) valid = False;
16737      if (valid) {
16738         IRTemp newRt = newTemp(Ity_I32);
16739         loadGuardedLE( newRt,
16740                        ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
16741         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16742         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16743                             getIRegA(rN), mkU32(imm12));
16744         putIRegA(rN, erN, condT, Ijk_Boring);
16745         DIP("ldrt%s r%u, [r%u], #%c%u\n",
16746             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
16747         goto decode_success;
16748      }
16749   }
16750
16751   /* -------------- (A2) LDRT reg+/-reg with shift -------------- */
16752   /* Load Register Unprivileged:
16753      ldrt<c> Rt, [Rn], +/-Rm{, shift}
16754   */
16755   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,1)
16756        && INSN(4,4) == 0 ) {
16757      UInt rT     = INSN(15,12);
16758      UInt rN     = INSN(19,16);
16759      UInt rM     = INSN(3,0);
16760      UInt imm5   = INSN(11,7);
16761      UInt bU     = INSN(23,23);
16762      UInt type   = INSN(6,5);
16763      Bool valid  = True;
16764      if (rT == 15 || rN == 15 || rN == rT || rM == 15
16765          /* || (ArchVersion() < 6 && rM == rN) */)
16766         valid = False;
16767      if (valid) {
16768         IRTemp newRt = newTemp(Ity_I32);
16769         loadGuardedLE( newRt,
16770                        ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
16771         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16772         // dis_buf generated is slightly bogus, in fact.
16773         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
16774                                                       type, imm5, dis_buf);
16775         putIRegA(rN, erN, condT, Ijk_Boring);
16776         DIP("ldrt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
16777         goto decode_success;
16778      }
16779   }
16780
16781   /* -------------- (A1) LDRBT reg+/-#imm12 -------------- */
16782   /* Load Register Byte Unprivileged:
16783      ldrbt<c> Rt, [Rn], #+/-imm12
16784   */
16785   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,1) ) {
16786      UInt rT     = INSN(15,12);
16787      UInt rN     = INSN(19,16);
16788      UInt imm12  = INSN(11,0);
16789      UInt bU     = INSN(23,23);
16790      Bool valid  = True;
16791      if (rT == 15 || rN == 15 || rN == rT) valid = False;
16792      if (valid) {
16793         IRTemp newRt = newTemp(Ity_I32);
16794         loadGuardedLE( newRt,
16795                        ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
16796         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16797         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16798                             getIRegA(rN), mkU32(imm12));
16799         putIRegA(rN, erN, condT, Ijk_Boring);
16800         DIP("ldrbt%s r%u, [r%u], #%c%u\n",
16801             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
16802         goto decode_success;
16803      }
16804   }
16805
16806   /* -------------- (A2) LDRBT reg+/-reg with shift -------------- */
16807   /* Load Register Byte Unprivileged:
16808      ldrbt<c> Rt, [Rn], +/-Rm{, shift}
16809   */
16810   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,1)
16811        && INSN(4,4) == 0 ) {
16812      UInt rT     = INSN(15,12);
16813      UInt rN     = INSN(19,16);
16814      UInt rM     = INSN(3,0);
16815      UInt imm5   = INSN(11,7);
16816      UInt bU     = INSN(23,23);
16817      UInt type   = INSN(6,5);
16818      Bool valid  = True;
16819      if (rT == 15 || rN == 15 || rN == rT || rM == 15
16820          /* || (ArchVersion() < 6 && rM == rN) */)
16821         valid = False;
16822      if (valid) {
16823         IRTemp newRt = newTemp(Ity_I32);
16824         loadGuardedLE( newRt,
16825                        ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
16826         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16827         // dis_buf generated is slightly bogus, in fact.
16828         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
16829                                                       type, imm5, dis_buf);
16830         putIRegA(rN, erN, condT, Ijk_Boring);
16831         DIP("ldrbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
16832         goto decode_success;
16833      }
16834   }
16835
16836   /* -------------- (A1) LDRHT reg+#imm8 -------------- */
16837   /* Load Register Halfword Unprivileged:
16838      ldrht<c> Rt, [Rn] {, #+/-imm8}
16839   */
16840   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
16841       && INSN(7,4) == BITS4(1,0,1,1) ) {
16842      UInt rT    = INSN(15,12);
16843      UInt rN    = INSN(19,16);
16844      UInt bU    = INSN(23,23);
16845      UInt imm4H = INSN(11,8);
16846      UInt imm4L = INSN(3,0);
16847      UInt imm8  = (imm4H << 4) | imm4L;
16848      Bool valid = True;
16849      if (rT == 15 || rN == 15 || rN == rT)
16850         valid = False;
16851      if (valid) {
16852         IRTemp newRt = newTemp(Ity_I32);
16853         loadGuardedLE( newRt,
16854                        ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
16855         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16856         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16857                             getIRegA(rN), mkU32(imm8));
16858         putIRegA(rN, erN, condT, Ijk_Boring);
16859         DIP("ldrht%s r%u, [r%u], #%c%u\n",
16860             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
16861         goto decode_success;
16862      }
16863   }
16864
16865   /* -------------- (A2) LDRHT reg+/-reg -------------- */
16866   /* Load Register Halfword Unprivileged:
16867      ldrht<c> Rt, [Rn], +/-Rm
16868   */
16869   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
16870       && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
16871      UInt rT    = INSN(15,12);
16872      UInt rN    = INSN(19,16);
16873      UInt rM    = INSN(3,0);
16874      UInt bU    = INSN(23,23);
16875      Bool valid = True;
16876      if (rT == 15 || rN == 15 || rN == rT || rM == 15)
16877         valid = False;
16878      if (valid) {
16879         IRTemp newRt = newTemp(Ity_I32);
16880         loadGuardedLE( newRt,
16881                        ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
16882         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16883         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16884                             getIRegA(rN), getIRegA(rM));
16885         putIRegA(rN, erN, condT, Ijk_Boring);
16886         DIP("ldrht%s r%u, [r%u], %cr%u\n",
16887             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
16888         goto decode_success;
16889      }
16890   }
16891
16892   /* -------------- (A1) LDRSHT reg+#imm8 -------------- */
16893   /* Load Register Signed Halfword Unprivileged:
16894      ldrsht<c> Rt, [Rn] {, #+/-imm8}
16895   */
16896   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
16897       && INSN(7,4) == BITS4(1,1,1,1)) {
16898      UInt rT    = INSN(15,12);
16899      UInt rN    = INSN(19,16);
16900      UInt bU    = INSN(23,23);
16901      UInt imm4H = INSN(11,8);
16902      UInt imm4L = INSN(3,0);
16903      UInt imm8  = (imm4H << 4) | imm4L;
16904      Bool valid = True;
16905      if (rN == 15 || rT == 15 || rN == rT)
16906         valid = False;
16907      if (valid) {
16908         IRTemp newRt = newTemp(Ity_I32);
16909         loadGuardedLE( newRt,
16910                        ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
16911         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16912         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16913                             getIRegA(rN), mkU32(imm8));
16914         putIRegA(rN, erN, condT, Ijk_Boring);
16915         DIP("ldrsht%s r%u, [r%u], #%c%u\n",
16916             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
16917         goto decode_success;
16918      }
16919   }
16920
16921   /* -------------- (A2) LDRSHT reg+/-reg -------------- */
16922   /* Load Register Signed Halfword Unprivileged:
16923      ldrsht<c> Rt, [Rn], +/-Rm
16924   */
16925   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
16926       && INSN(11,4) == BITS8(0,0,0,0,1,1,1,1)) {
16927      UInt rT    = INSN(15,12);
16928      UInt rN    = INSN(19,16);
16929      UInt rM    = INSN(3,0);
16930      UInt bU    = INSN(23,23);
16931      Bool valid = True;
16932      if (rN == 15 || rT == 15 || rN == rT || rM == 15)
16933         valid = False;
16934      if (valid) {
16935         IRTemp newRt = newTemp(Ity_I32);
16936         loadGuardedLE( newRt,
16937                        ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
16938         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16939         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16940                             getIRegA(rN), getIRegA(rM));
16941         putIRegA(rN, erN, condT, Ijk_Boring);
16942         DIP("ldrsht%s r%u, [r%u], %cr%u\n",
16943             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
16944         goto decode_success;
16945      }
16946   }
16947
16948   /* -------------- (A1) LDRSBT reg+#imm8 -------------- */
16949   /* Load Register Signed Byte Unprivileged:
16950      ldrsbt<c> Rt, [Rn] {, #+/-imm8}
16951   */
16952   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
16953       && INSN(7,4) == BITS4(1,1,0,1)) {
16954      UInt rT    = INSN(15,12);
16955      UInt rN    = INSN(19,16);
16956      UInt bU    = INSN(23,23);
16957      UInt imm4H = INSN(11,8);
16958      UInt imm4L = INSN(3,0);
16959      UInt imm8  = (imm4H << 4) | imm4L;
16960      Bool valid = True;
16961      if (rT == 15 || rN == 15 || rN == rT)
16962         valid = False;
16963      if (valid) {
16964         IRTemp newRt = newTemp(Ity_I32);
16965         loadGuardedLE( newRt,
16966                        ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
16967         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16968         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16969                             getIRegA(rN), mkU32(imm8));
16970         putIRegA(rN, erN, condT, Ijk_Boring);
16971         DIP("ldrsbt%s r%u, [r%u], #%c%u\n",
16972             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
16973         goto decode_success;
16974      }
16975   }
16976
16977   /* -------------- (A2) LDRSBT reg+/-reg -------------- */
16978   /* Load Register Signed Byte Unprivileged:
16979      ldrsbt<c> Rt, [Rn], +/-Rm
16980   */
16981   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
16982       && INSN(11,4) == BITS8(0,0,0,0,1,1,0,1)) {
16983      UInt rT    = INSN(15,12);
16984      UInt rN    = INSN(19,16);
16985      UInt bU    = INSN(23,23);
16986      UInt rM    = INSN(3,0);
16987      Bool valid = True;
16988      if (rT == 15 || rN == 15 || rN == rT || rM == 15)
16989         valid = False;
16990      if (valid) {
16991         IRTemp newRt = newTemp(Ity_I32);
16992         loadGuardedLE( newRt,
16993                        ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
16994         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16995         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16996                             getIRegA(rN), getIRegA(rM));
16997         putIRegA(rN, erN, condT, Ijk_Boring);
16998         DIP("ldrsbt%s r%u, [r%u], %cr%u\n",
16999             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
17000         goto decode_success;
17001      }
17002   }
17003
17004   /* -------------- (A1) STRBT reg+#imm12 -------------- */
17005   /* Store Register Byte Unprivileged:
17006      strbt<c> Rt, [Rn], #+/-imm12
17007   */
17008   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,0) ) {
17009      UInt rT     = INSN(15,12);
17010      UInt rN     = INSN(19,16);
17011      UInt imm12  = INSN(11,0);
17012      UInt bU     = INSN(23,23);
17013      Bool valid = True;
17014      if (rT == 15 || rN == 15 || rN == rT) valid = False;
17015      if (valid) {
17016         IRExpr* address = getIRegA(rN);
17017         IRExpr* data = unop(Iop_32to8, getIRegA(rT));
17018         storeGuardedLE( address, data, condT);
17019         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
17020                               getIRegA(rN), mkU32(imm12));
17021         putIRegA(rN, newRn, condT, Ijk_Boring);
17022         DIP("strbt%s r%u, [r%u], #%c%u\n",
17023             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
17024         goto decode_success;
17025      }
17026   }
17027
17028   /* -------------- (A2) STRBT reg+/-reg -------------- */
17029   /* Store Register Byte Unprivileged:
17030      strbt<c> Rt, [Rn], +/-Rm{, shift}
17031   */
17032   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,0)
17033       && INSN(4,4) == 0) {
17034      UInt rT     = INSN(15,12);
17035      UInt rN     = INSN(19,16);
17036      UInt imm5   = INSN(11,7);
17037      UInt type   = INSN(6,5);
17038      UInt rM     = INSN(3,0);
17039      UInt bU     = INSN(23,23);
17040      Bool valid  = True;
17041      if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
17042      if (valid) {
17043         IRExpr* address = getIRegA(rN);
17044         IRExpr* data = unop(Iop_32to8, getIRegA(rT));
17045         storeGuardedLE( address, data, condT);
17046         // dis_buf generated is slightly bogus, in fact.
17047         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
17048                                                       type, imm5, dis_buf);
17049         putIRegA(rN, erN, condT, Ijk_Boring);
17050         DIP("strbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
17051         goto decode_success;
17052      }
17053   }
17054
17055   /* -------------- (A1) STRHT reg+#imm8 -------------- */
17056   /* Store Register Halfword Unprivileged:
17057      strht<c> Rt, [Rn], #+/-imm8
17058   */
17059   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,0)
17060       && INSN(7,4) == BITS4(1,0,1,1) ) {
17061      UInt rT    = INSN(15,12);
17062      UInt rN    = INSN(19,16);
17063      UInt imm4H = INSN(11,8);
17064      UInt imm4L = INSN(3,0);
17065      UInt imm8  = (imm4H << 4) | imm4L;
17066      UInt bU    = INSN(23,23);
17067      Bool valid = True;
17068      if (rT == 15 || rN == 15 || rN == rT) valid = False;
17069      if (valid) {
17070         IRExpr* address = getIRegA(rN);
17071         IRExpr* data = unop(Iop_32to16, getIRegA(rT));
17072         storeGuardedLE( address, data, condT);
17073         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
17074                               getIRegA(rN), mkU32(imm8));
17075         putIRegA(rN, newRn, condT, Ijk_Boring);
17076         DIP("strht%s r%u, [r%u], #%c%u\n",
17077             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
17078         goto decode_success;
17079      }
17080   }
17081
17082   /* -------------- (A2) STRHT reg+reg -------------- */
17083   /* Store Register Halfword Unprivileged:
17084      strht<c> Rt, [Rn], +/-Rm
17085   */
17086   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,0)
17087       && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
17088      UInt rT    = INSN(15,12);
17089      UInt rN    = INSN(19,16);
17090      UInt rM    = INSN(3,0);
17091      UInt bU    = INSN(23,23);
17092      Bool valid = True;
17093      if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
17094      if (valid) {
17095         IRExpr* address = getIRegA(rN);
17096         IRExpr* data = unop(Iop_32to16, getIRegA(rT));
17097         storeGuardedLE( address, data, condT);
17098         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
17099                               getIRegA(rN), getIRegA(rM));
17100         putIRegA(rN, newRn, condT, Ijk_Boring);
17101         DIP("strht%s r%u, [r%u], %cr%u\n",
17102             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
17103         goto decode_success;
17104      }
17105   }
17106
17107   /* -------------- (A1) STRT reg+imm12 -------------- */
17108   /* Store Register Unprivileged:
17109      strt<c> Rt, [Rn], #+/-imm12
17110   */
17111   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,0) ) {
17112      UInt rT    = INSN(15,12);
17113      UInt rN    = INSN(19,16);
17114      UInt imm12 = INSN(11,0);
17115      UInt bU    = INSN(23,23);
17116      Bool valid = True;
17117      if (rN == 15 || rN == rT) valid = False;
17118      if (valid) {
17119         IRExpr* address = getIRegA(rN);
17120         storeGuardedLE( address, getIRegA(rT), condT);
17121         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
17122                               getIRegA(rN), mkU32(imm12));
17123         putIRegA(rN, newRn, condT, Ijk_Boring);
17124         DIP("strt%s r%u, [r%u], %c%u\n",
17125             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
17126         goto decode_success;
17127      }
17128   }
17129
17130   /* -------------- (A2) STRT reg+reg -------------- */
17131   /* Store Register Unprivileged:
17132      strt<c> Rt, [Rn], +/-Rm{, shift}
17133   */
17134   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,0)
17135       && INSN(4,4) == 0 ) {
17136      UInt rT    = INSN(15,12);
17137      UInt rN    = INSN(19,16);
17138      UInt rM    = INSN(3,0);
17139      UInt type  = INSN(6,5);
17140      UInt imm5  = INSN(11,7);
17141      UInt bU    = INSN(23,23);
17142      Bool valid = True;
17143      if (rN == 15 || rN == rT || rM == 15) valid = False;
17144      /* FIXME We didn't do:
17145         if ArchVersion() < 6 && rM == rN then UNPREDICTABLE */
17146      if (valid) {
17147         storeGuardedLE( getIRegA(rN), getIRegA(rT), condT);
17148         // dis_buf generated is slightly bogus, in fact.
17149         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
17150                                                       type, imm5, dis_buf);
17151         putIRegA(rN, erN, condT, Ijk_Boring);
17152         DIP("strt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
17153         goto decode_success;
17154      }
17155   }
17156
17157   /* ----------------------------------------------------------- */
17158   /* -- ARMv7 instructions                                    -- */
17159   /* ----------------------------------------------------------- */
17160
17161   /* -------------- read CP15 TPIDRURO register ------------- */
17162   /* mrc     p15, 0, r0, c13, c0, 3  up to
17163      mrc     p15, 0, r14, c13, c0, 3
17164   */
17165   /* I don't know whether this is really v7-only.  But anyway, we
17166      have to support it since arm-linux uses TPIDRURO as a thread
17167      state register. */
17168   if (0x0E1D0F70 == (insn & 0x0FFF0FFF)) {
17169      UInt rD = INSN(15,12);
17170      if (rD <= 14) {
17171         /* skip r15, that's too stupid to handle */
17172         putIRegA(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
17173                      condT, Ijk_Boring);
17174         DIP("mrc%s p15,0, r%u, c13, c0, 3\n", nCC(INSN_COND), rD);
17175         goto decode_success;
17176      }
17177      /* fall through */
17178   }
17179
17180   /* Handle various kinds of barriers.  This is rather indiscriminate
17181      in the sense that they are all turned into an IR Fence, which
17182      means we don't know which they are, so the back end has to
17183      re-emit them all when it comes acrosss an IR Fence.
17184   */
17185   /* v6 */ /* mcr 15, 0, rT, c7, c10, 5 */
17186   if (0xEE070FBA == (insn & 0xFFFF0FFF)) {
17187      UInt rT = INSN(15,12);
17188      if (rT <= 14) {
17189         /* mcr 15, 0, rT, c7, c10, 5 (v6) equiv to DMB (v7).  Data
17190            Memory Barrier -- ensures ordering of memory accesses. */
17191         stmt( IRStmt_MBE(Imbe_Fence) );
17192         DIP("mcr 15, 0, r%u, c7, c10, 5 (data memory barrier)\n", rT);
17193         goto decode_success;
17194      }
17195      /* fall through */
17196   }
17197   /* other flavours of barrier */
17198   switch (insn) {
17199      case 0xEE070F9A: /* v6 */
17200         /* mcr 15, 0, r0, c7, c10, 4 (v6) equiv to DSB (v7).  Data
17201            Synch Barrier -- ensures completion of memory accesses. */
17202         stmt( IRStmt_MBE(Imbe_Fence) );
17203         DIP("mcr 15, 0, r0, c7, c10, 4 (data synch barrier)\n");
17204         goto decode_success;
17205      case 0xEE070F95: /* v6 */
17206         /* mcr 15, 0, r0, c7, c5, 4 (v6) equiv to ISB (v7).
17207            Instruction Synchronisation Barrier (or Flush Prefetch
17208            Buffer) -- a pipe flush, I think.  I suspect we could
17209            ignore those, but to be on the safe side emit a fence
17210            anyway. */
17211         stmt( IRStmt_MBE(Imbe_Fence) );
17212         DIP("mcr 15, 0, r0, c7, c5, 4 (insn synch barrier)\n");
17213         goto decode_success;
17214      default:
17215         break;
17216   }
17217
17218   /* ----------------------------------------------------------- */
17219   /* -- VFP (CP 10, CP 11) instructions (in ARM mode)         -- */
17220   /* ----------------------------------------------------------- */
17221
17222   if (INSN_COND != ARMCondNV) {
17223      Bool ok_vfp = decode_CP10_CP11_instruction (
17224                       &dres, INSN(27,0), condT, INSN_COND,
17225                       False/*!isT*/
17226                    );
17227      if (ok_vfp)
17228         goto decode_success;
17229   }
17230
17231   /* ----------------------------------------------------------- */
17232   /* -- NEON instructions (in ARM mode)                       -- */
17233   /* ----------------------------------------------------------- */
17234
17235   /* These are all in NV space, and so are taken care of (far) above,
17236      by a call from this function to decode_NV_instruction(). */
17237
17238   /* ----------------------------------------------------------- */
17239   /* -- v6 media instructions (in ARM mode)                   -- */
17240   /* ----------------------------------------------------------- */
17241
17242   { Bool ok_v6m = decode_V6MEDIA_instruction(
17243                       &dres, INSN(27,0), condT, INSN_COND,
17244                       False/*!isT*/
17245                   );
17246     if (ok_v6m)
17247        goto decode_success;
17248   }
17249
17250   /* ----------------------------------------------------------- */
17251   /* -- Undecodable                                           -- */
17252   /* ----------------------------------------------------------- */
17253
17254   goto decode_failure;
17255   /*NOTREACHED*/
17256
17257  decode_failure:
17258   /* All decode failures end up here. */
17259   if (sigill_diag) {
17260      vex_printf("disInstr(arm): unhandled instruction: "
17261                 "0x%x\n", insn);
17262      vex_printf("                 cond=%d(0x%x) 27:20=%u(0x%02x) "
17263                                   "4:4=%d "
17264                                   "3:0=%u(0x%x)\n",
17265                 (Int)INSN_COND, (UInt)INSN_COND,
17266                 (Int)INSN(27,20), (UInt)INSN(27,20),
17267                 (Int)INSN(4,4),
17268                 (Int)INSN(3,0), (UInt)INSN(3,0) );
17269   }
17270
17271   /* Tell the dispatcher that this insn cannot be decoded, and so has
17272      not been executed, and (is currently) the next to be executed.
17273      R15 should be up-to-date since it made so at the start of each
17274      insn, but nevertheless be paranoid and update it again right
17275      now. */
17276   vassert(0 == (guest_R15_curr_instr_notENC & 3));
17277   llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
17278   dres.whatNext    = Dis_StopHere;
17279   dres.jk_StopHere = Ijk_NoDecode;
17280   dres.len         = 0;
17281   return dres;
17282
17283  decode_success:
17284   /* All decode successes end up here. */
17285   DIP("\n");
17286
17287   vassert(dres.len == 4 || dres.len == 20);
17288
17289   /* Now then.  Do we have an implicit jump to r15 to deal with? */
17290   if (r15written) {
17291      /* If we get jump to deal with, we assume that there's been no
17292         other competing branch stuff previously generated for this
17293         insn.  That's reasonable, in the sense that the ARM insn set
17294         appears to declare as "Unpredictable" any instruction which
17295         generates more than one possible new value for r15.  Hence
17296         just assert.  The decoders themselves should check against
17297         all such instructions which are thusly Unpredictable, and
17298         decline to decode them.  Hence we should never get here if we
17299         have competing new values for r15, and hence it is safe to
17300         assert here. */
17301      vassert(dres.whatNext == Dis_Continue);
17302      vassert(irsb->next == NULL);
17303      vassert(irsb->jumpkind == Ijk_Boring);
17304      /* If r15 is unconditionally written, terminate the block by
17305         jumping to it.  If it's conditionally written, still
17306         terminate the block (a shame, but we can't do side exits to
17307         arbitrary destinations), but first jump to the next
17308         instruction if the condition doesn't hold. */
17309      /* We can't use getIReg(15) to get the destination, since that
17310         will produce r15+8, which isn't what we want.  Must use
17311         llGetIReg(15) instead. */
17312      if (r15guard == IRTemp_INVALID) {
17313         /* unconditional */
17314      } else {
17315         /* conditional */
17316         stmt( IRStmt_Exit(
17317                  unop(Iop_32to1,
17318                       binop(Iop_Xor32,
17319                             mkexpr(r15guard), mkU32(1))),
17320                  r15kind,
17321                  IRConst_U32(guest_R15_curr_instr_notENC + 4),
17322                  OFFB_R15T
17323         ));
17324      }
17325      /* This seems crazy, but we're required to finish the insn with
17326         a write to the guest PC.  As usual we rely on ir_opt to tidy
17327         up later. */
17328      llPutIReg(15, llGetIReg(15));
17329      dres.whatNext    = Dis_StopHere;
17330      dres.jk_StopHere = r15kind;
17331   } else {
17332      /* Set up the end-state in the normal way. */
17333      switch (dres.whatNext) {
17334         case Dis_Continue:
17335            llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC));
17336            break;
17337         case Dis_ResteerU:
17338         case Dis_ResteerC:
17339            llPutIReg(15, mkU32(dres.continueAt));
17340            break;
17341         case Dis_StopHere:
17342            break;
17343         default:
17344            vassert(0);
17345      }
17346   }
17347
17348   return dres;
17349
17350#  undef INSN_COND
17351#  undef INSN
17352}
17353
17354
17355/*------------------------------------------------------------*/
17356/*--- Disassemble a single Thumb2 instruction              ---*/
17357/*------------------------------------------------------------*/
17358
17359static const UChar it_length_table[256]; /* fwds */
17360
17361/* NB: in Thumb mode we do fetches of regs with getIRegT, which
17362   automagically adds 4 to fetches of r15.  However, writes to regs
17363   are done with putIRegT, which disallows writes to r15.  Hence any
17364   r15 writes and associated jumps have to be done "by hand". */
17365
17366/* Disassemble a single Thumb instruction into IR.  The instruction is
17367   located in host memory at guest_instr, and has (decoded) guest IP
17368   of guest_R15_curr_instr_notENC, which will have been set before the
17369   call here. */
17370
17371static
17372DisResult disInstr_THUMB_WRK (
17373             Bool         (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
17374             Bool         resteerCisOk,
17375             void*        callback_opaque,
17376             UChar*       guest_instr,
17377             VexArchInfo* archinfo,
17378             VexAbiInfo*  abiinfo,
17379             Bool         sigill_diag
17380          )
17381{
17382   /* A macro to fish bits out of insn0.  There's also INSN1, to fish
17383      bits out of insn1, but that's defined only after the end of the
17384      16-bit insn decoder, so as to stop it mistakenly being used
17385      therein. */
17386#  define INSN0(_bMax,_bMin)  SLICE_UInt(((UInt)insn0), (_bMax), (_bMin))
17387
17388   DisResult dres;
17389   UShort    insn0; /*  first 16 bits of the insn */
17390   UShort    insn1; /* second 16 bits of the insn */
17391   //Bool      allow_VFP = False;
17392   //UInt      hwcaps = archinfo->hwcaps;
17393   HChar     dis_buf[128];  // big enough to hold LDMIA etc text
17394
17395   /* Summary result of the ITxxx backwards analysis: False == safe
17396      but suboptimal. */
17397   Bool guaranteedUnconditional = False;
17398
17399   /* What insn variants are we supporting today? */
17400   //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
17401   // etc etc
17402
17403   /* Set result defaults. */
17404   dres.whatNext    = Dis_Continue;
17405   dres.len         = 2;
17406   dres.continueAt  = 0;
17407   dres.jk_StopHere = Ijk_INVALID;
17408
17409   /* Set default actions for post-insn handling of writes to r15, if
17410      required. */
17411   r15written = False;
17412   r15guard   = IRTemp_INVALID; /* unconditional */
17413   r15kind    = Ijk_Boring;
17414
17415   /* Insns could be 2 or 4 bytes long.  Just get the first 16 bits at
17416      this point.  If we need the second 16, get them later.  We can't
17417      get them both out immediately because it risks a fault (very
17418      unlikely, but ..) if the second 16 bits aren't actually
17419      necessary. */
17420   insn0 = getUShortLittleEndianly( guest_instr );
17421   insn1 = 0; /* We'll get it later, once we know we need it. */
17422
17423   /* Similarly, will set this later. */
17424   IRTemp old_itstate = IRTemp_INVALID;
17425
17426   if (0) vex_printf("insn: 0x%x\n", insn0);
17427
17428   DIP("\t(thumb) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
17429
17430   vassert(0 == (guest_R15_curr_instr_notENC & 1));
17431
17432   /* ----------------------------------------------------------- */
17433   /* Spot "Special" instructions (see comment at top of file). */
17434   {
17435      UChar* code = (UChar*)guest_instr;
17436      /* Spot the 16-byte preamble:
17437
17438         ea4f 0cfc  mov.w   ip, ip, ror #3
17439         ea4f 3c7c  mov.w   ip, ip, ror #13
17440         ea4f 7c7c  mov.w   ip, ip, ror #29
17441         ea4f 4cfc  mov.w   ip, ip, ror #19
17442      */
17443      UInt word1 = 0x0CFCEA4F;
17444      UInt word2 = 0x3C7CEA4F;
17445      UInt word3 = 0x7C7CEA4F;
17446      UInt word4 = 0x4CFCEA4F;
17447      if (getUIntLittleEndianly(code+ 0) == word1 &&
17448          getUIntLittleEndianly(code+ 4) == word2 &&
17449          getUIntLittleEndianly(code+ 8) == word3 &&
17450          getUIntLittleEndianly(code+12) == word4) {
17451         /* Got a "Special" instruction preamble.  Which one is it? */
17452         // 0x 0A 0A EA 4A
17453         if (getUIntLittleEndianly(code+16) == 0x0A0AEA4A
17454                                               /* orr.w r10,r10,r10 */) {
17455            /* R3 = client_request ( R4 ) */
17456            DIP("r3 = client_request ( %%r4 )\n");
17457            llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
17458            dres.jk_StopHere = Ijk_ClientReq;
17459            dres.whatNext    = Dis_StopHere;
17460            goto decode_success;
17461         }
17462         else
17463         // 0x 0B 0B EA 4B
17464         if (getUIntLittleEndianly(code+16) == 0x0B0BEA4B
17465                                               /* orr r11,r11,r11 */) {
17466            /* R3 = guest_NRADDR */
17467            DIP("r3 = guest_NRADDR\n");
17468            dres.len = 20;
17469            llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
17470            goto decode_success;
17471         }
17472         else
17473         // 0x 0C 0C EA 4C
17474         if (getUIntLittleEndianly(code+16) == 0x0C0CEA4C
17475                                               /* orr r12,r12,r12 */) {
17476            /*  branch-and-link-to-noredir R4 */
17477            DIP("branch-and-link-to-noredir r4\n");
17478            llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
17479            llPutIReg(15, getIRegT(4));
17480            dres.jk_StopHere = Ijk_NoRedir;
17481            dres.whatNext    = Dis_StopHere;
17482            goto decode_success;
17483         }
17484         else
17485         // 0x 09 09 EA 49
17486         if (getUIntLittleEndianly(code+16) == 0x0909EA49
17487                                               /* orr r9,r9,r9 */) {
17488            /* IR injection */
17489            DIP("IR injection\n");
17490            vex_inject_ir(irsb, Iend_LE);
17491            // Invalidate the current insn. The reason is that the IRop we're
17492            // injecting here can change. In which case the translation has to
17493            // be redone. For ease of handling, we simply invalidate all the
17494            // time.
17495            stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
17496            stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
17497            llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
17498            dres.whatNext    = Dis_StopHere;
17499            dres.jk_StopHere = Ijk_InvalICache;
17500            goto decode_success;
17501         }
17502         /* We don't know what it is.  Set insn0 so decode_failure
17503            can print the insn following the Special-insn preamble. */
17504         insn0 = getUShortLittleEndianly(code+16);
17505         goto decode_failure;
17506         /*NOTREACHED*/
17507      }
17508
17509   }
17510
17511   /* ----------------------------------------------------------- */
17512
17513   /* Main Thumb instruction decoder starts here.  It's a series of
17514      switches which examine ever longer bit sequences at the MSB of
17515      the instruction word, first for 16-bit insns, then for 32-bit
17516      insns. */
17517
17518   /* --- BEGIN ITxxx optimisation analysis --- */
17519   /* This is a crucial optimisation for the ITState boilerplate that
17520      follows.  Examine the 9 halfwords preceding this instruction,
17521      and if we are absolutely sure that none of them constitute an
17522      'it' instruction, then we can be sure that this instruction is
17523      not under the control of any 'it' instruction, and so
17524      guest_ITSTATE must be zero.  So write zero into ITSTATE right
17525      now, so that iropt can fold out almost all of the resulting
17526      junk.
17527
17528      If we aren't sure, we can always safely skip this step.  So be a
17529      bit conservative about it: only poke around in the same page as
17530      this instruction, lest we get a fault from the previous page
17531      that would not otherwise have happened.  The saving grace is
17532      that such skipping is pretty rare -- it only happens,
17533      statistically, 18/4096ths of the time, so is judged unlikely to
17534      be a performance problems.
17535
17536      FIXME: do better.  Take into account the number of insns covered
17537      by any IT insns we find, to rule out cases where an IT clearly
17538      cannot cover this instruction.  This would improve behaviour for
17539      branch targets immediately following an IT-guarded group that is
17540      not of full length.  Eg, (and completely ignoring issues of 16-
17541      vs 32-bit insn length):
17542
17543             ite cond
17544             insn1
17545             insn2
17546      label: insn3
17547             insn4
17548
17549      The 'it' only conditionalises insn1 and insn2.  However, the
17550      current analysis is conservative and considers insn3 and insn4
17551      also possibly guarded.  Hence if 'label:' is the start of a hot
17552      loop we will get a big performance hit.
17553   */
17554   {
17555      /* Summary result of this analysis: False == safe but
17556         suboptimal. */
17557      vassert(guaranteedUnconditional == False);
17558
17559      UInt pc = guest_R15_curr_instr_notENC;
17560      vassert(0 == (pc & 1));
17561
17562      UInt pageoff = pc & 0xFFF;
17563      if (pageoff >= 18) {
17564         /* It's safe to poke about in the 9 halfwords preceding this
17565            insn.  So, have a look at them. */
17566         guaranteedUnconditional = True; /* assume no 'it' insn found,
17567                                            till we do */
17568         UShort* hwp = (UShort*)(HWord)pc;
17569         Int i;
17570         for (i = -1; i >= -9; i--) {
17571            /* We're in the same page.  (True, but commented out due
17572               to expense.) */
17573            /*
17574            vassert( ( ((UInt)(&hwp[i])) & 0xFFFFF000 )
17575                      == ( pc & 0xFFFFF000 ) );
17576            */
17577            /* All valid IT instructions must have the form 0xBFxy,
17578               where x can be anything, but y must be nonzero.  Find
17579               the number of insns covered by it (1 .. 4) and check to
17580               see if it can possibly reach up to the instruction in
17581               question.  Some (x,y) combinations mean UNPREDICTABLE,
17582               and the table is constructed to be conservative by
17583               returning 4 for those cases, so the analysis is safe
17584               even if the code uses unpredictable IT instructions (in
17585               which case its authors are nuts, but hey.)  */
17586            UShort hwp_i = hwp[i];
17587            if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) {
17588               /* might be an 'it' insn. */
17589               /* # guarded insns */
17590               Int n_guarded = (Int)it_length_table[hwp_i & 0xFF];
17591               vassert(n_guarded >= 1 && n_guarded <= 4);
17592               if (n_guarded * 2 /* # guarded HWs, worst case */
17593                   > (-(i+1)))   /* -(i+1): # remaining HWs after the IT */
17594                   /* -(i+0) also seems to work, even though I think
17595                      it's wrong.  I don't understand that. */
17596                  guaranteedUnconditional = False;
17597               break;
17598            }
17599         }
17600      }
17601   }
17602   /* --- END ITxxx optimisation analysis --- */
17603
17604   /* Generate the guarding condition for this insn, by examining
17605      ITSTATE.  Assign it to condT.  Also, generate new
17606      values for ITSTATE ready for stuffing back into the
17607      guest state, but don't actually do the Put yet, since it will
17608      need to stuffed back in only after the instruction gets to a
17609      point where it is sure to complete.  Mostly we let the code at
17610      decode_success handle this, but in cases where the insn contains
17611      a side exit, we have to update them before the exit. */
17612
17613   /* If the ITxxx optimisation analysis above could not prove that
17614      this instruction is guaranteed unconditional, we insert a
17615      lengthy IR preamble to compute the guarding condition at
17616      runtime.  If it can prove it (which obviously we hope is the
17617      normal case) then we insert a minimal preamble, which is
17618      equivalent to setting guest_ITSTATE to zero and then folding
17619      that through the full preamble (which completely disappears). */
17620
17621   IRTemp condT              = IRTemp_INVALID;
17622   IRTemp cond_AND_notInIT_T = IRTemp_INVALID;
17623
17624   IRTemp new_itstate        = IRTemp_INVALID;
17625   vassert(old_itstate == IRTemp_INVALID);
17626
17627   if (guaranteedUnconditional) {
17628      /* BEGIN "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
17629
17630      // ITSTATE = 0 :: I32
17631      IRTemp z32 = newTemp(Ity_I32);
17632      assign(z32, mkU32(0));
17633      put_ITSTATE(z32);
17634
17635      // old_itstate = 0 :: I32
17636      //
17637      // old_itstate = get_ITSTATE();
17638      old_itstate = z32; /* 0 :: I32 */
17639
17640      // new_itstate = old_itstate >> 8
17641      //             = 0 >> 8
17642      //             = 0 :: I32
17643      //
17644      // new_itstate = newTemp(Ity_I32);
17645      // assign(new_itstate,
17646      //        binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
17647      new_itstate = z32;
17648
17649      // ITSTATE = 0 :: I32(again)
17650      //
17651      // put_ITSTATE(new_itstate);
17652
17653      // condT1 = calc_cond_dyn( xor(and(old_istate,0xF0), 0xE0) )
17654      //        = calc_cond_dyn( xor(0,0xE0) )
17655      //        = calc_cond_dyn ( 0xE0 )
17656      //        = 1 :: I32
17657      // Not that this matters, since the computed value is not used:
17658      // see condT folding below
17659      //
17660      // IRTemp condT1 = newTemp(Ity_I32);
17661      // assign(condT1,
17662      //        mk_armg_calculate_condition_dyn(
17663      //           binop(Iop_Xor32,
17664      //                 binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
17665      //                 mkU32(0xE0))
17666      //       )
17667      // );
17668
17669      // condT = 32to8(and32(old_itstate,0xF0)) == 0  ? 1  : condT1
17670      //       = 32to8(and32(0,0xF0)) == 0  ? 1  : condT1
17671      //       = 32to8(0) == 0  ? 1  : condT1
17672      //       = 0 == 0  ? 1  : condT1
17673      //       = 1
17674      //
17675      // condT = newTemp(Ity_I32);
17676      // assign(condT, IRExpr_ITE(
17677      //                  unop(Iop_32to8, binop(Iop_And32,
17678      //                                        mkexpr(old_itstate),
17679      //                                        mkU32(0xF0))),
17680      //                  mkexpr(condT1),
17681      //                  mkU32(1))
17682      //       ));
17683      condT = newTemp(Ity_I32);
17684      assign(condT, mkU32(1));
17685
17686      // notInITt = xor32(and32(old_itstate, 1), 1)
17687      //          = xor32(and32(0, 1), 1)
17688      //          = xor32(0, 1)
17689      //          = 1 :: I32
17690      //
17691      // IRTemp notInITt = newTemp(Ity_I32);
17692      // assign(notInITt,
17693      //        binop(Iop_Xor32,
17694      //              binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
17695      //              mkU32(1)));
17696
17697      // cond_AND_notInIT_T = and32(notInITt, condT)
17698      //                    = and32(1, 1)
17699      //                    = 1
17700      //
17701      // cond_AND_notInIT_T = newTemp(Ity_I32);
17702      // assign(cond_AND_notInIT_T,
17703      //        binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
17704      cond_AND_notInIT_T = condT; /* 1 :: I32 */
17705
17706      /* END "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
17707   } else {
17708      /* BEGIN { STANDARD PREAMBLE; } */
17709
17710      old_itstate = get_ITSTATE();
17711
17712      new_itstate = newTemp(Ity_I32);
17713      assign(new_itstate,
17714             binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
17715
17716      put_ITSTATE(new_itstate);
17717
17718      /* Same strategy as for ARM insns: generate a condition
17719         temporary at this point (or IRTemp_INVALID, meaning
17720         unconditional).  We leave it to lower-level instruction
17721         decoders to decide whether they can generate straight-line
17722         code, or whether they must generate a side exit before the
17723         instruction.  condT :: Ity_I32 and is always either zero or
17724         one. */
17725      IRTemp condT1 = newTemp(Ity_I32);
17726      assign(condT1,
17727             mk_armg_calculate_condition_dyn(
17728                binop(Iop_Xor32,
17729                      binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
17730                      mkU32(0xE0))
17731            )
17732      );
17733
17734      /* This is a bit complex, but needed to make Memcheck understand
17735         that, if the condition in old_itstate[7:4] denotes AL (that
17736         is, if this instruction is to be executed unconditionally),
17737         then condT does not depend on the results of calling the
17738         helper.
17739
17740         We test explicitly for old_itstate[7:4] == AL ^ 0xE, and in
17741         that case set condT directly to 1.  Else we use the results
17742         of the helper.  Since old_itstate is always defined and
17743         because Memcheck does lazy V-bit propagation through ITE,
17744         this will cause condT to always be a defined 1 if the
17745         condition is 'AL'.  From an execution semantics point of view
17746         this is irrelevant since we're merely duplicating part of the
17747         behaviour of the helper.  But it makes it clear to Memcheck,
17748         in this case, that condT does not in fact depend on the
17749         contents of the condition code thunk.  Without it, we get
17750         quite a lot of false errors.
17751
17752         So, just to clarify: from a straight semantics point of view,
17753         we can simply do "assign(condT, mkexpr(condT1))", and the
17754         simulator still runs fine.  It's just that we get loads of
17755         false errors from Memcheck. */
17756      condT = newTemp(Ity_I32);
17757      assign(condT, IRExpr_ITE(
17758                       binop(Iop_CmpNE32, binop(Iop_And32,
17759                                                mkexpr(old_itstate),
17760                                                mkU32(0xF0)),
17761                                          mkU32(0)),
17762                       mkexpr(condT1),
17763                       mkU32(1)
17764            ));
17765
17766      /* Something we don't have in ARM: generate a 0 or 1 value
17767         indicating whether or not we are in an IT block (NB: 0 = in
17768         IT block, 1 = not in IT block).  This is used to gate
17769         condition code updates in 16-bit Thumb instructions. */
17770      IRTemp notInITt = newTemp(Ity_I32);
17771      assign(notInITt,
17772             binop(Iop_Xor32,
17773                   binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
17774                   mkU32(1)));
17775
17776      /* Compute 'condT && notInITt' -- that is, the instruction is
17777         going to execute, and we're not in an IT block.  This is the
17778         gating condition for updating condition codes in 16-bit Thumb
17779         instructions, except for CMP, CMN and TST. */
17780      cond_AND_notInIT_T = newTemp(Ity_I32);
17781      assign(cond_AND_notInIT_T,
17782             binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
17783      /* END { STANDARD PREAMBLE; } */
17784   }
17785
17786
17787   /* At this point:
17788      * ITSTATE has been updated
17789      * condT holds the guarding condition for this instruction (0 or 1),
17790      * notInITt is 1 if we're in "normal" code, 0 if in an IT block
17791      * cond_AND_notInIT_T is the AND of the above two.
17792
17793      If the instruction proper can't trap, then there's nothing else
17794      to do w.r.t. ITSTATE -- just go and and generate IR for the
17795      insn, taking into account the guarding condition.
17796
17797      If, however, the instruction might trap, then we must back up
17798      ITSTATE to the old value, and re-update it after the potentially
17799      trapping IR section.  A trap can happen either via a memory
17800      reference or because we need to throw SIGILL.
17801
17802      If an instruction has a side exit, we need to be sure that any
17803      ITSTATE backup is re-updated before the side exit.
17804   */
17805
17806   /* ----------------------------------------------------------- */
17807   /* --                                                       -- */
17808   /* -- Thumb 16-bit integer instructions                     -- */
17809   /* --                                                       -- */
17810   /* -- IMPORTANT: references to insn1 or INSN1 are           -- */
17811   /* --            not allowed in this section                -- */
17812   /* --                                                       -- */
17813   /* ----------------------------------------------------------- */
17814
17815   /* 16-bit instructions inside an IT block, apart from CMP, CMN and
17816      TST, do not set the condition codes.  Hence we must dynamically
17817      test for this case for every condition code update. */
17818
17819   IROp   anOp   = Iop_INVALID;
17820   const HChar* anOpNm = NULL;
17821
17822   /* ================ 16-bit 15:6 cases ================ */
17823
17824   switch (INSN0(15,6)) {
17825
17826   case 0x10a:   // CMP
17827   case 0x10b: { // CMN
17828      /* ---------------- CMP Rn, Rm ---------------- */
17829      Bool   isCMN = INSN0(15,6) == 0x10b;
17830      UInt   rN    = INSN0(2,0);
17831      UInt   rM    = INSN0(5,3);
17832      IRTemp argL  = newTemp(Ity_I32);
17833      IRTemp argR  = newTemp(Ity_I32);
17834      assign( argL, getIRegT(rN) );
17835      assign( argR, getIRegT(rM) );
17836      /* Update flags regardless of whether in an IT block or not. */
17837      setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
17838                      argL, argR, condT );
17839      DIP("%s r%u, r%u\n", isCMN ? "cmn" : "cmp", rN, rM);
17840      goto decode_success;
17841   }
17842
17843   case 0x108: {
17844      /* ---------------- TST Rn, Rm ---------------- */
17845      UInt   rN   = INSN0(2,0);
17846      UInt   rM   = INSN0(5,3);
17847      IRTemp oldC = newTemp(Ity_I32);
17848      IRTemp oldV = newTemp(Ity_I32);
17849      IRTemp res  = newTemp(Ity_I32);
17850      assign( oldC, mk_armg_calculate_flag_c() );
17851      assign( oldV, mk_armg_calculate_flag_v() );
17852      assign( res,  binop(Iop_And32, getIRegT(rN), getIRegT(rM)) );
17853      /* Update flags regardless of whether in an IT block or not. */
17854      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
17855      DIP("tst r%u, r%u\n", rN, rM);
17856      goto decode_success;
17857   }
17858
17859   case 0x109: {
17860      /* ---------------- NEGS Rd, Rm ---------------- */
17861      /* Rd = -Rm */
17862      UInt   rM   = INSN0(5,3);
17863      UInt   rD   = INSN0(2,0);
17864      IRTemp arg  = newTemp(Ity_I32);
17865      IRTemp zero = newTemp(Ity_I32);
17866      assign(arg, getIRegT(rM));
17867      assign(zero, mkU32(0));
17868      // rD can never be r15
17869      putIRegT(rD, binop(Iop_Sub32, mkexpr(zero), mkexpr(arg)), condT);
17870      setFlags_D1_D2( ARMG_CC_OP_SUB, zero, arg, cond_AND_notInIT_T);
17871      DIP("negs r%u, r%u\n", rD, rM);
17872      goto decode_success;
17873   }
17874
17875   case 0x10F: {
17876      /* ---------------- MVNS Rd, Rm ---------------- */
17877      /* Rd = ~Rm */
17878      UInt   rM   = INSN0(5,3);
17879      UInt   rD   = INSN0(2,0);
17880      IRTemp oldV = newTemp(Ity_I32);
17881      IRTemp oldC = newTemp(Ity_I32);
17882      IRTemp res  = newTemp(Ity_I32);
17883      assign( oldV, mk_armg_calculate_flag_v() );
17884      assign( oldC, mk_armg_calculate_flag_c() );
17885      assign(res, unop(Iop_Not32, getIRegT(rM)));
17886      // rD can never be r15
17887      putIRegT(rD, mkexpr(res), condT);
17888      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
17889                         cond_AND_notInIT_T );
17890      DIP("mvns r%u, r%u\n", rD, rM);
17891      goto decode_success;
17892   }
17893
17894   case 0x10C:
17895      /* ---------------- ORRS Rd, Rm ---------------- */
17896      anOp = Iop_Or32; anOpNm = "orr"; goto and_orr_eor_mul;
17897   case 0x100:
17898      /* ---------------- ANDS Rd, Rm ---------------- */
17899      anOp = Iop_And32; anOpNm = "and"; goto and_orr_eor_mul;
17900   case 0x101:
17901      /* ---------------- EORS Rd, Rm ---------------- */
17902      anOp = Iop_Xor32; anOpNm = "eor"; goto and_orr_eor_mul;
17903   case 0x10d:
17904      /* ---------------- MULS Rd, Rm ---------------- */
17905      anOp = Iop_Mul32; anOpNm = "mul"; goto and_orr_eor_mul;
17906   and_orr_eor_mul: {
17907      /* Rd = Rd `op` Rm */
17908      UInt   rM   = INSN0(5,3);
17909      UInt   rD   = INSN0(2,0);
17910      IRTemp res  = newTemp(Ity_I32);
17911      IRTemp oldV = newTemp(Ity_I32);
17912      IRTemp oldC = newTemp(Ity_I32);
17913      assign( oldV, mk_armg_calculate_flag_v() );
17914      assign( oldC, mk_armg_calculate_flag_c() );
17915      assign( res, binop(anOp, getIRegT(rD), getIRegT(rM) ));
17916      // not safe to read guest state after here
17917      // rD can never be r15
17918      putIRegT(rD, mkexpr(res), condT);
17919      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
17920                         cond_AND_notInIT_T );
17921      DIP("%s r%u, r%u\n", anOpNm, rD, rM);
17922      goto decode_success;
17923   }
17924
17925   case 0x10E: {
17926      /* ---------------- BICS Rd, Rm ---------------- */
17927      /* Rd = Rd & ~Rm */
17928      UInt   rM   = INSN0(5,3);
17929      UInt   rD   = INSN0(2,0);
17930      IRTemp res  = newTemp(Ity_I32);
17931      IRTemp oldV = newTemp(Ity_I32);
17932      IRTemp oldC = newTemp(Ity_I32);
17933      assign( oldV, mk_armg_calculate_flag_v() );
17934      assign( oldC, mk_armg_calculate_flag_c() );
17935      assign( res, binop(Iop_And32, getIRegT(rD),
17936                                    unop(Iop_Not32, getIRegT(rM) )));
17937      // not safe to read guest state after here
17938      // rD can never be r15
17939      putIRegT(rD, mkexpr(res), condT);
17940      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
17941                         cond_AND_notInIT_T );
17942      DIP("bics r%u, r%u\n", rD, rM);
17943      goto decode_success;
17944   }
17945
17946   case 0x105: {
17947      /* ---------------- ADCS Rd, Rm ---------------- */
17948      /* Rd = Rd + Rm + oldC */
17949      UInt   rM   = INSN0(5,3);
17950      UInt   rD   = INSN0(2,0);
17951      IRTemp argL = newTemp(Ity_I32);
17952      IRTemp argR = newTemp(Ity_I32);
17953      IRTemp oldC = newTemp(Ity_I32);
17954      IRTemp res  = newTemp(Ity_I32);
17955      assign(argL, getIRegT(rD));
17956      assign(argR, getIRegT(rM));
17957      assign(oldC, mk_armg_calculate_flag_c());
17958      assign(res, binop(Iop_Add32,
17959                        binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
17960                        mkexpr(oldC)));
17961      // rD can never be r15
17962      putIRegT(rD, mkexpr(res), condT);
17963      setFlags_D1_D2_ND( ARMG_CC_OP_ADC, argL, argR, oldC,
17964                         cond_AND_notInIT_T );
17965      DIP("adcs r%u, r%u\n", rD, rM);
17966      goto decode_success;
17967   }
17968
17969   case 0x106: {
17970      /* ---------------- SBCS Rd, Rm ---------------- */
17971      /* Rd = Rd - Rm - (oldC ^ 1) */
17972      UInt   rM   = INSN0(5,3);
17973      UInt   rD   = INSN0(2,0);
17974      IRTemp argL = newTemp(Ity_I32);
17975      IRTemp argR = newTemp(Ity_I32);
17976      IRTemp oldC = newTemp(Ity_I32);
17977      IRTemp res  = newTemp(Ity_I32);
17978      assign(argL, getIRegT(rD));
17979      assign(argR, getIRegT(rM));
17980      assign(oldC, mk_armg_calculate_flag_c());
17981      assign(res, binop(Iop_Sub32,
17982                        binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
17983                        binop(Iop_Xor32, mkexpr(oldC), mkU32(1))));
17984      // rD can never be r15
17985      putIRegT(rD, mkexpr(res), condT);
17986      setFlags_D1_D2_ND( ARMG_CC_OP_SBB, argL, argR, oldC,
17987                         cond_AND_notInIT_T );
17988      DIP("sbcs r%u, r%u\n", rD, rM);
17989      goto decode_success;
17990   }
17991
17992   case 0x2CB: {
17993      /* ---------------- UXTB Rd, Rm ---------------- */
17994      /* Rd = 8Uto32(Rm) */
17995      UInt rM = INSN0(5,3);
17996      UInt rD = INSN0(2,0);
17997      putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFF)),
17998                   condT);
17999      DIP("uxtb r%u, r%u\n", rD, rM);
18000      goto decode_success;
18001   }
18002
18003   case 0x2C9: {
18004      /* ---------------- SXTB Rd, Rm ---------------- */
18005      /* Rd = 8Sto32(Rm) */
18006      UInt rM = INSN0(5,3);
18007      UInt rD = INSN0(2,0);
18008      putIRegT(rD, binop(Iop_Sar32,
18009                         binop(Iop_Shl32, getIRegT(rM), mkU8(24)),
18010                         mkU8(24)),
18011                   condT);
18012      DIP("sxtb r%u, r%u\n", rD, rM);
18013      goto decode_success;
18014   }
18015
18016   case 0x2CA: {
18017      /* ---------------- UXTH Rd, Rm ---------------- */
18018      /* Rd = 16Uto32(Rm) */
18019      UInt rM = INSN0(5,3);
18020      UInt rD = INSN0(2,0);
18021      putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFFFF)),
18022                   condT);
18023      DIP("uxth r%u, r%u\n", rD, rM);
18024      goto decode_success;
18025   }
18026
18027   case 0x2C8: {
18028      /* ---------------- SXTH Rd, Rm ---------------- */
18029      /* Rd = 16Sto32(Rm) */
18030      UInt rM = INSN0(5,3);
18031      UInt rD = INSN0(2,0);
18032      putIRegT(rD, binop(Iop_Sar32,
18033                         binop(Iop_Shl32, getIRegT(rM), mkU8(16)),
18034                         mkU8(16)),
18035                   condT);
18036      DIP("sxth r%u, r%u\n", rD, rM);
18037      goto decode_success;
18038   }
18039
18040   case 0x102:   // LSLS
18041   case 0x103:   // LSRS
18042   case 0x104:   // ASRS
18043   case 0x107: { // RORS
18044      /* ---------------- LSLS Rs, Rd ---------------- */
18045      /* ---------------- LSRS Rs, Rd ---------------- */
18046      /* ---------------- ASRS Rs, Rd ---------------- */
18047      /* ---------------- RORS Rs, Rd ---------------- */
18048      /* Rd = Rd `op` Rs, and set flags */
18049      UInt   rS   = INSN0(5,3);
18050      UInt   rD   = INSN0(2,0);
18051      IRTemp oldV = newTemp(Ity_I32);
18052      IRTemp rDt  = newTemp(Ity_I32);
18053      IRTemp rSt  = newTemp(Ity_I32);
18054      IRTemp res  = newTemp(Ity_I32);
18055      IRTemp resC = newTemp(Ity_I32);
18056      const HChar* wot  = "???";
18057      assign(rSt, getIRegT(rS));
18058      assign(rDt, getIRegT(rD));
18059      assign(oldV, mk_armg_calculate_flag_v());
18060      /* Does not appear to be the standard 'how' encoding. */
18061      switch (INSN0(15,6)) {
18062         case 0x102:
18063            compute_result_and_C_after_LSL_by_reg(
18064               dis_buf, &res, &resC, rDt, rSt, rD, rS
18065            );
18066            wot = "lsl";
18067            break;
18068         case 0x103:
18069            compute_result_and_C_after_LSR_by_reg(
18070               dis_buf, &res, &resC, rDt, rSt, rD, rS
18071            );
18072            wot = "lsr";
18073            break;
18074         case 0x104:
18075            compute_result_and_C_after_ASR_by_reg(
18076               dis_buf, &res, &resC, rDt, rSt, rD, rS
18077            );
18078            wot = "asr";
18079            break;
18080         case 0x107:
18081            compute_result_and_C_after_ROR_by_reg(
18082               dis_buf, &res, &resC, rDt, rSt, rD, rS
18083            );
18084            wot = "ror";
18085            break;
18086         default:
18087            /*NOTREACHED*/vassert(0);
18088      }
18089      // not safe to read guest state after this point
18090      putIRegT(rD, mkexpr(res), condT);
18091      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
18092                         cond_AND_notInIT_T );
18093      DIP("%ss r%u, r%u\n", wot, rS, rD);
18094      goto decode_success;
18095   }
18096
18097   case 0x2E8:   // REV
18098   case 0x2E9: { // REV16
18099      /* ---------------- REV   Rd, Rm ---------------- */
18100      /* ---------------- REV16 Rd, Rm ---------------- */
18101      UInt rM = INSN0(5,3);
18102      UInt rD = INSN0(2,0);
18103      Bool isREV = INSN0(15,6) == 0x2E8;
18104      IRTemp arg = newTemp(Ity_I32);
18105      assign(arg, getIRegT(rM));
18106      IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
18107      putIRegT(rD, mkexpr(res), condT);
18108      DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM);
18109      goto decode_success;
18110   }
18111
18112   case 0x2EB: { // REVSH
18113      /* ---------------- REVSH Rd, Rn ---------------- */
18114      UInt rM = INSN0(5,3);
18115      UInt rD = INSN0(2,0);
18116      IRTemp irt_rM  = newTemp(Ity_I32);
18117      IRTemp irt_hi  = newTemp(Ity_I32);
18118      IRTemp irt_low = newTemp(Ity_I32);
18119      IRTemp irt_res = newTemp(Ity_I32);
18120      assign(irt_rM, getIRegT(rM));
18121      assign(irt_hi,
18122             binop(Iop_Sar32,
18123                   binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
18124                   mkU8(16)
18125             )
18126      );
18127      assign(irt_low,
18128             binop(Iop_And32,
18129                   binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
18130                   mkU32(0xFF)
18131             )
18132      );
18133      assign(irt_res,
18134             binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
18135      );
18136      putIRegT(rD, mkexpr(irt_res), condT);
18137      DIP("revsh r%u, r%u\n", rD, rM);
18138      goto decode_success;
18139   }
18140
18141   default:
18142      break; /* examine the next shortest prefix */
18143
18144   }
18145
18146
18147   /* ================ 16-bit 15:7 cases ================ */
18148
18149   switch (INSN0(15,7)) {
18150
18151   case BITS9(1,0,1,1,0,0,0,0,0): {
18152      /* ------------ ADD SP, #imm7 * 4 ------------ */
18153      UInt uimm7 = INSN0(6,0);
18154      putIRegT(13, binop(Iop_Add32, getIRegT(13), mkU32(uimm7 * 4)),
18155                   condT);
18156      DIP("add sp, #%u\n", uimm7 * 4);
18157      goto decode_success;
18158   }
18159
18160   case BITS9(1,0,1,1,0,0,0,0,1): {
18161      /* ------------ SUB SP, #imm7 * 4 ------------ */
18162      UInt uimm7 = INSN0(6,0);
18163      putIRegT(13, binop(Iop_Sub32, getIRegT(13), mkU32(uimm7 * 4)),
18164                   condT);
18165      DIP("sub sp, #%u\n", uimm7 * 4);
18166      goto decode_success;
18167   }
18168
18169   case BITS9(0,1,0,0,0,1,1,1,0): {
18170      /* ---------------- BX rM ---------------- */
18171      /* Branch to reg, and optionally switch modes.  Reg contains a
18172         suitably encoded address therefore (w CPSR.T at the bottom).
18173         Have to special-case r15, as usual. */
18174      UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
18175      if (BITS3(0,0,0) == INSN0(2,0)) {
18176         IRTemp dst = newTemp(Ity_I32);
18177         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
18178         mk_skip_over_T16_if_cond_is_false(condT);
18179         condT = IRTemp_INVALID;
18180         // now uncond
18181         if (rM <= 14) {
18182            assign( dst, getIRegT(rM) );
18183         } else {
18184            vassert(rM == 15);
18185            assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
18186         }
18187         llPutIReg(15, mkexpr(dst));
18188         dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
18189         dres.whatNext    = Dis_StopHere;
18190         DIP("bx r%u (possibly switch to ARM mode)\n", rM);
18191         goto decode_success;
18192      }
18193      break;
18194   }
18195
18196   /* ---------------- BLX rM ---------------- */
18197   /* Branch and link to interworking address in rM. */
18198   case BITS9(0,1,0,0,0,1,1,1,1): {
18199      if (BITS3(0,0,0) == INSN0(2,0)) {
18200         UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
18201         IRTemp dst = newTemp(Ity_I32);
18202         if (rM <= 14) {
18203            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
18204            mk_skip_over_T16_if_cond_is_false(condT);
18205            condT = IRTemp_INVALID;
18206            // now uncond
18207            /* We're returning to Thumb code, hence "| 1" */
18208            assign( dst, getIRegT(rM) );
18209            putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
18210                          IRTemp_INVALID );
18211            llPutIReg(15, mkexpr(dst));
18212            dres.jk_StopHere = Ijk_Call;
18213            dres.whatNext    = Dis_StopHere;
18214            DIP("blx r%u (possibly switch to ARM mode)\n", rM);
18215            goto decode_success;
18216         }
18217         /* else unpredictable, fall through */
18218      }
18219      break;
18220   }
18221
18222   default:
18223      break; /* examine the next shortest prefix */
18224
18225   }
18226
18227
18228   /* ================ 16-bit 15:8 cases ================ */
18229
18230   switch (INSN0(15,8)) {
18231
18232   case BITS8(1,1,0,1,1,1,1,1): {
18233      /* ---------------- SVC ---------------- */
18234      UInt imm8 = INSN0(7,0);
18235      if (imm8 == 0) {
18236         /* A syscall.  We can't do this conditionally, hence: */
18237         mk_skip_over_T16_if_cond_is_false( condT );
18238         // FIXME: what if we have to back up and restart this insn?
18239         // then ITSTATE will be wrong (we'll have it as "used")
18240         // when it isn't.  Correct is to save ITSTATE in a
18241         // stash pseudo-reg, and back up from that if we have to
18242         // restart.
18243         // uncond after here
18244         llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ));
18245         dres.jk_StopHere = Ijk_Sys_syscall;
18246         dres.whatNext    = Dis_StopHere;
18247         DIP("svc #0x%08x\n", imm8);
18248         goto decode_success;
18249      }
18250      /* else fall through */
18251      break;
18252   }
18253
18254   case BITS8(0,1,0,0,0,1,0,0): {
18255      /* ---------------- ADD(HI) Rd, Rm ---------------- */
18256      UInt h1 = INSN0(7,7);
18257      UInt h2 = INSN0(6,6);
18258      UInt rM = (h2 << 3) | INSN0(5,3);
18259      UInt rD = (h1 << 3) | INSN0(2,0);
18260      //if (h1 == 0 && h2 == 0) { // Original T1 was more restrictive
18261      if (rD == 15 && rM == 15) {
18262         // then it's invalid
18263      } else {
18264         IRTemp res = newTemp(Ity_I32);
18265         assign( res, binop(Iop_Add32, getIRegT(rD), getIRegT(rM) ));
18266         if (rD != 15) {
18267            putIRegT( rD, mkexpr(res), condT );
18268         } else {
18269            /* Only allowed outside or last-in IT block; SIGILL if not so. */
18270            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
18271            /* jump over insn if not selected */
18272            mk_skip_over_T16_if_cond_is_false(condT);
18273            condT = IRTemp_INVALID;
18274            // now uncond
18275            /* non-interworking branch */
18276            llPutIReg(15, binop(Iop_Or32, mkexpr(res), mkU32(1)));
18277            dres.jk_StopHere = Ijk_Boring;
18278            dres.whatNext    = Dis_StopHere;
18279         }
18280         DIP("add(hi) r%u, r%u\n", rD, rM);
18281         goto decode_success;
18282      }
18283      break;
18284   }
18285
18286   case BITS8(0,1,0,0,0,1,0,1): {
18287      /* ---------------- CMP(HI) Rd, Rm ---------------- */
18288      UInt h1 = INSN0(7,7);
18289      UInt h2 = INSN0(6,6);
18290      UInt rM = (h2 << 3) | INSN0(5,3);
18291      UInt rN = (h1 << 3) | INSN0(2,0);
18292      if (h1 != 0 || h2 != 0) {
18293         IRTemp argL  = newTemp(Ity_I32);
18294         IRTemp argR  = newTemp(Ity_I32);
18295         assign( argL, getIRegT(rN) );
18296         assign( argR, getIRegT(rM) );
18297         /* Update flags regardless of whether in an IT block or not. */
18298         setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
18299         DIP("cmphi r%u, r%u\n", rN, rM);
18300         goto decode_success;
18301      }
18302      break;
18303   }
18304
18305   case BITS8(0,1,0,0,0,1,1,0): {
18306      /* ---------------- MOV(HI) Rd, Rm ---------------- */
18307      UInt h1 = INSN0(7,7);
18308      UInt h2 = INSN0(6,6);
18309      UInt rM = (h2 << 3) | INSN0(5,3);
18310      UInt rD = (h1 << 3) | INSN0(2,0);
18311      /* The old ARM ARM seems to disallow the case where both Rd and
18312         Rm are "low" registers, but newer versions allow it. */
18313      if (1 /*h1 != 0 || h2 != 0*/) {
18314         IRTemp val = newTemp(Ity_I32);
18315         assign( val, getIRegT(rM) );
18316         if (rD != 15) {
18317            putIRegT( rD, mkexpr(val), condT );
18318         } else {
18319            /* Only allowed outside or last-in IT block; SIGILL if not so. */
18320            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
18321            /* jump over insn if not selected */
18322            mk_skip_over_T16_if_cond_is_false(condT);
18323            condT = IRTemp_INVALID;
18324            // now uncond
18325            /* non-interworking branch */
18326            llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1)));
18327            dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
18328            dres.whatNext    = Dis_StopHere;
18329         }
18330         DIP("mov r%u, r%u\n", rD, rM);
18331         goto decode_success;
18332      }
18333      break;
18334   }
18335
18336   case BITS8(1,0,1,1,1,1,1,1): {
18337      /* ---------------- IT (if-then) ---------------- */
18338      UInt firstcond = INSN0(7,4);
18339      UInt mask = INSN0(3,0);
18340      UInt newITSTATE = 0;
18341      /* This is the ITSTATE represented as described in
18342         libvex_guest_arm.h.  It is not the ARM ARM representation. */
18343      HChar c1 = '.';
18344      HChar c2 = '.';
18345      HChar c3 = '.';
18346      Bool valid = compute_ITSTATE( &newITSTATE, &c1, &c2, &c3,
18347                                    firstcond, mask );
18348      if (valid && firstcond != 0xF/*NV*/) {
18349         /* Not allowed in an IT block; SIGILL if so. */
18350         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
18351
18352         IRTemp t = newTemp(Ity_I32);
18353         assign(t, mkU32(newITSTATE));
18354         put_ITSTATE(t);
18355
18356         DIP("it%c%c%c %s\n", c1, c2, c3, nCC(firstcond));
18357         goto decode_success;
18358      }
18359      break;
18360   }
18361
18362   case BITS8(1,0,1,1,0,0,0,1):
18363   case BITS8(1,0,1,1,0,0,1,1):
18364   case BITS8(1,0,1,1,1,0,0,1):
18365   case BITS8(1,0,1,1,1,0,1,1): {
18366      /* ---------------- CB{N}Z ---------------- */
18367      UInt rN    = INSN0(2,0);
18368      UInt bOP   = INSN0(11,11);
18369      UInt imm32 = (INSN0(9,9) << 6) | (INSN0(7,3) << 1);
18370      gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
18371      /* It's a conditional branch forward. */
18372      IRTemp kond = newTemp(Ity_I1);
18373      assign( kond, binop(bOP ? Iop_CmpNE32 : Iop_CmpEQ32,
18374                          getIRegT(rN), mkU32(0)) );
18375
18376      vassert(0 == (guest_R15_curr_instr_notENC & 1));
18377      /* Looks like the nearest insn we can branch to is the one after
18378         next.  That makes sense, as there's no point in being able to
18379         encode a conditional branch to the next instruction. */
18380      UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
18381      stmt(IRStmt_Exit( mkexpr(kond),
18382                        Ijk_Boring,
18383                        IRConst_U32(toUInt(dst)),
18384                        OFFB_R15T ));
18385      DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
18386      goto decode_success;
18387   }
18388
18389   default:
18390      break; /* examine the next shortest prefix */
18391
18392   }
18393
18394
18395   /* ================ 16-bit 15:9 cases ================ */
18396
18397   switch (INSN0(15,9)) {
18398
18399   case BITS7(1,0,1,1,0,1,0): {
18400      /* ---------------- PUSH ---------------- */
18401      /* This is a bit like STMxx, but way simpler. Complications we
18402         don't have to deal with:
18403         * SP being one of the transferred registers
18404         * direction (increment vs decrement)
18405         * before-vs-after-ness
18406      */
18407      Int  i, nRegs;
18408      UInt bitR    = INSN0(8,8);
18409      UInt regList = INSN0(7,0);
18410      if (bitR) regList |= (1 << 14);
18411
18412      /* At least one register must be transferred, else result is
18413         UNPREDICTABLE. */
18414      if (regList != 0) {
18415         /* Since we can't generate a guaranteed non-trapping IR
18416            sequence, (1) jump over the insn if it is gated false, and
18417            (2) back out the ITSTATE update. */
18418         mk_skip_over_T16_if_cond_is_false(condT);
18419         condT = IRTemp_INVALID;
18420         put_ITSTATE(old_itstate);
18421         // now uncond
18422
18423         nRegs = 0;
18424         for (i = 0; i < 16; i++) {
18425            if ((regList & (1 << i)) != 0)
18426               nRegs++;
18427         }
18428         vassert(nRegs >= 1 && nRegs <= 9);
18429
18430         /* Move SP down first of all, so we're "covered".  And don't
18431            mess with its alignment. */
18432         IRTemp newSP = newTemp(Ity_I32);
18433         assign(newSP, binop(Iop_Sub32, getIRegT(13), mkU32(4 * nRegs)));
18434         putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
18435
18436         /* Generate a transfer base address as a forced-aligned
18437            version of the final SP value. */
18438         IRTemp base = newTemp(Ity_I32);
18439         assign(base, binop(Iop_And32, mkexpr(newSP), mkU32(~3)));
18440
18441         /* Now the transfers */
18442         nRegs = 0;
18443         for (i = 0; i < 16; i++) {
18444            if ((regList & (1 << i)) != 0) {
18445               storeLE( binop(Iop_Add32, mkexpr(base), mkU32(4 * nRegs)),
18446                        getIRegT(i) );
18447               nRegs++;
18448            }
18449         }
18450
18451         /* Reinstate the ITSTATE update. */
18452         put_ITSTATE(new_itstate);
18453
18454         DIP("push {%s0x%04x}\n", bitR ? "lr," : "", regList & 0xFF);
18455         goto decode_success;
18456      }
18457      break;
18458   }
18459
18460   case BITS7(1,0,1,1,1,1,0): {
18461      /* ---------------- POP ---------------- */
18462      Int  i, nRegs;
18463      UInt bitR    = INSN0(8,8);
18464      UInt regList = INSN0(7,0);
18465
18466      /* At least one register must be transferred, else result is
18467         UNPREDICTABLE. */
18468      if (regList != 0 || bitR) {
18469         /* Since we can't generate a guaranteed non-trapping IR
18470            sequence, (1) jump over the insn if it is gated false, and
18471            (2) back out the ITSTATE update. */
18472         mk_skip_over_T16_if_cond_is_false(condT);
18473         condT = IRTemp_INVALID;
18474         put_ITSTATE(old_itstate);
18475         // now uncond
18476
18477         nRegs = 0;
18478         for (i = 0; i < 8; i++) {
18479            if ((regList & (1 << i)) != 0)
18480               nRegs++;
18481         }
18482         vassert(nRegs >= 0 && nRegs <= 8);
18483         vassert(bitR == 0 || bitR == 1);
18484
18485         IRTemp oldSP = newTemp(Ity_I32);
18486         assign(oldSP, getIRegT(13));
18487
18488         /* Generate a transfer base address as a forced-aligned
18489            version of the original SP value. */
18490         IRTemp base = newTemp(Ity_I32);
18491         assign(base, binop(Iop_And32, mkexpr(oldSP), mkU32(~3)));
18492
18493         /* Compute a new value for SP, but don't install it yet, so
18494            that we're "covered" until all the transfers are done.
18495            And don't mess with its alignment. */
18496         IRTemp newSP = newTemp(Ity_I32);
18497         assign(newSP, binop(Iop_Add32, mkexpr(oldSP),
18498                                        mkU32(4 * (nRegs + bitR))));
18499
18500         /* Now the transfers, not including PC */
18501         nRegs = 0;
18502         for (i = 0; i < 8; i++) {
18503            if ((regList & (1 << i)) != 0) {
18504               putIRegT(i, loadLE( Ity_I32,
18505                                   binop(Iop_Add32, mkexpr(base),
18506                                                    mkU32(4 * nRegs))),
18507                           IRTemp_INVALID );
18508               nRegs++;
18509            }
18510         }
18511
18512         IRTemp newPC = IRTemp_INVALID;
18513         if (bitR) {
18514            newPC = newTemp(Ity_I32);
18515            assign( newPC, loadLE( Ity_I32,
18516                                   binop(Iop_Add32, mkexpr(base),
18517                                                    mkU32(4 * nRegs))));
18518         }
18519
18520         /* Now we can safely install the new SP value */
18521         putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
18522
18523         /* Reinstate the ITSTATE update. */
18524         put_ITSTATE(new_itstate);
18525
18526         /* now, do we also have to do a branch?  If so, it turns out
18527            that the new PC value is encoded exactly as we need it to
18528            be -- with CPSR.T in the bottom bit.  So we can simply use
18529            it as is, no need to mess with it.  Note, therefore, this
18530            is an interworking return. */
18531         if (bitR) {
18532            llPutIReg(15, mkexpr(newPC));
18533            dres.jk_StopHere = Ijk_Ret;
18534            dres.whatNext    = Dis_StopHere;
18535         }
18536
18537         DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
18538         goto decode_success;
18539      }
18540      break;
18541   }
18542
18543   case BITS7(0,0,0,1,1,1,0):   /* ADDS */
18544   case BITS7(0,0,0,1,1,1,1): { /* SUBS */
18545      /* ---------------- ADDS Rd, Rn, #uimm3 ---------------- */
18546      /* ---------------- SUBS Rd, Rn, #uimm3 ---------------- */
18547      UInt   uimm3 = INSN0(8,6);
18548      UInt   rN    = INSN0(5,3);
18549      UInt   rD    = INSN0(2,0);
18550      UInt   isSub = INSN0(9,9);
18551      IRTemp argL  = newTemp(Ity_I32);
18552      IRTemp argR  = newTemp(Ity_I32);
18553      assign( argL, getIRegT(rN) );
18554      assign( argR, mkU32(uimm3) );
18555      putIRegT(rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
18556                         mkexpr(argL), mkexpr(argR)),
18557                   condT);
18558      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
18559                      argL, argR, cond_AND_notInIT_T );
18560      DIP("%s r%u, r%u, #%u\n", isSub ? "subs" : "adds", rD, rN, uimm3);
18561      goto decode_success;
18562   }
18563
18564   case BITS7(0,0,0,1,1,0,0):   /* ADDS */
18565   case BITS7(0,0,0,1,1,0,1): { /* SUBS */
18566      /* ---------------- ADDS Rd, Rn, Rm ---------------- */
18567      /* ---------------- SUBS Rd, Rn, Rm ---------------- */
18568      UInt   rM    = INSN0(8,6);
18569      UInt   rN    = INSN0(5,3);
18570      UInt   rD    = INSN0(2,0);
18571      UInt   isSub = INSN0(9,9);
18572      IRTemp argL  = newTemp(Ity_I32);
18573      IRTemp argR  = newTemp(Ity_I32);
18574      assign( argL, getIRegT(rN) );
18575      assign( argR, getIRegT(rM) );
18576      putIRegT( rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
18577                          mkexpr(argL), mkexpr(argR)),
18578                    condT );
18579      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
18580                      argL, argR, cond_AND_notInIT_T );
18581      DIP("%s r%u, r%u, r%u\n", isSub ? "subs" : "adds", rD, rN, rM);
18582      goto decode_success;
18583   }
18584
18585   case BITS7(0,1,0,1,0,0,0):   /* STR */
18586   case BITS7(0,1,0,1,1,0,0): { /* LDR */
18587      /* ------------- LDR Rd, [Rn, Rm] ------------- */
18588      /* ------------- STR Rd, [Rn, Rm] ------------- */
18589      /* LDR/STR Rd, [Rn + Rm] */
18590      UInt    rD   = INSN0(2,0);
18591      UInt    rN   = INSN0(5,3);
18592      UInt    rM   = INSN0(8,6);
18593      UInt    isLD = INSN0(11,11);
18594
18595      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18596      put_ITSTATE(old_itstate); // backout
18597      if (isLD) {
18598         IRTemp tD = newTemp(Ity_I32);
18599         loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
18600         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18601      } else {
18602         storeGuardedLE(ea, getIRegT(rD), condT);
18603      }
18604      put_ITSTATE(new_itstate); // restore
18605
18606      DIP("%s r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
18607      goto decode_success;
18608   }
18609
18610   case BITS7(0,1,0,1,0,0,1):
18611   case BITS7(0,1,0,1,1,0,1): {
18612      /* ------------- LDRH Rd, [Rn, Rm] ------------- */
18613      /* ------------- STRH Rd, [Rn, Rm] ------------- */
18614      /* LDRH/STRH Rd, [Rn + Rm] */
18615      UInt    rD   = INSN0(2,0);
18616      UInt    rN   = INSN0(5,3);
18617      UInt    rM   = INSN0(8,6);
18618      UInt    isLD = INSN0(11,11);
18619
18620      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18621      put_ITSTATE(old_itstate); // backout
18622      if (isLD) {
18623         IRTemp tD = newTemp(Ity_I32);
18624         loadGuardedLE(tD, ILGop_16Uto32, ea, llGetIReg(rD), condT);
18625         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18626      } else {
18627         storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
18628      }
18629      put_ITSTATE(new_itstate); // restore
18630
18631      DIP("%sh r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
18632      goto decode_success;
18633   }
18634
18635   case BITS7(0,1,0,1,1,1,1): {
18636      /* ------------- LDRSH Rd, [Rn, Rm] ------------- */
18637      /* LDRSH Rd, [Rn + Rm] */
18638      UInt    rD = INSN0(2,0);
18639      UInt    rN = INSN0(5,3);
18640      UInt    rM = INSN0(8,6);
18641
18642      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18643      put_ITSTATE(old_itstate); // backout
18644      IRTemp tD = newTemp(Ity_I32);
18645      loadGuardedLE(tD, ILGop_16Sto32, ea, llGetIReg(rD), condT);
18646      putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18647      put_ITSTATE(new_itstate); // restore
18648
18649      DIP("ldrsh r%u, [r%u, r%u]\n", rD, rN, rM);
18650      goto decode_success;
18651   }
18652
18653   case BITS7(0,1,0,1,0,1,1): {
18654      /* ------------- LDRSB Rd, [Rn, Rm] ------------- */
18655      /* LDRSB Rd, [Rn + Rm] */
18656      UInt    rD = INSN0(2,0);
18657      UInt    rN = INSN0(5,3);
18658      UInt    rM = INSN0(8,6);
18659
18660      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18661      put_ITSTATE(old_itstate); // backout
18662      IRTemp tD = newTemp(Ity_I32);
18663      loadGuardedLE(tD, ILGop_8Sto32, ea, llGetIReg(rD), condT);
18664      putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18665      put_ITSTATE(new_itstate); // restore
18666
18667      DIP("ldrsb r%u, [r%u, r%u]\n", rD, rN, rM);
18668      goto decode_success;
18669   }
18670
18671   case BITS7(0,1,0,1,0,1,0):
18672   case BITS7(0,1,0,1,1,1,0): {
18673      /* ------------- LDRB Rd, [Rn, Rm] ------------- */
18674      /* ------------- STRB Rd, [Rn, Rm] ------------- */
18675      /* LDRB/STRB Rd, [Rn + Rm] */
18676      UInt    rD   = INSN0(2,0);
18677      UInt    rN   = INSN0(5,3);
18678      UInt    rM   = INSN0(8,6);
18679      UInt    isLD = INSN0(11,11);
18680
18681      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18682      put_ITSTATE(old_itstate); // backout
18683      if (isLD) {
18684         IRTemp tD = newTemp(Ity_I32);
18685         loadGuardedLE(tD, ILGop_8Uto32, ea, llGetIReg(rD), condT);
18686         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18687      } else {
18688         storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
18689      }
18690      put_ITSTATE(new_itstate); // restore
18691
18692      DIP("%sb r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
18693      goto decode_success;
18694   }
18695
18696   default:
18697      break; /* examine the next shortest prefix */
18698
18699   }
18700
18701
18702   /* ================ 16-bit 15:11 cases ================ */
18703
18704   switch (INSN0(15,11)) {
18705
18706   case BITS5(0,0,1,1,0):
18707   case BITS5(0,0,1,1,1): {
18708      /* ---------------- ADDS Rn, #uimm8 ---------------- */
18709      /* ---------------- SUBS Rn, #uimm8 ---------------- */
18710      UInt   isSub = INSN0(11,11);
18711      UInt   rN    = INSN0(10,8);
18712      UInt   uimm8 = INSN0(7,0);
18713      IRTemp argL  = newTemp(Ity_I32);
18714      IRTemp argR  = newTemp(Ity_I32);
18715      assign( argL, getIRegT(rN) );
18716      assign( argR, mkU32(uimm8) );
18717      putIRegT( rN, binop(isSub ? Iop_Sub32 : Iop_Add32,
18718                          mkexpr(argL), mkexpr(argR)), condT );
18719      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
18720                      argL, argR, cond_AND_notInIT_T );
18721      DIP("%s r%u, #%u\n", isSub ? "subs" : "adds", rN, uimm8);
18722      goto decode_success;
18723   }
18724
18725   case BITS5(1,0,1,0,0): {
18726      /* ---------------- ADD rD, PC, #imm8 * 4 ---------------- */
18727      /* a.k.a. ADR */
18728      /* rD = align4(PC) + imm8 * 4 */
18729      UInt rD   = INSN0(10,8);
18730      UInt imm8 = INSN0(7,0);
18731      putIRegT(rD, binop(Iop_Add32,
18732                         binop(Iop_And32, getIRegT(15), mkU32(~3U)),
18733                         mkU32(imm8 * 4)),
18734                   condT);
18735      DIP("add r%u, pc, #%u\n", rD, imm8 * 4);
18736      goto decode_success;
18737   }
18738
18739   case BITS5(1,0,1,0,1): {
18740      /* ---------------- ADD rD, SP, #imm8 * 4 ---------------- */
18741      UInt rD   = INSN0(10,8);
18742      UInt imm8 = INSN0(7,0);
18743      putIRegT(rD, binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4)),
18744                   condT);
18745      DIP("add r%u, r13, #%u\n", rD, imm8 * 4);
18746      goto decode_success;
18747   }
18748
18749   case BITS5(0,0,1,0,1): {
18750      /* ---------------- CMP Rn, #uimm8 ---------------- */
18751      UInt   rN    = INSN0(10,8);
18752      UInt   uimm8 = INSN0(7,0);
18753      IRTemp argL  = newTemp(Ity_I32);
18754      IRTemp argR  = newTemp(Ity_I32);
18755      assign( argL, getIRegT(rN) );
18756      assign( argR, mkU32(uimm8) );
18757      /* Update flags regardless of whether in an IT block or not. */
18758      setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
18759      DIP("cmp r%u, #%u\n", rN, uimm8);
18760      goto decode_success;
18761   }
18762
18763   case BITS5(0,0,1,0,0): {
18764      /* -------------- (T1) MOVS Rn, #uimm8 -------------- */
18765      UInt   rD    = INSN0(10,8);
18766      UInt   uimm8 = INSN0(7,0);
18767      IRTemp oldV  = newTemp(Ity_I32);
18768      IRTemp oldC  = newTemp(Ity_I32);
18769      IRTemp res   = newTemp(Ity_I32);
18770      assign( oldV, mk_armg_calculate_flag_v() );
18771      assign( oldC, mk_armg_calculate_flag_c() );
18772      assign( res, mkU32(uimm8) );
18773      putIRegT(rD, mkexpr(res), condT);
18774      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
18775                         cond_AND_notInIT_T );
18776      DIP("movs r%u, #%u\n", rD, uimm8);
18777      goto decode_success;
18778   }
18779
18780   case BITS5(0,1,0,0,1): {
18781      /* ------------- LDR Rd, [PC, #imm8 * 4] ------------- */
18782      /* LDR Rd, [align4(PC) + imm8 * 4] */
18783      UInt   rD   = INSN0(10,8);
18784      UInt   imm8 = INSN0(7,0);
18785      IRTemp ea   = newTemp(Ity_I32);
18786
18787      assign(ea, binop(Iop_Add32,
18788                       binop(Iop_And32, getIRegT(15), mkU32(~3U)),
18789                       mkU32(imm8 * 4)));
18790      put_ITSTATE(old_itstate); // backout
18791      IRTemp tD = newTemp(Ity_I32);
18792      loadGuardedLE( tD, ILGop_Ident32, mkexpr(ea), llGetIReg(rD), condT );
18793      putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18794      put_ITSTATE(new_itstate); // restore
18795
18796      DIP("ldr r%u, [pc, #%u]\n", rD, imm8 * 4);
18797      goto decode_success;
18798   }
18799
18800   case BITS5(0,1,1,0,0):   /* STR */
18801   case BITS5(0,1,1,0,1): { /* LDR */
18802      /* ------------- LDR Rd, [Rn, #imm5 * 4] ------------- */
18803      /* ------------- STR Rd, [Rn, #imm5 * 4] ------------- */
18804      /* LDR/STR Rd, [Rn + imm5 * 4] */
18805      UInt    rD   = INSN0(2,0);
18806      UInt    rN   = INSN0(5,3);
18807      UInt    imm5 = INSN0(10,6);
18808      UInt    isLD = INSN0(11,11);
18809
18810      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 4));
18811      put_ITSTATE(old_itstate); // backout
18812      if (isLD) {
18813         IRTemp tD = newTemp(Ity_I32);
18814         loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
18815         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18816      } else {
18817         storeGuardedLE( ea, getIRegT(rD), condT );
18818      }
18819      put_ITSTATE(new_itstate); // restore
18820
18821      DIP("%s r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 4);
18822      goto decode_success;
18823   }
18824
18825   case BITS5(1,0,0,0,0):   /* STRH */
18826   case BITS5(1,0,0,0,1): { /* LDRH */
18827      /* ------------- LDRH Rd, [Rn, #imm5 * 2] ------------- */
18828      /* ------------- STRH Rd, [Rn, #imm5 * 2] ------------- */
18829      /* LDRH/STRH Rd, [Rn + imm5 * 2] */
18830      UInt    rD   = INSN0(2,0);
18831      UInt    rN   = INSN0(5,3);
18832      UInt    imm5 = INSN0(10,6);
18833      UInt    isLD = INSN0(11,11);
18834
18835      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 2));
18836      put_ITSTATE(old_itstate); // backout
18837      if (isLD) {
18838         IRTemp tD = newTemp(Ity_I32);
18839         loadGuardedLE( tD, ILGop_16Uto32, ea, llGetIReg(rD), condT );
18840         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18841      } else {
18842         storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
18843      }
18844      put_ITSTATE(new_itstate); // restore
18845
18846      DIP("%sh r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 2);
18847      goto decode_success;
18848   }
18849
18850   case BITS5(0,1,1,1,0):   /* STRB */
18851   case BITS5(0,1,1,1,1): { /* LDRB */
18852      /* ------------- LDRB Rd, [Rn, #imm5] ------------- */
18853      /* ------------- STRB Rd, [Rn, #imm5] ------------- */
18854      /* LDRB/STRB Rd, [Rn + imm5] */
18855      UInt    rD   = INSN0(2,0);
18856      UInt    rN   = INSN0(5,3);
18857      UInt    imm5 = INSN0(10,6);
18858      UInt    isLD = INSN0(11,11);
18859
18860      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5));
18861      put_ITSTATE(old_itstate); // backout
18862      if (isLD) {
18863         IRTemp tD = newTemp(Ity_I32);
18864         loadGuardedLE( tD, ILGop_8Uto32, ea, llGetIReg(rD), condT );
18865         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18866      } else {
18867         storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
18868      }
18869      put_ITSTATE(new_itstate); // restore
18870
18871      DIP("%sb r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5);
18872      goto decode_success;
18873   }
18874
18875   case BITS5(1,0,0,1,0):   /* STR */
18876   case BITS5(1,0,0,1,1): { /* LDR */
18877      /* ------------- LDR Rd, [SP, #imm8 * 4] ------------- */
18878      /* ------------- STR Rd, [SP, #imm8 * 4] ------------- */
18879      /* LDR/STR Rd, [SP + imm8 * 4] */
18880      UInt rD    = INSN0(10,8);
18881      UInt imm8  = INSN0(7,0);
18882      UInt isLD  = INSN0(11,11);
18883
18884      IRExpr* ea = binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4));
18885      put_ITSTATE(old_itstate); // backout
18886      if (isLD) {
18887         IRTemp tD = newTemp(Ity_I32);
18888         loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
18889         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18890      } else {
18891         storeGuardedLE(ea, getIRegT(rD), condT);
18892      }
18893      put_ITSTATE(new_itstate); // restore
18894
18895      DIP("%s r%u, [sp, #%u]\n", isLD ? "ldr" : "str", rD, imm8 * 4);
18896      goto decode_success;
18897   }
18898
18899   case BITS5(1,1,0,0,1): {
18900      /* ------------- LDMIA Rn!, {reglist} ------------- */
18901      Int i, nRegs = 0;
18902      UInt rN   = INSN0(10,8);
18903      UInt list = INSN0(7,0);
18904      /* Empty lists aren't allowed. */
18905      if (list != 0) {
18906         mk_skip_over_T16_if_cond_is_false(condT);
18907         condT = IRTemp_INVALID;
18908         put_ITSTATE(old_itstate);
18909         // now uncond
18910
18911         IRTemp oldRn = newTemp(Ity_I32);
18912         IRTemp base  = newTemp(Ity_I32);
18913         assign(oldRn, getIRegT(rN));
18914         assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
18915         for (i = 0; i < 8; i++) {
18916            if (0 == (list & (1 << i)))
18917               continue;
18918            nRegs++;
18919            putIRegT(
18920               i, loadLE(Ity_I32,
18921                         binop(Iop_Add32, mkexpr(base),
18922                                          mkU32(nRegs * 4 - 4))),
18923               IRTemp_INVALID
18924            );
18925         }
18926         /* Only do the writeback for rN if it isn't in the list of
18927            registers to be transferred. */
18928         if (0 == (list & (1 << rN))) {
18929            putIRegT(rN,
18930                     binop(Iop_Add32, mkexpr(oldRn),
18931                                      mkU32(nRegs * 4)),
18932                     IRTemp_INVALID
18933            );
18934         }
18935
18936         /* Reinstate the ITSTATE update. */
18937         put_ITSTATE(new_itstate);
18938
18939         DIP("ldmia r%u!, {0x%04x}\n", rN, list);
18940         goto decode_success;
18941      }
18942      break;
18943   }
18944
18945   case BITS5(1,1,0,0,0): {
18946      /* ------------- STMIA Rn!, {reglist} ------------- */
18947      Int i, nRegs = 0;
18948      UInt rN   = INSN0(10,8);
18949      UInt list = INSN0(7,0);
18950      /* Empty lists aren't allowed.  Also, if rN is in the list then
18951         it must be the lowest numbered register in the list. */
18952      Bool valid = list != 0;
18953      if (valid && 0 != (list & (1 << rN))) {
18954         for (i = 0; i < rN; i++) {
18955            if (0 != (list & (1 << i)))
18956               valid = False;
18957         }
18958      }
18959      if (valid) {
18960         mk_skip_over_T16_if_cond_is_false(condT);
18961         condT = IRTemp_INVALID;
18962         put_ITSTATE(old_itstate);
18963         // now uncond
18964
18965         IRTemp oldRn = newTemp(Ity_I32);
18966         IRTemp base = newTemp(Ity_I32);
18967         assign(oldRn, getIRegT(rN));
18968         assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
18969         for (i = 0; i < 8; i++) {
18970            if (0 == (list & (1 << i)))
18971               continue;
18972            nRegs++;
18973            storeLE( binop(Iop_Add32, mkexpr(base), mkU32(nRegs * 4 - 4)),
18974                     getIRegT(i) );
18975         }
18976         /* Always do the writeback. */
18977         putIRegT(rN,
18978                  binop(Iop_Add32, mkexpr(oldRn),
18979                                   mkU32(nRegs * 4)),
18980                  IRTemp_INVALID);
18981
18982         /* Reinstate the ITSTATE update. */
18983         put_ITSTATE(new_itstate);
18984
18985         DIP("stmia r%u!, {0x%04x}\n", rN, list);
18986         goto decode_success;
18987      }
18988      break;
18989   }
18990
18991   case BITS5(0,0,0,0,0):   /* LSLS */
18992   case BITS5(0,0,0,0,1):   /* LSRS */
18993   case BITS5(0,0,0,1,0): { /* ASRS */
18994      /* ---------------- LSLS Rd, Rm, #imm5 ---------------- */
18995      /* ---------------- LSRS Rd, Rm, #imm5 ---------------- */
18996      /* ---------------- ASRS Rd, Rm, #imm5 ---------------- */
18997      UInt   rD   = INSN0(2,0);
18998      UInt   rM   = INSN0(5,3);
18999      UInt   imm5 = INSN0(10,6);
19000      IRTemp res  = newTemp(Ity_I32);
19001      IRTemp resC = newTemp(Ity_I32);
19002      IRTemp rMt  = newTemp(Ity_I32);
19003      IRTemp oldV = newTemp(Ity_I32);
19004      const HChar* wot  = "???";
19005      assign(rMt, getIRegT(rM));
19006      assign(oldV, mk_armg_calculate_flag_v());
19007      /* Looks like INSN0(12,11) are the standard 'how' encoding.
19008         Could compactify if the ROR case later appears. */
19009      switch (INSN0(15,11)) {
19010         case BITS5(0,0,0,0,0):
19011            compute_result_and_C_after_LSL_by_imm5(
19012               dis_buf, &res, &resC, rMt, imm5, rM
19013            );
19014            wot = "lsl";
19015            break;
19016         case BITS5(0,0,0,0,1):
19017            compute_result_and_C_after_LSR_by_imm5(
19018               dis_buf, &res, &resC, rMt, imm5, rM
19019            );
19020            wot = "lsr";
19021            break;
19022         case BITS5(0,0,0,1,0):
19023            compute_result_and_C_after_ASR_by_imm5(
19024               dis_buf, &res, &resC, rMt, imm5, rM
19025            );
19026            wot = "asr";
19027            break;
19028         default:
19029            /*NOTREACHED*/vassert(0);
19030      }
19031      // not safe to read guest state after this point
19032      putIRegT(rD, mkexpr(res), condT);
19033      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
19034                         cond_AND_notInIT_T );
19035      /* ignore buf and roll our own output */
19036      DIP("%ss r%u, r%u, #%u\n", wot, rD, rM, imm5);
19037      goto decode_success;
19038   }
19039
19040   case BITS5(1,1,1,0,0): {
19041      /* ---------------- B #simm11 ---------------- */
19042      Int  simm11 = INSN0(10,0);
19043           simm11 = (simm11 << 21) >> 20;
19044      UInt dst    = simm11 + guest_R15_curr_instr_notENC + 4;
19045      /* Only allowed outside or last-in IT block; SIGILL if not so. */
19046      gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19047      // and skip this insn if not selected; being cleverer is too
19048      // difficult
19049      mk_skip_over_T16_if_cond_is_false(condT);
19050      condT = IRTemp_INVALID;
19051      // now uncond
19052      llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
19053      dres.jk_StopHere = Ijk_Boring;
19054      dres.whatNext    = Dis_StopHere;
19055      DIP("b 0x%x\n", dst);
19056      goto decode_success;
19057   }
19058
19059   default:
19060      break; /* examine the next shortest prefix */
19061
19062   }
19063
19064
19065   /* ================ 16-bit 15:12 cases ================ */
19066
19067   switch (INSN0(15,12)) {
19068
19069   case BITS4(1,1,0,1): {
19070      /* ---------------- Bcond #simm8 ---------------- */
19071      UInt cond  = INSN0(11,8);
19072      Int  simm8 = INSN0(7,0);
19073           simm8 = (simm8 << 24) >> 23;
19074      UInt dst   = simm8 + guest_R15_curr_instr_notENC + 4;
19075      if (cond != ARMCondAL && cond != ARMCondNV) {
19076         /* Not allowed in an IT block; SIGILL if so. */
19077         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
19078
19079         IRTemp kondT = newTemp(Ity_I32);
19080         assign( kondT, mk_armg_calculate_condition(cond) );
19081         stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
19082                            Ijk_Boring,
19083                            IRConst_U32(dst | 1/*CPSR.T*/),
19084                            OFFB_R15T ));
19085         llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2)
19086                              | 1 /*CPSR.T*/ ));
19087         dres.jk_StopHere = Ijk_Boring;
19088         dres.whatNext    = Dis_StopHere;
19089         DIP("b%s 0x%x\n", nCC(cond), dst);
19090         goto decode_success;
19091      }
19092      break;
19093   }
19094
19095   default:
19096      break; /* hmm, nothing matched */
19097
19098   }
19099
19100   /* ================ 16-bit misc cases ================ */
19101
19102   switch (INSN0(15,0)) {
19103      case 0xBF00:
19104         /* ------ NOP ------ */
19105         DIP("nop\n");
19106         goto decode_success;
19107      case 0xBF20:
19108         /* ------ WFE ------ */
19109         /* WFE gets used as a spin-loop hint.  Do the usual thing,
19110            which is to continue after yielding. */
19111         stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
19112                            Ijk_Yield,
19113                            IRConst_U32((guest_R15_curr_instr_notENC + 2)
19114                                        | 1 /*CPSR.T*/),
19115                            OFFB_R15T ));
19116         DIP("wfe\n");
19117         goto decode_success;
19118      case 0xBF40:
19119         /* ------ SEV ------ */
19120         /* Treat this as a no-op.  Any matching WFEs won't really
19121            cause the host CPU to snooze; they just cause V to try to
19122            run some other thread for a while.  So there's no point in
19123            really doing anything for SEV. */
19124         DIP("sev\n");
19125         goto decode_success;
19126      default:
19127         break; /* fall through */
19128   }
19129
19130   /* ----------------------------------------------------------- */
19131   /* --                                                       -- */
19132   /* -- Thumb 32-bit integer instructions                     -- */
19133   /* --                                                       -- */
19134   /* ----------------------------------------------------------- */
19135
19136#  define INSN1(_bMax,_bMin)  SLICE_UInt(((UInt)insn1), (_bMax), (_bMin))
19137
19138   /* second 16 bits of the instruction, if any */
19139   vassert(insn1 == 0);
19140   insn1 = getUShortLittleEndianly( guest_instr+2 );
19141
19142   anOp   = Iop_INVALID; /* paranoia */
19143   anOpNm = NULL;        /* paranoia */
19144
19145   /* Change result defaults to suit 32-bit insns. */
19146   vassert(dres.whatNext   == Dis_Continue);
19147   vassert(dres.len        == 2);
19148   vassert(dres.continueAt == 0);
19149   dres.len = 4;
19150
19151   /* ---------------- BL/BLX simm26 ---------------- */
19152   if (BITS5(1,1,1,1,0) == INSN0(15,11) && BITS2(1,1) == INSN1(15,14)) {
19153      UInt isBL = INSN1(12,12);
19154      UInt bS   = INSN0(10,10);
19155      UInt bJ1  = INSN1(13,13);
19156      UInt bJ2  = INSN1(11,11);
19157      UInt bI1  = 1 ^ (bJ1 ^ bS);
19158      UInt bI2  = 1 ^ (bJ2 ^ bS);
19159      Int simm25
19160         =   (bS          << (1 + 1 + 10 + 11 + 1))
19161           | (bI1         << (1 + 10 + 11 + 1))
19162           | (bI2         << (10 + 11 + 1))
19163           | (INSN0(9,0)  << (11 + 1))
19164           | (INSN1(10,0) << 1);
19165      simm25 = (simm25 << 7) >> 7;
19166
19167      vassert(0 == (guest_R15_curr_instr_notENC & 1));
19168      UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
19169
19170      /* One further validity case to check: in the case of BLX
19171         (not-BL), that insn1[0] must be zero. */
19172      Bool valid = True;
19173      if (isBL == 0 && INSN1(0,0) == 1) valid = False;
19174      if (valid) {
19175         /* Only allowed outside or last-in IT block; SIGILL if not so. */
19176         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19177         // and skip this insn if not selected; being cleverer is too
19178         // difficult
19179         mk_skip_over_T32_if_cond_is_false(condT);
19180         condT = IRTemp_INVALID;
19181         // now uncond
19182
19183         /* We're returning to Thumb code, hence "| 1" */
19184         putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 4) | 1 ),
19185                   IRTemp_INVALID);
19186         if (isBL) {
19187            /* BL: unconditional T -> T call */
19188            /* we're calling Thumb code, hence "| 1" */
19189            llPutIReg(15, mkU32( dst | 1 ));
19190            DIP("bl 0x%x (stay in Thumb mode)\n", dst);
19191         } else {
19192            /* BLX: unconditional T -> A call */
19193            /* we're calling ARM code, hence "& 3" to align to a
19194               valid ARM insn address */
19195            llPutIReg(15, mkU32( dst & ~3 ));
19196            DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
19197         }
19198         dres.whatNext    = Dis_StopHere;
19199         dres.jk_StopHere = Ijk_Call;
19200         goto decode_success;
19201      }
19202   }
19203
19204   /* ---------------- {LD,ST}M{IA,DB} ---------------- */
19205   if (0x3a2 == INSN0(15,6) // {LD,ST}MIA
19206       || 0x3a4 == INSN0(15,6)) { // {LD,ST}MDB
19207      UInt bW      = INSN0(5,5); /* writeback Rn ? */
19208      UInt bL      = INSN0(4,4);
19209      UInt rN      = INSN0(3,0);
19210      UInt bP      = INSN1(15,15); /* reglist entry for r15 */
19211      UInt bM      = INSN1(14,14); /* reglist entry for r14 */
19212      UInt rLmost  = INSN1(12,0);  /* reglist entry for r0 .. 12 */
19213      UInt rL13    = INSN1(13,13); /* must be zero */
19214      UInt regList = 0;
19215      Bool valid   = True;
19216
19217      UInt bINC    = 1;
19218      UInt bBEFORE = 0;
19219      if (INSN0(15,6) == 0x3a4) {
19220         bINC    = 0;
19221         bBEFORE = 1;
19222      }
19223
19224      /* detect statically invalid cases, and construct the final
19225         reglist */
19226      if (rL13 == 1)
19227         valid = False;
19228
19229      if (bL == 1) {
19230         regList = (bP << 15) | (bM << 14) | rLmost;
19231         if (rN == 15)                       valid = False;
19232         if (popcount32(regList) < 2)        valid = False;
19233         if (bP == 1 && bM == 1)             valid = False;
19234         if (bW == 1 && (regList & (1<<rN))) valid = False;
19235      } else {
19236         regList = (bM << 14) | rLmost;
19237         if (bP == 1)                        valid = False;
19238         if (rN == 15)                       valid = False;
19239         if (popcount32(regList) < 2)        valid = False;
19240         if (bW == 1 && (regList & (1<<rN))) valid = False;
19241      }
19242
19243      if (valid) {
19244         if (bL == 1 && bP == 1) {
19245            // We'll be writing the PC.  Hence:
19246            /* Only allowed outside or last-in IT block; SIGILL if not so. */
19247            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19248         }
19249
19250         /* Go uncond: */
19251         mk_skip_over_T32_if_cond_is_false(condT);
19252         condT = IRTemp_INVALID;
19253         // now uncond
19254
19255         /* Generate the IR.  This might generate a write to R15. */
19256         mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
19257
19258         if (bL == 1 && (regList & (1<<15))) {
19259            // If we wrote to R15, we have an interworking return to
19260            // deal with.
19261            llPutIReg(15, llGetIReg(15));
19262            dres.jk_StopHere = Ijk_Ret;
19263            dres.whatNext    = Dis_StopHere;
19264         }
19265
19266         DIP("%sm%c%c r%u%s, {0x%04x}\n",
19267              bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
19268              rN, bW ? "!" : "", regList);
19269
19270         goto decode_success;
19271      }
19272   }
19273
19274   /* -------------- (T3) ADD{S}.W Rd, Rn, #constT -------------- */
19275   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19276       && INSN0(9,5) == BITS5(0,1,0,0,0)
19277       && INSN1(15,15) == 0) {
19278      UInt bS = INSN0(4,4);
19279      UInt rN = INSN0(3,0);
19280      UInt rD = INSN1(11,8);
19281      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
19282      /* but allow "add.w reg, sp, #constT" for reg != PC */
19283      if (!valid && rD <= 14 && rN == 13)
19284         valid = True;
19285      if (valid) {
19286         IRTemp argL  = newTemp(Ity_I32);
19287         IRTemp argR  = newTemp(Ity_I32);
19288         IRTemp res   = newTemp(Ity_I32);
19289         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
19290         assign(argL, getIRegT(rN));
19291         assign(argR, mkU32(imm32));
19292         assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
19293         putIRegT(rD, mkexpr(res), condT);
19294         if (bS == 1)
19295            setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
19296         DIP("add%s.w r%u, r%u, #%u\n",
19297             bS == 1 ? "s" : "", rD, rN, imm32);
19298         goto decode_success;
19299      }
19300   }
19301
19302   /* ---------------- (T4) ADDW Rd, Rn, #uimm12 -------------- */
19303   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19304       && INSN0(9,4) == BITS6(1,0,0,0,0,0)
19305       && INSN1(15,15) == 0) {
19306      UInt rN = INSN0(3,0);
19307      UInt rD = INSN1(11,8);
19308      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
19309      /* but allow "addw reg, sp, #uimm12" for reg != PC */
19310      if (!valid && rD <= 14 && rN == 13)
19311         valid = True;
19312      if (valid) {
19313         IRTemp argL = newTemp(Ity_I32);
19314         IRTemp argR = newTemp(Ity_I32);
19315         IRTemp res  = newTemp(Ity_I32);
19316         UInt imm12  = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
19317         assign(argL, getIRegT(rN));
19318         assign(argR, mkU32(imm12));
19319         assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
19320         putIRegT(rD, mkexpr(res), condT);
19321         DIP("addw r%u, r%u, #%u\n", rD, rN, imm12);
19322         goto decode_success;
19323      }
19324   }
19325
19326   /* ---------------- (T2) CMP.W Rn, #constT ---------------- */
19327   /* ---------------- (T2) CMN.W Rn, #constT ---------------- */
19328   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19329       && (   INSN0(9,4) == BITS6(0,1,1,0,1,1)  // CMP
19330           || INSN0(9,4) == BITS6(0,1,0,0,0,1)) // CMN
19331       && INSN1(15,15) == 0
19332       && INSN1(11,8) == BITS4(1,1,1,1)) {
19333      UInt rN = INSN0(3,0);
19334      if (rN != 15) {
19335         IRTemp argL  = newTemp(Ity_I32);
19336         IRTemp argR  = newTemp(Ity_I32);
19337         Bool   isCMN = INSN0(9,4) == BITS6(0,1,0,0,0,1);
19338         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
19339         assign(argL, getIRegT(rN));
19340         assign(argR, mkU32(imm32));
19341         setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
19342                         argL, argR, condT );
19343         DIP("%s.w r%u, #%u\n", isCMN ? "cmn" : "cmp", rN, imm32);
19344         goto decode_success;
19345      }
19346   }
19347
19348   /* -------------- (T1) TST.W Rn, #constT -------------- */
19349   /* -------------- (T1) TEQ.W Rn, #constT -------------- */
19350   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19351       && (   INSN0(9,4) == BITS6(0,0,0,0,0,1)  // TST
19352           || INSN0(9,4) == BITS6(0,0,1,0,0,1)) // TEQ
19353       && INSN1(15,15) == 0
19354       && INSN1(11,8) == BITS4(1,1,1,1)) {
19355      UInt rN = INSN0(3,0);
19356      if (!isBadRegT(rN)) { // yes, really, it's inconsistent with CMP.W
19357         Bool  isTST  = INSN0(9,4) == BITS6(0,0,0,0,0,1);
19358         IRTemp argL  = newTemp(Ity_I32);
19359         IRTemp argR  = newTemp(Ity_I32);
19360         IRTemp res   = newTemp(Ity_I32);
19361         IRTemp oldV  = newTemp(Ity_I32);
19362         IRTemp oldC  = newTemp(Ity_I32);
19363         Bool   updC  = False;
19364         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
19365         assign(argL, getIRegT(rN));
19366         assign(argR, mkU32(imm32));
19367         assign(res,  binop(isTST ? Iop_And32 : Iop_Xor32,
19368                            mkexpr(argL), mkexpr(argR)));
19369         assign( oldV, mk_armg_calculate_flag_v() );
19370         assign( oldC, updC
19371                       ? mkU32((imm32 >> 31) & 1)
19372                       : mk_armg_calculate_flag_c() );
19373         setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
19374         DIP("%s.w r%u, #%u\n", isTST ? "tst" : "teq", rN, imm32);
19375         goto decode_success;
19376      }
19377   }
19378
19379   /* -------------- (T3) SUB{S}.W Rd, Rn, #constT -------------- */
19380   /* -------------- (T3) RSB{S}.W Rd, Rn, #constT -------------- */
19381   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19382       && (INSN0(9,5) == BITS5(0,1,1,0,1) // SUB
19383           || INSN0(9,5) == BITS5(0,1,1,1,0)) // RSB
19384       && INSN1(15,15) == 0) {
19385      Bool isRSB = INSN0(9,5) == BITS5(0,1,1,1,0);
19386      UInt bS    = INSN0(4,4);
19387      UInt rN    = INSN0(3,0);
19388      UInt rD    = INSN1(11,8);
19389      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
19390      /* but allow "sub{s}.w reg, sp, #constT
19391         this is (T2) of "SUB (SP minus immediate)" */
19392      if (!valid && !isRSB && rN == 13 && rD != 15)
19393         valid = True;
19394      if (valid) {
19395         IRTemp argL  = newTemp(Ity_I32);
19396         IRTemp argR  = newTemp(Ity_I32);
19397         IRTemp res   = newTemp(Ity_I32);
19398         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
19399         assign(argL, getIRegT(rN));
19400         assign(argR, mkU32(imm32));
19401         assign(res,  isRSB
19402                      ? binop(Iop_Sub32, mkexpr(argR), mkexpr(argL))
19403                      : binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
19404         putIRegT(rD, mkexpr(res), condT);
19405         if (bS == 1) {
19406            if (isRSB)
19407               setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
19408            else
19409               setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
19410         }
19411         DIP("%s%s.w r%u, r%u, #%u\n",
19412             isRSB ? "rsb" : "sub", bS == 1 ? "s" : "", rD, rN, imm32);
19413         goto decode_success;
19414      }
19415   }
19416
19417   /* -------------- (T4) SUBW Rd, Rn, #uimm12 ------------------- */
19418   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19419       && INSN0(9,4) == BITS6(1,0,1,0,1,0)
19420       && INSN1(15,15) == 0) {
19421      UInt rN = INSN0(3,0);
19422      UInt rD = INSN1(11,8);
19423      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
19424      /* but allow "subw sp, sp, #uimm12" */
19425      if (!valid && rD == 13 && rN == 13)
19426         valid = True;
19427      if (valid) {
19428         IRTemp argL  = newTemp(Ity_I32);
19429         IRTemp argR  = newTemp(Ity_I32);
19430         IRTemp res   = newTemp(Ity_I32);
19431         UInt imm12   = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
19432         assign(argL, getIRegT(rN));
19433         assign(argR, mkU32(imm12));
19434         assign(res,  binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
19435         putIRegT(rD, mkexpr(res), condT);
19436         DIP("subw r%u, r%u, #%u\n", rD, rN, imm12);
19437         goto decode_success;
19438      }
19439   }
19440
19441   /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
19442   /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
19443   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19444       && (   INSN0(9,5) == BITS5(0,1,0,1,0)  // ADC
19445           || INSN0(9,5) == BITS5(0,1,0,1,1)) // SBC
19446       && INSN1(15,15) == 0) {
19447      /* ADC:  Rd = Rn + constT + oldC */
19448      /* SBC:  Rd = Rn - constT - (oldC ^ 1) */
19449      UInt bS    = INSN0(4,4);
19450      UInt rN    = INSN0(3,0);
19451      UInt rD    = INSN1(11,8);
19452      if (!isBadRegT(rN) && !isBadRegT(rD)) {
19453         IRTemp argL  = newTemp(Ity_I32);
19454         IRTemp argR  = newTemp(Ity_I32);
19455         IRTemp res   = newTemp(Ity_I32);
19456         IRTemp oldC  = newTemp(Ity_I32);
19457         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
19458         assign(argL, getIRegT(rN));
19459         assign(argR, mkU32(imm32));
19460         assign(oldC, mk_armg_calculate_flag_c() );
19461         const HChar* nm  = "???";
19462         switch (INSN0(9,5)) {
19463            case BITS5(0,1,0,1,0): // ADC
19464               nm = "adc";
19465               assign(res,
19466                      binop(Iop_Add32,
19467                            binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
19468                            mkexpr(oldC) ));
19469               putIRegT(rD, mkexpr(res), condT);
19470               if (bS)
19471                  setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
19472                                     argL, argR, oldC, condT );
19473               break;
19474            case BITS5(0,1,0,1,1): // SBC
19475               nm = "sbc";
19476               assign(res,
19477                      binop(Iop_Sub32,
19478                            binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
19479                            binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
19480               putIRegT(rD, mkexpr(res), condT);
19481               if (bS)
19482                  setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
19483                                     argL, argR, oldC, condT );
19484               break;
19485            default:
19486              vassert(0);
19487         }
19488         DIP("%s%s.w r%u, r%u, #%u\n",
19489             nm, bS == 1 ? "s" : "", rD, rN, imm32);
19490         goto decode_success;
19491      }
19492   }
19493
19494   /* -------------- (T1) ORR{S}.W Rd, Rn, #constT -------------- */
19495   /* -------------- (T1) AND{S}.W Rd, Rn, #constT -------------- */
19496   /* -------------- (T1) BIC{S}.W Rd, Rn, #constT -------------- */
19497   /* -------------- (T1) EOR{S}.W Rd, Rn, #constT -------------- */
19498   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19499       && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // ORR
19500           || INSN0(9,5) == BITS5(0,0,0,0,0)  // AND
19501           || INSN0(9,5) == BITS5(0,0,0,0,1)  // BIC
19502           || INSN0(9,5) == BITS5(0,0,1,0,0)  // EOR
19503           || INSN0(9,5) == BITS5(0,0,0,1,1)) // ORN
19504       && INSN1(15,15) == 0) {
19505      UInt bS = INSN0(4,4);
19506      UInt rN = INSN0(3,0);
19507      UInt rD = INSN1(11,8);
19508      if (!isBadRegT(rN) && !isBadRegT(rD)) {
19509         Bool   notArgR = False;
19510         IROp   op      = Iop_INVALID;
19511         const HChar* nm = "???";
19512         switch (INSN0(9,5)) {
19513            case BITS5(0,0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
19514            case BITS5(0,0,0,0,0): op = Iop_And32; nm = "and"; break;
19515            case BITS5(0,0,0,0,1): op = Iop_And32; nm = "bic";
19516                                   notArgR = True; break;
19517            case BITS5(0,0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
19518            case BITS5(0,0,0,1,1): op = Iop_Or32;  nm = "orn";
19519                                   notArgR = True; break;
19520            default: vassert(0);
19521         }
19522         IRTemp argL  = newTemp(Ity_I32);
19523         IRTemp argR  = newTemp(Ity_I32);
19524         IRTemp res   = newTemp(Ity_I32);
19525         Bool   updC  = False;
19526         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
19527         assign(argL, getIRegT(rN));
19528         assign(argR, mkU32(notArgR ? ~imm32 : imm32));
19529         assign(res,  binop(op, mkexpr(argL), mkexpr(argR)));
19530         putIRegT(rD, mkexpr(res), condT);
19531         if (bS) {
19532            IRTemp oldV = newTemp(Ity_I32);
19533            IRTemp oldC = newTemp(Ity_I32);
19534            assign( oldV, mk_armg_calculate_flag_v() );
19535            assign( oldC, updC
19536                          ? mkU32((imm32 >> 31) & 1)
19537                          : mk_armg_calculate_flag_c() );
19538            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19539                               condT );
19540         }
19541         DIP("%s%s.w r%u, r%u, #%u\n",
19542             nm, bS == 1 ? "s" : "", rD, rN, imm32);
19543         goto decode_success;
19544      }
19545   }
19546
19547   /* ---------- (T3) ADD{S}.W Rd, Rn, Rm, {shift} ---------- */
19548   /* ---------- (T3) SUB{S}.W Rd, Rn, Rm, {shift} ---------- */
19549   /* ---------- (T3) RSB{S}.W Rd, Rn, Rm, {shift} ---------- */
19550   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19551       && (   INSN0(8,5) == BITS4(1,0,0,0)  // add subopc
19552           || INSN0(8,5) == BITS4(1,1,0,1)  // sub subopc
19553           || INSN0(8,5) == BITS4(1,1,1,0)) // rsb subopc
19554       && INSN1(15,15) == 0) {
19555      UInt rN   = INSN0(3,0);
19556      UInt rD   = INSN1(11,8);
19557      UInt rM   = INSN1(3,0);
19558      UInt bS   = INSN0(4,4);
19559      UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
19560      UInt how  = INSN1(5,4);
19561
19562      Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
19563      /* but allow "add.w reg, sp, reg, lsl #N for N=0,1,2 or 3
19564         (T3) "ADD (SP plus register) */
19565      if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
19566          && rD != 15 && rN == 13 && imm5 <= 3 && how == 0) {
19567         valid = True;
19568      }
19569      /* also allow "sub.w reg, sp, reg   w/ no shift
19570         (T1) "SUB (SP minus register) */
19571      if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // sub
19572          && rD != 15 && rN == 13 && imm5 == 0 && how == 0) {
19573         valid = True;
19574      }
19575      if (valid) {
19576         Bool   swap = False;
19577         IROp   op   = Iop_INVALID;
19578         const HChar* nm = "???";
19579         switch (INSN0(8,5)) {
19580            case BITS4(1,0,0,0): op = Iop_Add32; nm = "add"; break;
19581            case BITS4(1,1,0,1): op = Iop_Sub32; nm = "sub"; break;
19582            case BITS4(1,1,1,0): op = Iop_Sub32; nm = "rsb";
19583                                 swap = True; break;
19584            default: vassert(0);
19585         }
19586
19587         IRTemp argL = newTemp(Ity_I32);
19588         assign(argL, getIRegT(rN));
19589
19590         IRTemp rMt = newTemp(Ity_I32);
19591         assign(rMt, getIRegT(rM));
19592
19593         IRTemp argR = newTemp(Ity_I32);
19594         compute_result_and_C_after_shift_by_imm5(
19595            dis_buf, &argR, NULL, rMt, how, imm5, rM
19596         );
19597
19598         IRTemp res = newTemp(Ity_I32);
19599         assign(res, swap
19600                     ? binop(op, mkexpr(argR), mkexpr(argL))
19601                     : binop(op, mkexpr(argL), mkexpr(argR)));
19602
19603         putIRegT(rD, mkexpr(res), condT);
19604         if (bS) {
19605            switch (op) {
19606               case Iop_Add32:
19607                  setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
19608                  break;
19609               case Iop_Sub32:
19610                  if (swap)
19611                     setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
19612                  else
19613                     setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
19614                  break;
19615               default:
19616                  vassert(0);
19617            }
19618         }
19619
19620         DIP("%s%s.w r%u, r%u, %s\n",
19621             nm, bS ? "s" : "", rD, rN, dis_buf);
19622         goto decode_success;
19623      }
19624   }
19625
19626   /* ---------- (T3) ADC{S}.W Rd, Rn, Rm, {shift} ---------- */
19627   /* ---------- (T2) SBC{S}.W Rd, Rn, Rm, {shift} ---------- */
19628   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19629       && (   INSN0(8,5) == BITS4(1,0,1,0)   // adc subopc
19630           || INSN0(8,5) == BITS4(1,0,1,1))  // sbc subopc
19631       && INSN1(15,15) == 0) {
19632      /* ADC:  Rd = Rn + shifter_operand + oldC */
19633      /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
19634      UInt rN = INSN0(3,0);
19635      UInt rD = INSN1(11,8);
19636      UInt rM = INSN1(3,0);
19637      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
19638         UInt bS   = INSN0(4,4);
19639         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
19640         UInt how  = INSN1(5,4);
19641
19642         IRTemp argL = newTemp(Ity_I32);
19643         assign(argL, getIRegT(rN));
19644
19645         IRTemp rMt = newTemp(Ity_I32);
19646         assign(rMt, getIRegT(rM));
19647
19648         IRTemp oldC = newTemp(Ity_I32);
19649         assign(oldC, mk_armg_calculate_flag_c());
19650
19651         IRTemp argR = newTemp(Ity_I32);
19652         compute_result_and_C_after_shift_by_imm5(
19653            dis_buf, &argR, NULL, rMt, how, imm5, rM
19654         );
19655
19656         const HChar* nm  = "???";
19657         IRTemp res = newTemp(Ity_I32);
19658         switch (INSN0(8,5)) {
19659            case BITS4(1,0,1,0): // ADC
19660               nm = "adc";
19661               assign(res,
19662                      binop(Iop_Add32,
19663                            binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
19664                            mkexpr(oldC) ));
19665               putIRegT(rD, mkexpr(res), condT);
19666               if (bS)
19667                  setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
19668                                     argL, argR, oldC, condT );
19669               break;
19670            case BITS4(1,0,1,1): // SBC
19671               nm = "sbc";
19672               assign(res,
19673                      binop(Iop_Sub32,
19674                            binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
19675                            binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
19676               putIRegT(rD, mkexpr(res), condT);
19677               if (bS)
19678                  setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
19679                                     argL, argR, oldC, condT );
19680               break;
19681            default:
19682               vassert(0);
19683         }
19684
19685         DIP("%s%s.w r%u, r%u, %s\n",
19686             nm, bS ? "s" : "", rD, rN, dis_buf);
19687         goto decode_success;
19688      }
19689   }
19690
19691   /* ---------- (T3) AND{S}.W Rd, Rn, Rm, {shift} ---------- */
19692   /* ---------- (T3) ORR{S}.W Rd, Rn, Rm, {shift} ---------- */
19693   /* ---------- (T3) EOR{S}.W Rd, Rn, Rm, {shift} ---------- */
19694   /* ---------- (T3) BIC{S}.W Rd, Rn, Rm, {shift} ---------- */
19695   /* ---------- (T1) ORN{S}.W Rd, Rn, Rm, {shift} ---------- */
19696   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19697       && (   INSN0(8,5) == BITS4(0,0,0,0)  // and subopc
19698           || INSN0(8,5) == BITS4(0,0,1,0)  // orr subopc
19699           || INSN0(8,5) == BITS4(0,1,0,0)  // eor subopc
19700           || INSN0(8,5) == BITS4(0,0,0,1)  // bic subopc
19701           || INSN0(8,5) == BITS4(0,0,1,1)) // orn subopc
19702       && INSN1(15,15) == 0) {
19703      UInt rN = INSN0(3,0);
19704      UInt rD = INSN1(11,8);
19705      UInt rM = INSN1(3,0);
19706      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
19707         Bool notArgR = False;
19708         IROp op      = Iop_INVALID;
19709         const HChar* nm  = "???";
19710         switch (INSN0(8,5)) {
19711            case BITS4(0,0,0,0): op = Iop_And32; nm = "and"; break;
19712            case BITS4(0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
19713            case BITS4(0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
19714            case BITS4(0,0,0,1): op = Iop_And32; nm = "bic";
19715                                 notArgR = True; break;
19716            case BITS4(0,0,1,1): op = Iop_Or32; nm = "orn";
19717                                 notArgR = True; break;
19718            default: vassert(0);
19719         }
19720         UInt bS   = INSN0(4,4);
19721         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
19722         UInt how  = INSN1(5,4);
19723
19724         IRTemp rNt = newTemp(Ity_I32);
19725         assign(rNt, getIRegT(rN));
19726
19727         IRTemp rMt = newTemp(Ity_I32);
19728         assign(rMt, getIRegT(rM));
19729
19730         IRTemp argR = newTemp(Ity_I32);
19731         IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
19732
19733         compute_result_and_C_after_shift_by_imm5(
19734            dis_buf, &argR, bS ? &oldC : NULL, rMt, how, imm5, rM
19735         );
19736
19737         IRTemp res = newTemp(Ity_I32);
19738         if (notArgR) {
19739            vassert(op == Iop_And32 || op == Iop_Or32);
19740            assign(res, binop(op, mkexpr(rNt),
19741                                  unop(Iop_Not32, mkexpr(argR))));
19742         } else {
19743            assign(res, binop(op, mkexpr(rNt), mkexpr(argR)));
19744         }
19745
19746         putIRegT(rD, mkexpr(res), condT);
19747         if (bS) {
19748            IRTemp oldV = newTemp(Ity_I32);
19749            assign( oldV, mk_armg_calculate_flag_v() );
19750            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19751                               condT );
19752         }
19753
19754         DIP("%s%s.w r%u, r%u, %s\n",
19755             nm, bS ? "s" : "", rD, rN, dis_buf);
19756         goto decode_success;
19757      }
19758   }
19759
19760   /* -------------- (T?) LSL{S}.W Rd, Rn, Rm -------------- */
19761   /* -------------- (T?) LSR{S}.W Rd, Rn, Rm -------------- */
19762   /* -------------- (T?) ASR{S}.W Rd, Rn, Rm -------------- */
19763   /* -------------- (T?) ROR{S}.W Rd, Rn, Rm -------------- */
19764   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,0,0)
19765       && INSN1(15,12) == BITS4(1,1,1,1)
19766       && INSN1(7,4) == BITS4(0,0,0,0)) {
19767      UInt how = INSN0(6,5); // standard encoding
19768      UInt rN  = INSN0(3,0);
19769      UInt rD  = INSN1(11,8);
19770      UInt rM  = INSN1(3,0);
19771      UInt bS  = INSN0(4,4);
19772      Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
19773      if (valid) {
19774         IRTemp rNt    = newTemp(Ity_I32);
19775         IRTemp rMt    = newTemp(Ity_I32);
19776         IRTemp res    = newTemp(Ity_I32);
19777         IRTemp oldC   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
19778         IRTemp oldV   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
19779         const HChar* nms[4] = { "lsl", "lsr", "asr", "ror" };
19780         const HChar* nm     = nms[how];
19781         assign(rNt, getIRegT(rN));
19782         assign(rMt, getIRegT(rM));
19783         compute_result_and_C_after_shift_by_reg(
19784            dis_buf, &res, bS ? &oldC : NULL,
19785            rNt, how, rMt, rN, rM
19786         );
19787         if (bS)
19788            assign(oldV, mk_armg_calculate_flag_v());
19789         putIRegT(rD, mkexpr(res), condT);
19790         if (bS) {
19791            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19792                               condT );
19793         }
19794         DIP("%s%s.w r%u, r%u, r%u\n",
19795             nm, bS ? "s" : "", rD, rN, rM);
19796         goto decode_success;
19797      }
19798   }
19799
19800   /* ------------ (T?) MOV{S}.W Rd, Rn, {shift} ------------ */
19801   /* ------------ (T?) MVN{S}.W Rd, Rn, {shift} ------------ */
19802   if ((INSN0(15,0) & 0xFFCF) == 0xEA4F
19803       && INSN1(15,15) == 0) {
19804      UInt rD = INSN1(11,8);
19805      UInt rN = INSN1(3,0);
19806      UInt bS = INSN0(4,4);
19807      int badRegs = bS ? (isBadRegT(rD) || isBadRegT(rN))
19808                       : (rD == 15 || rN == 15 || (rD == 15 && rN == 15));
19809
19810      if (!badRegs) {
19811         UInt isMVN = INSN0(5,5);
19812         UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
19813         UInt how   = INSN1(5,4);
19814
19815         IRTemp rNt = newTemp(Ity_I32);
19816         assign(rNt, getIRegT(rN));
19817
19818         IRTemp oldRn = newTemp(Ity_I32);
19819         IRTemp oldC  = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
19820         compute_result_and_C_after_shift_by_imm5(
19821            dis_buf, &oldRn, bS ? &oldC : NULL, rNt, how, imm5, rN
19822         );
19823
19824         IRTemp res = newTemp(Ity_I32);
19825         assign(res, isMVN ? unop(Iop_Not32, mkexpr(oldRn))
19826                           : mkexpr(oldRn));
19827
19828         putIRegT(rD, mkexpr(res), condT);
19829         if (bS) {
19830            IRTemp oldV = newTemp(Ity_I32);
19831            assign( oldV, mk_armg_calculate_flag_v() );
19832            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT);
19833         }
19834         DIP("%s%s.w r%u, %s\n",
19835             isMVN ? "mvn" : "mov", bS ? "s" : "", rD, dis_buf);
19836         goto decode_success;
19837      }
19838   }
19839
19840   /* -------------- (T?) TST.W Rn, Rm, {shift} -------------- */
19841   /* -------------- (T?) TEQ.W Rn, Rm, {shift} -------------- */
19842   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19843       && (   INSN0(8,4) == BITS5(0,0,0,0,1)  // TST
19844           || INSN0(8,4) == BITS5(0,1,0,0,1)) // TEQ
19845       && INSN1(15,15) == 0
19846       && INSN1(11,8) == BITS4(1,1,1,1)) {
19847      UInt rN = INSN0(3,0);
19848      UInt rM = INSN1(3,0);
19849      if (!isBadRegT(rN) && !isBadRegT(rM)) {
19850         Bool isTST = INSN0(8,4) == BITS5(0,0,0,0,1);
19851
19852         UInt how  = INSN1(5,4);
19853         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
19854
19855         IRTemp argL = newTemp(Ity_I32);
19856         assign(argL, getIRegT(rN));
19857
19858         IRTemp rMt = newTemp(Ity_I32);
19859         assign(rMt, getIRegT(rM));
19860
19861         IRTemp argR = newTemp(Ity_I32);
19862         IRTemp oldC = newTemp(Ity_I32);
19863         compute_result_and_C_after_shift_by_imm5(
19864            dis_buf, &argR, &oldC, rMt, how, imm5, rM
19865         );
19866
19867         IRTemp oldV = newTemp(Ity_I32);
19868         assign( oldV, mk_armg_calculate_flag_v() );
19869
19870         IRTemp res = newTemp(Ity_I32);
19871         assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
19872                           mkexpr(argL), mkexpr(argR)));
19873
19874         setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19875                            condT );
19876         DIP("%s.w r%u, %s\n", isTST ? "tst" : "teq", rN, dis_buf);
19877         goto decode_success;
19878      }
19879   }
19880
19881   /* -------------- (T3) CMP.W Rn, Rm, {shift} -------------- */
19882   /* -------------- (T2) CMN.W Rn, Rm, {shift} -------------- */
19883   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19884       && (   INSN0(8,4) == BITS5(1,1,0,1,1)  // CMP
19885           || INSN0(8,4) == BITS5(1,0,0,0,1)) // CMN
19886       && INSN1(15,15) == 0
19887       && INSN1(11,8) == BITS4(1,1,1,1)) {
19888      UInt rN = INSN0(3,0);
19889      UInt rM = INSN1(3,0);
19890      if (!isBadRegT(rN) && !isBadRegT(rM)) {
19891         Bool isCMN = INSN0(8,4) == BITS5(1,0,0,0,1);
19892         UInt how   = INSN1(5,4);
19893         UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
19894
19895         IRTemp argL = newTemp(Ity_I32);
19896         assign(argL, getIRegT(rN));
19897
19898         IRTemp rMt = newTemp(Ity_I32);
19899         assign(rMt, getIRegT(rM));
19900
19901         IRTemp argR = newTemp(Ity_I32);
19902         compute_result_and_C_after_shift_by_imm5(
19903            dis_buf, &argR, NULL, rMt, how, imm5, rM
19904         );
19905
19906         setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
19907                         argL, argR, condT );
19908
19909         DIP("%s.w r%u, %s\n", isCMN ? "cmn" : "cmp", rN, dis_buf);
19910         goto decode_success;
19911      }
19912   }
19913
19914   /* -------------- (T2) MOV{S}.W Rd, #constT -------------- */
19915   /* -------------- (T2) MVN{S}.W Rd, #constT -------------- */
19916   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19917       && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // MOV
19918           || INSN0(9,5) == BITS5(0,0,0,1,1)) // MVN
19919       && INSN0(3,0) == BITS4(1,1,1,1)
19920       && INSN1(15,15) == 0) {
19921      UInt rD = INSN1(11,8);
19922      if (!isBadRegT(rD)) {
19923         Bool   updC  = False;
19924         UInt   bS    = INSN0(4,4);
19925         Bool   isMVN = INSN0(5,5) == 1;
19926         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
19927         IRTemp res   = newTemp(Ity_I32);
19928         assign(res, mkU32(isMVN ? ~imm32 : imm32));
19929         putIRegT(rD, mkexpr(res), condT);
19930         if (bS) {
19931            IRTemp oldV = newTemp(Ity_I32);
19932            IRTemp oldC = newTemp(Ity_I32);
19933            assign( oldV, mk_armg_calculate_flag_v() );
19934            assign( oldC, updC
19935                          ? mkU32((imm32 >> 31) & 1)
19936                          : mk_armg_calculate_flag_c() );
19937            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19938                               condT );
19939         }
19940         DIP("%s%s.w r%u, #%u\n",
19941             isMVN ? "mvn" : "mov", bS ? "s" : "", rD, imm32);
19942         goto decode_success;
19943      }
19944   }
19945
19946   /* -------------- (T3) MOVW Rd, #imm16 -------------- */
19947   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19948       && INSN0(9,4) == BITS6(1,0,0,1,0,0)
19949       && INSN1(15,15) == 0) {
19950      UInt rD = INSN1(11,8);
19951      if (!isBadRegT(rD)) {
19952         UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
19953                      | (INSN1(14,12) << 8) | INSN1(7,0);
19954         putIRegT(rD, mkU32(imm16), condT);
19955         DIP("movw r%u, #%u\n", rD, imm16);
19956         goto decode_success;
19957      }
19958   }
19959
19960   /* ---------------- MOVT Rd, #imm16 ---------------- */
19961   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19962       && INSN0(9,4) == BITS6(1,0,1,1,0,0)
19963       && INSN1(15,15) == 0) {
19964      UInt rD = INSN1(11,8);
19965      if (!isBadRegT(rD)) {
19966         UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
19967                      | (INSN1(14,12) << 8) | INSN1(7,0);
19968         IRTemp res = newTemp(Ity_I32);
19969         assign(res,
19970                binop(Iop_Or32,
19971                      binop(Iop_And32, getIRegT(rD), mkU32(0xFFFF)),
19972                      mkU32(imm16 << 16)));
19973         putIRegT(rD, mkexpr(res), condT);
19974         DIP("movt r%u, #%u\n", rD, imm16);
19975         goto decode_success;
19976      }
19977   }
19978
19979   /* ---------------- LD/ST reg+/-#imm8 ---------------- */
19980   /* Loads and stores of the form:
19981         op  Rt, [Rn, #-imm8]      or
19982         op  Rt, [Rn], #+/-imm8    or
19983         op  Rt, [Rn, #+/-imm8]!
19984      where op is one of
19985         ldrb ldrh ldr  ldrsb ldrsh
19986         strb strh str
19987   */
19988   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0) && INSN1(11,11) == 1) {
19989      Bool   valid  = True;
19990      Bool   syned  = False;
19991      Bool   isST   = False;
19992      IRType ty     = Ity_I8;
19993      const HChar* nm = "???";
19994
19995      switch (INSN0(8,4)) {
19996         case BITS5(0,0,0,0,0):   // strb
19997            nm = "strb"; isST = True; break;
19998         case BITS5(0,0,0,0,1):   // ldrb
19999            nm = "ldrb"; break;
20000         case BITS5(1,0,0,0,1):   // ldrsb
20001            nm = "ldrsb"; syned = True; break;
20002         case BITS5(0,0,0,1,0):   // strh
20003            nm = "strh"; ty = Ity_I16; isST = True; break;
20004         case BITS5(0,0,0,1,1):   // ldrh
20005            nm = "ldrh"; ty = Ity_I16; break;
20006         case BITS5(1,0,0,1,1):   // ldrsh
20007            nm = "ldrsh"; ty = Ity_I16; syned = True; break;
20008         case BITS5(0,0,1,0,0):   // str
20009            nm = "str"; ty = Ity_I32; isST = True; break;
20010         case BITS5(0,0,1,0,1):
20011            nm = "ldr"; ty = Ity_I32; break;  // ldr
20012         default:
20013            valid = False; break;
20014      }
20015
20016      UInt rN      = INSN0(3,0);
20017      UInt rT      = INSN1(15,12);
20018      UInt bP      = INSN1(10,10);
20019      UInt bU      = INSN1(9,9);
20020      UInt bW      = INSN1(8,8);
20021      UInt imm8    = INSN1(7,0);
20022      Bool loadsPC = False;
20023
20024      if (valid) {
20025         if (bP == 1 && bU == 1 && bW == 0)
20026            valid = False;
20027         if (bP == 0 && bW == 0)
20028            valid = False;
20029         if (rN == 15)
20030            valid = False;
20031         if (bW == 1 && rN == rT)
20032            valid = False;
20033         if (ty == Ity_I8 || ty == Ity_I16) {
20034            if (isBadRegT(rT))
20035               valid = False;
20036         } else {
20037            /* ty == Ity_I32 */
20038            if (isST && rT == 15)
20039               valid = False;
20040            if (!isST && rT == 15)
20041               loadsPC = True;
20042         }
20043      }
20044
20045      if (valid) {
20046         // if it's a branch, it can't happen in the middle of an IT block
20047         // Also, if it is a branch, make it unconditional at this point.
20048         // Doing conditional branches in-line is too complex (for now)
20049         if (loadsPC) {
20050            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20051            // go uncond
20052            mk_skip_over_T32_if_cond_is_false(condT);
20053            condT = IRTemp_INVALID;
20054            // now uncond
20055         }
20056
20057         IRTemp preAddr = newTemp(Ity_I32);
20058         assign(preAddr, getIRegT(rN));
20059
20060         IRTemp postAddr = newTemp(Ity_I32);
20061         assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
20062                                mkexpr(preAddr), mkU32(imm8)));
20063
20064         IRTemp transAddr = bP == 1 ? postAddr : preAddr;
20065
20066         if (isST) {
20067
20068            /* Store.  If necessary, update the base register before
20069               the store itself, so that the common idiom of "str rX,
20070               [sp, #-4]!" (store rX at sp-4, then do new sp = sp-4,
20071               a.k.a "push rX") doesn't cause Memcheck to complain
20072               that the access is below the stack pointer.  Also, not
20073               updating sp before the store confuses Valgrind's
20074               dynamic stack-extending logic.  So do it before the
20075               store.  Hence we need to snarf the store data before
20076               doing the basereg update. */
20077
20078            /* get hold of the data to be stored */
20079            IRTemp oldRt = newTemp(Ity_I32);
20080            assign(oldRt, getIRegT(rT));
20081
20082            /* Update Rn if necessary. */
20083            if (bW == 1) {
20084               vassert(rN != rT); // assured by validity check above
20085               putIRegT(rN, mkexpr(postAddr), condT);
20086            }
20087
20088            /* generate the transfer */
20089            IRExpr* data = NULL;
20090            switch (ty) {
20091               case Ity_I8:
20092                  data = unop(Iop_32to8, mkexpr(oldRt));
20093                  break;
20094               case Ity_I16:
20095                  data = unop(Iop_32to16, mkexpr(oldRt));
20096                  break;
20097               case Ity_I32:
20098                  data = mkexpr(oldRt);
20099                  break;
20100               default:
20101                  vassert(0);
20102            }
20103            storeGuardedLE(mkexpr(transAddr), data, condT);
20104
20105         } else {
20106
20107            /* Load. */
20108            IRTemp llOldRt = newTemp(Ity_I32);
20109            assign(llOldRt, llGetIReg(rT));
20110
20111            /* generate the transfer */
20112            IRTemp    newRt = newTemp(Ity_I32);
20113            IRLoadGOp widen = ILGop_INVALID;
20114            switch (ty) {
20115               case Ity_I8:
20116                  widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
20117               case Ity_I16:
20118                  widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
20119               case Ity_I32:
20120                  widen = ILGop_Ident32; break;
20121               default:
20122                  vassert(0);
20123            }
20124            loadGuardedLE(newRt, widen,
20125                          mkexpr(transAddr), mkexpr(llOldRt), condT);
20126            if (rT == 15) {
20127               vassert(loadsPC);
20128               /* We'll do the write to the PC just below */
20129            } else {
20130               vassert(!loadsPC);
20131               /* IRTemp_INVALID is OK here because in the case where
20132                  condT is false at run time, we're just putting the
20133                  old rT value back. */
20134               putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
20135            }
20136
20137            /* Update Rn if necessary. */
20138            if (bW == 1) {
20139               vassert(rN != rT); // assured by validity check above
20140               putIRegT(rN, mkexpr(postAddr), condT);
20141            }
20142
20143            if (loadsPC) {
20144               /* Presumably this is an interworking branch. */
20145               vassert(rN != 15); // assured by validity check above
20146               vassert(rT == 15);
20147               vassert(condT == IRTemp_INVALID); /* due to check above */
20148               llPutIReg(15, mkexpr(newRt));
20149               dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
20150               dres.whatNext    = Dis_StopHere;
20151            }
20152         }
20153
20154         if (bP == 1 && bW == 0) {
20155            DIP("%s.w r%u, [r%u, #%c%u]\n",
20156                nm, rT, rN, bU ? '+' : '-', imm8);
20157         }
20158         else if (bP == 1 && bW == 1) {
20159            DIP("%s.w r%u, [r%u, #%c%u]!\n",
20160                nm, rT, rN, bU ? '+' : '-', imm8);
20161         }
20162         else {
20163            vassert(bP == 0 && bW == 1);
20164            DIP("%s.w r%u, [r%u], #%c%u\n",
20165                nm, rT, rN, bU ? '+' : '-', imm8);
20166         }
20167
20168         goto decode_success;
20169      }
20170   }
20171
20172   /* ------------- LD/ST reg+(reg<<imm2) ------------- */
20173   /* Loads and stores of the form:
20174         op  Rt, [Rn, Rm, LSL #imm8]
20175      where op is one of
20176         ldrb ldrh ldr  ldrsb ldrsh
20177         strb strh str
20178   */
20179   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)
20180       && INSN1(11,6) == BITS6(0,0,0,0,0,0)) {
20181      Bool   valid  = True;
20182      Bool   syned  = False;
20183      Bool   isST   = False;
20184      IRType ty     = Ity_I8;
20185      const HChar* nm = "???";
20186
20187      switch (INSN0(8,4)) {
20188         case BITS5(0,0,0,0,0):   // strb
20189            nm = "strb"; isST = True; break;
20190         case BITS5(0,0,0,0,1):   // ldrb
20191            nm = "ldrb"; break;
20192         case BITS5(1,0,0,0,1):   // ldrsb
20193            nm = "ldrsb"; syned = True; break;
20194         case BITS5(0,0,0,1,0):   // strh
20195            nm = "strh"; ty = Ity_I16; isST = True; break;
20196         case BITS5(0,0,0,1,1):   // ldrh
20197            nm = "ldrh"; ty = Ity_I16; break;
20198         case BITS5(1,0,0,1,1):   // ldrsh
20199            nm = "ldrsh"; ty = Ity_I16; syned = True; break;
20200         case BITS5(0,0,1,0,0):   // str
20201            nm = "str"; ty = Ity_I32; isST = True; break;
20202         case BITS5(0,0,1,0,1):
20203            nm = "ldr"; ty = Ity_I32; break;  // ldr
20204         default:
20205            valid = False; break;
20206      }
20207
20208      UInt rN      = INSN0(3,0);
20209      UInt rM      = INSN1(3,0);
20210      UInt rT      = INSN1(15,12);
20211      UInt imm2    = INSN1(5,4);
20212      Bool loadsPC = False;
20213
20214      if (ty == Ity_I8 || ty == Ity_I16) {
20215         /* all 8- and 16-bit load and store cases have the
20216            same exclusion set. */
20217         if (rN == 15 || isBadRegT(rT) || isBadRegT(rM))
20218            valid = False;
20219      } else {
20220         vassert(ty == Ity_I32);
20221         if (rN == 15 || isBadRegT(rM))
20222            valid = False;
20223         if (isST && rT == 15)
20224            valid = False;
20225         /* If it is a load and rT is 15, that's only allowable if we
20226            not in an IT block, or are the last in it.  Need to insert
20227            a dynamic check for that. */
20228         if (!isST && rT == 15)
20229            loadsPC = True;
20230      }
20231
20232      if (valid) {
20233         // if it's a branch, it can't happen in the middle of an IT block
20234         // Also, if it is a branch, make it unconditional at this point.
20235         // Doing conditional branches in-line is too complex (for now)
20236         if (loadsPC) {
20237            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20238            // go uncond
20239            mk_skip_over_T32_if_cond_is_false(condT);
20240            condT = IRTemp_INVALID;
20241            // now uncond
20242         }
20243
20244         IRTemp transAddr = newTemp(Ity_I32);
20245         assign(transAddr,
20246                binop( Iop_Add32,
20247                       getIRegT(rN),
20248                       binop(Iop_Shl32, getIRegT(rM), mkU8(imm2)) ));
20249
20250         if (isST) {
20251
20252            /* get hold of the data to be stored */
20253            IRTemp oldRt = newTemp(Ity_I32);
20254            assign(oldRt, getIRegT(rT));
20255
20256            /* generate the transfer */
20257            IRExpr* data = NULL;
20258            switch (ty) {
20259               case Ity_I8:
20260                  data = unop(Iop_32to8, mkexpr(oldRt));
20261                  break;
20262               case Ity_I16:
20263                  data = unop(Iop_32to16, mkexpr(oldRt));
20264                  break;
20265              case Ity_I32:
20266                  data = mkexpr(oldRt);
20267                  break;
20268              default:
20269                 vassert(0);
20270            }
20271            storeGuardedLE(mkexpr(transAddr), data, condT);
20272
20273         } else {
20274
20275            /* Load. */
20276            IRTemp llOldRt = newTemp(Ity_I32);
20277            assign(llOldRt, llGetIReg(rT));
20278
20279            /* generate the transfer */
20280            IRTemp    newRt = newTemp(Ity_I32);
20281            IRLoadGOp widen = ILGop_INVALID;
20282            switch (ty) {
20283               case Ity_I8:
20284                  widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
20285               case Ity_I16:
20286                  widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
20287               case Ity_I32:
20288                  widen = ILGop_Ident32; break;
20289               default:
20290                  vassert(0);
20291            }
20292            loadGuardedLE(newRt, widen,
20293                          mkexpr(transAddr), mkexpr(llOldRt), condT);
20294
20295            if (rT == 15) {
20296               vassert(loadsPC);
20297               /* We'll do the write to the PC just below */
20298            } else {
20299               vassert(!loadsPC);
20300               /* IRTemp_INVALID is OK here because in the case where
20301                  condT is false at run time, we're just putting the
20302                  old rT value back. */
20303               putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
20304            }
20305
20306            if (loadsPC) {
20307               /* Presumably this is an interworking branch. */
20308               vassert(rN != 15); // assured by validity check above
20309               vassert(rT == 15);
20310               vassert(condT == IRTemp_INVALID); /* due to check above */
20311               llPutIReg(15, mkexpr(newRt));
20312               dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
20313               dres.whatNext    = Dis_StopHere;
20314            }
20315         }
20316
20317         DIP("%s.w r%u, [r%u, r%u, LSL #%u]\n",
20318             nm, rT, rN, rM, imm2);
20319
20320         goto decode_success;
20321      }
20322   }
20323
20324   /* --------------- LD/ST reg+imm12 --------------- */
20325   /* Loads and stores of the form:
20326         op  Rt, [Rn, #+-imm12]
20327      where op is one of
20328         ldrb ldrh ldr  ldrsb ldrsh
20329         strb strh str
20330   */
20331   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) {
20332      Bool   valid  = True;
20333      Bool   syned  = INSN0(8,8) == 1;
20334      Bool   isST   = False;
20335      IRType ty     = Ity_I8;
20336      UInt   bU     = INSN0(7,7); // 1: +imm   0: -imm
20337                                  // -imm is only supported by literal versions
20338      const HChar* nm = "???";
20339
20340      switch (INSN0(6,4)) {
20341         case BITS3(0,0,0):   // strb
20342            nm = "strb"; isST = True; break;
20343         case BITS3(0,0,1):   // ldrb
20344            nm = syned ? "ldrsb" : "ldrb"; break;
20345         case BITS3(0,1,0):   // strh
20346            nm = "strh"; ty = Ity_I16; isST = True; break;
20347         case BITS3(0,1,1):   // ldrh
20348            nm = syned ? "ldrsh" : "ldrh"; ty = Ity_I16; break;
20349         case BITS3(1,0,0):   // str
20350            nm = "str"; ty = Ity_I32; isST = True; break;
20351         case BITS3(1,0,1):
20352            nm = "ldr"; ty = Ity_I32; break;  // ldr
20353         default:
20354            valid = False; break;
20355      }
20356
20357      UInt rN      = INSN0(3,0);
20358      UInt rT      = INSN1(15,12);
20359      UInt imm12   = INSN1(11,0);
20360      Bool loadsPC = False;
20361
20362      if (rN != 15 && bU == 0) {
20363         // only pc supports #-imm12
20364         valid = False;
20365      }
20366
20367      if (isST) {
20368         if (syned) valid = False;
20369         if (rN == 15 || rT == 15)
20370            valid = False;
20371      } else {
20372         /* For a 32-bit load, rT == 15 is only allowable if we are not
20373            in an IT block, or are the last in it.  Need to insert
20374            a dynamic check for that.  Also, in this particular
20375            case, rN == 15 is allowable.  In this case however, the
20376            value obtained for rN is (apparently)
20377            "word-align(address of current insn + 4)". */
20378         if (rT == 15) {
20379            if (ty == Ity_I32)
20380               loadsPC = True;
20381            else // Can't do it for B/H loads
20382               valid = False;
20383         }
20384      }
20385
20386      if (valid) {
20387         // if it's a branch, it can't happen in the middle of an IT block
20388         // Also, if it is a branch, make it unconditional at this point.
20389         // Doing conditional branches in-line is too complex (for now)
20390         if (loadsPC) {
20391            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20392            // go uncond
20393            mk_skip_over_T32_if_cond_is_false(condT);
20394            condT = IRTemp_INVALID;
20395            // now uncond
20396         }
20397
20398         IRTemp rNt = newTemp(Ity_I32);
20399         if (rN == 15) {
20400            vassert(!isST);
20401            assign(rNt, binop(Iop_And32, getIRegT(15), mkU32(~3)));
20402         } else {
20403            assign(rNt, getIRegT(rN));
20404         }
20405
20406         IRTemp transAddr = newTemp(Ity_I32);
20407         assign(transAddr,
20408                binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
20409                      mkexpr(rNt), mkU32(imm12)));
20410
20411         IRTemp oldRt = newTemp(Ity_I32);
20412         assign(oldRt, getIRegT(rT));
20413
20414         IRTemp llOldRt = newTemp(Ity_I32);
20415         assign(llOldRt, llGetIReg(rT));
20416
20417         if (isST) {
20418            IRExpr* data = NULL;
20419            switch (ty) {
20420               case Ity_I8:
20421                  data = unop(Iop_32to8, mkexpr(oldRt));
20422                  break;
20423               case Ity_I16:
20424                  data = unop(Iop_32to16, mkexpr(oldRt));
20425                  break;
20426              case Ity_I32:
20427                  data = mkexpr(oldRt);
20428                  break;
20429              default:
20430                 vassert(0);
20431            }
20432            storeGuardedLE(mkexpr(transAddr), data, condT);
20433         } else {
20434            IRTemp    newRt = newTemp(Ity_I32);
20435            IRLoadGOp widen = ILGop_INVALID;
20436            switch (ty) {
20437               case Ity_I8:
20438                  widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
20439               case Ity_I16:
20440                  widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
20441               case Ity_I32:
20442                  widen = ILGop_Ident32; break;
20443               default:
20444                  vassert(0);
20445            }
20446            loadGuardedLE(newRt, widen,
20447                          mkexpr(transAddr), mkexpr(llOldRt), condT);
20448            if (rT == 15) {
20449               vassert(loadsPC);
20450               /* We'll do the write to the PC just below */
20451            } else {
20452               vassert(!loadsPC);
20453               /* IRTemp_INVALID is OK here because in the case where
20454                  condT is false at run time, we're just putting the
20455                  old rT value back. */
20456               putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
20457            }
20458
20459            if (loadsPC) {
20460               /* Presumably this is an interworking branch. */
20461               vassert(rT == 15);
20462               vassert(condT == IRTemp_INVALID); /* due to check above */
20463               llPutIReg(15, mkexpr(newRt));
20464               dres.jk_StopHere = Ijk_Boring;
20465               dres.whatNext    = Dis_StopHere;
20466            }
20467         }
20468
20469         DIP("%s.w r%u, [r%u, +#%u]\n", nm, rT, rN, imm12);
20470
20471         goto decode_success;
20472      }
20473   }
20474
20475   /* -------------- LDRD/STRD reg+/-#imm8 -------------- */
20476   /* Doubleword loads and stores of the form:
20477         ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]    or
20478         ldrd/strd  Rt, Rt2, [Rn], #+/-imm8    or
20479         ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]!
20480   */
20481   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,0) && INSN0(6,6) == 1) {
20482      UInt bP   = INSN0(8,8);
20483      UInt bU   = INSN0(7,7);
20484      UInt bW   = INSN0(5,5);
20485      UInt bL   = INSN0(4,4);  // 1: load  0: store
20486      UInt rN   = INSN0(3,0);
20487      UInt rT   = INSN1(15,12);
20488      UInt rT2  = INSN1(11,8);
20489      UInt imm8 = INSN1(7,0);
20490
20491      Bool valid = True;
20492      if (bP == 0 && bW == 0)                 valid = False;
20493      if (bW == 1 && (rN == rT || rN == rT2)) valid = False;
20494      if (isBadRegT(rT) || isBadRegT(rT2))    valid = False;
20495      if (bL == 1 && rT == rT2)               valid = False;
20496      /* It's OK to use PC as the base register only in the
20497         following case: ldrd Rt, Rt2, [PC, #+/-imm8] */
20498      if (rN == 15 && (bL == 0/*store*/
20499                       || bW == 1/*wb*/))     valid = False;
20500
20501      if (valid) {
20502         IRTemp preAddr = newTemp(Ity_I32);
20503         assign(preAddr, 15 == rN
20504                           ? binop(Iop_And32, getIRegT(15), mkU32(~3U))
20505                           : getIRegT(rN));
20506
20507         IRTemp postAddr = newTemp(Ity_I32);
20508         assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
20509                                mkexpr(preAddr), mkU32(imm8 << 2)));
20510
20511         IRTemp transAddr = bP == 1 ? postAddr : preAddr;
20512
20513         /* For almost all cases, we do the writeback after the transfers.
20514            However, that leaves the stack "uncovered" in this case:
20515               strd    rD, [sp, #-8]
20516            In which case, do the writeback to SP now, instead of later.
20517            This is bad in that it makes the insn non-restartable if the
20518            accesses fault, but at least keeps Memcheck happy. */
20519         Bool writeback_already_done = False;
20520         if (bL == 0/*store*/ && bW == 1/*wb*/
20521             && rN == 13 && rN != rT && rN != rT2
20522             && bU == 0/*minus*/ && (imm8 << 2) == 8) {
20523            putIRegT(rN, mkexpr(postAddr), condT);
20524            writeback_already_done = True;
20525         }
20526
20527         if (bL == 0) {
20528            IRTemp oldRt  = newTemp(Ity_I32);
20529            IRTemp oldRt2 = newTemp(Ity_I32);
20530            assign(oldRt,  getIRegT(rT));
20531            assign(oldRt2, getIRegT(rT2));
20532            storeGuardedLE( mkexpr(transAddr),
20533                            mkexpr(oldRt), condT );
20534            storeGuardedLE( binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
20535                            mkexpr(oldRt2), condT );
20536         } else {
20537            IRTemp oldRt  = newTemp(Ity_I32);
20538            IRTemp oldRt2 = newTemp(Ity_I32);
20539            IRTemp newRt  = newTemp(Ity_I32);
20540            IRTemp newRt2 = newTemp(Ity_I32);
20541            assign(oldRt,  llGetIReg(rT));
20542            assign(oldRt2, llGetIReg(rT2));
20543            loadGuardedLE( newRt, ILGop_Ident32,
20544                           mkexpr(transAddr),
20545                           mkexpr(oldRt), condT );
20546            loadGuardedLE( newRt2, ILGop_Ident32,
20547                           binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
20548                           mkexpr(oldRt2), condT );
20549            /* Put unconditionally, since we already switched on the condT
20550               in the guarded loads. */
20551            putIRegT(rT,  mkexpr(newRt),  IRTemp_INVALID);
20552            putIRegT(rT2, mkexpr(newRt2), IRTemp_INVALID);
20553         }
20554
20555         if (bW == 1 && !writeback_already_done) {
20556            putIRegT(rN, mkexpr(postAddr), condT);
20557         }
20558
20559         const HChar* nm = bL ? "ldrd" : "strd";
20560
20561         if (bP == 1 && bW == 0) {
20562            DIP("%s.w r%u, r%u, [r%u, #%c%u]\n",
20563                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
20564         }
20565         else if (bP == 1 && bW == 1) {
20566            DIP("%s.w r%u, r%u, [r%u, #%c%u]!\n",
20567                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
20568         }
20569         else {
20570            vassert(bP == 0 && bW == 1);
20571            DIP("%s.w r%u, r%u, [r%u], #%c%u\n",
20572                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
20573         }
20574
20575         goto decode_success;
20576      }
20577   }
20578
20579   /* -------------- (T3) Bcond.W label -------------- */
20580   /* This variant carries its own condition, so can't be part of an
20581      IT block ... */
20582   if (INSN0(15,11) == BITS5(1,1,1,1,0)
20583       && INSN1(15,14) == BITS2(1,0)
20584       && INSN1(12,12) == 0) {
20585      UInt cond = INSN0(9,6);
20586      if (cond != ARMCondAL && cond != ARMCondNV) {
20587         Int simm21
20588            =   (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
20589              | (INSN1(11,11) << (1 + 6 + 11 + 1))
20590              | (INSN1(13,13) << (6 + 11 + 1))
20591              | (INSN0(5,0)   << (11 + 1))
20592              | (INSN1(10,0)  << 1);
20593         simm21 = (simm21 << 11) >> 11;
20594
20595         vassert(0 == (guest_R15_curr_instr_notENC & 1));
20596         UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
20597
20598         /* Not allowed in an IT block; SIGILL if so. */
20599         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
20600
20601         IRTemp kondT = newTemp(Ity_I32);
20602         assign( kondT, mk_armg_calculate_condition(cond) );
20603         stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
20604                            Ijk_Boring,
20605                            IRConst_U32(dst | 1/*CPSR.T*/),
20606                            OFFB_R15T ));
20607         llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4)
20608                              | 1 /*CPSR.T*/ ));
20609         dres.jk_StopHere = Ijk_Boring;
20610         dres.whatNext    = Dis_StopHere;
20611         DIP("b%s.w 0x%x\n", nCC(cond), dst);
20612         goto decode_success;
20613      }
20614   }
20615
20616   /* ---------------- (T4) B.W label ---------------- */
20617   /* ... whereas this variant doesn't carry its own condition, so it
20618      has to be either unconditional or the conditional by virtue of
20619      being the last in an IT block.  The upside is that there's 4
20620      more bits available for the jump offset, so it has a 16-times
20621      greater branch range than the T3 variant. */
20622   if (INSN0(15,11) == BITS5(1,1,1,1,0)
20623       && INSN1(15,14) == BITS2(1,0)
20624       && INSN1(12,12) == 1) {
20625      if (1) {
20626         UInt bS  = INSN0(10,10);
20627         UInt bJ1 = INSN1(13,13);
20628         UInt bJ2 = INSN1(11,11);
20629         UInt bI1 = 1 ^ (bJ1 ^ bS);
20630         UInt bI2 = 1 ^ (bJ2 ^ bS);
20631         Int simm25
20632            =   (bS          << (1 + 1 + 10 + 11 + 1))
20633              | (bI1         << (1 + 10 + 11 + 1))
20634              | (bI2         << (10 + 11 + 1))
20635              | (INSN0(9,0)  << (11 + 1))
20636              | (INSN1(10,0) << 1);
20637         simm25 = (simm25 << 7) >> 7;
20638
20639         vassert(0 == (guest_R15_curr_instr_notENC & 1));
20640         UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
20641
20642         /* If in an IT block, must be the last insn. */
20643         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20644
20645         // go uncond
20646         mk_skip_over_T32_if_cond_is_false(condT);
20647         condT = IRTemp_INVALID;
20648         // now uncond
20649
20650         // branch to dst
20651         llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
20652         dres.jk_StopHere = Ijk_Boring;
20653         dres.whatNext    = Dis_StopHere;
20654         DIP("b.w 0x%x\n", dst);
20655         goto decode_success;
20656      }
20657   }
20658
20659   /* ------------------ TBB, TBH ------------------ */
20660   if (INSN0(15,4) == 0xE8D && INSN1(15,5) == 0x780) {
20661      UInt rN = INSN0(3,0);
20662      UInt rM = INSN1(3,0);
20663      UInt bH = INSN1(4,4);
20664      if (bH/*ATC*/ || (rN != 13 && !isBadRegT(rM))) {
20665         /* Must be last or not-in IT block */
20666         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20667         /* Go uncond */
20668         mk_skip_over_T32_if_cond_is_false(condT);
20669         condT = IRTemp_INVALID;
20670
20671         IRExpr* ea
20672             = binop(Iop_Add32,
20673                     getIRegT(rN),
20674                     bH ? binop(Iop_Shl32, getIRegT(rM), mkU8(1))
20675                        : getIRegT(rM));
20676
20677         IRTemp delta = newTemp(Ity_I32);
20678         if (bH) {
20679            assign(delta, unop(Iop_16Uto32, loadLE(Ity_I16, ea)));
20680         } else {
20681            assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
20682         }
20683
20684         llPutIReg(
20685            15,
20686            binop(Iop_Or32,
20687                  binop(Iop_Add32,
20688                        getIRegT(15),
20689                        binop(Iop_Shl32, mkexpr(delta), mkU8(1))
20690                  ),
20691                  mkU32(1)
20692         ));
20693         dres.jk_StopHere = Ijk_Boring;
20694         dres.whatNext    = Dis_StopHere;
20695         DIP("tb%c [r%u, r%u%s]\n",
20696             bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
20697         goto decode_success;
20698      }
20699   }
20700
20701   /* ------------------ UBFX ------------------ */
20702   /* ------------------ SBFX ------------------ */
20703   /* There's also ARM versions of same, but it doesn't seem worth the
20704      hassle to common up the handling (it's only a couple of C
20705      statements). */
20706   if ((INSN0(15,4) == 0xF3C // UBFX
20707        || INSN0(15,4) == 0xF34) // SBFX
20708       && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
20709      UInt rN  = INSN0(3,0);
20710      UInt rD  = INSN1(11,8);
20711      UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
20712      UInt wm1 = INSN1(4,0);
20713      UInt msb =  lsb + wm1;
20714      if (!isBadRegT(rD) && !isBadRegT(rN) && msb <= 31) {
20715         Bool   isU  = INSN0(15,4) == 0xF3C;
20716         IRTemp src  = newTemp(Ity_I32);
20717         IRTemp tmp  = newTemp(Ity_I32);
20718         IRTemp res  = newTemp(Ity_I32);
20719         UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
20720         vassert(msb >= 0 && msb <= 31);
20721         vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
20722
20723         assign(src, getIRegT(rN));
20724         assign(tmp, binop(Iop_And32,
20725                           binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
20726                           mkU32(mask)));
20727         assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
20728                           binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
20729                           mkU8(31-wm1)));
20730
20731         putIRegT(rD, mkexpr(res), condT);
20732
20733         DIP("%s r%u, r%u, #%u, #%u\n",
20734             isU ? "ubfx" : "sbfx", rD, rN, lsb, wm1 + 1);
20735         goto decode_success;
20736      }
20737   }
20738
20739   /* ------------------ UXTB ------------------ */
20740   /* ------------------ UXTH ------------------ */
20741   /* ------------------ SXTB ------------------ */
20742   /* ------------------ SXTH ------------------ */
20743   /* ----------------- UXTB16 ----------------- */
20744   /* ----------------- SXTB16 ----------------- */
20745   /* FIXME: this is an exact duplicate of the ARM version.  They
20746      should be commoned up. */
20747   if ((INSN0(15,0) == 0xFA5F     // UXTB
20748        || INSN0(15,0) == 0xFA1F  // UXTH
20749        || INSN0(15,0) == 0xFA4F  // SXTB
20750        || INSN0(15,0) == 0xFA0F  // SXTH
20751        || INSN0(15,0) == 0xFA3F  // UXTB16
20752        || INSN0(15,0) == 0xFA2F) // SXTB16
20753       && INSN1(15,12) == BITS4(1,1,1,1)
20754       && INSN1(7,6) == BITS2(1,0)) {
20755      UInt rD = INSN1(11,8);
20756      UInt rM = INSN1(3,0);
20757      UInt rot = INSN1(5,4);
20758      if (!isBadRegT(rD) && !isBadRegT(rM)) {
20759         const HChar* nm = "???";
20760         IRTemp srcT = newTemp(Ity_I32);
20761         IRTemp rotT = newTemp(Ity_I32);
20762         IRTemp dstT = newTemp(Ity_I32);
20763         assign(srcT, getIRegT(rM));
20764         assign(rotT, genROR32(srcT, 8 * rot));
20765         switch (INSN0(15,0)) {
20766            case 0xFA5F: // UXTB
20767               nm = "uxtb";
20768               assign(dstT, unop(Iop_8Uto32,
20769                                 unop(Iop_32to8, mkexpr(rotT))));
20770               break;
20771            case 0xFA1F: // UXTH
20772               nm = "uxth";
20773               assign(dstT, unop(Iop_16Uto32,
20774                                 unop(Iop_32to16, mkexpr(rotT))));
20775               break;
20776            case 0xFA4F: // SXTB
20777               nm = "sxtb";
20778               assign(dstT, unop(Iop_8Sto32,
20779                                 unop(Iop_32to8, mkexpr(rotT))));
20780               break;
20781            case 0xFA0F: // SXTH
20782               nm = "sxth";
20783               assign(dstT, unop(Iop_16Sto32,
20784                                 unop(Iop_32to16, mkexpr(rotT))));
20785               break;
20786            case 0xFA3F: // UXTB16
20787               nm = "uxtb16";
20788               assign(dstT, binop(Iop_And32, mkexpr(rotT),
20789                                             mkU32(0x00FF00FF)));
20790               break;
20791            case 0xFA2F: { // SXTB16
20792               nm = "sxtb16";
20793               IRTemp lo32 = newTemp(Ity_I32);
20794               IRTemp hi32 = newTemp(Ity_I32);
20795               assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
20796               assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
20797               assign(
20798                  dstT,
20799                  binop(Iop_Or32,
20800                        binop(Iop_And32,
20801                              unop(Iop_8Sto32,
20802                                   unop(Iop_32to8, mkexpr(lo32))),
20803                              mkU32(0xFFFF)),
20804                        binop(Iop_Shl32,
20805                              unop(Iop_8Sto32,
20806                                   unop(Iop_32to8, mkexpr(hi32))),
20807                              mkU8(16))
20808               ));
20809               break;
20810            }
20811            default:
20812               vassert(0);
20813         }
20814         putIRegT(rD, mkexpr(dstT), condT);
20815         DIP("%s r%u, r%u, ror #%u\n", nm, rD, rM, 8 * rot);
20816         goto decode_success;
20817      }
20818   }
20819
20820   /* -------------- MUL.W Rd, Rn, Rm -------------- */
20821   if (INSN0(15,4) == 0xFB0
20822       && (INSN1(15,0) & 0xF0F0) == 0xF000) {
20823      UInt rN = INSN0(3,0);
20824      UInt rD = INSN1(11,8);
20825      UInt rM = INSN1(3,0);
20826      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
20827         IRTemp res = newTemp(Ity_I32);
20828         assign(res, binop(Iop_Mul32, getIRegT(rN), getIRegT(rM)));
20829         putIRegT(rD, mkexpr(res), condT);
20830         DIP("mul.w r%u, r%u, r%u\n", rD, rN, rM);
20831         goto decode_success;
20832      }
20833   }
20834
20835   /* -------------- SDIV.W Rd, Rn, Rm -------------- */
20836   if (INSN0(15,4) == 0xFB9
20837       && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
20838      UInt rN = INSN0(3,0);
20839      UInt rD = INSN1(11,8);
20840      UInt rM = INSN1(3,0);
20841      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
20842         IRTemp res  = newTemp(Ity_I32);
20843         IRTemp argL = newTemp(Ity_I32);
20844         IRTemp argR = newTemp(Ity_I32);
20845         assign(argL, getIRegT(rN));
20846         assign(argR, getIRegT(rM));
20847         assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
20848         putIRegT(rD, mkexpr(res), condT);
20849         DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM);
20850         goto decode_success;
20851      }
20852   }
20853
20854   /* -------------- UDIV.W Rd, Rn, Rm -------------- */
20855   if (INSN0(15,4) == 0xFBB
20856       && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
20857      UInt rN = INSN0(3,0);
20858      UInt rD = INSN1(11,8);
20859      UInt rM = INSN1(3,0);
20860      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
20861         IRTemp res  = newTemp(Ity_I32);
20862         IRTemp argL = newTemp(Ity_I32);
20863         IRTemp argR = newTemp(Ity_I32);
20864         assign(argL, getIRegT(rN));
20865         assign(argR, getIRegT(rM));
20866         assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
20867         putIRegT(rD, mkexpr(res), condT);
20868         DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM);
20869         goto decode_success;
20870      }
20871   }
20872
20873   /* ------------------ {U,S}MULL ------------------ */
20874   if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
20875       && INSN1(7,4) == BITS4(0,0,0,0)) {
20876      UInt isU  = INSN0(5,5);
20877      UInt rN   = INSN0(3,0);
20878      UInt rDlo = INSN1(15,12);
20879      UInt rDhi = INSN1(11,8);
20880      UInt rM   = INSN1(3,0);
20881      if (!isBadRegT(rDhi) && !isBadRegT(rDlo)
20882          && !isBadRegT(rN) && !isBadRegT(rM) && rDlo != rDhi) {
20883         IRTemp res   = newTemp(Ity_I64);
20884         assign(res, binop(isU ? Iop_MullU32 : Iop_MullS32,
20885                           getIRegT(rN), getIRegT(rM)));
20886         putIRegT( rDhi, unop(Iop_64HIto32, mkexpr(res)), condT );
20887         putIRegT( rDlo, unop(Iop_64to32, mkexpr(res)), condT );
20888         DIP("%cmull r%u, r%u, r%u, r%u\n",
20889             isU ? 'u' : 's', rDlo, rDhi, rN, rM);
20890         goto decode_success;
20891      }
20892   }
20893
20894   /* ------------------ ML{A,S} ------------------ */
20895   if (INSN0(15,4) == 0xFB0
20896       && (   INSN1(7,4) == BITS4(0,0,0,0)    // MLA
20897           || INSN1(7,4) == BITS4(0,0,0,1))) { // MLS
20898      UInt rN = INSN0(3,0);
20899      UInt rA = INSN1(15,12);
20900      UInt rD = INSN1(11,8);
20901      UInt rM = INSN1(3,0);
20902      if (!isBadRegT(rD) && !isBadRegT(rN)
20903          && !isBadRegT(rM) && !isBadRegT(rA)) {
20904         Bool   isMLA = INSN1(7,4) == BITS4(0,0,0,0);
20905         IRTemp res   = newTemp(Ity_I32);
20906         assign(res,
20907                binop(isMLA ? Iop_Add32 : Iop_Sub32,
20908                      getIRegT(rA),
20909                      binop(Iop_Mul32, getIRegT(rN), getIRegT(rM))));
20910         putIRegT(rD, mkexpr(res), condT);
20911         DIP("%s r%u, r%u, r%u, r%u\n",
20912             isMLA ? "mla" : "mls", rD, rN, rM, rA);
20913         goto decode_success;
20914      }
20915   }
20916
20917   /* ------------------ (T3) ADR ------------------ */
20918   if ((INSN0(15,0) == 0xF20F || INSN0(15,0) == 0xF60F)
20919       && INSN1(15,15) == 0) {
20920      /* rD = align4(PC) + imm32 */
20921      UInt rD = INSN1(11,8);
20922      if (!isBadRegT(rD)) {
20923         UInt imm32 = (INSN0(10,10) << 11)
20924                      | (INSN1(14,12) << 8) | INSN1(7,0);
20925         putIRegT(rD, binop(Iop_Add32,
20926                            binop(Iop_And32, getIRegT(15), mkU32(~3U)),
20927                            mkU32(imm32)),
20928                      condT);
20929         DIP("add r%u, pc, #%u\n", rD, imm32);
20930         goto decode_success;
20931      }
20932   }
20933
20934   /* ----------------- (T1) UMLAL ----------------- */
20935   /* ----------------- (T1) SMLAL ----------------- */
20936   if ((INSN0(15,4) == 0xFBE // UMLAL
20937        || INSN0(15,4) == 0xFBC) // SMLAL
20938       && INSN1(7,4) == BITS4(0,0,0,0)) {
20939      UInt rN   = INSN0(3,0);
20940      UInt rDlo = INSN1(15,12);
20941      UInt rDhi = INSN1(11,8);
20942      UInt rM   = INSN1(3,0);
20943      if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
20944          && !isBadRegT(rM) && rDhi != rDlo) {
20945         Bool   isS   = INSN0(15,4) == 0xFBC;
20946         IRTemp argL  = newTemp(Ity_I32);
20947         IRTemp argR  = newTemp(Ity_I32);
20948         IRTemp old   = newTemp(Ity_I64);
20949         IRTemp res   = newTemp(Ity_I64);
20950         IRTemp resHi = newTemp(Ity_I32);
20951         IRTemp resLo = newTemp(Ity_I32);
20952         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
20953         assign( argL, getIRegT(rM));
20954         assign( argR, getIRegT(rN));
20955         assign( old, binop(Iop_32HLto64, getIRegT(rDhi), getIRegT(rDlo)) );
20956         assign( res, binop(Iop_Add64,
20957                            mkexpr(old),
20958                            binop(mulOp, mkexpr(argL), mkexpr(argR))) );
20959         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
20960         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
20961         putIRegT( rDhi, mkexpr(resHi), condT );
20962         putIRegT( rDlo, mkexpr(resLo), condT );
20963         DIP("%cmlal r%u, r%u, r%u, r%u\n",
20964             isS ? 's' : 'u', rDlo, rDhi, rN, rM);
20965         goto decode_success;
20966      }
20967   }
20968
20969   /* ------------------ (T1) UMAAL ------------------ */
20970   if (INSN0(15,4) == 0xFBE && INSN1(7,4) == BITS4(0,1,1,0)) {
20971      UInt rN   = INSN0(3,0);
20972      UInt rDlo = INSN1(15,12);
20973      UInt rDhi = INSN1(11,8);
20974      UInt rM   = INSN1(3,0);
20975      if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
20976          && !isBadRegT(rM) && rDhi != rDlo) {
20977         IRTemp argN   = newTemp(Ity_I32);
20978         IRTemp argM   = newTemp(Ity_I32);
20979         IRTemp argDhi = newTemp(Ity_I32);
20980         IRTemp argDlo = newTemp(Ity_I32);
20981         IRTemp res    = newTemp(Ity_I64);
20982         IRTemp resHi  = newTemp(Ity_I32);
20983         IRTemp resLo  = newTemp(Ity_I32);
20984         assign( argN,   getIRegT(rN) );
20985         assign( argM,   getIRegT(rM) );
20986         assign( argDhi, getIRegT(rDhi) );
20987         assign( argDlo, getIRegT(rDlo) );
20988         assign( res,
20989                 binop(Iop_Add64,
20990                       binop(Iop_Add64,
20991                             binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
20992                             unop(Iop_32Uto64, mkexpr(argDhi))),
20993                       unop(Iop_32Uto64, mkexpr(argDlo))) );
20994         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
20995         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
20996         putIRegT( rDhi, mkexpr(resHi), condT );
20997         putIRegT( rDlo, mkexpr(resLo), condT );
20998         DIP("umaal r%u, r%u, r%u, r%u\n", rDlo, rDhi, rN, rM);
20999         goto decode_success;
21000      }
21001   }
21002
21003   /* ------------------- (T1) SMMUL{R} ------------------ */
21004   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
21005       && INSN0(6,4) == BITS3(1,0,1)
21006       && INSN1(15,12) == BITS4(1,1,1,1)
21007       && INSN1(7,5) == BITS3(0,0,0)) {
21008      UInt bitR = INSN1(4,4);
21009      UInt rD = INSN1(11,8);
21010      UInt rM = INSN1(3,0);
21011      UInt rN = INSN0(3,0);
21012      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21013         IRExpr* res
21014         = unop(Iop_64HIto32,
21015                binop(Iop_Add64,
21016                      binop(Iop_MullS32, getIRegT(rN), getIRegT(rM)),
21017                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
21018         putIRegT(rD, res, condT);
21019         DIP("smmul%s r%u, r%u, r%u\n",
21020             bitR ? "r" : "", rD, rN, rM);
21021         goto decode_success;
21022      }
21023   }
21024
21025   /* ------------------- (T1) SMMLA{R} ------------------ */
21026   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
21027       && INSN0(6,4) == BITS3(1,0,1)
21028       && INSN1(7,5) == BITS3(0,0,0)) {
21029      UInt bitR = INSN1(4,4);
21030      UInt rA = INSN1(15,12);
21031      UInt rD = INSN1(11,8);
21032      UInt rM = INSN1(3,0);
21033      UInt rN = INSN0(3,0);
21034      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && (rA != 13)) {
21035         IRExpr* res
21036         = unop(Iop_64HIto32,
21037                binop(Iop_Add64,
21038                      binop(Iop_Add64,
21039                            binop(Iop_32HLto64, getIRegT(rA), mkU32(0)),
21040                            binop(Iop_MullS32, getIRegT(rN), getIRegT(rM))),
21041                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
21042         putIRegT(rD, res, condT);
21043         DIP("smmla%s r%u, r%u, r%u, r%u\n",
21044             bitR ? "r" : "", rD, rN, rM, rA);
21045         goto decode_success;
21046      }
21047   }
21048
21049   /* ------------------ (T2) ADR ------------------ */
21050   if ((INSN0(15,0) == 0xF2AF || INSN0(15,0) == 0xF6AF)
21051       && INSN1(15,15) == 0) {
21052      /* rD = align4(PC) - imm32 */
21053      UInt rD = INSN1(11,8);
21054      if (!isBadRegT(rD)) {
21055         UInt imm32 = (INSN0(10,10) << 11)
21056                      | (INSN1(14,12) << 8) | INSN1(7,0);
21057         putIRegT(rD, binop(Iop_Sub32,
21058                            binop(Iop_And32, getIRegT(15), mkU32(~3U)),
21059                            mkU32(imm32)),
21060                      condT);
21061         DIP("sub r%u, pc, #%u\n", rD, imm32);
21062         goto decode_success;
21063      }
21064   }
21065
21066   /* ------------------- (T1) BFI ------------------- */
21067   /* ------------------- (T1) BFC ------------------- */
21068   if (INSN0(15,4) == 0xF36 && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
21069      UInt rD  = INSN1(11,8);
21070      UInt rN  = INSN0(3,0);
21071      UInt msb = INSN1(4,0);
21072      UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
21073      if (isBadRegT(rD) || rN == 13 || msb < lsb) {
21074         /* undecodable; fall through */
21075      } else {
21076         IRTemp src    = newTemp(Ity_I32);
21077         IRTemp olddst = newTemp(Ity_I32);
21078         IRTemp newdst = newTemp(Ity_I32);
21079         UInt   mask = 1 << (msb - lsb);
21080         mask = (mask - 1) + mask;
21081         vassert(mask != 0); // guaranteed by "msb < lsb" check above
21082         mask <<= lsb;
21083
21084         assign(src, rN == 15 ? mkU32(0) : getIRegT(rN));
21085         assign(olddst, getIRegT(rD));
21086         assign(newdst,
21087                binop(Iop_Or32,
21088                   binop(Iop_And32,
21089                         binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
21090                         mkU32(mask)),
21091                   binop(Iop_And32,
21092                         mkexpr(olddst),
21093                         mkU32(~mask)))
21094               );
21095
21096         putIRegT(rD, mkexpr(newdst), condT);
21097
21098         if (rN == 15) {
21099            DIP("bfc r%u, #%u, #%u\n",
21100                rD, lsb, msb-lsb+1);
21101         } else {
21102            DIP("bfi r%u, r%u, #%u, #%u\n",
21103                rD, rN, lsb, msb-lsb+1);
21104         }
21105         goto decode_success;
21106      }
21107   }
21108
21109   /* ------------------- (T1) SXTAH ------------------- */
21110   /* ------------------- (T1) UXTAH ------------------- */
21111   if ((INSN0(15,4) == 0xFA1      // UXTAH
21112        || INSN0(15,4) == 0xFA0)  // SXTAH
21113       && INSN1(15,12) == BITS4(1,1,1,1)
21114       && INSN1(7,6) == BITS2(1,0)) {
21115      Bool isU = INSN0(15,4) == 0xFA1;
21116      UInt rN  = INSN0(3,0);
21117      UInt rD  = INSN1(11,8);
21118      UInt rM  = INSN1(3,0);
21119      UInt rot = INSN1(5,4);
21120      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21121         IRTemp srcL = newTemp(Ity_I32);
21122         IRTemp srcR = newTemp(Ity_I32);
21123         IRTemp res  = newTemp(Ity_I32);
21124         assign(srcR, getIRegT(rM));
21125         assign(srcL, getIRegT(rN));
21126         assign(res,  binop(Iop_Add32,
21127                            mkexpr(srcL),
21128                            unop(isU ? Iop_16Uto32 : Iop_16Sto32,
21129                                 unop(Iop_32to16,
21130                                      genROR32(srcR, 8 * rot)))));
21131         putIRegT(rD, mkexpr(res), condT);
21132         DIP("%cxtah r%u, r%u, r%u, ror #%u\n",
21133             isU ? 'u' : 's', rD, rN, rM, rot);
21134         goto decode_success;
21135      }
21136   }
21137
21138   /* ------------------- (T1) SXTAB ------------------- */
21139   /* ------------------- (T1) UXTAB ------------------- */
21140   if ((INSN0(15,4) == 0xFA5      // UXTAB
21141        || INSN0(15,4) == 0xFA4)  // SXTAB
21142       && INSN1(15,12) == BITS4(1,1,1,1)
21143       && INSN1(7,6) == BITS2(1,0)) {
21144      Bool isU = INSN0(15,4) == 0xFA5;
21145      UInt rN  = INSN0(3,0);
21146      UInt rD  = INSN1(11,8);
21147      UInt rM  = INSN1(3,0);
21148      UInt rot = INSN1(5,4);
21149      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21150         IRTemp srcL = newTemp(Ity_I32);
21151         IRTemp srcR = newTemp(Ity_I32);
21152         IRTemp res  = newTemp(Ity_I32);
21153         assign(srcR, getIRegT(rM));
21154         assign(srcL, getIRegT(rN));
21155         assign(res,  binop(Iop_Add32,
21156                            mkexpr(srcL),
21157                            unop(isU ? Iop_8Uto32 : Iop_8Sto32,
21158                                 unop(Iop_32to8,
21159                                      genROR32(srcR, 8 * rot)))));
21160         putIRegT(rD, mkexpr(res), condT);
21161         DIP("%cxtab r%u, r%u, r%u, ror #%u\n",
21162             isU ? 'u' : 's', rD, rN, rM, rot);
21163         goto decode_success;
21164      }
21165   }
21166
21167   /* ------------------- (T1) CLZ ------------------- */
21168   if (INSN0(15,4) == 0xFAB
21169       && INSN1(15,12) == BITS4(1,1,1,1)
21170       && INSN1(7,4) == BITS4(1,0,0,0)) {
21171      UInt rM1 = INSN0(3,0);
21172      UInt rD  = INSN1(11,8);
21173      UInt rM2 = INSN1(3,0);
21174      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
21175         IRTemp arg = newTemp(Ity_I32);
21176         IRTemp res = newTemp(Ity_I32);
21177         assign(arg, getIRegT(rM1));
21178         assign(res, IRExpr_ITE(
21179                        binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
21180                        mkU32(32),
21181                        unop(Iop_Clz32, mkexpr(arg))
21182         ));
21183         putIRegT(rD, mkexpr(res), condT);
21184         DIP("clz r%u, r%u\n", rD, rM1);
21185         goto decode_success;
21186      }
21187   }
21188
21189   /* ------------------- (T1) RBIT ------------------- */
21190   if (INSN0(15,4) == 0xFA9
21191       && INSN1(15,12) == BITS4(1,1,1,1)
21192       && INSN1(7,4) == BITS4(1,0,1,0)) {
21193      UInt rM1 = INSN0(3,0);
21194      UInt rD  = INSN1(11,8);
21195      UInt rM2 = INSN1(3,0);
21196      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
21197         IRTemp arg = newTemp(Ity_I32);
21198         assign(arg, getIRegT(rM1));
21199         IRTemp res = gen_BITREV(arg);
21200         putIRegT(rD, mkexpr(res), condT);
21201         DIP("rbit r%u, r%u\n", rD, rM1);
21202         goto decode_success;
21203      }
21204   }
21205
21206   /* ------------------- (T2) REV   ------------------- */
21207   /* ------------------- (T2) REV16 ------------------- */
21208   if (INSN0(15,4) == 0xFA9
21209       && INSN1(15,12) == BITS4(1,1,1,1)
21210       && (   INSN1(7,4) == BITS4(1,0,0,0)     // REV
21211           || INSN1(7,4) == BITS4(1,0,0,1))) { // REV16
21212      UInt rM1   = INSN0(3,0);
21213      UInt rD    = INSN1(11,8);
21214      UInt rM2   = INSN1(3,0);
21215      Bool isREV = INSN1(7,4) == BITS4(1,0,0,0);
21216      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
21217         IRTemp arg = newTemp(Ity_I32);
21218         assign(arg, getIRegT(rM1));
21219         IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
21220         putIRegT(rD, mkexpr(res), condT);
21221         DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM1);
21222         goto decode_success;
21223      }
21224   }
21225
21226   /* ------------------- (T2) REVSH ------------------ */
21227   if (INSN0(15,4) == 0xFA9
21228       && INSN1(15,12) == BITS4(1,1,1,1)
21229       && INSN1(7,4) == BITS4(1,0,1,1)) {
21230      UInt rM1 = INSN0(3,0);
21231      UInt rM2 = INSN1(3,0);
21232      UInt rD  = INSN1(11,8);
21233      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
21234         IRTemp irt_rM  = newTemp(Ity_I32);
21235         IRTemp irt_hi  = newTemp(Ity_I32);
21236         IRTemp irt_low = newTemp(Ity_I32);
21237         IRTemp irt_res = newTemp(Ity_I32);
21238         assign(irt_rM, getIRegT(rM1));
21239         assign(irt_hi,
21240                binop(Iop_Sar32,
21241                      binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
21242                      mkU8(16)
21243                )
21244         );
21245         assign(irt_low,
21246                binop(Iop_And32,
21247                      binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
21248                      mkU32(0xFF)
21249                )
21250         );
21251         assign(irt_res,
21252                binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
21253         );
21254         putIRegT(rD, mkexpr(irt_res), condT);
21255         DIP("revsh r%u, r%u\n", rD, rM1);
21256         goto decode_success;
21257      }
21258   }
21259
21260   /* -------------- (T1) MSR apsr, reg -------------- */
21261   if (INSN0(15,4) == 0xF38
21262       && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(9,0) == 0x000) {
21263      UInt rN          = INSN0(3,0);
21264      UInt write_ge    = INSN1(10,10);
21265      UInt write_nzcvq = INSN1(11,11);
21266      if (!isBadRegT(rN) && (write_nzcvq || write_ge)) {
21267         IRTemp rNt = newTemp(Ity_I32);
21268         assign(rNt, getIRegT(rN));
21269         desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
21270         DIP("msr cpsr_%s%s, r%u\n",
21271             write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
21272         goto decode_success;
21273      }
21274   }
21275
21276   /* -------------- (T1) MRS reg, apsr -------------- */
21277   if (INSN0(15,0) == 0xF3EF
21278       && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(7,0) == 0x00) {
21279      UInt rD = INSN1(11,8);
21280      if (!isBadRegT(rD)) {
21281         IRTemp apsr = synthesise_APSR();
21282         putIRegT( rD, mkexpr(apsr), condT );
21283         DIP("mrs r%u, cpsr\n", rD);
21284         goto decode_success;
21285      }
21286   }
21287
21288   /* ----------------- (T1) LDREX ----------------- */
21289   if (INSN0(15,4) == 0xE85 && INSN1(11,8) == BITS4(1,1,1,1)) {
21290      UInt rN   = INSN0(3,0);
21291      UInt rT   = INSN1(15,12);
21292      UInt imm8 = INSN1(7,0);
21293      if (!isBadRegT(rT) && rN != 15) {
21294         IRTemp res;
21295         // go uncond
21296         mk_skip_over_T32_if_cond_is_false( condT );
21297         // now uncond
21298         res = newTemp(Ity_I32);
21299         stmt( IRStmt_LLSC(Iend_LE,
21300                           res,
21301                           binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
21302                           NULL/*this is a load*/ ));
21303         putIRegT(rT, mkexpr(res), IRTemp_INVALID);
21304         DIP("ldrex r%u, [r%u, #+%u]\n", rT, rN, imm8 * 4);
21305         goto decode_success;
21306      }
21307   }
21308
21309   /* --------------- (T1) LDREX{B,H} --------------- */
21310   if (INSN0(15,4) == 0xE8D
21311       && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) {
21312      UInt rN  = INSN0(3,0);
21313      UInt rT  = INSN1(15,12);
21314      Bool isH = INSN1(11,0) == 0xF5F;
21315      if (!isBadRegT(rT) && rN != 15) {
21316         IRTemp res;
21317         // go uncond
21318         mk_skip_over_T32_if_cond_is_false( condT );
21319         // now uncond
21320         res = newTemp(isH ? Ity_I16 : Ity_I8);
21321         stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
21322                           NULL/*this is a load*/ ));
21323         putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)),
21324                      IRTemp_INVALID);
21325         DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN);
21326         goto decode_success;
21327      }
21328   }
21329
21330   /* --------------- (T1) LDREXD --------------- */
21331   if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) {
21332      UInt rN  = INSN0(3,0);
21333      UInt rT  = INSN1(15,12);
21334      UInt rT2 = INSN1(11,8);
21335      if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) {
21336         IRTemp res;
21337         // go uncond
21338         mk_skip_over_T32_if_cond_is_false( condT );
21339         // now uncond
21340         res = newTemp(Ity_I64);
21341         // FIXME: assumes little-endian guest
21342         stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
21343                           NULL/*this is a load*/ ));
21344         // FIXME: assumes little-endian guest
21345         putIRegT(rT,  unop(Iop_64to32,   mkexpr(res)), IRTemp_INVALID);
21346         putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID);
21347         DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN);
21348         goto decode_success;
21349      }
21350   }
21351
21352   /* ----------------- (T1) STREX ----------------- */
21353   if (INSN0(15,4) == 0xE84) {
21354      UInt rN   = INSN0(3,0);
21355      UInt rT   = INSN1(15,12);
21356      UInt rD   = INSN1(11,8);
21357      UInt imm8 = INSN1(7,0);
21358      if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
21359          && rD != rN && rD != rT) {
21360         IRTemp resSC1, resSC32;
21361         // go uncond
21362         mk_skip_over_T32_if_cond_is_false( condT );
21363         // now uncond
21364         /* Ok, now we're unconditional.  Do the store. */
21365         resSC1 = newTemp(Ity_I1);
21366         stmt( IRStmt_LLSC(Iend_LE,
21367                           resSC1,
21368                           binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
21369                           getIRegT(rT)) );
21370         /* Set rD to 1 on failure, 0 on success.  Currently we have
21371            resSC1 == 0 on failure, 1 on success. */
21372         resSC32 = newTemp(Ity_I32);
21373         assign(resSC32,
21374                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
21375         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
21376         DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
21377         goto decode_success;
21378      }
21379   }
21380
21381   /* --------------- (T1) STREX{B,H} --------------- */
21382   if (INSN0(15,4) == 0xE8C
21383       && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) {
21384      UInt rN  = INSN0(3,0);
21385      UInt rT  = INSN1(15,12);
21386      UInt rD  = INSN1(3,0);
21387      Bool isH = INSN1(11,4) == 0xF5;
21388      if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
21389          && rD != rN && rD != rT) {
21390         IRTemp resSC1, resSC32;
21391         // go uncond
21392         mk_skip_over_T32_if_cond_is_false( condT );
21393         // now uncond
21394         /* Ok, now we're unconditional.  Do the store. */
21395         resSC1 = newTemp(Ity_I1);
21396         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN),
21397                           unop(isH ? Iop_32to16 : Iop_32to8,
21398                                getIRegT(rT))) );
21399         /* Set rD to 1 on failure, 0 on success.  Currently we have
21400            resSC1 == 0 on failure, 1 on success. */
21401         resSC32 = newTemp(Ity_I32);
21402         assign(resSC32,
21403                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
21404         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
21405         DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN);
21406         goto decode_success;
21407      }
21408   }
21409
21410   /* ---------------- (T1) STREXD ---------------- */
21411   if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) {
21412      UInt rN  = INSN0(3,0);
21413      UInt rT  = INSN1(15,12);
21414      UInt rT2 = INSN1(11,8);
21415      UInt rD  = INSN1(3,0);
21416      if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2)
21417          && rN != 15 && rD != rN && rD != rT && rD != rT) {
21418         IRTemp resSC1, resSC32, data;
21419         // go uncond
21420         mk_skip_over_T32_if_cond_is_false( condT );
21421         // now uncond
21422         /* Ok, now we're unconditional.  Do the store. */
21423         resSC1 = newTemp(Ity_I1);
21424         data = newTemp(Ity_I64);
21425         // FIXME: assumes little-endian guest
21426         assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT)));
21427         // FIXME: assumes little-endian guest
21428         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data)));
21429         /* Set rD to 1 on failure, 0 on success.  Currently we have
21430            resSC1 == 0 on failure, 1 on success. */
21431         resSC32 = newTemp(Ity_I32);
21432         assign(resSC32,
21433                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
21434         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
21435         DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN);
21436         goto decode_success;
21437      }
21438   }
21439
21440   /* -------------- v7 barrier insns -------------- */
21441   if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) {
21442      /* FIXME: should this be unconditional? */
21443      /* XXX this isn't really right, is it?  The generated IR does
21444         them unconditionally.  I guess it doesn't matter since it
21445         doesn't do any harm to do them even when the guarding
21446         condition is false -- it's just a performance loss. */
21447      switch (INSN1(7,0)) {
21448         case 0x4F: /* DSB sy */
21449         case 0x4E: /* DSB st */
21450         case 0x4B: /* DSB ish */
21451         case 0x4A: /* DSB ishst */
21452         case 0x47: /* DSB nsh */
21453         case 0x46: /* DSB nshst */
21454         case 0x43: /* DSB osh */
21455         case 0x42: /* DSB oshst */
21456            stmt( IRStmt_MBE(Imbe_Fence) );
21457            DIP("DSB\n");
21458            goto decode_success;
21459         case 0x5F: /* DMB sy */
21460         case 0x5E: /* DMB st */
21461         case 0x5B: /* DMB ish */
21462         case 0x5A: /* DMB ishst */
21463         case 0x57: /* DMB nsh */
21464         case 0x56: /* DMB nshst */
21465         case 0x53: /* DMB osh */
21466         case 0x52: /* DMB oshst */
21467            stmt( IRStmt_MBE(Imbe_Fence) );
21468            DIP("DMB\n");
21469            goto decode_success;
21470         case 0x6F: /* ISB */
21471            stmt( IRStmt_MBE(Imbe_Fence) );
21472            DIP("ISB\n");
21473            goto decode_success;
21474         default:
21475            break;
21476      }
21477   }
21478
21479   /* ---------------------- PLD{,W} ---------------------- */
21480   if ((INSN0(15,4) & 0xFFD) == 0xF89 && INSN1(15,12) == 0xF) {
21481      /* FIXME: should this be unconditional? */
21482      /* PLD/PLDW immediate, encoding T1 */
21483      UInt rN    = INSN0(3,0);
21484      UInt bW    = INSN0(5,5);
21485      UInt imm12 = INSN1(11,0);
21486      DIP("pld%s [r%u, #%u]\n", bW ? "w" : "",  rN, imm12);
21487      goto decode_success;
21488   }
21489
21490   if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,8) == 0xFC) {
21491      /* FIXME: should this be unconditional? */
21492      /* PLD/PLDW immediate, encoding T2 */
21493      UInt rN    = INSN0(3,0);
21494      UInt bW    = INSN0(5,5);
21495      UInt imm8  = INSN1(7,0);
21496      DIP("pld%s [r%u, #-%u]\n", bW ? "w" : "",  rN, imm8);
21497      goto decode_success;
21498   }
21499
21500   if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,6) == 0x3C0) {
21501      /* FIXME: should this be unconditional? */
21502      /* PLD/PLDW register, encoding T1 */
21503      UInt rN   = INSN0(3,0);
21504      UInt rM   = INSN1(3,0);
21505      UInt bW   = INSN0(5,5);
21506      UInt imm2 = INSN1(5,4);
21507      if (!isBadRegT(rM)) {
21508         DIP("pld%s [r%u, r%u, lsl %d]\n", bW ? "w" : "", rN, rM, imm2);
21509         goto decode_success;
21510      }
21511      /* fall through */
21512   }
21513
21514   /* -------------- read CP15 TPIDRURO register ------------- */
21515   /* mrc     p15, 0,  r0, c13, c0, 3  up to
21516      mrc     p15, 0, r14, c13, c0, 3
21517   */
21518   /* I don't know whether this is really v7-only.  But anyway, we
21519      have to support it since arm-linux uses TPIDRURO as a thread
21520      state register. */
21521   if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F70)) {
21522      /* FIXME: should this be unconditional? */
21523      UInt rD = INSN1(15,12);
21524      if (!isBadRegT(rD)) {
21525         putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32), IRTemp_INVALID);
21526         DIP("mrc p15,0, r%u, c13, c0, 3\n", rD);
21527         goto decode_success;
21528      }
21529      /* fall through */
21530   }
21531
21532   /* ------------------- CLREX ------------------ */
21533   if (INSN0(15,0) == 0xF3BF && INSN1(15,0) == 0x8F2F) {
21534      /* AFAICS, this simply cancels a (all?) reservations made by a
21535         (any?) preceding LDREX(es).  Arrange to hand it through to
21536         the back end. */
21537      mk_skip_over_T32_if_cond_is_false( condT );
21538      stmt( IRStmt_MBE(Imbe_CancelReservation) );
21539      DIP("clrex\n");
21540      goto decode_success;
21541   }
21542
21543   /* ------------------- NOP ------------------ */
21544   if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
21545      DIP("nop\n");
21546      goto decode_success;
21547   }
21548
21549   /* -------------- (T1) LDRT reg+#imm8 -------------- */
21550   /* Load Register Unprivileged:
21551      ldrt Rt, [Rn, #imm8]
21552   */
21553   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,1)
21554       && INSN1(11,8) == BITS4(1,1,1,0)) {
21555      UInt rT    = INSN1(15,12);
21556      UInt rN    = INSN0(3,0);
21557      UInt imm8  = INSN1(7,0);
21558      Bool valid = True;
21559      if (rN == 15 || isBadRegT(rT)) valid = False;
21560      if (valid) {
21561         put_ITSTATE(old_itstate);
21562         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21563         IRTemp newRt = newTemp(Ity_I32);
21564         loadGuardedLE( newRt, ILGop_Ident32, ea, llGetIReg(rT), condT );
21565         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21566         put_ITSTATE(new_itstate);
21567         DIP("ldrt r%u, [r%u, #%u]\n", rT, rN, imm8);
21568         goto decode_success;
21569      }
21570   }
21571
21572   /* -------------- (T1) STRT reg+#imm8 -------------- */
21573   /* Store Register Unprivileged:
21574      strt Rt, [Rn, #imm8]
21575   */
21576   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,0)
21577       && INSN1(11,8) == BITS4(1,1,1,0)) {
21578      UInt rT    = INSN1(15,12);
21579      UInt rN    = INSN0(3,0);
21580      UInt imm8  = INSN1(7,0);
21581      Bool valid = True;
21582      if (rN == 15 || isBadRegT(rT)) valid = False;
21583      if (valid) {
21584         put_ITSTATE(old_itstate);
21585         IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21586         storeGuardedLE( address, llGetIReg(rT), condT );
21587         put_ITSTATE(new_itstate);
21588         DIP("strt r%u, [r%u, #%u]\n", rT, rN, imm8);
21589         goto decode_success;
21590      }
21591   }
21592
21593   /* -------------- (T1) STRBT reg+#imm8 -------------- */
21594   /* Store Register Byte Unprivileged:
21595      strbt Rt, [Rn, #imm8]
21596   */
21597   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,0)
21598       && INSN1(11,8) == BITS4(1,1,1,0)) {
21599      UInt rT    = INSN1(15,12);
21600      UInt rN    = INSN0(3,0);
21601      UInt imm8  = INSN1(7,0);
21602      Bool valid = True;
21603      if (rN == 15 || isBadRegT(rT)) valid = False;
21604      if (valid) {
21605         put_ITSTATE(old_itstate);
21606         IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21607         IRExpr* data = unop(Iop_32to8, llGetIReg(rT));
21608         storeGuardedLE( address, data, condT );
21609         put_ITSTATE(new_itstate);
21610         DIP("strbt r%u, [r%u, #%u]\n", rT, rN, imm8);
21611         goto decode_success;
21612      }
21613   }
21614
21615   /* -------------- (T1) LDRHT reg+#imm8 -------------- */
21616   /* Load Register Halfword Unprivileged:
21617      ldrht Rt, [Rn, #imm8]
21618   */
21619   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,1)
21620       && INSN1(11,8) == BITS4(1,1,1,0)) {
21621      UInt rN    = INSN0(3,0);
21622      Bool valid = True;
21623      if (rN == 15) {
21624         /* In this case our instruction is LDRH (literal), in fact:
21625            LDRH (literal) was realized earlier, so we don't want to
21626            make it twice. */
21627         valid = False;
21628      }
21629      UInt rT    = INSN1(15,12);
21630      UInt imm8  = INSN1(7,0);
21631      if (isBadRegT(rT)) valid = False;
21632      if (valid) {
21633         put_ITSTATE(old_itstate);
21634         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21635         IRTemp newRt = newTemp(Ity_I32);
21636         loadGuardedLE( newRt, ILGop_16Uto32, ea, llGetIReg(rT), condT );
21637         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21638         put_ITSTATE(new_itstate);
21639         DIP("ldrht r%u, [r%u, #%u]\n", rT, rN, imm8);
21640         goto decode_success;
21641      }
21642   }
21643
21644   /* -------------- (T1) LDRSHT reg+#imm8 -------------- */
21645   /* Load Register Signed Halfword Unprivileged:
21646      ldrsht Rt, [Rn, #imm8]
21647   */
21648   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(1,1)
21649       && INSN1(11,8) == BITS4(1,1,1,0)) {
21650      UInt rN    = INSN0(3,0);
21651      Bool valid = True;
21652      if (rN == 15) {
21653         /* In this case our instruction is LDRSH (literal), in fact:
21654            LDRSH (literal) was realized earlier, so we don't want to
21655            make it twice. */
21656         valid = False;
21657      }
21658      UInt rT    = INSN1(15,12);
21659      UInt imm8  = INSN1(7,0);
21660      if (isBadRegT(rT)) valid = False;
21661      if (valid) {
21662         put_ITSTATE(old_itstate);
21663         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21664         IRTemp newRt = newTemp(Ity_I32);
21665         loadGuardedLE( newRt, ILGop_16Sto32, ea, llGetIReg(rT), condT );
21666         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21667         put_ITSTATE(new_itstate);
21668         DIP("ldrsht r%u, [r%u, #%u]\n", rT, rN, imm8);
21669         goto decode_success;
21670      }
21671   }
21672
21673   /* -------------- (T1) STRHT reg+#imm8 -------------- */
21674   /* Store Register Halfword Unprivileged:
21675      strht Rt, [Rn, #imm8]
21676   */
21677   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,0)
21678       && INSN1(11,8) == BITS4(1,1,1,0)) {
21679      UInt rT    = INSN1(15,12);
21680      UInt rN    = INSN0(3,0);
21681      UInt imm8  = INSN1(7,0);
21682      Bool valid = True;
21683      if (rN == 15 || isBadRegT(rT)) valid = False;
21684      if (valid) {
21685         put_ITSTATE(old_itstate);
21686         IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21687         IRExpr* data = unop(Iop_32to16, llGetIReg(rT));
21688         storeGuardedLE( address, data, condT );
21689         put_ITSTATE(new_itstate);
21690         DIP("strht r%u, [r%u, #%u]\n", rT, rN, imm8);
21691         goto decode_success;
21692      }
21693   }
21694
21695   /* -------------- (T1) LDRBT reg+#imm8 -------------- */
21696   /* Load Register Byte Unprivileged:
21697      ldrbt Rt, [Rn, #imm8]
21698   */
21699   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,1)
21700       && INSN1(11,8) == BITS4(1,1,1,0)) {
21701      UInt rN    = INSN0(3,0);
21702      UInt rT    = INSN1(15,12);
21703      UInt imm8  = INSN1(7,0);
21704      Bool valid = True;
21705      if (rN == 15 /* insn is LDRB (literal) */) valid = False;
21706      if (isBadRegT(rT)) valid = False;
21707      if (valid) {
21708         put_ITSTATE(old_itstate);
21709         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21710         IRTemp newRt = newTemp(Ity_I32);
21711         loadGuardedLE( newRt, ILGop_8Uto32, ea, llGetIReg(rT), condT );
21712         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21713         put_ITSTATE(new_itstate);
21714         DIP("ldrbt r%u, [r%u, #%u]\n", rT, rN, imm8);
21715         goto decode_success;
21716      }
21717   }
21718
21719   /* -------------- (T1) LDRSBT reg+#imm8 -------------- */
21720   /* Load Register Signed Byte Unprivileged:
21721      ldrsbt Rt, [Rn, #imm8]
21722   */
21723   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
21724       && INSN1(11,8) == BITS4(1,1,1,0)) {
21725      UInt rN    = INSN0(3,0);
21726      Bool valid = True;
21727      UInt rT    = INSN1(15,12);
21728      UInt imm8  = INSN1(7,0);
21729      if (rN == 15 /* insn is LDRSB (literal) */) valid = False;
21730      if (isBadRegT(rT)) valid = False;
21731      if (valid) {
21732         put_ITSTATE(old_itstate);
21733         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21734         IRTemp newRt = newTemp(Ity_I32);
21735         loadGuardedLE( newRt, ILGop_8Sto32, ea, llGetIReg(rT), condT );
21736         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21737         put_ITSTATE(new_itstate);
21738         DIP("ldrsbt r%u, [r%u, #%u]\n", rT, rN, imm8);
21739         goto decode_success;
21740      }
21741   }
21742
21743   /* -------------- (T1) PLI reg+#imm12 -------------- */
21744   /* Preload Instruction:
21745      pli [Rn, #imm12]
21746   */
21747   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,1,0) && INSN0(5,4) == BITS2(0,1)
21748       && INSN1(15,12) == BITS4(1,1,1,1)) {
21749      UInt rN    = INSN0(3,0);
21750      UInt imm12 = INSN1(11,0);
21751      if (rN != 15) {
21752         DIP("pli [r%u, #%u]\n", rN, imm12);
21753         goto decode_success;
21754      }
21755   }
21756
21757   /* -------------- (T2) PLI reg-#imm8 -------------- */
21758   /* Preload Instruction:
21759      pli [Rn, #-imm8]
21760   */
21761   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
21762       && INSN1(15,8) == BITS8(1,1,1,1,1,1,0,0)) {
21763      UInt rN   = INSN0(3,0);
21764      UInt imm8 = INSN1(7,0);
21765      if (rN != 15) {
21766         DIP("pli [r%u, #-%u]\n", rN, imm8);
21767         goto decode_success;
21768      }
21769   }
21770
21771   /* -------------- (T3) PLI PC+/-#imm12 -------------- */
21772   /* Preload Instruction:
21773      pli [PC, #+/-imm12]
21774   */
21775   if (INSN0(15,8) == BITS8(1,1,1,1,1,0,0,1)
21776       && INSN0(6,0) == BITS7(0,0,1,1,1,1,1)
21777       && INSN1(15,12) == BITS4(1,1,1,1)) {
21778      UInt imm12 = INSN1(11,0);
21779      UInt bU    = INSN0(7,7);
21780      DIP("pli [pc, #%c%u]\n", bU == 1 ? '+' : '-', imm12);
21781      goto decode_success;
21782   }
21783
21784   /* ----------------------------------------------------------- */
21785   /* -- VFP (CP 10, CP 11) instructions (in Thumb mode)       -- */
21786   /* ----------------------------------------------------------- */
21787
21788   if (INSN0(15,12) == BITS4(1,1,1,0)) {
21789      UInt insn28 = (INSN0(11,0) << 16) | INSN1(15,0);
21790      Bool ok_vfp = decode_CP10_CP11_instruction (
21791                       &dres, insn28, condT, ARMCondAL/*bogus*/,
21792                       True/*isT*/
21793                    );
21794      if (ok_vfp)
21795         goto decode_success;
21796   }
21797
21798   /* ----------------------------------------------------------- */
21799   /* -- NEON instructions (in Thumb mode)                     -- */
21800   /* ----------------------------------------------------------- */
21801
21802   if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
21803      UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
21804      Bool ok_neon = decode_NEON_instruction(
21805                        &dres, insn32, condT, True/*isT*/
21806                     );
21807      if (ok_neon)
21808         goto decode_success;
21809   }
21810
21811   /* ----------------------------------------------------------- */
21812   /* -- v6 media instructions (in Thumb mode)                 -- */
21813   /* ----------------------------------------------------------- */
21814
21815   { UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
21816     Bool ok_v6m = decode_V6MEDIA_instruction(
21817                      &dres, insn32, condT, ARMCondAL/*bogus*/,
21818                      True/*isT*/
21819                   );
21820     if (ok_v6m)
21821        goto decode_success;
21822   }
21823
21824   /* ----------------------------------------------------------- */
21825   /* -- Undecodable                                           -- */
21826   /* ----------------------------------------------------------- */
21827
21828   goto decode_failure;
21829   /*NOTREACHED*/
21830
21831  decode_failure:
21832   /* All decode failures end up here. */
21833   if (sigill_diag)
21834      vex_printf("disInstr(thumb): unhandled instruction: "
21835                 "0x%04x 0x%04x\n", (UInt)insn0, (UInt)insn1);
21836
21837   /* Back up ITSTATE to the initial value for this instruction.
21838      If we don't do that, any subsequent restart of the instruction
21839      will restart with the wrong value. */
21840   if (old_itstate != IRTemp_INVALID)
21841      put_ITSTATE(old_itstate);
21842
21843   /* Tell the dispatcher that this insn cannot be decoded, and so has
21844      not been executed, and (is currently) the next to be executed.
21845      R15 should be up-to-date since it made so at the start of each
21846      insn, but nevertheless be paranoid and update it again right
21847      now. */
21848   vassert(0 == (guest_R15_curr_instr_notENC & 1));
21849   llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
21850   dres.whatNext    = Dis_StopHere;
21851   dres.jk_StopHere = Ijk_NoDecode;
21852   dres.len         = 0;
21853   return dres;
21854
21855  decode_success:
21856   /* All decode successes end up here. */
21857   vassert(dres.len == 4 || dres.len == 2 || dres.len == 20);
21858   switch (dres.whatNext) {
21859      case Dis_Continue:
21860         llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1)));
21861         break;
21862      case Dis_ResteerU:
21863      case Dis_ResteerC:
21864         llPutIReg(15, mkU32(dres.continueAt));
21865         break;
21866      case Dis_StopHere:
21867         break;
21868      default:
21869         vassert(0);
21870   }
21871
21872   DIP("\n");
21873
21874   return dres;
21875
21876#  undef INSN0
21877#  undef INSN1
21878}
21879
21880#undef DIP
21881#undef DIS
21882
21883
21884/* Helper table for figuring out how many insns an IT insn
21885   conditionalises.
21886
21887   An ITxyz instruction of the format "1011 1111 firstcond mask"
21888   conditionalises some number of instructions, as indicated by the
21889   following table.  A value of zero indicates the instruction is
21890   invalid in some way.
21891
21892   mask = 0 means this isn't an IT instruction
21893   fc = 15 (NV) means unpredictable
21894
21895   The line fc = 14 (AL) is different from the others; there are
21896   additional constraints in this case.
21897
21898          mask(0 ..                   15)
21899        +--------------------------------
21900   fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21901   ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21902        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21903        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21904        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21905        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21906        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21907        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21908        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21909        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21910        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21911        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21912        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21913        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21914        | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0
21915   15)  | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
21916
21917   To be conservative with the analysis, let's rule out the mask = 0
21918   case, since that isn't an IT insn at all.  But for all the other
21919   cases where the table contains zero, that means unpredictable, so
21920   let's say 4 to be conservative.  Hence we have a safe value for any
21921   IT (mask,fc) pair that the CPU would actually identify as an IT
21922   instruction.  The final table is
21923
21924          mask(0 ..                   15)
21925        +--------------------------------
21926   fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21927   ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21928        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21929        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21930        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21931        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21932        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21933        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21934        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21935        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21936        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21937        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21938        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21939        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21940        | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4
21941   15)  | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
21942*/
21943static const UChar it_length_table[256]
21944   = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21945       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21946       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21947       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21948       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21949       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21950       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21951       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21952       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21953       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21954       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21955       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21956       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21957       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21958       0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
21959       0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
21960     };
21961
21962
21963/*------------------------------------------------------------*/
21964/*--- Top-level fn                                         ---*/
21965/*------------------------------------------------------------*/
21966
21967/* Disassemble a single instruction into IR.  The instruction
21968   is located in host memory at &guest_code[delta]. */
21969
21970DisResult disInstr_ARM ( IRSB*        irsb_IN,
21971                         Bool         (*resteerOkFn) ( void*, Addr64 ),
21972                         Bool         resteerCisOk,
21973                         void*        callback_opaque,
21974                         UChar*       guest_code_IN,
21975                         Long         delta_ENCODED,
21976                         Addr64       guest_IP_ENCODED,
21977                         VexArch      guest_arch,
21978                         VexArchInfo* archinfo,
21979                         VexAbiInfo*  abiinfo,
21980                         Bool         host_bigendian_IN,
21981                         Bool         sigill_diag_IN )
21982{
21983   DisResult dres;
21984   Bool isThumb = (Bool)(guest_IP_ENCODED & 1);
21985
21986   /* Set globals (see top of this file) */
21987   vassert(guest_arch == VexArchARM);
21988
21989   irsb              = irsb_IN;
21990   host_is_bigendian = host_bigendian_IN;
21991   __curr_is_Thumb   = isThumb;
21992
21993   if (isThumb) {
21994      guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED - 1;
21995   } else {
21996      guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED;
21997   }
21998
21999   if (isThumb) {
22000      dres = disInstr_THUMB_WRK ( resteerOkFn,
22001                                  resteerCisOk, callback_opaque,
22002                                  &guest_code_IN[delta_ENCODED - 1],
22003                                  archinfo, abiinfo, sigill_diag_IN );
22004   } else {
22005      dres = disInstr_ARM_WRK ( resteerOkFn,
22006                                resteerCisOk, callback_opaque,
22007                                &guest_code_IN[delta_ENCODED],
22008                                archinfo, abiinfo, sigill_diag_IN );
22009   }
22010
22011   return dres;
22012}
22013
22014/* Test program for the conversion of IRCmpF64Result values to VFP
22015   nzcv values.  See handling of FCMPD et al above. */
22016/*
22017UInt foo ( UInt x )
22018{
22019   UInt ix    = ((x >> 5) & 3) | (x & 1);
22020   UInt termL = (((((ix ^ 1) << 30) - 1) >> 29) + 1);
22021   UInt termR = (ix & (ix >> 1) & 1);
22022   return termL  -  termR;
22023}
22024
22025void try ( char* s, UInt ir, UInt req )
22026{
22027   UInt act = foo(ir);
22028   printf("%s 0x%02x -> req %d%d%d%d act %d%d%d%d (0x%x)\n",
22029          s, ir, (req >> 3) & 1, (req >> 2) & 1,
22030                 (req >> 1) & 1, (req >> 0) & 1,
22031                 (act >> 3) & 1, (act >> 2) & 1,
22032                 (act >> 1) & 1, (act >> 0) & 1, act);
22033
22034}
22035
22036int main ( void )
22037{
22038   printf("\n");
22039   try("UN", 0x45, 0b0011);
22040   try("LT", 0x01, 0b1000);
22041   try("GT", 0x00, 0b0010);
22042   try("EQ", 0x40, 0b0110);
22043   printf("\n");
22044   return 0;
22045}
22046*/
22047
22048/* Spare code for doing reference implementations of various 64-bit
22049   SIMD interleaves/deinterleaves/concatenation ops. */
22050/*
22051// Split a 64 bit value into 4 16 bit ones, in 32-bit IRTemps with
22052// the top halves guaranteed to be zero.
22053static void break64to16s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
22054                           IRTemp* out0, IRTemp v64 )
22055{
22056  if (out3) *out3 = newTemp(Ity_I32);
22057  if (out2) *out2 = newTemp(Ity_I32);
22058  if (out1) *out1 = newTemp(Ity_I32);
22059  if (out0) *out0 = newTemp(Ity_I32);
22060  IRTemp hi32 = newTemp(Ity_I32);
22061  IRTemp lo32 = newTemp(Ity_I32);
22062  assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
22063  assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
22064  if (out3) assign(*out3, binop(Iop_Shr32, mkexpr(hi32), mkU8(16)));
22065  if (out2) assign(*out2, binop(Iop_And32, mkexpr(hi32), mkU32(0xFFFF)));
22066  if (out1) assign(*out1, binop(Iop_Shr32, mkexpr(lo32), mkU8(16)));
22067  if (out0) assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFFFF)));
22068}
22069
22070// Make a 64 bit value from 4 16 bit ones, each of which is in a 32 bit
22071// IRTemp.
22072static IRTemp mk64from16s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
22073{
22074  IRTemp hi32 = newTemp(Ity_I32);
22075  IRTemp lo32 = newTemp(Ity_I32);
22076  assign(hi32,
22077         binop(Iop_Or32,
22078               binop(Iop_Shl32, mkexpr(in3), mkU8(16)),
22079               binop(Iop_And32, mkexpr(in2), mkU32(0xFFFF))));
22080  assign(lo32,
22081         binop(Iop_Or32,
22082               binop(Iop_Shl32, mkexpr(in1), mkU8(16)),
22083               binop(Iop_And32, mkexpr(in0), mkU32(0xFFFF))));
22084  IRTemp res = newTemp(Ity_I64);
22085  assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
22086  return res;
22087}
22088
22089static IRExpr* mk_InterleaveLO16x4 ( IRTemp a3210, IRTemp b3210 )
22090{
22091  // returns a1 b1 a0 b0
22092  IRTemp a1, a0, b1, b0;
22093  break64to16s(NULL, NULL, &a1, &a0, a3210);
22094  break64to16s(NULL, NULL, &b1, &b0, b3210);
22095  return mkexpr(mk64from16s(a1, b1, a0, b0));
22096}
22097
22098static IRExpr* mk_InterleaveHI16x4 ( IRTemp a3210, IRTemp b3210 )
22099{
22100  // returns a3 b3 a2 b2
22101  IRTemp a3, a2, b3, b2;
22102  break64to16s(&a3, &a2, NULL, NULL, a3210);
22103  break64to16s(&b3, &b2, NULL, NULL, b3210);
22104  return mkexpr(mk64from16s(a3, b3, a2, b2));
22105}
22106
22107static IRExpr* mk_CatEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
22108{
22109  // returns a2 a0 b2 b0
22110  IRTemp a2, a0, b2, b0;
22111  break64to16s(NULL, &a2, NULL, &a0, a3210);
22112  break64to16s(NULL, &b2, NULL, &b0, b3210);
22113  return mkexpr(mk64from16s(a2, a0, b2, b0));
22114}
22115
22116static IRExpr* mk_CatOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
22117{
22118  // returns a3 a1 b3 b1
22119  IRTemp a3, a1, b3, b1;
22120  break64to16s(&a3, NULL, &a1, NULL, a3210);
22121  break64to16s(&b3, NULL, &b1, NULL, b3210);
22122  return mkexpr(mk64from16s(a3, a1, b3, b1));
22123}
22124
22125static IRExpr* mk_InterleaveOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
22126{
22127  // returns a3 b3 a1 b1
22128  IRTemp a3, b3, a1, b1;
22129  break64to16s(&a3, NULL, &a1, NULL, a3210);
22130  break64to16s(&b3, NULL, &b1, NULL, b3210);
22131  return mkexpr(mk64from16s(a3, b3, a1, b1));
22132}
22133
22134static IRExpr* mk_InterleaveEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
22135{
22136  // returns a2 b2 a0 b0
22137  IRTemp a2, b2, a0, b0;
22138  break64to16s(NULL, &a2, NULL, &a0, a3210);
22139  break64to16s(NULL, &b2, NULL, &b0, b3210);
22140  return mkexpr(mk64from16s(a2, b2, a0, b0));
22141}
22142
22143static void break64to8s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
22144                          IRTemp* out4, IRTemp* out3, IRTemp* out2,
22145                          IRTemp* out1,IRTemp* out0, IRTemp v64 )
22146{
22147  if (out7) *out7 = newTemp(Ity_I32);
22148  if (out6) *out6 = newTemp(Ity_I32);
22149  if (out5) *out5 = newTemp(Ity_I32);
22150  if (out4) *out4 = newTemp(Ity_I32);
22151  if (out3) *out3 = newTemp(Ity_I32);
22152  if (out2) *out2 = newTemp(Ity_I32);
22153  if (out1) *out1 = newTemp(Ity_I32);
22154  if (out0) *out0 = newTemp(Ity_I32);
22155  IRTemp hi32 = newTemp(Ity_I32);
22156  IRTemp lo32 = newTemp(Ity_I32);
22157  assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
22158  assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
22159  if (out7)
22160    assign(*out7, binop(Iop_And32,
22161                        binop(Iop_Shr32, mkexpr(hi32), mkU8(24)),
22162                        mkU32(0xFF)));
22163  if (out6)
22164    assign(*out6, binop(Iop_And32,
22165                        binop(Iop_Shr32, mkexpr(hi32), mkU8(16)),
22166                        mkU32(0xFF)));
22167  if (out5)
22168    assign(*out5, binop(Iop_And32,
22169                        binop(Iop_Shr32, mkexpr(hi32), mkU8(8)),
22170                        mkU32(0xFF)));
22171  if (out4)
22172    assign(*out4, binop(Iop_And32, mkexpr(hi32), mkU32(0xFF)));
22173  if (out3)
22174    assign(*out3, binop(Iop_And32,
22175                        binop(Iop_Shr32, mkexpr(lo32), mkU8(24)),
22176                        mkU32(0xFF)));
22177  if (out2)
22178    assign(*out2, binop(Iop_And32,
22179                        binop(Iop_Shr32, mkexpr(lo32), mkU8(16)),
22180                        mkU32(0xFF)));
22181  if (out1)
22182    assign(*out1, binop(Iop_And32,
22183                        binop(Iop_Shr32, mkexpr(lo32), mkU8(8)),
22184                        mkU32(0xFF)));
22185  if (out0)
22186    assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFF)));
22187}
22188
22189static IRTemp mk64from8s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
22190                           IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
22191{
22192  IRTemp hi32 = newTemp(Ity_I32);
22193  IRTemp lo32 = newTemp(Ity_I32);
22194  assign(hi32,
22195         binop(Iop_Or32,
22196               binop(Iop_Or32,
22197                     binop(Iop_Shl32,
22198                           binop(Iop_And32, mkexpr(in7), mkU32(0xFF)),
22199                           mkU8(24)),
22200                     binop(Iop_Shl32,
22201                           binop(Iop_And32, mkexpr(in6), mkU32(0xFF)),
22202                           mkU8(16))),
22203               binop(Iop_Or32,
22204                     binop(Iop_Shl32,
22205                           binop(Iop_And32, mkexpr(in5), mkU32(0xFF)), mkU8(8)),
22206                     binop(Iop_And32,
22207                           mkexpr(in4), mkU32(0xFF)))));
22208  assign(lo32,
22209         binop(Iop_Or32,
22210               binop(Iop_Or32,
22211                     binop(Iop_Shl32,
22212                           binop(Iop_And32, mkexpr(in3), mkU32(0xFF)),
22213                           mkU8(24)),
22214                     binop(Iop_Shl32,
22215                           binop(Iop_And32, mkexpr(in2), mkU32(0xFF)),
22216                           mkU8(16))),
22217               binop(Iop_Or32,
22218                     binop(Iop_Shl32,
22219                           binop(Iop_And32, mkexpr(in1), mkU32(0xFF)), mkU8(8)),
22220                     binop(Iop_And32,
22221                           mkexpr(in0), mkU32(0xFF)))));
22222  IRTemp res = newTemp(Ity_I64);
22223  assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
22224  return res;
22225}
22226
22227static IRExpr* mk_InterleaveLO8x8 ( IRTemp a76543210, IRTemp b76543210 )
22228{
22229  // returns a3 b3 a2 b2 a1 b1 a0 b0
22230  IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
22231  break64to8s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
22232  break64to8s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
22233  return mkexpr(mk64from8s(a3, b3, a2, b2, a1, b1, a0, b0));
22234}
22235
22236static IRExpr* mk_InterleaveHI8x8 ( IRTemp a76543210, IRTemp b76543210 )
22237{
22238  // returns a7 b7 a6 b6 a5 b5 a4 b4
22239  IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
22240  break64to8s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
22241  break64to8s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
22242  return mkexpr(mk64from8s(a7, b7, a6, b6, a5, b5, a4, b4));
22243}
22244
22245static IRExpr* mk_CatEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
22246{
22247  // returns a6 a4 a2 a0 b6 b4 b2 b0
22248  IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
22249  break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
22250  break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
22251  return mkexpr(mk64from8s(a6, a4, a2, a0, b6, b4, b2, b0));
22252}
22253
22254static IRExpr* mk_CatOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
22255{
22256  // returns a7 a5 a3 a1 b7 b5 b3 b1
22257  IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
22258  break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
22259  break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
22260  return mkexpr(mk64from8s(a7, a5, a3, a1, b7, b5, b3, b1));
22261}
22262
22263static IRExpr* mk_InterleaveEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
22264{
22265  // returns a6 b6 a4 b4 a2 b2 a0 b0
22266  IRTemp a6, b6, a4, b4, a2, b2, a0, b0;
22267  break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
22268  break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
22269  return mkexpr(mk64from8s(a6, b6, a4, b4, a2, b2, a0, b0));
22270}
22271
22272static IRExpr* mk_InterleaveOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
22273{
22274  // returns a7 b7 a5 b5 a3 b3 a1 b1
22275  IRTemp a7, b7, a5, b5, a3, b3, a1, b1;
22276  break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
22277  break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
22278  return mkexpr(mk64from8s(a7, b7, a5, b5, a3, b3, a1, b1));
22279}
22280
22281static IRExpr* mk_InterleaveLO32x2 ( IRTemp a10, IRTemp b10 )
22282{
22283  // returns a0 b0
22284  return binop(Iop_32HLto64, unop(Iop_64to32, mkexpr(a10)),
22285                             unop(Iop_64to32, mkexpr(b10)));
22286}
22287
22288static IRExpr* mk_InterleaveHI32x2 ( IRTemp a10, IRTemp b10 )
22289{
22290  // returns a1 b1
22291  return binop(Iop_32HLto64, unop(Iop_64HIto32, mkexpr(a10)),
22292                             unop(Iop_64HIto32, mkexpr(b10)));
22293}
22294*/
22295
22296/*--------------------------------------------------------------------*/
22297/*--- end                                         guest_arm_toIR.c ---*/
22298/*--------------------------------------------------------------------*/
22299